Skip to content
Snippets Groups Projects
Commit e906b662 authored by Kristina Doskich's avatar Kristina Doskich
Browse files

Add worker to import gist to snippet

It is a first step of Import all GitHub gists into Snippets.
This commit adds a new Gitlab::GithubGistsImport::ImportGistWorker
that responsible for GistImporter execution that creates snippet
from GitHub gist attributes.

Details: #371099
Changelog: added
parent cc8cecec
No related branches found
No related tags found
1 merge request!103761Add worker to import gist to snippet
......@@ -1038,6 +1038,15 @@
:weight: 1
:idempotent: false
:tags: []
- :name: github_gists_importer:github_gists_import_import_gist
:worker_name: Gitlab::GithubGistsImport::ImportGistWorker
:feature_category: :importers
:has_external_dependencies: false
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: false
:tags: []
- :name: github_importer:github_import_attachments_import_issue
:worker_name: Gitlab::GithubImport::Attachments::ImportIssueWorker
:feature_category: :importers
......
# frozen_string_literal: true
module Gitlab
module GithubGistsImport
class ImportGistWorker # rubocop:disable Scalability/IdempotentWorker
include ApplicationWorker
include Gitlab::NotifyUponDeath
data_consistency :always
queue_namespace :github_gists_importer
feature_category :importers
sidekiq_options dead: false, retry: 5
def perform(user_id, gist_hash, notify_key)
gist = ::Gitlab::GithubGistsImport::Representation::Gist.from_json_hash(gist_hash)
with_logging(user_id, gist.github_identifiers) do
result = importer_class.new(gist, user_id).execute
error(user_id, result.errors, gist.github_identifiers) unless result.success?
JobWaiter.notify(notify_key, jid)
end
rescue StandardError => e
log_and_track_error(user_id, e, gist.github_identifiers)
raise
end
private
def importer_class
::Gitlab::GithubGistsImport::Importer::GistImporter
end
def with_logging(user_id, gist_id)
info(user_id, 'start importer', gist_id)
yield
info(user_id, 'importer finished', gist_id)
end
def log_and_track_error(user_id, exception, gist_id)
error(user_id, exception.message, gist_id)
Gitlab::ErrorTracking.track_exception(exception,
import_type: :github_gists,
user_id: user_id
)
end
def error(user_id, error_message, gist_id)
attributes = {
user_id: user_id,
github_identifiers: gist_id,
message: 'importer failed',
'error.message': error_message
}
Gitlab::GithubImport::Logger.error(structured_payload(attributes))
end
def info(user_id, message, gist_id)
attributes = {
user_id: user_id,
message: message,
github_identifiers: gist_id
}
Gitlab::GithubImport::Logger.info(structured_payload(attributes))
end
end
end
end
......@@ -193,6 +193,8 @@
- 1
- - geo
- 1
- - github_gists_importer
- 1
- - github_import_advance_stage
- 1
- - github_importer
......
# frozen_string_literal: true
module Gitlab
module GithubGistsImport
module Importer
class GistImporter
attr_reader :gist, :user
FileCountLimitError = Class.new(StandardError)
# gist - An instance of `Gitlab::GithubGistsImport::Representation::Gist`.
def initialize(gist, user_id)
@gist = gist
@user = User.find(user_id)
end
def execute
snippet = build_snippet
import_repository(snippet) if snippet.save!
return ServiceResponse.success unless max_snippet_files_count_exceeded?(snippet)
fail_and_track(snippet)
end
private
def build_snippet
attrs = {
title: gist.truncated_title,
visibility_level: gist.visibility_level,
content: gist.first_file[:file_content],
file_name: gist.first_file[:file_name],
author: user,
created_at: gist.created_at,
updated_at: gist.updated_at
}
PersonalSnippet.new(attrs)
end
def import_repository(snippet)
resolved_address = get_resolved_address
snippet.create_repository
snippet.repository.fetch_as_mirror(gist.git_pull_url, forced: true, resolved_address: resolved_address)
rescue StandardError
remove_snippet_and_repository(snippet)
raise
end
def get_resolved_address
validated_pull_url, host = Gitlab::UrlBlocker.validate!(gist.git_pull_url,
schemes: Project::VALID_IMPORT_PROTOCOLS,
ports: Project::VALID_IMPORT_PORTS,
allow_localhost: allow_local_requests?,
allow_local_network: allow_local_requests?)
host.present? ? validated_pull_url.host.to_s : ''
end
def max_snippet_files_count_exceeded?(snippet)
snippet.all_files.size > Snippet.max_file_limit
end
def remove_snippet_and_repository(snippet)
snippet.repository.remove if snippet.repository_exists?
snippet.destroy
end
def allow_local_requests?
Gitlab::CurrentSettings.allow_local_requests_from_web_hooks_and_services?
end
def fail_and_track(snippet)
remove_snippet_and_repository(snippet)
ServiceResponse.error(message: 'Snippet max file count exceeded').track_exception(as: FileCountLimitError)
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module GithubGistsImport
module Representation
class Gist
include Gitlab::GithubImport::Representation::ToHash
include Gitlab::GithubImport::Representation::ExposeAttribute
attr_reader :attributes
expose_attribute :id, :description, :is_public, :created_at, :updated_at, :files, :git_pull_url
# Builds a gist from a GitHub API response.
#
# gist - An instance of `Hash` containing the gist
# details.
def self.from_api_response(gist, additional_data = {})
hash = {
id: gist[:id],
description: gist[:description],
is_public: gist[:public],
files: gist[:files],
git_pull_url: gist[:git_pull_url],
created_at: gist[:created_at],
updated_at: gist[:updated_at]
}
new(hash)
end
# Builds a new gist using a Hash that was built from a JSON payload.
def self.from_json_hash(raw_hash)
new(Gitlab::GithubImport::Representation.symbolize_hash(raw_hash))
end
# attributes - A hash containing the raw gist details. The keys of this
# Hash (and any nested hashes) must be symbols.
def initialize(attributes)
@attributes = attributes
end
# Gist description can be an empty string, so we returning nil to use first file
# name as a title in such case on snippet creation
# Gist description has a limit of 256, while the snippet's title can be up to 255
def truncated_title
title = description.presence || first_file[:file_name]
title.truncate(255)
end
def visibility_level
is_public ? Gitlab::VisibilityLevel::PUBLIC : Gitlab::VisibilityLevel::PRIVATE
end
def first_file
_key, value = files.first
{
file_name: value[:filename],
file_content: Gitlab::HTTP.try_get(value[:raw_url])&.body
}
end
def github_identifiers
{ id: id }
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::GithubGistsImport::Importer::GistImporter, feature_category: :importer do
subject { described_class.new(gist_object, user.id).execute }
let_it_be(:user) { create(:user) }
let(:created_at) { Time.utc(2022, 1, 9, 12, 15) }
let(:updated_at) { Time.utc(2022, 5, 9, 12, 17) }
let(:gist_file) { { file_name: '_Summary.md', file_content: 'File content' } }
let(:url) { 'https://host.com/gistid.git' }
let(:gist_object) do
instance_double('Gitlab::GithubGistsImport::Representation::Gist',
truncated_title: 'My Gist',
visibility_level: 0,
files: { '_Summary.md': gist_file },
first_file: gist_file,
git_pull_url: url,
created_at: created_at,
updated_at: updated_at
)
end
let(:expected_snippet_attrs) do
{
title: 'My Gist',
visibility_level: 0,
content: 'File content',
file_name: '_Summary.md',
author_id: user.id,
created_at: gist_object.created_at,
updated_at: gist_object.updated_at
}.stringify_keys
end
describe '#execute' do
context 'when success' do
it 'creates expected snippet and snippet repository' do
expect_next_instance_of(Repository) do |repository|
expect(repository).to receive(:fetch_as_mirror)
end
expect { subject }.to change { user.snippets.count }.by(1)
expect(user.snippets[0].attributes).to include expected_snippet_attrs
end
end
context 'when file size limit exeeded' do
before do
files = [].tap { |array| 11.times { |n| array << ["file#{n}.txt", {}] } }.to_h
allow(gist_object).to receive(:files).and_return(files)
allow_next_instance_of(Repository) do |repository|
allow(repository).to receive(:fetch_as_mirror)
allow(repository).to receive(:empty?).and_return(false)
allow(repository).to receive(:ls_files).and_return(files.keys)
end
end
it 'returns error' do
result = subject
expect(user.snippets.count).to eq(0)
expect(result.error?).to eq(true)
expect(result.errors).to match_array(['Snippet max file count exceeded'])
end
end
context 'when invalid attributes' do
let(:gist_file) { { file_name: '_Summary.md', file_content: nil } }
it 'raises an error' do
expect { subject }.to raise_error(ActiveRecord::RecordInvalid, "Validation failed: Content can't be blank")
end
end
context 'when repository cloning fails' do
it 'returns error' do
expect_next_instance_of(Repository) do |repository|
expect(repository).to receive(:fetch_as_mirror).and_raise(Gitlab::Shell::Error)
expect(repository).to receive(:remove)
end
expect { subject }.to raise_error(Gitlab::Shell::Error)
expect(user.snippets.count).to eq(0)
end
end
context 'when url is invalid' do
let(:url) { 'invalid' }
context 'when local network is allowed' do
before do
allow(::Gitlab::CurrentSettings)
.to receive(:allow_local_requests_from_web_hooks_and_services?).and_return(true)
end
it 'raises error' do
expect(Gitlab::UrlBlocker)
.to receive(:validate!)
.with(url, ports: [80, 443], schemes: %w[http https git],
allow_localhost: true, allow_local_network: true)
.and_raise(Gitlab::UrlBlocker::BlockedUrlError)
expect { subject }.to raise_error(Gitlab::UrlBlocker::BlockedUrlError)
end
end
context 'when local network is not allowed' do
before do
allow(::Gitlab::CurrentSettings)
.to receive(:allow_local_requests_from_web_hooks_and_services?).and_return(false)
end
it 'raises error' do
expect(Gitlab::UrlBlocker)
.to receive(:validate!)
.with(url, ports: [80, 443], schemes: %w[http https git],
allow_localhost: false, allow_local_network: false)
.and_raise(Gitlab::UrlBlocker::BlockedUrlError)
expect { subject }.to raise_error(Gitlab::UrlBlocker::BlockedUrlError)
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::GithubGistsImport::Representation::Gist do
shared_examples 'a Gist' do
it 'returns an instance of Gist' do
expect(gist).to be_an_instance_of(described_class)
end
context 'with Gist' do
it 'includes gist attributes' do
expect(gist).to have_attributes(
id: '1',
description: 'Gist title',
is_public: true,
files: { '_Summary.md': { filename: '_Summary.md', raw_url: 'https://some_url' } },
git_pull_url: 'https://gist.github.com/gistid.git'
)
end
end
end
describe '.from_api_response' do
let(:response) do
{
id: '1',
description: 'Gist title',
public: true,
created_at: '2022-04-26 18:30:53 UTC',
updated_at: '2022-04-26 18:30:53 UTC',
files: { '_Summary.md': { filename: '_Summary.md', raw_url: 'https://some_url' } },
git_pull_url: 'https://gist.github.com/gistid.git'
}
end
it_behaves_like 'a Gist' do
let(:gist) { described_class.from_api_response(response) }
end
end
describe '.from_json_hash' do
it_behaves_like 'a Gist' do
let(:hash) do
{
'id' => '1',
'description' => 'Gist title',
'is_public' => true,
'files' => { '_Summary.md': { filename: '_Summary.md', raw_url: 'https://some_url' } },
'git_pull_url' => 'https://gist.github.com/gistid.git'
}
end
let(:gist) { described_class.from_json_hash(hash) }
end
end
describe '#truncated_title' do
it 'truncates the title to 255 characters' do
object = described_class.new(description: 'm' * 300)
expect(object.truncated_title.length).to eq(255)
end
it 'does not truncate the title if it is shorter than 255 characters' do
object = described_class.new(description: 'foo')
expect(object.truncated_title).to eq('foo')
end
end
describe '#github_identifiers' do
it 'returns a hash with needed identifiers' do
github_identifiers = { id: 1 }
gist = described_class.new(github_identifiers.merge(something_else: '_something_else_'))
expect(gist.github_identifiers).to eq(github_identifiers)
end
end
describe '#visibility_level' do
it 'returns 20 when public' do
visibility = { is_public: true }
gist = described_class.new(visibility.merge(something_else: '_something_else_'))
expect(gist.visibility_level).to eq(20)
end
it 'returns 0 when private' do
visibility = { is_public: false }
gist = described_class.new(visibility.merge(something_else: '_something_else_'))
expect(gist.visibility_level).to eq(0)
end
end
describe '#first_file' do
let(:http_response) { instance_double('HTTParty::Response', body: 'File content') }
before do
allow(Gitlab::HTTP).to receive(:try_get).and_return(http_response)
end
it 'returns a hash with needed identifiers' do
files = { files: { '_Summary.md': { filename: '_Summary.md', raw_url: 'https://some_url' } } }
gist = described_class.new(files.merge(something_else: '_something_else_'))
expect(gist.first_file).to eq(file_name: '_Summary.md', file_content: 'File content')
end
end
end
......@@ -286,6 +286,7 @@
'Gitlab::GithubImport::Stage::ImportPullRequestsReviewsWorker' => 5,
'Gitlab::GithubImport::Stage::ImportPullRequestsWorker' => 5,
'Gitlab::GithubImport::Stage::ImportRepositoryWorker' => 5,
'Gitlab::GithubGistsImport::ImportGistWorker' => 5,
'Gitlab::JiraImport::AdvanceStageWorker' => 5,
'Gitlab::JiraImport::ImportIssueWorker' => 5,
'Gitlab::JiraImport::Stage::FinishImportWorker' => 5,
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::GithubGistsImport::ImportGistWorker, feature_category: :importer do
subject { described_class.new }
let_it_be(:user) { create(:user) }
let(:token) { 'token' }
let(:gist_hash) do
{
id: '055b70',
git_pull_url: 'https://gist.github.com/foo/bar.git',
files: {
'random.txt': {
filename: 'random.txt',
type: 'text/plain',
language: 'Text',
raw_url: 'https://gist.githubusercontent.com/user_name/055b70/raw/66a7be0d/random.txt',
size: 166903
}
},
is_public: false,
created_at: '2022-09-06T11:38:18Z',
updated_at: '2022-09-06T11:38:18Z',
description: 'random text'
}
end
let(:importer) { instance_double('Gitlab::GithubGistsImport::Importer::GistImporter') }
let(:importer_result) { instance_double('ServiceResponse', success?: true) }
let(:gist_object) do
instance_double('Gitlab::GithubGistsImport::Representation::Gist',
gist_hash.merge(github_identifiers: { id: '055b70' }, truncated_title: 'random text', visibility_level: 0))
end
let(:log_attributes) do
{
'user_id' => user.id,
'github_identifiers' => { 'id': gist_object.id },
'class' => 'Gitlab::GithubGistsImport::ImportGistWorker',
'correlation_id' => 'new-correlation-id',
'jid' => nil,
'job_status' => 'running',
'queue' => 'github_gists_importer:github_gists_import_import_gist'
}
end
describe '#perform' do
before do
allow(Gitlab::GithubGistsImport::Representation::Gist)
.to receive(:from_json_hash)
.with(gist_hash)
.and_return(gist_object)
allow(Gitlab::GithubGistsImport::Importer::GistImporter)
.to receive(:new)
.with(gist_object, user.id)
.and_return(importer)
allow(Gitlab::ApplicationContext).to receive(:current).and_return('correlation_id' => 'new-correlation-id')
allow(described_class).to receive(:queue).and_return('github_gists_importer:github_gists_import_import_gist')
end
context 'when success' do
it 'imports gist' do
expect(Gitlab::GithubImport::Logger)
.to receive(:info)
.with(log_attributes.merge('message' => 'start importer'))
expect(importer).to receive(:execute).and_return(importer_result)
expect(Gitlab::JobWaiter).to receive(:notify).with('some_key', subject.jid)
expect(Gitlab::GithubImport::Logger)
.to receive(:info)
.with(log_attributes.merge('message' => 'importer finished'))
subject.perform(user.id, gist_hash, 'some_key')
end
end
context 'when importer raised an error' do
it 'raises an error' do
exception = StandardError.new('_some_error_')
expect(importer).to receive(:execute).and_raise(exception)
expect(Gitlab::GithubImport::Logger)
.to receive(:error)
.with(log_attributes.merge('message' => 'importer failed', 'error.message' => '_some_error_'))
expect(Gitlab::ErrorTracking).to receive(:track_exception)
expect { subject.perform(user.id, gist_hash, 'some_key') }.to raise_error(StandardError)
end
end
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment