Commit 92249f1a authored by Douwe Maan's avatar Douwe Maan
Browse files

Merge branch 'github-importer-refactor' into 'master'

Rewrite the GitHub importer to perform work in parallel and greatly improve performance

Closes #33135, #38621, and #39361

See merge request gitlab-org/gitlab-ce!14731
parents 3c369ba1 6e242e82
Loading
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -43,7 +43,7 @@ def create
    @target_namespace = find_or_create_namespace(namespace_path, current_user.namespace_path)

    if can?(current_user, :create_projects, @target_namespace)
      @project = Gitlab::GithubImport::ProjectCreator.new(repo, @project_name, @target_namespace, current_user, access_params, type: provider).execute
      @project = Gitlab::LegacyGithubImport::ProjectCreator.new(repo, @project_name, @target_namespace, current_user, access_params, type: provider).execute
    else
      render 'unauthorized'
    end
@@ -52,7 +52,7 @@ def create
  private

  def client
    @client ||= Gitlab::GithubImport::Client.new(session[access_token_key], client_options)
    @client ||= Gitlab::LegacyGithubImport::Client.new(session[access_token_key], client_options)
  end

  def verify_import_enabled
+41 −0
Original line number Diff line number Diff line
@@ -365,6 +365,7 @@ def self.with_feature_available_for_user(feature, user)
  scope :abandoned, -> { where('projects.last_activity_at < ?', 6.months.ago) }

  scope :excluding_project, ->(project) { where.not(id: project) }
  scope :import_started, -> { where(import_status: 'started') }

  state_machine :import_status, initial: :none do
    event :import_schedule do
@@ -1190,6 +1191,10 @@ def repository_exists?
    !!repository.exists?
  end

  def wiki_repository_exists?
    wiki.repository_exists?
  end

  # update visibility_level of forks
  def update_forks_visibility_level
    return unless visibility_level < visibility_level_was
@@ -1433,6 +1438,31 @@ def rename_repo_notify!
    reload_repository!
  end

  def after_import
    repository.after_import
    import_finish
    remove_import_jid
    update_project_counter_caches
  end

  def update_project_counter_caches
    classes = [
      Projects::OpenIssuesCountService,
      Projects::OpenMergeRequestsCountService
    ]

    classes.each do |klass|
      klass.new(self).refresh_cache
    end
  end

  def remove_import_jid
    return unless import_jid

    Gitlab::SidekiqStatus.unset(import_jid)
    update_column(:import_jid, nil)
  end

  def running_or_pending_build_count(force: false)
    Rails.cache.fetch(['projects', id, 'running_or_pending_build_count'], force: force) do
      builds.running_or_pending.count(:all)
@@ -1690,6 +1720,17 @@ def reference_counter(wiki: false)
    Gitlab::ReferenceCounter.new(gl_repository(is_wiki: wiki))
  end

  # Refreshes the expiration time of the associated import job ID.
  #
  # This method can be used by asynchronous importers to refresh the status,
  # preventing the StuckImportJobsWorker from marking the import as failed.
  def refresh_import_jid_expiration
    return unless import_jid

    Gitlab::SidekiqStatus
      .set(import_jid, StuckImportJobsWorker::IMPORT_JOBS_EXPIRATION)
  end

  private

  def storage
+4 −0
Original line number Diff line number Diff line
@@ -973,6 +973,10 @@ def fetch_source_branch!(source_repository, source_branch, local_ref)
    raw_repository.fetch_source_branch!(source_repository.raw_repository, source_branch, local_ref)
  end

  def remote_exists?(name)
    raw_repository.remote_exists?(name)
  end

  def compare_source_branch(target_branch_name, source_repository, source_branch_name, straight:)
    raw_repository.compare_source_branch(target_branch_name, source_repository.raw_repository, source_branch_name, straight: straight)
  end
+14 −9
Original line number Diff line number Diff line
@@ -267,18 +267,23 @@ def sort(method)
      end
    end

    def for_github_id(id)
      joins(:identities)
        .where(identities: { provider: :github, extern_uid: id.to_s })
    end

    # Find a User by their primary email or any associated secondary email
    def find_by_any_email(email)
      sql = 'SELECT *
      FROM users
      WHERE id IN (
        SELECT id FROM users WHERE email = :email
        UNION
        SELECT emails.user_id FROM emails WHERE email = :email
      )
      LIMIT 1;'
      by_any_email(email).take
    end

    # Returns a relation containing all the users for the given Email address
    def by_any_email(email)
      users = where(email: email)
      emails = joins(:emails).where(emails: { email: email })
      union = Gitlab::SQL::Union.new([users, emails])

      User.find_by_sql([sql, { email: email }]).first
      from("(#{union.to_sql}) #{table_name}")
    end

    def filter(filter_name)
+17 −1
Original line number Diff line number Diff line
@@ -4,6 +4,18 @@ class ImportService < BaseService

    Error = Class.new(StandardError)

    # Returns true if this importer is supposed to perform its work in the
    # background.
    #
    # This method will only return `true` if async importing is explicitly
    # supported by an importer class (`Gitlab::GithubImport::ParallelImporter`
    # for example).
    def async?
      return false unless has_importer?

      !!importer_class.try(:async?)
    end

    def execute
      add_repository_to_project unless project.gitlab_project_import?

@@ -75,12 +87,16 @@ def import_data
      end
    end

    def importer_class
      Gitlab::ImportSources.importer(project.import_type)
    end

    def has_importer?
      Gitlab::ImportSources.importer_names.include?(project.import_type)
    end

    def importer
      Gitlab::ImportSources.importer(project.import_type).new(project)
      importer_class.new(project)
    end

    def unknown_url?
Loading