Commit bddbcaef authored by Tiago Botelho's avatar Tiago Botelho

Backports every CE related change from ee-44542 to CE

parent 7603beff
Pipeline #21527300 failed with stages
in 30 minutes and 55 seconds
class Import::BaseController < ApplicationController
private
def find_already_added_projects(import_type)
current_user.created_projects.where(import_type: import_type).includes(:import_state)
end
def find_jobs(import_type)
current_user.created_projects
.includes(:import_state)
.where(import_type: import_type)
.to_json(only: [:id], methods: [:import_status])
end
def find_or_create_namespace(names, owner)
names = params[:target_namespace].presence || names
......
......@@ -22,16 +22,14 @@ class Import::BitbucketController < Import::BaseController
@repos, @incompatible_repos = repos.partition { |repo| repo.valid? }
@already_added_projects = current_user.created_projects.where(import_type: 'bitbucket')
@already_added_projects = find_already_added_projects('bitbucket')
already_added_projects_names = @already_added_projects.pluck(:import_source)
@repos.to_a.reject! { |repo| already_added_projects_names.include?(repo.full_name) }
end
def jobs
render json: current_user.created_projects
.where(import_type: 'bitbucket')
.to_json(only: [:id, :import_status])
render json: find_jobs('bitbucket')
end
def create
......
......@@ -46,15 +46,14 @@ class Import::FogbugzController < Import::BaseController
@repos = client.repos
@already_added_projects = current_user.created_projects.where(import_type: 'fogbugz')
@already_added_projects = find_already_added_projects('fogbugz')
already_added_projects_names = @already_added_projects.pluck(:import_source)
@repos.reject! { |repo| already_added_projects_names.include? repo.name }
end
def jobs
jobs = current_user.created_projects.where(import_type: 'fogbugz').to_json(only: [:id, :import_status])
render json: jobs
render json: find_jobs('fogbugz')
end
def create
......
......@@ -24,15 +24,14 @@ class Import::GithubController < Import::BaseController
def status
@repos = client.repos
@already_added_projects = current_user.created_projects.where(import_type: provider)
@already_added_projects = find_already_added_projects(provider)
already_added_projects_names = @already_added_projects.pluck(:import_source)
@repos.reject! { |repo| already_added_projects_names.include? repo.full_name }
end
def jobs
jobs = current_user.created_projects.where(import_type: provider).to_json(only: [:id, :import_status])
render json: jobs
render json: find_jobs(provider)
end
def create
......
......@@ -12,15 +12,14 @@ class Import::GitlabController < Import::BaseController
def status
@repos = client.projects
@already_added_projects = current_user.created_projects.where(import_type: "gitlab")
@already_added_projects = find_already_added_projects('gitlab')
already_added_projects_names = @already_added_projects.pluck(:import_source)
@repos = @repos.to_a.reject { |repo| already_added_projects_names.include? repo["path_with_namespace"] }
end
def jobs
jobs = current_user.created_projects.where(import_type: "gitlab").to_json(only: [:id, :import_status])
render json: jobs
render json: find_jobs('gitlab')
end
def create
......
......@@ -73,15 +73,14 @@ class Import::GoogleCodeController < Import::BaseController
@repos = client.repos
@incompatible_repos = client.incompatible_repos
@already_added_projects = current_user.created_projects.where(import_type: "google_code")
@already_added_projects = find_already_added_projects('google_code')
already_added_projects_names = @already_added_projects.pluck(:import_source)
@repos.reject! { |repo| already_added_projects_names.include? repo.name }
end
def jobs
jobs = current_user.created_projects.where(import_type: "google_code").to_json(only: [:id, :import_status])
render json: jobs
render json: find_jobs('google_code')
end
def create
......
......@@ -67,6 +67,9 @@ class Project < ActiveRecord::Base
before_save :ensure_runners_token
after_save :update_project_statistics, if: :namespace_id_changed?
after_save :create_import_state, if: ->(project) { project.import? && project.import_state.nil? }
after_create :create_project_feature, unless: :project_feature
after_create :create_ci_cd_settings,
......@@ -157,6 +160,8 @@ class Project < ActiveRecord::Base
has_one :fork_network_member
has_one :fork_network, through: :fork_network_member
has_one :import_state, autosave: true, class_name: 'ProjectImportState', inverse_of: :project
# Merge Requests for target project should be removed with it
has_many :merge_requests, foreign_key: 'target_project_id'
has_many :source_of_merge_requests, foreign_key: 'source_project_id', class_name: 'MergeRequest'
......@@ -385,55 +390,9 @@ class Project < ActiveRecord::Base
scope :abandoned, -> { where('projects.last_activity_at < ?', 6.months.ago) }
scope :excluding_project, ->(project) { where.not(id: project) }
scope :import_started, -> { where(import_status: 'started') }
state_machine :import_status, initial: :none do
event :import_schedule do
transition [:none, :finished, :failed] => :scheduled
end
event :force_import_start do
transition [:none, :finished, :failed] => :started
end
event :import_start do
transition scheduled: :started
end
event :import_finish do
transition started: :finished
end
event :import_fail do
transition [:scheduled, :started] => :failed
end
event :import_retry do
transition failed: :started
end
state :scheduled
state :started
state :finished
state :failed
after_transition [:none, :finished, :failed] => :scheduled do |project, _|
project.run_after_commit do
job_id = add_import_job
update(import_jid: job_id) if job_id
end
end
after_transition started: :finished do |project, _|
project.reset_cache_and_import_attrs
if Gitlab::ImportSources.importer_names.include?(project.import_type) && project.repo_exists?
project.run_after_commit do
Projects::AfterImportService.new(project).execute
end
end
end
end
scope :joins_import_state, -> { joins("LEFT JOIN project_mirror_data import_state ON import_state.project_id = projects.id") }
scope :import_started, -> { joins_import_state.where("import_state.status = 'started' OR projects.import_status = 'started'") }
class << self
# Searches for a list of projects based on the query given in `query`.
......@@ -663,10 +622,6 @@ class Project < ActiveRecord::Base
external_import? || forked? || gitlab_project_import? || bare_repository_import?
end
def no_import?
import_status == 'none'
end
def external_import?
import_url.present?
end
......@@ -679,6 +634,93 @@ class Project < ActiveRecord::Base
import_started? || import_scheduled?
end
def import_state_args
{
status: self[:import_status],
jid: self[:import_jid],
last_error: self[:import_error]
}
end
def ensure_import_state
return if self[:import_status] == 'none' || self[:import_status].nil?
return unless import_state.nil?
create_import_state(import_state_args)
update_column(:import_status, 'none')
end
def import_schedule
ensure_import_state
import_state&.schedule
end
def force_import_start
ensure_import_state
import_state&.force_start
end
def import_start
ensure_import_state
import_state&.start
end
def import_fail
ensure_import_state
import_state&.fail_op
end
def import_finish
ensure_import_state
import_state&.finish
end
def import_jid=(new_jid)
ensure_import_state
import_state&.jid = new_jid
end
def import_jid
ensure_import_state
import_state&.jid
end
def import_error=(new_error)
ensure_import_state
import_state&.last_error = new_error
end
def import_error
ensure_import_state
import_state&.last_error
end
def import_status=(new_status)
ensure_import_state
import_state&.status = new_status
end
def import_status
ensure_import_state
import_state&.status || 'none'
end
def no_import?
import_status == 'none'
end
def import_started?
# import? does SQL work so only run it if it looks like there's an import running
import_status == 'started' && import?
......@@ -1480,7 +1522,7 @@ class Project < ActiveRecord::Base
def rename_repo_notify!
# When we import a project overwriting the original project, there
# is a move operation. In that case we don't want to send the instructions.
send_move_instructions(full_path_was) unless started?
send_move_instructions(full_path_was) unless import_started?
expires_full_path_cache
self.old_path_with_namespace = full_path_was
......@@ -1534,7 +1576,8 @@ class Project < ActiveRecord::Base
return unless import_jid
Gitlab::SidekiqStatus.unset(import_jid)
update_column(:import_jid, nil)
import_state.update_column(:jid, nil)
end
def running_or_pending_build_count(force: false)
......@@ -1553,7 +1596,8 @@ class Project < ActiveRecord::Base
sanitized_message = Gitlab::UrlSanitizer.sanitize(error_message)
import_fail
update_column(:import_error, sanitized_message)
import_state.update_column(:last_error, sanitized_message)
rescue ActiveRecord::ActiveRecordError => e
Rails.logger.error("Error setting import status to failed: #{e.message}. Original error: #{sanitized_message}")
ensure
......
class ProjectImportState < ActiveRecord::Base
include AfterCommitQueue
self.table_name = "project_mirror_data"
belongs_to :project, inverse_of: :import_state
validates :project, presence: true
state_machine :status, initial: :none do
event :schedule do
transition [:none, :finished, :failed] => :scheduled
end
event :force_start do
transition [:none, :finished, :failed] => :started
end
event :start do
transition scheduled: :started
end
event :finish do
transition started: :finished
end
event :fail_op do
transition [:scheduled, :started] => :failed
end
state :scheduled
state :started
state :finished
state :failed
after_transition [:none, :finished, :failed] => :scheduled do |state, _|
state.run_after_commit do
job_id = project.add_import_job
update(jid: job_id) if job_id
end
end
after_transition started: :finished do |state, _|
project = state.project
project.reset_cache_and_import_attrs
if Gitlab::ImportSources.importer_names.include?(project.import_type) && project.repo_exists?
state.run_after_commit do
Projects::AfterImportService.new(project).execute
end
end
end
end
end
......@@ -142,7 +142,7 @@ module Projects
if @project
@project.errors.add(:base, message)
@project.mark_import_as_failed(message) if @project.import?
@project.mark_import_as_failed(message) if @project.persisted? && @project.import?
end
@project
......
......@@ -63,11 +63,10 @@ module Gitlab
end
def find_project(id)
# We only care about the import JID so we can refresh it. We also only
# want the project if it hasn't been marked as failed yet. It's possible
# the import gets marked as stuck when jobs of the current stage failed
# somehow.
Project.select(:import_jid).import_started.find_by(id: id)
# TODO: Only select the JID
# This is due to the fact that the JID could be present in either the project record or
# its associated import_state record
Project.import_started.find_by(id: id)
end
end
end
......
......@@ -31,7 +31,10 @@ module Gitlab
end
def find_project(id)
Project.select(:import_jid).import_started.find_by(id: id)
# TODO: Only select the JID
# This is due to the fact that the JID could be present in either the project record or
# its associated import_state record
Project.import_started.find_by(id: id)
end
end
end
......
......@@ -22,7 +22,8 @@ class StuckImportJobsWorker
end
def mark_projects_with_jid_as_failed!
jids_and_ids = enqueued_projects_with_jid.pluck(:import_jid, :id).to_h
# TODO: Rollback this change to use SQL through #pluck
jids_and_ids = enqueued_projects_with_jid.map { |project| [project.import_jid, project.id] }.to_h
# Find the jobs that aren't currently running or that exceeded the threshold.
completed_jids = Gitlab::SidekiqStatus.completed_jids(jids_and_ids.keys)
......@@ -42,15 +43,15 @@ class StuckImportJobsWorker
end
def enqueued_projects
Project.with_import_status(:scheduled, :started)
Project.joins_import_state.where("(import_state.status = 'scheduled' OR import_state.status = 'started') OR (projects.import_status = 'scheduled' OR projects.import_status = 'started')")
end
def enqueued_projects_with_jid
enqueued_projects.where.not(import_jid: nil)
enqueued_projects.where.not("import_state.jid IS NULL AND projects.import_jid IS NULL")
end
def enqueued_projects_without_jid
enqueued_projects.where(import_jid: nil)
enqueued_projects.where("import_state.jid IS NULL AND projects.import_jid IS NULL")
end
def error_message
......
class CreateProjectMirrorData < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def up
return if table_exists?(:project_mirror_data)
create_table :project_mirror_data do |t|
t.references :project, index: true, foreign_key: { on_delete: :cascade }
t.string :status
t.string :jid
t.text :last_error
end
end
def down
drop_table(:project_mirror_data) if table_exists?(:project_mirror_data)
end
end
class AddIndexesToProjectMirrorData < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_concurrent_index :project_mirror_data, :jid
add_concurrent_index :project_mirror_data, :status
end
def down
remove_index :project_mirror_data, :jid if index_exists? :project_mirror_data, :jid
remove_index :project_mirror_data, :status if index_exists? :project_mirror_data, :status
end
end
class MigrateImportAttributesDataFromProjectsToProjectMirrorData < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
UP_MIGRATION = 'PopulateImportState'.freeze
DOWN_MIGRATION = 'RollbackImportStateData'.freeze
BATCH_SIZE = 1000
DELAY_INTERVAL = 5.minutes
disable_ddl_transaction!
class Project < ActiveRecord::Base
include EachBatch
self.table_name = 'projects'
end
class ProjectImportState < ActiveRecord::Base
include EachBatch
self.table_name = 'project_mirror_data'
end
def up
projects = Project.where.not(import_status: :none)
queue_background_migration_jobs_by_range_at_intervals(projects, UP_MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE)
end
def down
import_state = ProjectImportState.where.not(status: :none)
queue_background_migration_jobs_by_range_at_intervals(import_state, DOWN_MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE)
end
end
......@@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20180503150427) do
ActiveRecord::Schema.define(version: 20180503175054) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......@@ -1518,6 +1518,17 @@ ActiveRecord::Schema.define(version: 20180503150427) do
add_index "project_import_data", ["project_id"], name: "index_project_import_data_on_project_id", using: :btree
create_table "project_mirror_data", force: :cascade do |t|
t.integer "project_id"
t.string "status"
t.string "jid"
t.text "last_error"
end
add_index "project_mirror_data", ["jid"], name: "index_project_mirror_data_on_jid", using: :btree
add_index "project_mirror_data", ["project_id"], name: "index_project_mirror_data_on_project_id", using: :btree
add_index "project_mirror_data", ["status"], name: "index_project_mirror_data_on_status", using: :btree
create_table "project_statistics", force: :cascade do |t|
t.integer "project_id", null: false
t.integer "namespace_id", null: false
......@@ -2211,6 +2222,7 @@ ActiveRecord::Schema.define(version: 20180503150427) do
add_foreign_key "project_features", "projects", name: "fk_18513d9b92", on_delete: :cascade
add_foreign_key "project_group_links", "projects", name: "fk_daa8cee94c", on_delete: :cascade
add_foreign_key "project_import_data", "projects", name: "fk_ffb9ee3a10", on_delete: :cascade
add_foreign_key "project_mirror_data", "projects", on_delete: :cascade
add_foreign_key "project_statistics", "projects", on_delete: :cascade
add_foreign_key "protected_branch_merge_access_levels", "protected_branches", name: "fk_8a3072ccb3", on_delete: :cascade
add_foreign_key "protected_branch_push_access_levels", "protected_branches", name: "fk_9ffc86a3d9", on_delete: :cascade
......
......@@ -136,6 +136,7 @@ module API
def self.preload_relation(projects_relation, options = {})
projects_relation.preload(:project_feature, :route)
.preload(:import_state)
.preload(namespace: [:route, :owner],
tags: :taggings)
end
......
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# This background migration creates all the records on the
# import state table for projects that are considered imports or forks
class PopulateImportState
def perform(start_id, end_id)
move_attributes_data_to_import_state(start_id, end_id)
rescue ActiveRecord::RecordNotUnique
retry
end
def move_attributes_data_to_import_state(start_id, end_id)
Rails.logger.info("#{self.class.name} - Moving import attributes data to project mirror data table: #{start_id} - #{end_id}")
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO project_mirror_data (project_id, status, jid, last_error)
SELECT id, import_status, import_jid, import_error
FROM projects
WHERE projects.import_status != 'none'
AND projects.id BETWEEN #{start_id} AND #{end_id}
AND NOT EXISTS (
SELECT id
FROM project_mirror_data
WHERE project_id = projects.id
)
SQL
ActiveRecord::Base.connection.execute <<~SQL
UPDATE projects
SET import_status = 'none'
WHERE import_status != 'none'
AND id BETWEEN #{start_id} AND #{end_id}
SQL
end
end
end
end
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# This background migration migrates all the data of import_state
# back to the projects table for projects that are considered imports or forks
class RollbackImportStateData
def perform(start_id, end_id)
move_attributes_data_to_project(start_id, end_id)
end
def move_attributes_data_to_project(start_id, end_id)
Rails.logger.info("#{self.class.name} - Moving import attributes data to projects table: #{start_id} - #{end_id}")
if Gitlab::Database.mysql?
ActiveRecord::Base.connection.execute <<~SQL
UPDATE projects, project_mirror_data
SET
projects.import_status = project_mirror_data.status,
projects.import_jid = project_mirror_data.jid,
projects.import_error = project_mirror_data.last_error
WHERE project_mirror_data.project_id = projects.id
AND project_mirror_data.id BETWEEN #{start_id} AND #{end_id}
SQL
else
ActiveRecord::Base.connection.execute <<~SQL
UPDATE projects
SET
import_status = project_mirror_data.status,
import_jid = project_mirror_data.jid,
import_error = project_mirror_data.last_error
FROM project_mirror_data
WHERE project_mirror_data.project_id = projects.id
AND project_mirror_data.id BETWEEN #{start_id} AND #{end_id}
SQL
end
end
end
end
end
......@@ -32,7 +32,8 @@ module Gitlab
Gitlab::SidekiqStatus
.set(jid, StuckImportJobsWorker::IMPORT_JOBS_EXPIRATION)
project.update_column(:import_jid, jid)
project.ensure_import_state
project.import_state&.update_column(:jid, jid)
Stage::ImportRepositoryWorker
.perform_async(project.id)
......
......@@ -78,7 +78,8 @@ module Gitlab
def handle_errors
return unless errors.any?
project.update_column(:import_error, {
project.ensure_import_state
project.import_state&.update_column(:last_error, {
message: 'The remote data could not be fully imported.',
errors: errors
}.to_json)
......
FactoryBot.define do
factory :import_state, class: ProjectImportState do
status :none
association :project, factory: :project
transient do
import_url { generate(:url) }
end
trait :repository do
association :project, factory: [:project, :repository]
end
trait :none do
status :none
end
trait :scheduled do
status :scheduled
end
trait :started do
status :started
end
trait :finished do
status :finished
end
trait :failed do
status :failed
end