Commit 32c9b2dc authored by Stan Hu's avatar Stan Hu Committed by Mayra Cabrera
Browse files

Merge branch 'da-scheduler-rescheduling-the-same-project' into 'master'

Geo - Avoid rescheduling the same project again in a backfill condition

See merge request gitlab-org/gitlab-ee!5069
parent 5ea5e9fd
Loading
Loading
Loading
Loading
+7 −1
Original line number Diff line number Diff line
@@ -39,7 +39,11 @@ def max_capacity
    def schedule_job(project_id)
      job_id = Geo::ProjectSyncWorker.perform_async(project_id, Time.now)

      { id: project_id, job_id: job_id } if job_id
      { project_id: project_id, job_id: job_id } if job_id
    end

    def scheduled_project_ids
      scheduled_jobs.map { |data| data[:project_id] }
    end

    def finder
@@ -59,12 +63,14 @@ def load_pending_resources

    def find_project_ids_not_synced(batch_size:)
      shard_restriction(finder.find_unsynced_projects(batch_size: batch_size))
        .where.not(id: scheduled_project_ids)
        .reorder(last_repository_updated_at: :desc)
        .pluck(:id)
    end

    def find_project_ids_updated_recently(batch_size:)
      shard_restriction(finder.find_projects_updated_recently(batch_size: batch_size))
        .where.not(id: scheduled_project_ids)
        .order('project_registry.last_repository_synced_at ASC NULLS FIRST, projects.last_repository_updated_at ASC')
        .pluck(:id)
    end
+2 −7
Original line number Diff line number Diff line
@@ -145,7 +145,7 @@ def update_jobs_in_progress
        status = Gitlab::SidekiqStatus.job_status(scheduled_job_ids)

        # SidekiqStatus returns an array of booleans: true if the job is still running, false otherwise.
        # For each entry, first use `zip` to make { job_id: 123, id: 10 } -> [ { job_id: 123, id: 10 }, bool ]
        # For each entry, first use `zip` to make { job_id: 123 } -> [ { job_id: 123 }, bool ]
        # Next, filter out the jobs that have completed.
        @scheduled_jobs = @scheduled_jobs.zip(status).map { |(job, running)| job if running }.compact
      end
@@ -160,12 +160,7 @@ def schedule_jobs
        num_to_schedule = 0 if num_to_schedule < 0

        to_schedule = pending_resources.shift(num_to_schedule)

        scheduled = to_schedule.map do |args|
          job = schedule_job(*args)
          job if job&.fetch(:job_id, nil).present?
        end.compact

        scheduled = to_schedule.map { |args| schedule_job(*args) }.compact
        scheduled_jobs.concat(scheduled)

        log_info("Loop #{loops}", enqueued: scheduled.length, pending: pending_resources.length, scheduled: scheduled_jobs.length, capacity: capacity)
+5 −0
Original line number Diff line number Diff line
---
title: Geo - Avoid rescheduling the same project again in a backfill condition
merge_request: 5069
author:
type: fixed
+12 −0
Original line number Diff line number Diff line
@@ -64,6 +64,18 @@
      subject.perform(shard_name)
    end

    it 'does not schedule a job twice for the same project' do
      scheduled_jobs = [
        { job_id: 1, project_id: unsynced_project.id },
        { job_id: 2, project_id: unsynced_project_in_restricted_group.id }
      ]

      is_expected.to receive(:scheduled_jobs).and_return(scheduled_jobs).at_least(:once)
      is_expected.not_to receive(:schedule_job)

      Sidekiq::Testing.inline! { subject.perform(shard_name) }
    end

    it 'does not perform Geo::ProjectSyncWorker when no geo database is configured' do
      allow(Gitlab::Geo).to receive(:geo_database_configured?) { false }