Move logic to skip backfilled repositories to the backfill service

parent 02ff3334
Pipeline #7086795 passed with stages
in 104 minutes and 24 seconds
......@@ -10,6 +10,8 @@ module Geo
end
def execute
return if backfilled?(project)
try_obtain_lease do
log('Started repository sync')
started_at, finished_at = fetch_repositories
......@@ -81,6 +83,29 @@ module Geo
Gitlab::ExclusiveLease.cancel(lease_key, repository_lease)
end
def backfilled?(project)
return false unless project.repository.exists?
return false if project.repository.exists? && project.repository.empty?
return true if registry_exists?(project)
# When Geo customers upgrade to 9.0, the secondaries nodes that are
# enabled will start the backfilling process automatically. We need
# to populate the tracking database correctly for projects synced
# before the process being started or projects created during the
# backfilling. Otherwise, the query to retrieve the projects will
# always return the same projects because they don't have entries
# in the tracking database
update_registry(DateTime.now, DateTime.now)
true
end
def registry_exists?(project)
Geo::ProjectRegistry.where(project_id: project.id)
.where.not(last_repository_synced_at: nil)
.any?
end
def update_registry(started_at, finished_at)
log('Updating registry information')
registry = Geo::ProjectRegistry.find_or_initialize_by(project_id: project.id)
......
......@@ -18,9 +18,6 @@ class GeoBackfillWorker
break if over_time?(start_time)
break unless Gitlab::Geo.current_node_enabled?
project = Project.find(project_id)
next if synced?(project)
# We try to obtain a lease here for the entire backfilling process
# because backfill the repositories continuously at a controlled rate
# instead of hammering the primary node. Initially, we are backfilling
......@@ -50,37 +47,6 @@ class GeoBackfillWorker
Time.now - start_time >= RUN_TIME
end
def synced?(project)
return true if registry_exists?(project)
return false unless project.repository.exists?
return false if project.repository.exists? && project.repository.empty?
# When Geo customers upgrade to 9.0, the secondaries nodes that are enabled
# will start the backfilling process automatically. We need to populate the
# tracking database correctly for projects synced before. Otherwise, the
# query to retrieve the projects will always return the same 100 projects
# because they don't have entries in the tracking database.
create_missing_registry(project)
true
end
def registry_exists?(project)
Geo::ProjectRegistry.where(project_id: project.id)
.where.not(last_repository_synced_at: nil)
.any?
end
def create_missing_registry(project)
logger.info "Creating missing registry for project #{project.path_with_namespace} (#{project.id})"
Geo::ProjectRegistry.create(
project_id: project.id,
last_repository_synced_at: DateTime.now,
last_repository_successful_sync_at: DateTime.now
)
end
def try_obtain_lease
lease = Gitlab::ExclusiveLease.new(lease_key, timeout: lease_timeout).try_obtain
......
......@@ -2,62 +2,117 @@ require 'spec_helper'
describe Geo::RepositoryBackfillService, services: true do
let!(:primary) { create(:geo_node, :primary, host: 'primary-geo-node') }
let(:project) { create(:empty_project) }
subject { described_class.new(project.id) }
describe '#execute' do
it 'fetches project repositories' do
fetch_count = 0
context 'when repository is empty' do
let(:project) { create(:project_empty_repo) }
allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) do
fetch_count += 1
it 'fetches project repositories' do
fetch_count = 0
allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) do
fetch_count += 1
end
subject.execute
expect(fetch_count).to eq 2
end
subject.execute
it 'expires repository caches' do
allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) { true }
expect(fetch_count).to eq 2
end
expect_any_instance_of(Repository).to receive(:expire_all_method_caches).once
expect_any_instance_of(Repository).to receive(:expire_branch_cache).once
expect_any_instance_of(Repository).to receive(:expire_content_cache).once
it 'expires repository caches' do
allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) { true }
subject.execute
end
expect_any_instance_of(Repository).to receive(:expire_all_method_caches).once
expect_any_instance_of(Repository).to receive(:expire_branch_cache).once
expect_any_instance_of(Repository).to receive(:expire_content_cache).once
it 'releases lease' do
expect(Gitlab::ExclusiveLease).to receive(:cancel).once.and_call_original
subject.execute
end
subject.execute
end
it 'releases lease' do
expect(Gitlab::ExclusiveLease).to receive(:cancel).once.and_call_original
context 'tracking database' do
it 'tracks repository sync' do
expect { subject.execute }.to change(Geo::ProjectRegistry, :count).by(1)
end
subject.execute
end
it 'stores last_repository_successful_sync_at when succeed' do
allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) { true }
subject.execute
registry = Geo::ProjectRegistry.find_by(project_id: project.id)
expect(registry.last_repository_successful_sync_at).not_to be_nil
end
context 'tracking database' do
it 'tracks repository sync' do
expect { subject.execute }.to change(Geo::ProjectRegistry, :count).by(1)
it 'reset last_repository_successful_sync_at when fail' do
allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) { raise Gitlab::Shell::Error }
subject.execute
registry = Geo::ProjectRegistry.find_by(project_id: project.id)
expect(registry.last_repository_successful_sync_at).to be_nil
end
end
end
it 'stores last_repository_successful_sync_at when succeed' do
allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) { true }
context 'when repository exists and is not empty' do
let(:project) { create(:project) }
it 'does not fetch the project repositories' do
fetch_count = 0
allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) do
fetch_count += 1
end
subject.execute
registry = Geo::ProjectRegistry.find_by(project_id: project.id)
expect(fetch_count).to eq 0
end
expect(registry.last_repository_successful_sync_at).not_to be_nil
context 'tracking database' do
it 'tracks missing repository sync' do
expect { subject.execute }.to change(Geo::ProjectRegistry, :count).by(1)
end
end
end
it 'reset last_repository_successful_sync_at when fail' do
allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) { raise Gitlab::Shell::Error }
context 'when repository was backfilled' do
let(:project) { create(:project) }
let!(:registry) do
Geo::ProjectRegistry.create(
project: project,
last_repository_synced_at: DateTime.now,
last_repository_successful_sync_at: DateTime.now
)
end
it 'does not fetch the project repositories' do
fetch_count = 0
allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) do
fetch_count += 1
end
subject.execute
registry = Geo::ProjectRegistry.find_by(project_id: project.id)
expect(fetch_count).to eq 0
end
expect(registry.last_repository_successful_sync_at).to be_nil
context 'tracking database' do
it 'does not track repository sync' do
expect { subject.execute }.not_to change(Geo::ProjectRegistry, :count)
end
end
end
end
......
......@@ -26,30 +26,6 @@ describe Geo::GeoBackfillWorker, services: true do
subject.perform
end
context 'when repository exists' do
it 'does not perform Geo::RepositoryBackfillService for non empty repositories' do
create_list(:project, 2)
expect(Geo::RepositoryBackfillService).to receive(:new).twice.and_return(spy)
subject.perform
end
it 'performs Geo::RepositoryBackfillService for empty repositories' do
create(:empty_project)
expect(Geo::RepositoryBackfillService).to receive(:new).exactly(3).times.and_return(spy)
subject.perform
end
it 'creates missing registry for non empty repositories' do
create(:project)
expect { subject.perform }.to change(Geo::ProjectRegistry, :count).by(3)
end
end
it 'does not perform Geo::RepositoryBackfillService when can not obtain a lease' do
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain) { false }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment