Commit 6f6c3c6f authored by Toon Claes's avatar Toon Claes Committed by Douglas Barbosa Alexandre

Use Geo log to remove files when migrated to object storage

parent a85c4cf4
......@@ -63,6 +63,12 @@ class GitlabUploader < CarrierWave::Uploader::Base
super || file&.filename
end
def relative_path
return path if pathname.relative?
pathname.relative_path_from(Pathname.new(root))
end
def model_valid?
!!model
end
......@@ -115,4 +121,8 @@ class GitlabUploader < CarrierWave::Uploader::Base
# the cache directory.
File.join(work_dir, cache_id, version_name.to_s, for_file)
end
def pathname
@pathname ||= Pathname.new(path)
end
end
......@@ -9,6 +9,8 @@ class JobArtifactUploader < GitlabUploader
storage_options Gitlab.config.artifacts
alias_method :upload, :model
def cached_size
return model.size if model.size.present? && !model.file_changed?
......
......@@ -8,6 +8,8 @@ class LegacyArtifactUploader < GitlabUploader
storage_options Gitlab.config.artifacts
alias_method :upload, :model
def store_dir
dynamic_segment
end
......
......@@ -6,6 +6,8 @@ class LfsObjectUploader < GitlabUploader
storage_options Gitlab.config.lfs
alias_method :upload, :model
def filename
model.oid[4..-1]
end
......
......@@ -137,6 +137,8 @@ module ObjectStorage
included do |base|
base.include(ObjectStorage)
include ::EE::ObjectStorage::Concern
after :migrate, :delete_migrated_file
end
......
......@@ -81,6 +81,12 @@ module EE
has_artifact?(DAST_FILE)
end
def log_geo_deleted_event
# It is not needed to generate a Geo deleted event
# since Legacy Artifacts are migrated to multi-build artifacts
# See https://gitlab.com/gitlab-org/gitlab-ce/issues/46652
end
private
def has_artifact?(name)
......
......@@ -7,15 +7,13 @@ module EE
extend ActiveSupport::Concern
prepended do
after_destroy :log_geo_event
after_destroy :log_geo_deleted_event
scope :not_expired, -> { where('expire_at IS NULL OR expire_at > ?', Time.current) }
scope :geo_syncable, -> { with_files_stored_locally.not_expired }
end
private
def log_geo_event
def log_geo_deleted_event
::Geo::JobArtifactDeletedEventStore.new(self).create!
end
end
......
......@@ -7,15 +7,13 @@ module EE
extend ActiveSupport::Concern
prepended do
after_destroy :log_geo_event
after_destroy :log_geo_deleted_event
scope :geo_syncable, -> { with_files_stored_locally }
scope :with_files_stored_remotely, -> { where(file_store: LfsObjectUploader::Store::REMOTE) }
end
private
def log_geo_event
def log_geo_deleted_event
::Geo::LfsObjectDeletedEventStore.new(self).create!
end
end
......
......@@ -4,7 +4,7 @@ module EE
extend ::Gitlab::Utils::Override
prepended do
include ObjectStorage::BackgroundMove
include ::ObjectStorage::BackgroundMove
end
def for_epic?
......
......@@ -7,14 +7,12 @@ module EE
extend ActiveSupport::Concern
prepended do
after_destroy :log_geo_event
after_destroy :log_geo_deleted_event
scope :geo_syncable, -> { with_files_stored_locally }
end
private
def log_geo_event
def log_geo_deleted_event
::Geo::UploadDeletedEventStore.new(self).create!
end
end
......
# frozen_string_literal: true
module Geo
# This class is responsible for:
# * Finding the appropriate Downloader class for a FileRegistry record
......@@ -50,7 +52,7 @@ module Geo
# rubocop: disable CodeReuse/ActiveRecord
def update_registry(bytes_downloaded, mark_as_synced:, missing_on_primary: false)
registry =
if object_type.to_sym == :job_artifact
if job_artifact?
Geo::JobArtifactRegistry.find_or_initialize_by(artifact_id: object_db_id)
else
Geo::FileRegistry.find_or_initialize_by(
......
# frozen_string_literal: true
module Geo
class FileRegistryRemovalService < FileService
include ::Gitlab::Utils::StrongMemoize
......@@ -25,7 +27,7 @@ module Geo
log_info('Local file & registry removed')
end
rescue SystemCallError
rescue SystemCallError => e
log_error('Could not remove file', e.message)
raise
end
......@@ -35,7 +37,7 @@ module Geo
# rubocop: disable CodeReuse/ActiveRecord
def file_registry
strong_memoize(:file_registry) do
if object_type.to_sym == :job_artifact
if job_artifact?
::Geo::JobArtifactRegistry.find_by(artifact_id: object_db_id)
else
::Geo::FileRegistry.find_by(file_type: object_type, file_id: object_db_id)
......@@ -59,16 +61,15 @@ module Geo
end
end
# rubocop: disable CodeReuse/ActiveRecord
def file_uploader
strong_memoize(:file_uploader) do
case object_type.to_s
when 'lfs'
LfsObject.find_by!(id: object_db_id).file
when 'job_artifact'
Ci::JobArtifact.find_by!(id: object_db_id).file
case object_type
when :lfs
LfsObject.find(object_db_id).file
when :job_artifact
Ci::JobArtifact.find(object_db_id).file
when *Geo::FileService::DEFAULT_OBJECT_TYPES
Upload.find_by!(id: object_db_id).build_uploader
Upload.find(object_db_id).build_uploader
else
raise NameError, "Unrecognized type: #{object_type}"
end
......@@ -77,11 +78,6 @@ module Geo
log_error('Could not build uploader', err.message)
raise
end
# rubocop: enable CodeReuse/ActiveRecord
def upload?
Geo::FileService::DEFAULT_OBJECT_TYPES.include?(object_type.to_s)
end
def lease_key
"file_registry_removal_service:#{object_type}:#{object_db_id}"
......
# frozen_string_literal: true
module Geo
class FileService
include ExclusiveLeaseGuard
......@@ -5,8 +7,8 @@ module Geo
attr_reader :object_type, :object_db_id
DEFAULT_OBJECT_TYPES = %w[attachment avatar file import_export namespace_file personal_file favicon].freeze
DEFAULT_SERVICE_TYPE = 'file'.freeze
DEFAULT_OBJECT_TYPES = %i[attachment avatar file import_export namespace_file personal_file favicon].freeze
DEFAULT_SERVICE_TYPE = :file
def initialize(object_type, object_db_id)
@object_type = object_type.to_sym
......@@ -19,9 +21,17 @@ module Geo
private
def upload?
DEFAULT_OBJECT_TYPES.include?(object_type)
end
def job_artifact?
object_type == :job_artifact
end
def service_klass_name
klass_name =
if DEFAULT_OBJECT_TYPES.include?(object_type.to_s)
if upload?
DEFAULT_SERVICE_TYPE
else
object_type
......@@ -33,7 +43,7 @@ module Geo
def base_log_data(message)
{
class: self.class.name,
object_type: object_type.to_s,
object_type: object_type,
object_db_id: object_db_id,
message: message
}
......
......@@ -10,13 +10,6 @@ module Geo
@job_artifact = job_artifact
end
override :create!
def create!
return unless job_artifact.local_store?
super
end
private
def build_event
......@@ -26,14 +19,8 @@ module Geo
)
end
def local_store_path
Pathname.new(JobArtifactUploader.root)
end
def relative_file_path
return unless job_artifact.file.present?
Pathname.new(job_artifact.file.path).relative_path_from(local_store_path)
job_artifact.file.relative_path if job_artifact.file.present?
end
# This is called by ProjectLogHelpers to build json log with context info
......
......@@ -10,13 +10,6 @@ module Geo
@lfs_object = lfs_object
end
override :create!
def create!
return unless lfs_object.local_store?
super
end
private
def build_event
......@@ -27,14 +20,8 @@ module Geo
)
end
def local_store_path
Pathname.new(LfsObjectUploader.root)
end
def relative_file_path
return unless lfs_object.file.present?
Pathname.new(lfs_object.file.path).relative_path_from(local_store_path)
lfs_object.file.relative_path if lfs_object.file.present?
end
# This is called by ProjectLogHelpers to build json log with context info
......
......@@ -10,13 +10,6 @@ module Geo
@upload = upload
end
override :create!
def create!
return unless upload.local?
super
end
private
def build_event
......
module EE
module ObjectStorage
module Concern
extend ActiveSupport::Concern
included do
after :migrate, :log_geo_deleted_event
end
private
def log_geo_deleted_event(_migrated_file)
upload.log_geo_deleted_event
end
end
end
end
---
title: Use Geo log to remove files when migrated to object storage
merge_request: 5966
author:
type: added
......@@ -6,42 +6,20 @@ module Gitlab
include BaseEvent
def process
return unless file_registry_job_artifacts.any? # avoid race condition
# delete synchronously to ensure consistency
if File.file?(file_path) && !delete_file(file_path)
return # do not delete file from registry if deletion failed
end
log_event
file_registry_job_artifacts.delete_all
# Must always schedule, regardless of shard health
job_id = ::Geo::FileRegistryRemovalWorker.perform_async(:job_artifact, event.job_artifact_id)
log_event(job_id)
end
private
# rubocop: disable CodeReuse/ActiveRecord
def file_registry_job_artifacts
@file_registry_job_artifacts ||= ::Geo::JobArtifactRegistry.where(artifact_id: event.job_artifact_id)
end
# rubocop: enable CodeReuse/ActiveRecord
def file_path
@file_path ||= File.join(::JobArtifactUploader.root, event.file_path)
end
def log_event
def log_event(job_id)
logger.event_info(
created_at,
'Deleted job artifact',
'Delete job artifact scheduled',
file_id: event.job_artifact_id,
file_path: file_path)
end
def delete_file(path)
File.delete(path)
rescue => ex
logger.error("Failed to remove file", exception: ex.class.name, details: ex.message, filename: path)
false
file_path: event.file_path,
job_id: job_id)
end
end
end
......
......@@ -5,28 +5,21 @@ module Gitlab
class LfsObjectDeletedEvent
include BaseEvent
# rubocop: disable CodeReuse/ActiveRecord
def process
# Must always schedule, regardless of shard health
job_id = ::Geo::FileRemovalWorker.perform_async(file_path)
job_id = ::Geo::FileRegistryRemovalWorker.perform_async(:lfs, event.lfs_object_id)
log_event(job_id)
::Geo::FileRegistry.lfs_objects.where(file_id: event.lfs_object_id).delete_all
end
# rubocop: enable CodeReuse/ActiveRecord
private
def file_path
@file_path ||= File.join(LfsObjectUploader.root, event.file_path)
end
def log_event(job_id)
logger.event_info(
created_at,
'Deleted LFS object',
'Delete LFS object scheduled',
oid: event.oid,
file_id: event.lfs_object_id,
file_path: file_path,
file_path: event.file_path,
job_id: job_id)
end
end
......
......@@ -5,24 +5,21 @@ module Gitlab
class UploadDeletedEvent
include BaseEvent
# rubocop: disable CodeReuse/ActiveRecord
def process
log_event
::Geo::FileRegistry.where(file_id: event.upload_id, file_type: event.upload_type).delete_all
job_id = ::Geo::FileRegistryRemovalWorker.perform_async(event.upload_type, event.upload_id)
log_event(job_id)
end
# rubocop: enable CodeReuse/ActiveRecord
private
def log_event
def log_event(job_id)
logger.event_info(
created_at,
'Deleted upload file',
'Delete upload file scheduled',
upload_id: event.upload_id,
upload_type: event.upload_type,
file_path: event.file_path,
model_id: event.model_id,
model_type: event.model_type)
model_type: event.model_type,
job_id: job_id)
end
end
end
......
......@@ -10,7 +10,7 @@ describe Gitlab::Geo::LogCursor::Events::JobArtifactDeletedEvent, :postgresql, :
subject { described_class.new(job_artifact_deleted_event, Time.now, logger) }
around do |example|
Sidekiq::Testing.fake! { example.run }
Sidekiq::Testing.inline! { example.run }
end
describe '#process' do
......@@ -32,11 +32,14 @@ describe Gitlab::Geo::LogCursor::Events::JobArtifactDeletedEvent, :postgresql, :
context 'when the delete fails' do
before do
expect(File).to receive(:delete).with(job_artifact.file.path).and_raise("Cannot delete")
allow(File).to receive(:unlink).and_call_original
allow(File).to receive(:unlink).with(job_artifact.file.path).and_raise(SystemCallError, "Cannot delete")
end
it 'does not remove the tracking database entry' do
expect { subject.process }.not_to change(Geo::JobArtifactRegistry, :count)
expect do
expect { subject.process }.to raise_error(SystemCallError)
end.not_to change(Geo::JobArtifactRegistry, :count)
end
end
end
......
......@@ -10,7 +10,7 @@ describe Gitlab::Geo::LogCursor::Events::LfsObjectDeletedEvent, :postgresql, :cl
subject { described_class.new(lfs_object_deleted_event, Time.now, logger) }
around do |example|
Sidekiq::Testing.fake! { example.run }
Sidekiq::Testing.inline! { example.run }
end
describe '#process' do
......@@ -24,10 +24,8 @@ describe Gitlab::Geo::LogCursor::Events::LfsObjectDeletedEvent, :postgresql, :cl
expect { subject.process }.to change(Geo::FileRegistry.lfs_objects, :count).by(-1)
end
it 'schedules a Geo::FileRemovalWorker job' do
file_path = File.join(LfsObjectUploader.root, lfs_object_deleted_event.file_path)
expect(::Geo::FileRemovalWorker).to receive(:perform_async).with(file_path)
it 'schedules a Geo::FileRegistryRemovalWorker job' do
expect(::Geo::FileRegistryRemovalWorker).to receive(:perform_async).with(:lfs, lfs_object_deleted_event.lfs_object_id)
subject.process
end
......
......@@ -10,7 +10,7 @@ describe Gitlab::Geo::LogCursor::Events::UploadDeletedEvent, :postgresql, :clean
subject { described_class.new(upload_deleted_event, Time.now, logger) }
around do |example|
Sidekiq::Testing.fake! { example.run }
Sidekiq::Testing.inline! { example.run }
end
describe '#process' do
......
require 'spec_helper'
describe Appearance do
include ::EE::GeoHelpers
subject { build(:appearance) }
describe 'validations' do
......@@ -17,4 +19,42 @@ describe Appearance do
it { is_expected.to allow_value(hex).for(:message_font_color) }
it { is_expected.not_to allow_value('000').for(:message_font_color) }
end
context 'object storage with background upload' do
context 'when running in a Geo primary node' do
set(:primary) { create(:geo_node, :primary) }
set(:secondary) { create(:geo_node) }
before do
stub_current_geo_node(primary)
stub_uploads_object_storage(AttachmentUploader, background_upload: true)
end
it 'creates a Geo deleted log event for logo' do
Sidekiq::Testing.inline! do
expect do
create(:appearance, :with_logo)
end.to change(Geo::UploadDeletedEvent, :count).by(1)
end
end
it 'creates a Geo deleted log event for header logo' do
Sidekiq::Testing.inline! do
expect do
create(:appearance, :with_header_logo)
end.to change(Geo::UploadDeletedEvent, :count).by(1)
end
end
it 'creates only a Geo deleted log event for the migrated header logo' do
Sidekiq::Testing.inline! do
appearance = create(:appearance, :with_header_logo, :with_logo)
expect do
appearance.update(header_logo: fixture_file_upload('spec/fixtures/rails_sample.jpg'))
end.to change(Geo::UploadDeletedEvent, :count).by(1)
end
end
end
end
end
require 'spec_helper'
describe Note do
include ::EE::GeoHelpers
context 'object storage with background upload' do
before do
stub_uploads_object_storage(AttachmentUploader, background_upload: true)
end
context 'when running in a Geo primary node' do
set(:primary) { create(:geo_node, :primary) }
set(:secondary) { create(:geo_node) }
before do
stub_current_geo_node(primary)
end
it 'creates a Geo deleted log event for attachment' do
Sidekiq::Testing.inline! do
expect do
create(:note, :with_attachment)
end.to change(Geo::UploadDeletedEvent, :count).by(1)
end
end
end
end
end
......@@ -129,7 +129,7 @@ describe Geo::FileRegistryRemovalService do
end
end
context 'with file' do # TODO
context 'with file' do
let!(:upload) { create(:user, :with_avatar).avatar.upload }
let!(:file_registry) { create(:geo_file_registry, :avatar, file_id: upload.id) }
let!(:file_path) { upload.build_uploader.file.path }
......
......@@ -18,7 +18,7 @@ describe Geo::HashedStorageAttachmentsEventStore do
TestEnv.clean_test_path
end
describe '#create' do
describe '#create!' do
it_behaves_like 'a Geo event store', Geo::HashedStorageAttachmentsEvent
context 'when running on a primary node' do
......
......@@ -17,7 +17,7 @@ describe Geo::HashedStorageMigratedEventStore do
TestEnv.clean_test_path
end
describe '#create' do
describe '#create!' do
it_behaves_like 'a Geo event store', Geo::HashedStorageMigratedEvent
context 'when running on a primary node' do
......
......@@ -11,21 +11,17 @@ describe Geo::JobArtifactDeletedEventStore do
subject { described_class.new(job_artifact) }
describe '#create' do
it_behaves_like 'a Geo event store', Geo::JobArtifactDeletedEvent
describe '#create!' do
it_behaves_like 'a Geo event store', Geo::JobArtifactDeletedEvent do
let(:file_subject) { job_artifact }
end
context 'when running on a primary node' do
before do
stub_primary_node
end
it 'does not create an event when LFS object is not on a local store' do
allow(job_artifact).to receive(:local_store?).and_return(false)
expect { subject.create! }.not_to change(Geo::JobArtifactDeletedEvent, :count)
end
it 'tracks LFS object attributes' do
it 'tracks artifact attributes' do
subject.create!
expect(Geo::JobArtifactDeletedEvent.last).to have_attributes(
......
......@@ -11,20 +11,16 @@ describe Geo::LfsObjectDeletedEventStore do
subject { described_class.new(lfs_object) }
describe '#create' do
it_behaves_like 'a Geo event store', Geo::LfsObjectDeletedEvent
describe '#create!' do
it_behaves_like 'a Geo event store', Geo::LfsObjectDeletedEvent do
let(:file_subject) { lfs_object }
end
context 'when running on a primary node' do
before do
stub_primary_node
end
it 'does not create an event when LFS object is not on a local store' do
allow(lfs_object).to receive(:local_store?).and_return(false)
expect { subject.create! }.not_to change(Geo::LfsObjectDeletedEvent, :count)
end
it 'tracks LFS object attributes' do
subject.create!
......
......@@ -9,7 +9,7 @@ describe Geo::RepositoriesChangedEventStore do
subject { described_class.new(geo_node) }
describe '#create' do
describe '#create!' do
it_behaves_like 'a Geo event store', Geo::RepositoriesChangedEvent
end
end
......@@ -10,7 +10,7 @@ describe Geo::RepositoryCreatedEventStore do