Commit 8d5f875c authored by Stan Hu's avatar Stan Hu Committed by Mayra Cabrera

Fix project exports clobbering concurrent export paths

When a project export completes, it removes everything in
`Project#import_export_shared.archive_path`, which can erase files
needed for another ongoing project export. This is problematic for
custom templates, which exports an existing project to get the most
recent changes and imports that archive to another project.

To avoid this from happening, we generate a random unique subpath in the
shared temporary directory so that multiple exports can work at the same
time.

Previously the path structure was as follows:

1. Project export files stored in:
   /shared/tmp/project_exports/namespace/project/:random
2. Project export .tar.gz files stored in:
   /shared/tmp/project_exports/namespace/project
3. Project export lock file:
   /shared/tmp/project_exports/namespace/project/.after_export_action

Now:

1. Project export files stored in:
   /shared/tmp/project_exports/namespace/project/:randomA/:randomB
2. Project export .tar.gz files stored in:
   /shared/tmp/project_exports/namespace/project/:randomA
3. Project export lock files stored in:
   /shared/tmp/project_exports/namespace/project/locks

The .tar.gz files are also now cleaned up in the AfterExportStrategy.

Also, ensure import/export path cleanup always happens.

A failure to update the database or object storage shouldn't
block us from cleaning up stale directories. This is especially
important to clear out stale lock file and archive paths.

Closes https://gitlab.com/gitlab-org/gitlab-ee/issues/14716
parent be360f19
......@@ -12,16 +12,20 @@ class ImportExportCleanUpService
def execute
Gitlab::Metrics.measure(:import_export_clean_up) do
clean_up_export_object_files
break unless File.directory?(path)
clean_up_export_files
execute_cleanup
end
end
private
def execute_cleanup
clean_up_export_object_files
ensure
# We don't want a failure in cleaning up object storage from
# blocking us from cleaning up temporary storage.
clean_up_export_files if File.directory?(path)
end
def clean_up_export_files
Gitlab::Popen.popen(%W(find #{path} -not -path #{path} -mmin +#{mmin} -delete))
end
......
---
title: Fix project exports clobbering concurrent export paths
merge_request: 16280
author:
type: fixed
......@@ -30,7 +30,6 @@ module EE
::RepositoryImportWorker.new.perform(export_into_project_id)
ensure
export_file.close if export_file.respond_to?(:close)
project.remove_exports
end
def export_file
......
......@@ -10,11 +10,9 @@ module Gitlab
StrategyError = Class.new(StandardError)
AFTER_EXPORT_LOCK_FILE_NAME = '.after_export_action'
private
attr_reader :project, :current_user
attr_reader :project, :current_user, :lock_file
public
......@@ -29,8 +27,9 @@ module Gitlab
def execute(current_user, project)
@project = project
return unless @project.export_status == :finished
ensure_export_ready!
ensure_lock_files_path!
@lock_file = File.join(lock_files_path, SecureRandom.hex)
@current_user = current_user
if invalid?
......@@ -48,19 +47,32 @@ module Gitlab
false
ensure
delete_after_export_lock
delete_export_file
delete_archive_path
end
def to_json(options = {})
@options.to_h.merge!(klass: self.class.name).to_json
end
def self.lock_file_path(project)
return unless project.export_path || export_file_exists?
def ensure_export_ready!
raise StrategyError unless project.export_file_exists?
end
def ensure_lock_files_path!
FileUtils.mkdir_p(lock_files_path) unless Dir.exist?(lock_files_path)
end
def lock_files_path
project.import_export_shared.lock_files_path
end
lock_path = project.import_export_shared.archive_path
def archive_path
project.import_export_shared.archive_path
end
mkdir_p(lock_path)
File.join(lock_path, AFTER_EXPORT_LOCK_FILE_NAME)
def locks_present?
project.import_export_shared.locks_present?
end
protected
......@@ -69,25 +81,33 @@ module Gitlab
raise NotImplementedError
end
def delete_export?
true
end
private
def delete_export_file
return if locks_present? || !delete_export?
project.remove_exports
end
def delete_archive_path
FileUtils.rm_rf(archive_path) if File.directory?(archive_path)
end
def create_or_update_after_export_lock
FileUtils.touch(self.class.lock_file_path(project))
FileUtils.touch(lock_file)
end
def delete_after_export_lock
lock_file = self.class.lock_file_path(project)
FileUtils.rm(lock_file) if lock_file.present? && File.exist?(lock_file)
end
def log_validation_errors
errors.full_messages.each { |msg| project.import_export_shared.add_error_message(msg) }
end
def export_file_exists?
project.export_file_exists?
end
end
end
end
......
......@@ -4,6 +4,12 @@ module Gitlab
module ImportExport
module AfterExportStrategies
class DownloadNotificationStrategy < BaseAfterExportStrategy
protected
def delete_export?
false
end
private
def strategy_execute
......
......@@ -24,8 +24,6 @@ module Gitlab
def strategy_execute
handle_response_error(send_file)
project.remove_exports
end
def handle_response_error(response)
......
# frozen_string_literal: true
#
# This class encapsulates the directories used by project import/export:
#
# 1. The project export job first generates the project metadata tree
# (e.g. `project.json) and repository bundle (e.g. `project.bundle`)
# inside a temporary `export_path`
# (e.g. /path/to/shared/tmp/project_exports/namespace/project/:randomA/:randomB).
#
# 2. The job then creates a tarball (e.g. `project.tar.gz`) in
# `archive_path` (e.g. /path/to/shared/tmp/project_exports/namespace/project/:randomA).
# CarrierWave moves this tarball files into its permanent location.
#
# 3. Lock files are used to indicate whether a project is in the
# `after_export` state. These are stored in a directory
# (e.g. /path/to/shared/tmp/project_exports/namespace/project/locks. The
# number of lock files present signifies how many concurrent project
# exports are running. Note that this assumes the temporary directory
# is a shared mount:
# https://gitlab.com/gitlab-org/gitlab/issues/32203
#
# NOTE: Stale files should be cleaned up via ImportExportCleanupService.
module Gitlab
module ImportExport
class Shared
attr_reader :errors, :project
LOCKS_DIRECTORY = 'locks'
def initialize(project)
@project = project
@errors = []
......@@ -12,17 +34,27 @@ module Gitlab
end
def active_export_count
Dir[File.join(archive_path, '*')].count { |name| File.directory?(name) }
Dir[File.join(base_path, '*')].count { |name| File.basename(name) != LOCKS_DIRECTORY && File.directory?(name) }
end
# The path where the project metadata and repository bundle is saved
def export_path
@export_path ||= Gitlab::ImportExport.export_path(relative_path: relative_path)
end
# The path where the tarball is saved
def archive_path
@archive_path ||= Gitlab::ImportExport.export_path(relative_path: relative_archive_path)
end
def base_path
@base_path ||= Gitlab::ImportExport.export_path(relative_path: relative_base_path)
end
def lock_files_path
@locks_files_path ||= File.join(base_path, LOCKS_DIRECTORY)
end
def error(error)
log_error(message: error.message, caller: caller[0].dup)
log_debug(backtrace: error.backtrace&.join("\n"))
......@@ -37,16 +69,24 @@ module Gitlab
end
def after_export_in_progress?
File.exist?(after_export_lock_file)
locks_present?
end
def locks_present?
Dir.exist?(lock_files_path) && !Dir.empty?(lock_files_path)
end
private
def relative_path
File.join(relative_archive_path, SecureRandom.hex)
@relative_path ||= File.join(relative_archive_path, SecureRandom.hex)
end
def relative_archive_path
@relative_archive_path ||= File.join(@project.disk_path, SecureRandom.hex)
end
def relative_base_path
@project.disk_path
end
......@@ -70,10 +110,6 @@ module Gitlab
def filtered_error_message(message)
Projects::ImportErrorFilter.filter_message(message)
end
def after_export_lock_file
AfterExportStrategies::BaseAfterExportStrategy.lock_file_path(project)
end
end
end
end
......@@ -49,8 +49,7 @@ describe 'Import/Export - project export integration test', :js do
expect(page).to have_content('Download export')
expect(file_permissions(project.export_path)).to eq(0700)
expect(project.export_status).to eq(:finished)
expect(project.export_file.path).to include('tar.gz')
in_directory_with_expanded_export(project) do |exit_status, tmpdir|
......
......@@ -24,14 +24,22 @@ describe Gitlab::ImportExport::AfterExportStrategies::BaseAfterExportStrategy do
service.execute(user, project)
expect(lock_path_exist?).to be_truthy
expect(service.locks_present?).to be_truthy
end
context 'when the method succeeds' do
it 'removes the lock file' do
service.execute(user, project)
expect(lock_path_exist?).to be_falsey
expect(service.locks_present?).to be_falsey
end
it 'removes the archive path' do
FileUtils.mkdir_p(shared.archive_path)
service.execute(user, project)
expect(File.exist?(shared.archive_path)).to be_falsey
end
end
......@@ -62,13 +70,21 @@ describe Gitlab::ImportExport::AfterExportStrategies::BaseAfterExportStrategy do
service.execute(user, project)
end
it 'removes the archive path' do
FileUtils.mkdir_p(shared.archive_path)
service.execute(user, project)
expect(File.exist?(shared.archive_path)).to be_falsey
end
end
context 'when an exception is raised' do
it 'removes the lock' do
expect { service.execute(user, project) }.to raise_error(NotImplementedError)
expect(lock_path_exist?).to be_falsey
expect(service.locks_present?).to be_falsey
end
end
end
......@@ -97,8 +113,4 @@ describe Gitlab::ImportExport::AfterExportStrategies::BaseAfterExportStrategy do
expect(described_class.new(params).to_json).to eq result
end
end
def lock_path_exist?
File.exist?(described_class.lock_file_path(project))
end
end
......@@ -5,6 +5,35 @@ describe Gitlab::ImportExport::Shared do
let(:project) { build(:project) }
subject { project.import_export_shared }
context 'with a repository on disk' do
let(:project) { create(:project, :repository) }
let(:base_path) { %(/tmp/project_exports/#{project.disk_path}/) }
describe '#archive_path' do
it 'uses a random hash to avoid conflicts' do
expect(subject.archive_path).to match(/#{base_path}\h{32}/)
end
it 'memoizes the path' do
path = subject.archive_path
2.times { expect(subject.archive_path).to eq(path) }
end
end
describe '#export_path' do
it 'uses a random hash relative to project path' do
expect(subject.export_path).to match(/#{base_path}\h{32}\/\h{32}/)
end
it 'memoizes the path' do
path = subject.export_path
2.times { expect(subject.export_path).to eq(path) }
end
end
end
describe '#error' do
let(:error) { StandardError.new('Error importing into /my/folder Permission denied @ unlink_internal - /var/opt/gitlab/gitlab-rails/shared/a/b/c/uploads/file') }
......
......@@ -30,7 +30,7 @@ describe API::ProjectExport do
FileUtils.mkdir_p File.join(project_started.export_path, 'securerandom-hex')
# simulate in after export action
FileUtils.touch Gitlab::ImportExport::AfterExportStrategies::BaseAfterExportStrategy.lock_file_path(project_after_export)
FileUtils.touch File.join(project_after_export.import_export_shared.lock_files_path, SecureRandom.hex)
end
after do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment