Commit a5b7f273 authored by Stan Hu's avatar Stan Hu 🔴

Merge branch 'mk-improve-background-migration-specs' into 'master'

Improve background migration specs

See merge request gitlab-org/gitlab-ce!17162
parents 4bc17b01 0c357ac8
Pipeline #17866204 passed with stages
in 48 minutes and 41 seconds
......@@ -5,157 +5,10 @@ module Gitlab
# This class processes a batch of rows in `untracked_files_for_uploads` by
# adding each file to the `uploads` table if it does not exist.
class PopulateUntrackedUploads # rubocop:disable Metrics/ClassLength
# This class is responsible for producing the attributes necessary to
# track an uploaded file in the `uploads` table.
class UntrackedFile < ActiveRecord::Base # rubocop:disable Metrics/ClassLength, Metrics/LineLength
self.table_name = 'untracked_files_for_uploads'
# Ends with /:random_hex/:filename
FILE_UPLOADER_PATH = %r{/\h+/[^/]+\z}
FULL_PATH_CAPTURE = /\A(.+)#{FILE_UPLOADER_PATH}/
# These regex patterns are tested against a relative path, relative to
# the upload directory.
# For convenience, if there exists a capture group in the pattern, then
# it indicates the model_id.
PATH_PATTERNS = [
{
pattern: %r{\A-/system/appearance/logo/(\d+)/},
uploader: 'AttachmentUploader',
model_type: 'Appearance'
},
{
pattern: %r{\A-/system/appearance/header_logo/(\d+)/},
uploader: 'AttachmentUploader',
model_type: 'Appearance'
},
{
pattern: %r{\A-/system/note/attachment/(\d+)/},
uploader: 'AttachmentUploader',
model_type: 'Note'
},
{
pattern: %r{\A-/system/user/avatar/(\d+)/},
uploader: 'AvatarUploader',
model_type: 'User'
},
{
pattern: %r{\A-/system/group/avatar/(\d+)/},
uploader: 'AvatarUploader',
model_type: 'Namespace'
},
{
pattern: %r{\A-/system/project/avatar/(\d+)/},
uploader: 'AvatarUploader',
model_type: 'Project'
},
{
pattern: FILE_UPLOADER_PATH,
uploader: 'FileUploader',
model_type: 'Project'
}
].freeze
def to_h
@upload_hash ||= {
path: upload_path,
uploader: uploader,
model_type: model_type,
model_id: model_id,
size: file_size,
checksum: checksum
}
end
def upload_path
# UntrackedFile#path is absolute, but Upload#path depends on uploader
@upload_path ||=
if uploader == 'FileUploader'
# Path relative to project directory in uploads
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH)
matchd[0].sub(%r{\A/}, '') # remove leading slash
else
path
end
end
def uploader
matching_pattern_map[:uploader]
end
def model_type
matching_pattern_map[:model_type]
end
def model_id
return @model_id if defined?(@model_id)
pattern = matching_pattern_map[:pattern]
matchd = path_relative_to_upload_dir.match(pattern)
# If something is captured (matchd[1] is not nil), it is a model_id
# Only the FileUploader pattern will not match an ID
@model_id = matchd[1] ? matchd[1].to_i : file_uploader_model_id
end
def file_size
File.size(absolute_path)
end
def checksum
Digest::SHA256.file(absolute_path).hexdigest
end
private
def matching_pattern_map
@matching_pattern_map ||= PATH_PATTERNS.find do |path_pattern_map|
path_relative_to_upload_dir.match(path_pattern_map[:pattern])
end
unless @matching_pattern_map
raise "Unknown upload path pattern \"#{path}\""
end
@matching_pattern_map
end
def file_uploader_model_id
matchd = path_relative_to_upload_dir.match(FULL_PATH_CAPTURE)
not_found_msg = <<~MSG
Could not capture project full_path from a FileUploader path:
"#{path_relative_to_upload_dir}"
MSG
raise not_found_msg unless matchd
full_path = matchd[1]
project = Project.find_by_full_path(full_path)
return nil unless project
project.id
end
# Not including a leading slash
def path_relative_to_upload_dir
upload_dir = Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR # rubocop:disable Metrics/LineLength
base = %r{\A#{Regexp.escape(upload_dir)}/}
@path_relative_to_upload_dir ||= path.sub(base, '')
end
def absolute_path
File.join(Gitlab.config.uploads.storage_path, path)
end
end
# This class is used to query the `uploads` table.
class Upload < ActiveRecord::Base
self.table_name = 'uploads'
end
def perform(start_id, end_id)
return unless migrate?
files = UntrackedFile.where(id: start_id..end_id)
files = Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.where(id: start_id..end_id)
processed_files = insert_uploads_if_needed(files)
processed_files.delete_all
......@@ -165,7 +18,8 @@ module Gitlab
private
def migrate?
UntrackedFile.table_exists? && Upload.table_exists?
Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.table_exists? &&
Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::Upload.table_exists?
end
def insert_uploads_if_needed(files)
......@@ -197,7 +51,7 @@ module Gitlab
def filter_existing_uploads(files)
paths = files.map(&:upload_path)
existing_paths = Upload.where(path: paths).pluck(:path).to_set
existing_paths = Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::Upload.where(path: paths).pluck(:path).to_set
files.reject do |file|
existing_paths.include?(file.upload_path)
......@@ -229,7 +83,7 @@ module Gitlab
end
ids.each do |model_type, model_ids|
model_class = Object.const_get(model_type)
model_class = "Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::#{model_type}".constantize
found_ids = model_class.where(id: model_ids.uniq).pluck(:id)
deleted_ids = ids[model_type] - found_ids
ids[model_type] = deleted_ids
......@@ -249,8 +103,8 @@ module Gitlab
end
def drop_temp_table_if_finished
if UntrackedFile.all.empty? && !Rails.env.test? # Dropping a table intermittently breaks test cleanup
UntrackedFile.connection.drop_table(:untracked_files_for_uploads,
if Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.all.empty? && !Rails.env.test? # Dropping a table intermittently breaks test cleanup
Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile.connection.drop_table(:untracked_files_for_uploads,
if_exists: true)
end
end
......
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
module PopulateUntrackedUploadsDependencies
# This class is responsible for producing the attributes necessary to
# track an uploaded file in the `uploads` table.
class UntrackedFile < ActiveRecord::Base # rubocop:disable Metrics/ClassLength, Metrics/LineLength
self.table_name = 'untracked_files_for_uploads'
# Ends with /:random_hex/:filename
FILE_UPLOADER_PATH = %r{/\h+/[^/]+\z}
FULL_PATH_CAPTURE = /\A(.+)#{FILE_UPLOADER_PATH}/
# These regex patterns are tested against a relative path, relative to
# the upload directory.
# For convenience, if there exists a capture group in the pattern, then
# it indicates the model_id.
PATH_PATTERNS = [
{
pattern: %r{\A-/system/appearance/logo/(\d+)/},
uploader: 'AttachmentUploader',
model_type: 'Appearance'
},
{
pattern: %r{\A-/system/appearance/header_logo/(\d+)/},
uploader: 'AttachmentUploader',
model_type: 'Appearance'
},
{
pattern: %r{\A-/system/note/attachment/(\d+)/},
uploader: 'AttachmentUploader',
model_type: 'Note'
},
{
pattern: %r{\A-/system/user/avatar/(\d+)/},
uploader: 'AvatarUploader',
model_type: 'User'
},
{
pattern: %r{\A-/system/group/avatar/(\d+)/},
uploader: 'AvatarUploader',
model_type: 'Namespace'
},
{
pattern: %r{\A-/system/project/avatar/(\d+)/},
uploader: 'AvatarUploader',
model_type: 'Project'
},
{
pattern: FILE_UPLOADER_PATH,
uploader: 'FileUploader',
model_type: 'Project'
}
].freeze
def to_h
@upload_hash ||= {
path: upload_path,
uploader: uploader,
model_type: model_type,
model_id: model_id,
size: file_size,
checksum: checksum
}
end
def upload_path
# UntrackedFile#path is absolute, but Upload#path depends on uploader
@upload_path ||=
if uploader == 'FileUploader'
# Path relative to project directory in uploads
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH)
matchd[0].sub(%r{\A/}, '') # remove leading slash
else
path
end
end
def uploader
matching_pattern_map[:uploader]
end
def model_type
matching_pattern_map[:model_type]
end
def model_id
return @model_id if defined?(@model_id)
pattern = matching_pattern_map[:pattern]
matchd = path_relative_to_upload_dir.match(pattern)
# If something is captured (matchd[1] is not nil), it is a model_id
# Only the FileUploader pattern will not match an ID
@model_id = matchd[1] ? matchd[1].to_i : file_uploader_model_id
end
def file_size
File.size(absolute_path)
end
def checksum
Digest::SHA256.file(absolute_path).hexdigest
end
private
def matching_pattern_map
@matching_pattern_map ||= PATH_PATTERNS.find do |path_pattern_map|
path_relative_to_upload_dir.match(path_pattern_map[:pattern])
end
unless @matching_pattern_map
raise "Unknown upload path pattern \"#{path}\""
end
@matching_pattern_map
end
def file_uploader_model_id
matchd = path_relative_to_upload_dir.match(FULL_PATH_CAPTURE)
not_found_msg = <<~MSG
Could not capture project full_path from a FileUploader path:
"#{path_relative_to_upload_dir}"
MSG
raise not_found_msg unless matchd
full_path = matchd[1]
project = Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::Project.find_by_full_path(full_path)
return nil unless project
project.id
end
# Not including a leading slash
def path_relative_to_upload_dir
upload_dir = Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR # rubocop:disable Metrics/LineLength
base = %r{\A#{Regexp.escape(upload_dir)}/}
@path_relative_to_upload_dir ||= path.sub(base, '')
end
def absolute_path
File.join(Gitlab.config.uploads.storage_path, path)
end
end
# Avoid using application code
class Upload < ActiveRecord::Base
self.table_name = 'uploads'
end
# Avoid using application code
class Appearance < ActiveRecord::Base
self.table_name = 'appearances'
end
# Avoid using application code
class Namespace < ActiveRecord::Base
self.table_name = 'namespaces'
end
# Avoid using application code
class Note < ActiveRecord::Base
self.table_name = 'notes'
end
# Avoid using application code
class User < ActiveRecord::Base
self.table_name = 'users'
end
# Since project Markdown upload paths don't contain the project ID, we have to find the
# project by its full_path. Due to MySQL/PostgreSQL differences, and historical reasons,
# the logic is somewhat complex, so I've mostly copied it in here.
class Project < ActiveRecord::Base
self.table_name = 'projects'
def self.find_by_full_path(path)
binary = Gitlab::Database.mysql? ? 'BINARY' : ''
order_sql = "(CASE WHEN #{binary} routes.path = #{connection.quote(path)} THEN 0 ELSE 1 END)"
where_full_path_in(path).reorder(order_sql).take
end
def self.where_full_path_in(path)
cast_lower = Gitlab::Database.postgresql?
path = connection.quote(path)
where =
if cast_lower
"(LOWER(routes.path) = LOWER(#{path}))"
else
"(routes.path = #{path})"
end
joins("INNER JOIN routes ON routes.source_id = projects.id AND routes.source_type = 'Project'").where(where)
end
end
end
end
end
require 'spec_helper'
# Rollback DB to 10.5 (later than this was originally written for) because it still needs to work.
describe Gitlab::BackgroundMigration::PopulateUntrackedUploadsDependencies::UntrackedFile, :migration, schema: 20180208183958 do
include MigrationsHelpers::TrackUntrackedUploadsHelpers
let!(:appearances) { table(:appearances) }
let!(:namespaces) { table(:namespaces) }
let!(:projects) { table(:projects) }
let!(:routes) { table(:routes) }
let!(:uploads) { table(:uploads) }
before(:all) do
ensure_temporary_tracking_table_exists
end
describe '#upload_path' do
def assert_upload_path(file_path, expected_upload_path)
untracked_file = create_untracked_file(file_path)
expect(untracked_file.upload_path).to eq(expected_upload_path)
end
context 'for an appearance logo file path' do
it 'returns the file path relative to the CarrierWave root' do
assert_upload_path('/-/system/appearance/logo/1/some_logo.jpg', 'uploads/-/system/appearance/logo/1/some_logo.jpg')
end
end
context 'for an appearance header_logo file path' do
it 'returns the file path relative to the CarrierWave root' do
assert_upload_path('/-/system/appearance/header_logo/1/some_logo.jpg', 'uploads/-/system/appearance/header_logo/1/some_logo.jpg')
end
end
context 'for a pre-Markdown Note attachment file path' do
it 'returns the file path relative to the CarrierWave root' do
assert_upload_path('/-/system/note/attachment/1234/some_attachment.pdf', 'uploads/-/system/note/attachment/1234/some_attachment.pdf')
end
end
context 'for a user avatar file path' do
it 'returns the file path relative to the CarrierWave root' do
assert_upload_path('/-/system/user/avatar/1234/avatar.jpg', 'uploads/-/system/user/avatar/1234/avatar.jpg')
end
end
context 'for a group avatar file path' do
it 'returns the file path relative to the CarrierWave root' do
assert_upload_path('/-/system/group/avatar/1234/avatar.jpg', 'uploads/-/system/group/avatar/1234/avatar.jpg')
end
end
context 'for a project avatar file path' do
it 'returns the file path relative to the CarrierWave root' do
assert_upload_path('/-/system/project/avatar/1234/avatar.jpg', 'uploads/-/system/project/avatar/1234/avatar.jpg')
end
end
context 'for a project Markdown attachment (notes, issues, MR descriptions) file path' do
it 'returns the file path relative to the project directory in uploads' do
project = create_project
random_hex = SecureRandom.hex
assert_upload_path("/#{get_full_path(project)}/#{random_hex}/Some file.jpg", "#{random_hex}/Some file.jpg")
end
end
end
describe '#uploader' do
def assert_uploader(file_path, expected_uploader)
untracked_file = create_untracked_file(file_path)
expect(untracked_file.uploader).to eq(expected_uploader)
end
context 'for an appearance logo file path' do
it 'returns AttachmentUploader as a string' do
assert_uploader('/-/system/appearance/logo/1/some_logo.jpg', 'AttachmentUploader')
end
end
context 'for an appearance header_logo file path' do
it 'returns AttachmentUploader as a string' do
assert_uploader('/-/system/appearance/header_logo/1/some_logo.jpg', 'AttachmentUploader')
end
end
context 'for a pre-Markdown Note attachment file path' do
it 'returns AttachmentUploader as a string' do
assert_uploader('/-/system/note/attachment/1234/some_attachment.pdf', 'AttachmentUploader')
end
end
context 'for a user avatar file path' do
it 'returns AvatarUploader as a string' do
assert_uploader('/-/system/user/avatar/1234/avatar.jpg', 'AvatarUploader')
end
end
context 'for a group avatar file path' do
it 'returns AvatarUploader as a string' do
assert_uploader('/-/system/group/avatar/1234/avatar.jpg', 'AvatarUploader')
end
end
context 'for a project avatar file path' do
it 'returns AvatarUploader as a string' do
assert_uploader('/-/system/project/avatar/1234/avatar.jpg', 'AvatarUploader')
end
end
context 'for a project Markdown attachment (notes, issues, MR descriptions) file path' do
it 'returns FileUploader as a string' do
project = create_project
assert_uploader("/#{get_full_path(project)}/#{SecureRandom.hex}/Some file.jpg", 'FileUploader')
end
end
end
describe '#model_type' do
def assert_model_type(file_path, expected_model_type)
untracked_file = create_untracked_file(file_path)
expect(untracked_file.model_type).to eq(expected_model_type)
end
context 'for an appearance logo file path' do
it 'returns Appearance as a string' do
assert_model_type('/-/system/appearance/logo/1/some_logo.jpg', 'Appearance')
end
end
context 'for an appearance header_logo file path' do
it 'returns Appearance as a string' do
assert_model_type('/-/system/appearance/header_logo/1/some_logo.jpg', 'Appearance')
end
end
context 'for a pre-Markdown Note attachment file path' do
it 'returns Note as a string' do
assert_model_type('/-/system/note/attachment/1234/some_attachment.pdf', 'Note')
end
end
context 'for a user avatar file path' do
it 'returns User as a string' do
assert_model_type('/-/system/user/avatar/1234/avatar.jpg', 'User')
end
end
context 'for a group avatar file path' do
it 'returns Namespace as a string' do
assert_model_type('/-/system/group/avatar/1234/avatar.jpg', 'Namespace')
end
end
context 'for a project avatar file path' do
it 'returns Project as a string' do
assert_model_type('/-/system/project/avatar/1234/avatar.jpg', 'Project')
end
end
context 'for a project Markdown attachment (notes, issues, MR descriptions) file path' do
it 'returns Project as a string' do
project = create_project
assert_model_type("/#{get_full_path(project)}/#{SecureRandom.hex}/Some file.jpg", 'Project')
end
end
end
describe '#model_id' do
def assert_model_id(file_path, expected_model_id)
untracked_file = create_untracked_file(file_path)
expect(untracked_file.model_id).to eq(expected_model_id)
end
context 'for an appearance logo file path' do
it 'returns the ID as a string' do
assert_model_id('/-/system/appearance/logo/1/some_logo.jpg', 1)
end
end
context 'for an appearance header_logo file path' do
it 'returns the ID as a string' do
assert_model_id('/-/system/appearance/header_logo/1/some_logo.jpg', 1)
end
end
context 'for a pre-Markdown Note attachment file path' do
it 'returns the ID as a string' do
assert_model_id('/-/system/note/attachment/1234/some_attachment.pdf', 1234)
end
end
context 'for a user avatar file path' do
it 'returns the ID as a string' do
assert_model_id('/-/system/user/avatar/1234/avatar.jpg', 1234)
end
end
context 'for a group avatar file path' do
it 'returns the ID as a string' do
assert_model_id('/-/system/group/avatar/1234/avatar.jpg', 1234)
end
end
context 'for a project avatar file path' do
it 'returns the ID as a string' do
assert_model_id('/-/system/project/avatar/1234/avatar.jpg', 1234)
end
end
context 'for a project Markdown attachment (notes, issues, MR descriptions) file path' do
it 'returns the ID as a string' do
project = create_project
assert_model_id("/#{get_full_path(project)}/#{SecureRandom.hex}/Some file.jpg", project.id)
end
end
end
describe '#file_size' do
context 'for an appearance logo file path' do
let(:appearance) { create_or_update_appearance(logo: true) }
let(:untracked_file) { described_class.create!(path: get_uploads(appearance, 'Appearance').first.path) }
it 'returns the file size' do
expect(untracked_file.file_size).to eq(1062)
end
end
context 'for a project avatar file path' do
let(:project) { create_project(avatar: true) }
let(:untracked_file) { described_class.create!(path: get_uploads(project, 'Project').first.path) }
it 'returns the file size' do
expect(untracked_file.file_size).to eq(1062)
end
end
context 'for a project Markdown attachment (notes, issues, MR descriptions) file path' do
let(:project) { create_project }
let(:untracked_file) { create_untracked_file("/#{get_full_path(project)}/#{get_uploads(project, 'Project').first.path}") }
before do
add_markdown_attachment(project)
end
it 'returns the file size' do
expect(untracked_file.file_size).to eq(1062)
end
end
end
def create_untracked_file(path_relative_to_upload_dir)
described_class.create!(path: "#{Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR}#{path_relative_to_upload_dir}")
end
end
require 'spec_helper'
describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq, :migration, schema: 20180129193323 do
include TrackUntrackedUploadsHelpers
include MigrationsHelpers
let!(:untracked_files_for_uploads) { described_class::UntrackedFile }
# Rollback DB to 10.5 (later than this was originally written for) because it still needs to work.
describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq, :migration, schema: 20180208183958 do
include MigrationsHelpers::TrackUntrackedUploadsHelpers
let!(:untracked_files_for_uploads) { table(:untracked_files_for_uploads) }
let!(:appearances) { table(:appearances) }