Commit 594e6a0a authored by Micaël Bergeron's avatar Micaël Bergeron

Refactor the uploaders

I've demoted the ObjectStoreUploader to a concern that is mixed in
the concrete uploader classes that need to store files in a remote
object store.

I've been working on making the local -> remote migration working
first, which has been trivial compared to the remote -> local one.

The current implementation is heavily based on side-effects which
makes the code brittle and hard to reason about.

The current approach is to store the `store` field in the correct
`Upload` model once a migration has been done. To retrieve the field
I use the `has_many :uploads` relationship, with all the paths that
a certain file may have `uploads.where(path: paths).last`. This as
the drawback of adding a database query for every upload lookup, but
I feel that the generalization of this behavior is worth it. We should
be able to optimize this down the road quite easily.
parent bbcaf4ae
......@@ -2,6 +2,7 @@ module UploadsActions
include Gitlab::Utils::StrongMemoize
def create
# TODO why not pass a GitlabUploader instance
link_to_file = UploadService.new(model, params[:file], uploader_class).execute
respond_to do |format|
......@@ -17,34 +18,53 @@ module UploadsActions
end
end
# This should either find the @file and redirect to its URL
def show
binding.pry
return render_404 unless uploader.exists?
disposition = uploader.image_or_video? ? 'inline' : 'attachment'
# send to the remote URL
redirect_to uploader.url unless uploader.file_storage?
# or send the file
disposition = uploader.image_or_video? ? 'inline' : 'attachment'
expires_in 0.seconds, must_revalidate: true, private: true
send_file uploader.file.path, disposition: disposition
end
private
def uploader_class
uploader.class
end
def upload_mount
mounted_as = params[:mounted_as]
upload_mounts = %w(avatar attachment file logo header_logo)
mounted_as if upload_mounts.include? mounted_as
end
# TODO: this method is too complex
#
def uploader
strong_memoize(:uploader) do
return if show_model.nil?
@uploader ||= if upload_model_class < CarrierWave::Mount::Extension && upload_mount
model.public_send(upload_mount)
elsif upload_model_class == PersonalSnippet
find_upload(PersonalFileUploader)&.build_uploader || PersonalFileUploader.new(model)
else
find_upload(FileUploader)&.build_uploader || FileUploader.new(model)
end
end
file_uploader = FileUploader.new(show_model, params[:secret])
file_uploader.retrieve_from_store!(params[:filename])
def find_upload(uploader_class)
return nil unless params[:secret] && params[:filename]
file_uploader
end
upload_path = uploader_class.upload_path(params[:secret], params[:filename])
Upload.where(uploader: uploader_class.to_s, path: upload_path)&.last
end
def image_or_video?
uploader && uploader.exists? && uploader.image_or_video?
end
def uploader_class
FileUploader
end
end
# Used out-of-context uploads
# see #upload_model_classs
#
class UploadsController < ApplicationController
include UploadsActions
UnknownUploadModelError = Class.new(StandardError)
rescue_from UnknownUploadModelError, with: :render_404
skip_before_action :authenticate_user!
before_action :upload_mount_satisfied?
before_action :find_model
before_action :authorize_access!, only: [:show]
before_action :authorize_create_access!, only: [:create]
private
def find_model
return nil unless params[:id]
return render_404 unless upload_model && upload_mount
@model = upload_model.find(params[:id])
@model = upload_model_class.find(params[:id])
end
def authorize_access!
......@@ -53,8 +56,8 @@ class UploadsController < ApplicationController
end
end
def upload_model
upload_models = {
def upload_model_class
model_classes = {
"user" => User,
"project" => Project,
"note" => Note,
......@@ -63,42 +66,17 @@ class UploadsController < ApplicationController
"personal_snippet" => PersonalSnippet
}
upload_models[params[:model]]
raise UnknownUploadModelError unless cls = model_classes[params[:model]]
cls
end
def upload_mount
return true unless params[:mounted_as]
upload_mounts = %w(avatar attachment file logo header_logo)
if upload_mounts.include?(params[:mounted_as])
params[:mounted_as]
end
end
def uploader
return @uploader if defined?(@uploader)
case model
when nil
@uploader = PersonalFileUploader.new(nil, params[:secret])
@uploader.retrieve_from_store!(params[:filename])
when PersonalSnippet
@uploader = PersonalFileUploader.new(model, params[:secret])
@uploader.retrieve_from_store!(params[:filename])
else
@uploader = @model.public_send(upload_mount) # rubocop:disable GitlabSecurity/PublicSend
redirect_to @uploader.url unless @uploader.file_storage?
end
@uploader
def upload_model_class_has_mounts?
upload_model_class < CarrierWave::Mount::Extension
end
def uploader_class
PersonalFileUploader
def upload_mount_satisfied?
return true unless upload_model_class_has_mounts?
upload_model_class.uploader_options.has_key?(upload_mount)
end
def model
......
......@@ -46,7 +46,7 @@ module Ci
end
scope :with_artifacts_not_expired, ->() { with_artifacts.where('artifacts_expire_at IS NULL OR artifacts_expire_at > ?', Time.now) }
scope :with_expired_artifacts, ->() { with_artifacts.where('artifacts_expire_at < ?', Time.now) }
scope :with_artifacts_stored_locally, ->() { with_artifacts.where(artifacts_file_store: [nil, LegacyArtifactUploader::LOCAL_STORE]) }
scope :with_artifacts_stored_locally, ->() { with_artifacts.where(artifacts_file_store: [nil, LegacyArtifactUploader::Store::LOCAL]) }
scope :last_month, ->() { where('created_at > ?', Date.today - 1.month) }
scope :manual_actions, ->() { where(when: :manual, status: COMPLETED_STATUSES + [:manual]) }
scope :ref_protected, -> { where(protected: true) }
......
......@@ -7,7 +7,7 @@ class LfsObject < ActiveRecord::Base
validates :oid, presence: true, uniqueness: true
scope :with_files_stored_locally, ->() { where(file_store: [nil, LfsObjectUploader::LOCAL_STORE]) }
scope :with_files_stored_locally, ->() { where(file_store: [nil, LfsObjectUploader::Store::LOCAL]) }
mount_uploader :file, LfsObjectUploader
......
......@@ -90,7 +90,9 @@ class Note < ActiveRecord::Base
end
end
# @deprecated attachments are handler by the MarkdownUploader
mount_uploader :attachment, AttachmentUploader
deprecate :attachment => 'Use the Markdown uploader instead'
# Scopes
scope :searchable, -> { where(system: false) }
......
......@@ -931,6 +931,14 @@ class Project < ActiveRecord::Base
end
end
def avatar_uploader(uploader)
return uploader unless avatar_identifier
paths = uploader.store_dirs.map {|store, path| File.join(path, avatar_identifier) }
uploader.upload = uploads.where(uploader: 'AvatarUploader', path: paths)&.last
uploader.object_store = uploader.upload&.store # TODO: move this to RecordsUploads
end
def avatar_in_git
repository.avatar
end
......
......@@ -17,13 +17,15 @@ class Upload < ActiveRecord::Base
end
def self.record(uploader)
remove_path(uploader.relative_path)
upload = uploader.upload || new
create(
binding.pry
upload.update_attributes(
size: uploader.file.size,
path: uploader.relative_path,
path: uploader.dynamic_path,
model: uploader.model,
uploader: uploader.class.to_s
uploader: uploader.class.to_s,
store: uploader.try(:object_store) || ObjectStorage::Store::LOCAL
)
end
......@@ -49,7 +51,15 @@ class Upload < ActiveRecord::Base
File.exist?(absolute_path)
end
private
def build_uploader(from = nil)
uploader = from || uploader_class.new(model)
uploader.upload = self
uploader.object_store = store
uploader
end
#private
def foreground_checksum?
size <= CHECKSUM_THRESHOLD
......
......@@ -25,7 +25,7 @@ module Geo
end
def local_store_path
Pathname.new(LfsObjectUploader.local_store_path)
Pathname.new(LfsObjectUploader.workhorse_upload_path)
end
def relative_file_path
......
......@@ -16,9 +16,9 @@ module Projects
@old_path = project.full_path
@new_path = project.disk_path
origin = FileUploader.dynamic_path_segment(project)
origin = FileUploader.model_path_segment(project)
project.storage_version = ::Project::HASHED_STORAGE_FEATURES[:attachments]
target = FileUploader.dynamic_path_segment(project)
target = FileUploader.model_path_segment(project)
result = move_folder!(origin, target)
project.save!
......
class AttachmentUploader < GitlabUploader
include RecordsUploads
include RecordsUploads::Concern
include ObjectStorage::Concern
prepend ObjectStorage::Extension::RecordsUploads
include UploaderHelper
storage :file
storage_options Gitlab.config.uploads
def store_dir
"#{base_dir}/#{model.class.to_s.underscore}/#{mounted_as}/#{model.id}"
private
def dynamic_segment
File.join(model.class.to_s.underscore, mounted_as.to_s, model.id.to_s)
end
end
class AvatarUploader < GitlabUploader
include RecordsUploads
include UploaderHelper
include RecordsUploads::Concern
include ObjectStorage::Concern
prepend ObjectStorage::Extension::RecordsUploads
storage :file
def store_dir
"#{base_dir}/#{model.class.to_s.underscore}/#{mounted_as}/#{model.id}"
end
storage_options Gitlab.config.uploads
def exists?
model.avatar.file && model.avatar.file.present?
......@@ -22,4 +20,10 @@ class AvatarUploader < GitlabUploader
def move_to_cache
false
end
private
def dynamic_segment
File.join(model.class.to_s.underscore, mounted_as.to_s, model.id.to_s)
end
end
......@@ -21,13 +21,12 @@ class FileMover
end
def update_markdown
updated_text = model.read_attribute(update_field).gsub(temp_file_uploader.to_markdown, uploader.to_markdown)
binding.pry
updated_text = model.read_attribute(update_field)
.gsub(temp_file_uploader.markdown_link, uploader.markdown_link)
model.update_attribute(update_field, updated_text)
true
rescue
revert
false
end
......
# This class breaks the actual CarrierWave concept.
# Every uploader should use a base_dir that is model agnostic so we can build
# back URLs from base_dir-relative paths saved in the `Upload` model.
#
# As the `.base_dir` is model dependent and **not** saved in the upload model (see #upload_path)
# there is no way to build back the correct file path without the model, which defies
# CarrierWave way of storing files.
#
class FileUploader < GitlabUploader
include RecordsUploads
include UploaderHelper
include RecordsUploads::Concern
include ObjectStorage::Concern
prepend ObjectStorage::Extension::RecordsUploads
MARKDOWN_PATTERN = %r{\!?\[.*?\]\(/uploads/(?<secret>[0-9a-f]{32})/(?<file>.*?)\)}
DYNAMIC_PATH_PATTERN = %r{(?<secret>\h{32})/(?<identifier>.*)}
storage :file
attr_accessor :model
attr_reader :secret
# TODO: remove this, FileUploader should not have storage_options, this class
# should be abstract, or even a Concern that simply add the secret
#
# Then create a new AdhocUploader that implement the base_dir logic of this class,
# which is wrong anyways.
storage_options Gitlab.config.uploads
def self.absolute_path(upload_record)
def self.root
storage_options&.storage_path
end
def self.absolute_path(upload)
File.join(
self.dynamic_path_segment(upload_record.model),
upload_record.path
root,
base_dir(upload.model),
upload.path # this already contain the dynamic_segment, see #upload_path
)
end
# Not using `GitlabUploader.base_dir` because all project namespaces are in
# the `public/uploads` dir.
#
def self.base_dir
root_dir
def self.base_dir(model)
model_path_segment(model)
end
# Returns the part of `store_dir` that can change based on the model's current
......@@ -29,59 +50,102 @@ class FileUploader < GitlabUploader
# model - Object that responds to `full_path` and `disk_path`
#
# Returns a String without a trailing slash
def self.dynamic_path_segment(model)
def self.model_path_segment(model)
if model.hashed_storage?(:attachments)
dynamic_path_builder(model.disk_path)
model.disk_path
else
dynamic_path_builder(model.full_path)
model.full_path
end
end
# Auxiliary method to build dynamic path segment when not using a project model
#
# Prefer to use the `.dynamic_path_segment` as it includes Hashed Storage specific logic
# Prefer to use the `.model_path_segment` as it includes Hashed Storage specific logic
#
# TODO: review this path?
# TODO: remove me this makes no sense
def self.dynamic_path_builder(path)
File.join(CarrierWave.root, base_dir, path)
File.join(root, path)
end
attr_accessor :model
attr_reader :secret
def self.upload_path(secret, identifier)
File.join(secret, identifier)
end
def initialize(model, secret = nil)
@model = model
@secret = secret || generate_secret
@secret = secret
end
def store_dir
File.join(dynamic_path_segment, @secret)
def base_dir
self.class.base_dir(@model)
end
def relative_path
self.file.path.sub("#{dynamic_path_segment}/", '')
# we don't need to know the actual path, an uploader instance should be
# able to yield the file content on demand, so we should build the digest
def absolute_path
self.class.absolute_path(@upload)
end
def to_markdown
to_h[:markdown]
def upload_path
self.class.upload_path(dynamic_segment, identifier)
end
def to_h
filename = image_or_video? ? self.file.basename : self.file.filename
escaped_filename = filename.gsub("]", "\\]")
def model_path_segment
self.class.model_path_segment(@model)
end
def store_dir
File.join(base_dir, dynamic_segment)
end
markdown = "[#{escaped_filename}](#{secure_url})"
def markdown_link
markdown = "[#{markdown_name}](#{secure_url})"
markdown.prepend("!") if image_or_video? || dangerous?
markdown
end
def to_h
{
alt: filename,
alt: markdown_name,
url: secure_url,
markdown: markdown
markdown: markdown_link
}
end
def filename
self.file.filename
end
# This is weird: the upload do not hold the secret, but holds the path
# so we need to extract the secret from the path
def upload=(value)
if matches = DYNAMIC_PATH_PATTERN.match(value.path)
@secret = matches[:secret]
@identifier = matches[:identifier]
retrieve_from_store!(@identifier)
end
super
end
def secret
@secret ||= generate_secret
end
private
def dynamic_path_segment
self.class.dynamic_path_segment(model)
def markdown_name
(image_or_video? ? File.basename(filename, File.extname(filename)) : filename).gsub("]", "\\]")
end
def identifier
@identifier ||= filename
end
def dynamic_segment
secret
end
def generate_secret
......
class GitlabUploader < CarrierWave::Uploader::Base
def self.absolute_path(upload_record)
File.join(CarrierWave.root, upload_record.path)
end
class << self
# DSL setter
def storage_options(options = nil)
@storage_options = options if options
@storage_options
end
def self.root_dir
'uploads'
end
def root
storage_options&.storage_path
end
# When object storage is used, keep the `root_dir` as `base_dir`.
# The files aren't really in folders there, they just have a name.
# The files that contain user input in their name, also contain a hash, so
# the names are still unique
#
# This method is overridden in the `FileUploader`
def self.base_dir
return root_dir unless file_storage?
# represent the directory namespacing at the class level
def base_dir
storage_options&.base_dir || ""
end
File.join(root_dir, '-', 'system')
end
def file_storage?
storage == CarrierWave::Storage::File
end
def self.file_storage?
self.storage == CarrierWave::Storage::File
def absolute_path(upload_record)
File.join(CarrierWave.root, upload_record.path)
end
end
delegate :base_dir, :file_storage?, to: :class
......@@ -39,17 +40,6 @@ class GitlabUploader < CarrierWave::Uploader::Base
true
end
# Designed to be overridden by child uploaders that have a dynamic path
# segment -- that is, a path that changes based on mutable attributes of its
# associated model
#
# For example, `FileUploader` builds the storage path based on the associated
# project model's `path_with_namespace` value, which can change when the
# project or its containing namespace is moved or renamed.
def relative_path
self.file.path.sub("#{root}/", '')
end
def exists?
file.present?
end
......@@ -67,6 +57,17 @@ class GitlabUploader < CarrierWave::Uploader::Base
private
# Designed to be overridden by child uploaders that have a dynamic path
# segment -- that is, a path that changes based on mutable attributes of its
# associated model
#
# For example, `FileUploader` builds the storage path based on the associated
# project model's `path_with_namespace` value, which can change when the
# project or its containing namespace is moved or renamed.
def dynamic_segment
raise(NotImplementedError)
end
# To prevent files from moving across filesystems, override the default
# implementation:
# http://github.com/carrierwaveuploader/carrierwave/blob/v1.0.0/lib/carrierwave/uploader/cache.rb#L181-L183
......
class JobArtifactUploader < ObjectStoreUploader
storage_options Gitlab.config.artifacts
def self.local_store_path
Gitlab.config.artifacts.path
end
class JobArtifactUploader < GitlabUploader
extend Workhorse::UploadPath
include ObjectStorage::Concern
def self.artifacts_upload_path
File.join(self.local_store_path, 'tmp/uploads/')
end
storage_options Gitlab.config.artifacts
def size
return super if model.size.nil?
......@@ -17,7 +12,7 @@ class JobArtifactUploader < ObjectStoreUploader
private
def default_path
def dynamic_segment
creation_date = model.created_at.utc.strftime('%Y_%m_%d')
File.join(disk_hash[0..1], disk_hash[2..3], disk_hash,
......
class LegacyArtifactUploader < ObjectStoreUploader
storage_options Gitlab.config.artifacts
def self.local_store_path
Gitlab.config.artifacts.path
end
class LegacyArtifactUploader < GitlabUploader
extend Workhorse::UploadPath
include ObjectStorage::Concern
def self.artifacts_upload_path
File.join(self.local_store_path, 'tmp/uploads/')
end
storage_options Gitlab.config.artifacts
private
def default_path
def dynamic_segment
File.join(model.created_at.utc.strftime('%Y_%m'), model.project_id.to_s, model.id.to_s)
end
end
class LfsObjectUploader < ObjectStoreUploader
storage_options Gitlab.config.lfs
class LfsObjectUploader < GitlabUploader
extend Workhorse::UploadPath
include ObjectStorage::Concern
def self.local_store_path
Gitlab.config.lfs.storage_path
end
storage_options Gitlab.config.lfs
def filename
model.oid[4..-1]
......@@ -11,7 +10,7 @@ class LfsObjectUploader < ObjectStoreUploader
private
def default_path
"#{model.oid[0, 2]}/#{model.oid[2, 2]}"
def dynamic_segment
File.join(model.oid[0, 2], model.oid[2, 2])
end
end
class NamespaceFileUploader < FileUploader
def self.base_dir
File.join(root_dir, '-', 'system', 'namespace')
storage_options Gitlab.config.uploads
def self.base_dir(model)
File.join(storage_options&.base_dir, 'namespace', model_path_segment(model))
end
def self.dynamic_path_segment(model)
dynamic_path_builder(model.id.to_s)
def self.model_path_segment(model)
File.join(model.id.to_s)
end
private
# Re-Override
def store_dir
store_dirs[object_store]
end
def secure_url
File.join('/uploads', @secret, file.filename)