Skip to content
Snippets Groups Projects
Commit 5b145ec6 authored by Stan Hu's avatar Stan Hu
Browse files

Add Azure Blob Storage support

This uses our custom Azure gem
(https://gitlab.com/gitlab-org/gitlab-fog-azure-rm) to integrate direct
upload access with GitLab.

Because the Azure Put Blob API does not work with chunked encoding,
uploads cannot be streamed directly via a pre-signed URL without saving
to disk first. To make this work without that, we need to add an Azure
client directly in Workhorse that uses the Put Block and Put Block List
API.

The Workhorse client is implemented in
gitlab-workhorse!555. We
use the Go Cloud Development Kit to generate a URL
(e.g. `azblob://container`) that can be extended for other object
storage providers.

Part of #25877
parent 3efc769f
No related branches found
No related tags found
No related merge requests found
......@@ -119,6 +119,7 @@ gem 'fog-local', '~> 0.6'
gem 'fog-openstack', '~> 1.0'
gem 'fog-rackspace', '~> 0.1.1'
gem 'fog-aliyun', '~> 0.3'
gem 'gitlab-fog-azure-rm', '~> 0.7'
# for Google storage
gem 'google-api-client', '~> 0.33'
......
......@@ -112,6 +112,15 @@ GEM
aws-sigv4 (~> 1.1)
aws-sigv4 (1.2.1)
aws-eventstream (~> 1, >= 1.0.2)
azure-core (0.1.15)
faraday (~> 0.9)
faraday_middleware (~> 0.10)
nokogiri (~> 1.6)
azure-storage (0.15.0.preview)
azure-core (~> 0.1)
faraday (~> 0.9)
faraday_middleware (~> 0.10)
nokogiri (~> 1.6, >= 1.6.8)
babosa (1.0.2)
base32 (0.3.2)
batch-loader (1.4.0)
......@@ -313,6 +322,9 @@ GEM
railties (>= 4.2.0)
faraday (0.17.3)
multipart-post (>= 1.2, < 3)
faraday-cookie_jar (0.0.6)
faraday (>= 0.7.4)
http-cookie (~> 1.0.0)
faraday-http-cache (2.0.0)
faraday (~> 0.8)
faraday_middleware (0.14.0)
......@@ -407,6 +419,12 @@ GEM
github-markup (1.7.0)
gitlab-chronic (0.10.5)
numerizer (~> 0.2)
gitlab-fog-azure-rm (0.7.0)
azure-storage (~> 0.15.0.preview)
fog-core (= 2.1.0)
fog-json (~> 1.2.0)
mime-types
ms_rest_azure (~> 0.12.0)
gitlab-labkit (0.12.1)
actionpack (>= 5.0.0, < 6.1.0)
activesupport (>= 5.0.0, < 6.1.0)
......@@ -668,6 +686,15 @@ GEM
mini_mime (1.0.2)
mini_portile2 (2.4.0)
minitest (5.11.3)
ms_rest (0.7.6)
concurrent-ruby (~> 1.0)
faraday (>= 0.9, < 2.0.0)
timeliness (~> 0.3.10)
ms_rest_azure (0.12.0)
concurrent-ruby (~> 1.0)
faraday (>= 0.9, < 2.0.0)
faraday-cookie_jar (~> 0.0.6)
ms_rest (~> 0.7.6)
msgpack (1.3.1)
multi_json (1.14.1)
multi_xml (0.6.0)
......@@ -1104,6 +1131,7 @@ GEM
thrift (0.11.0.0)
tilt (2.0.10)
timecop (0.9.1)
timeliness (0.3.10)
timfel-krb5-auth (0.8.3)
toml (0.2.0)
parslet (~> 1.8.0)
......@@ -1275,6 +1303,7 @@ DEPENDENCIES
gitaly (~> 13.3.0.pre.rc1)
github-markup (~> 1.7.0)
gitlab-chronic (~> 0.10.5)
gitlab-fog-azure-rm (~> 0.7)
gitlab-labkit (= 0.12.1)
gitlab-license (~> 1.0)
gitlab-mail_room (~> 0.0.6)
......
---
title: Add Azure Blob Storage support
merge_request: 38882
author:
type: added
......@@ -4,6 +4,12 @@
# This pulls in https://github.com/carrierwaveuploader/carrierwave/pull/2504 to support
# sending AWS S3 encryption headers when copying objects.
#
# This patch also incorporates
# https://github.com/carrierwaveuploader/carrierwave/pull/2375 to
# provide Azure support. This is already in CarrierWave v2.1.x, but
# upgrading this gem is a significant task:
# https://gitlab.com/gitlab-org/gitlab/-/issues/216067
module CarrierWave
module Storage
class Fog < Abstract
......@@ -16,6 +22,31 @@ def copy_to(new_path)
def copy_to_options
acl_header.merge(@uploader.fog_attributes)
end
def authenticated_url(options = {})
if %w[AWS Google Rackspace OpenStack AzureRM].include?(@uploader.fog_credentials[:provider])
# avoid a get by using local references
local_directory = connection.directories.new(key: @uploader.fog_directory)
local_file = local_directory.files.new(key: path)
expire_at = ::Fog::Time.now + @uploader.fog_authenticated_url_expiration
case @uploader.fog_credentials[:provider]
when 'AWS', 'Google'
# Older versions of fog-google do not support options as a parameter
if url_options_supported?(local_file)
local_file.url(expire_at, options)
else
warn "Options hash not supported in #{local_file.class}. You may need to upgrade your Fog provider."
local_file.url(expire_at)
end
when 'Rackspace'
connection.get_object_https_url(@uploader.fog_directory, path, expire_at, options)
when 'OpenStack'
connection.get_object_https_url(@uploader.fog_directory, path, expire_at)
else
local_file.url(expire_at)
end
end
end
end
end
end
......
class DirectUploadsValidator
SUPPORTED_DIRECT_UPLOAD_PROVIDERS = %w(Google AWS).freeze
SUPPORTED_DIRECT_UPLOAD_PROVIDERS = %w(Google AWS AzureRM).freeze
ValidationError = Class.new(StandardError)
......@@ -13,22 +13,33 @@ def verify!(uploader_type, object_store)
raise ValidationError, "No provider configured for '#{uploader_type}'. #{supported_provider_text}" if provider.blank?
return if SUPPORTED_DIRECT_UPLOAD_PROVIDERS.include?(provider)
return if provider_loaded?(provider)
raise ValidationError, "Object storage provider '#{provider}' is not supported " \
"when 'direct_upload' is used for '#{uploader_type}'. #{supported_provider_text}"
end
private
def provider_loaded?(provider)
return false unless SUPPORTED_DIRECT_UPLOAD_PROVIDERS.include?(provider)
require 'fog/azurerm' if provider == 'AzureRM'
true
end
def supported_provider_text
"Only #{SUPPORTED_DIRECT_UPLOAD_PROVIDERS.join(', ')} are supported."
"Only #{SUPPORTED_DIRECT_UPLOAD_PROVIDERS.to_sentence} are supported."
end
end
DirectUploadsValidator.new.tap do |validator|
CONFIGS = {
artifacts: Gitlab.config.artifacts,
uploads: Gitlab.config.uploads,
lfs: Gitlab.config.lfs
lfs: Gitlab.config.lfs,
packages: Gitlab.config.packages,
uploads: Gitlab.config.uploads
}.freeze
CONFIGS.each do |uploader_type, uploader|
......
......@@ -58,6 +58,10 @@ def google?
provider == 'Google'
end
def azure?
provider == 'AzureRM'
end
def fog_attributes
@fog_attributes ||= begin
return {} unless enabled? && aws?
......
......@@ -62,8 +62,16 @@ def multipart_upload_hash
end
def workhorse_client_hash
return {} unless config.aws?
if config.aws?
workhorse_aws_hash
elsif config.azure?
workhorse_azure_hash
else
{}
end
end
def workhorse_aws_hash
{
UseWorkhorseClient: use_workhorse_s3_client?,
RemoteTempObjectID: object_name,
......@@ -82,6 +90,21 @@ def workhorse_client_hash
}
end
def workhorse_azure_hash
{
# Azure requires Workhorse client because direct uploads can't
# use pre-signed URLs without buffering the whole file to disk.
UseWorkhorseClient: true,
RemoteTempObjectID: object_name,
ObjectStorage: {
Provider: 'AzureRM',
GoCloudConfig: {
URL: "azblob://#{bucket_name}"
}
}
}
end
def use_workhorse_s3_client?
return false unless Feature.enabled?(:use_workhorse_s3_client, default_enabled: true)
return false unless config.use_iam_profile? || config.consolidated_settings?
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe 'CarrierWave::Storage::Fog::File' do
let(:uploader_class) { Class.new(CarrierWave::Uploader::Base) }
let(:uploader) { uploader_class.new }
let(:storage) { CarrierWave::Storage::Fog.new(uploader) }
let(:azure_options) do
{
azure_storage_account_name: 'AZURE_ACCOUNT_NAME',
azure_storage_access_key: 'AZURE_ACCESS_KEY',
provider: 'AzureRM'
}
end
subject { CarrierWave::Storage::Fog::File.new(uploader, storage, 'test') }
before do
require 'fog/azurerm'
allow(uploader).to receive(:fog_credentials).and_return(azure_options)
Fog.mock!
end
describe '#authenticated_url' do
context 'with Azure' do
it 'has an authenticated URL' do
expect(subject.authenticated_url).to eq("https://sa.blob.core.windows.net/test_container/test_blob?token")
end
end
end
end
......@@ -8,7 +8,7 @@
end
where(:config_name) do
%w(lfs artifacts uploads)
%w(artifacts lfs packages uploads)
end
with_them do
......@@ -52,11 +52,19 @@
end
end
context 'when provider is AzureRM' do
let(:provider) { 'AzureRM' }
it 'succeeds' do
expect { subject }.not_to raise_error
end
end
context 'when connection is empty' do
let(:connection) { nil }
it 'raises an error' do
expect { subject }.to raise_error "No provider configured for '#{config_name}'. Only Google, AWS are supported."
expect { subject }.to raise_error "No provider configured for '#{config_name}'. Only Google, AWS, and AzureRM are supported."
end
end
......
......@@ -112,18 +112,20 @@
it_behaves_like 'a valid upload'
it 'sets Workhorse client data' do
expect(subject[:UseWorkhorseClient]).to eq(use_iam_profile)
expect(subject[:RemoteTempObjectID]).to eq(object_name)
object_store_config = subject[:ObjectStorage]
expect(object_store_config[:Provider]).to eq 'AWS'
s3_config = object_store_config[:S3Config]
expect(s3_config[:Bucket]).to eq(bucket_name)
expect(s3_config[:Region]).to eq(region)
expect(s3_config[:PathStyle]).to eq(path_style)
expect(s3_config[:UseIamProfile]).to eq(use_iam_profile)
expect(s3_config.keys).not_to include(%i(ServerSideEncryption SSEKMSKeyID))
aggregate_failures do
expect(subject[:UseWorkhorseClient]).to eq(use_iam_profile)
expect(subject[:RemoteTempObjectID]).to eq(object_name)
object_store_config = subject[:ObjectStorage]
expect(object_store_config[:Provider]).to eq 'AWS'
s3_config = object_store_config[:S3Config]
expect(s3_config[:Bucket]).to eq(bucket_name)
expect(s3_config[:Region]).to eq(region)
expect(s3_config[:PathStyle]).to eq(path_style)
expect(s3_config[:UseIamProfile]).to eq(use_iam_profile)
expect(s3_config.keys).not_to include(%i(ServerSideEncryption SSEKMSKeyID))
end
end
context 'when feature flag is disabled' do
......@@ -202,12 +204,44 @@
shared_examples 'a valid upload' do
it "returns valid structure" do
expect(subject).to have_key(:Timeout)
expect(subject[:GetURL]).to start_with(storage_url)
expect(subject[:StoreURL]).to start_with(storage_url)
expect(subject[:DeleteURL]).to start_with(storage_url)
expect(subject[:CustomPutHeaders]).to be_truthy
expect(subject[:PutHeaders]).to eq({})
aggregate_failures do
expect(subject).to have_key(:Timeout)
expect(subject[:GetURL]).to start_with(storage_url)
expect(subject[:StoreURL]).to start_with(storage_url)
expect(subject[:DeleteURL]).to start_with(storage_url)
expect(subject[:CustomPutHeaders]).to be_truthy
expect(subject[:PutHeaders]).to eq({})
end
end
end
shared_examples 'a valid AzureRM upload' do
before do
require 'fog/azurerm'
end
it_behaves_like 'a valid upload'
it 'enables the Workhorse client' do
aggregate_failures do
expect(subject[:UseWorkhorseClient]).to be true
expect(subject[:RemoteTempObjectID]).to eq(object_name)
expect(subject[:ObjectStorage][:Provider]).to eq('AzureRM')
expect(subject[:ObjectStorage][:GoCloudConfig]).to eq({ URL: "azblob://#{bucket_name}" })
end
end
end
shared_examples 'a valid upload' do
it "returns valid structure" do
aggregate_failures do
expect(subject).to have_key(:Timeout)
expect(subject[:GetURL]).to start_with(storage_url)
expect(subject[:StoreURL]).to start_with(storage_url)
expect(subject[:DeleteURL]).to start_with(storage_url)
expect(subject[:CustomPutHeaders]).to be_truthy
expect(subject[:PutHeaders]).to eq({})
end
end
end
......@@ -219,14 +253,16 @@
it_behaves_like 'a valid upload'
it "returns valid structure" do
expect(subject).to have_key(:MultipartUpload)
expect(subject[:MultipartUpload]).to have_key(:PartSize)
expect(subject[:MultipartUpload][:PartURLs]).to all(start_with(storage_url))
expect(subject[:MultipartUpload][:PartURLs]).to all(include('uploadId=myUpload'))
expect(subject[:MultipartUpload][:CompleteURL]).to start_with(storage_url)
expect(subject[:MultipartUpload][:CompleteURL]).to include('uploadId=myUpload')
expect(subject[:MultipartUpload][:AbortURL]).to start_with(storage_url)
expect(subject[:MultipartUpload][:AbortURL]).to include('uploadId=myUpload')
aggregate_failures do
expect(subject).to have_key(:MultipartUpload)
expect(subject[:MultipartUpload]).to have_key(:PartSize)
expect(subject[:MultipartUpload][:PartURLs]).to all(start_with(storage_url))
expect(subject[:MultipartUpload][:PartURLs]).to all(include('uploadId=myUpload'))
expect(subject[:MultipartUpload][:CompleteURL]).to start_with(storage_url)
expect(subject[:MultipartUpload][:CompleteURL]).to include('uploadId=myUpload')
expect(subject[:MultipartUpload][:AbortURL]).to start_with(storage_url)
expect(subject[:MultipartUpload][:AbortURL]).to include('uploadId=myUpload')
end
end
it 'uses only strings in query parameters' do
......@@ -370,5 +406,31 @@
it_behaves_like 'a valid upload without multipart data'
end
end
context 'when AzureRM is used' do
let(:credentials) do
{
provider: 'AzureRM',
azure_storage_account_name: 'azuretest',
azure_storage_access_key: 'ABCD1234'
}
end
let(:storage_url) { 'https://azuretest.blob.core.windows.net' }
context 'when length is known' do
let(:has_length) { true }
it_behaves_like 'a valid AzureRM upload'
it_behaves_like 'a valid upload without multipart data'
end
context 'when length is unknown' do
let(:has_length) { false }
it_behaves_like 'a valid AzureRM upload'
it_behaves_like 'a valid upload without multipart data'
end
end
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment