diff --git a/app/views/admin/application_settings/show.html.haml b/app/views/admin/application_settings/show.html.haml index 487dc7c1c55b4ac7c3144b5fae2d73216497b3f9..f2e2cd3e2faba221e56c104a5cce55903f6db2a2 100644 --- a/app/views/admin/application_settings/show.html.haml +++ b/app/views/admin/application_settings/show.html.haml @@ -373,3 +373,5 @@ = _('Geo allows you to replicate your GitLab instance to other geographical locations.') .settings-content = render partial: 'slack' + += render_if_exists 'admin/application_settings/pseudonymizer_settings', expanded: expanded diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 40fed40398db4b91f65159d8c9328bd0c57c2bb5..d8708433ff215ab912928880e2ca1e16ae114fbc 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -144,6 +144,7 @@ - cronjob:ldap_all_groups_sync - cronjob:ldap_sync - cronjob:update_all_mirrors +- cronjob:pseudonymizer - geo:geo_scheduler_scheduler - geo:geo_scheduler_primary_scheduler diff --git a/config/gitlab.yml.example b/config/gitlab.yml.example index fc067eb96c33657a2819de19261df5000592c511..c61bb52c694f3b13c807cb5c59220470d74c15d0 100644 --- a/config/gitlab.yml.example +++ b/config/gitlab.yml.example @@ -311,6 +311,10 @@ production: &base geo_migrated_local_files_clean_up_worker: cron: "15 */6 * * *" + # Export pseudonymized data in CSV format for analysis + pseudonymizer_worker: + cron: "0 * * * *" + registry: # enabled: true # host: registry.example.com @@ -726,6 +730,20 @@ production: &base # # Specifies Amazon S3 storage class to use for backups, this is optional # # storage_class: 'STANDARD' + ## Pseudonymizer exporter + pseudonymizer: + # Tables manifest that specifies the fields to extract and pseudonymize. + manifest: config/pseudonymizer.yml + upload: + # remote_directory: 'gitlab-elt' + # Fog storage connection settings, see http://fog.io/storage/ . + connection: + # provider: AWS + # region: eu-west-1 + # aws_access_key_id: AKIAKIAKI + # aws_secret_access_key: 'secret123' + # # The remote 'directory' to store the CSV files. For S3, this would be the bucket name. + ## GitLab Shell settings gitlab_shell: path: /home/git/gitlab-shell/ @@ -876,6 +894,17 @@ test: token: secret backup: path: tmp/tests/backups + pseudonymizer: + manifest: config/pseudonymizer.yml + upload: + # The remote 'directory' to store the CSV files. For S3, this would be the bucket name. + remote_directory: gitlab-elt.test + # Fog storage connection settings, see http://fog.io/storage/ + connection: + provider: AWS # Only AWS supported at the moment + aws_access_key_id: AWS_ACCESS_KEY_ID + aws_secret_access_key: AWS_SECRET_ACCESS_KEY + region: us-east-1 gitlab_shell: path: tmp/tests/gitlab-shell/ hooks_path: tmp/tests/gitlab-shell/hooks/ diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index 088c060964377c66a85d9f385985693cfe99fa1c..7f230c89241043208c828786c66d83191df2fd30 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -370,6 +370,10 @@ Settings.cron_jobs['gitlab_usage_ping_worker']['cron'] ||= Settings.__send__(:cron_for_usage_ping) Settings.cron_jobs['gitlab_usage_ping_worker']['job_class'] = 'GitlabUsagePingWorker' +Settings.cron_jobs['pseudonymizer_worker'] ||= Settingslogic.new({}) +Settings.cron_jobs['pseudonymizer_worker']['cron'] ||= '0 23 * * *' +Settings.cron_jobs['pseudonymizer_worker']['job_class'] ||= 'PseudonymizerWorker' + Settings.cron_jobs['schedule_update_user_activity_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['schedule_update_user_activity_worker']['cron'] ||= '30 0 * * *' Settings.cron_jobs['schedule_update_user_activity_worker']['job_class'] = 'ScheduleUpdateUserActivityWorker' @@ -470,6 +474,14 @@ Settings.backup['upload']['encryption'] ||= nil Settings.backup['upload']['storage_class'] ||= nil +# +# Pseudonymizer +# +Settings['pseudonymizer'] ||= Settingslogic.new({}) +Settings.pseudonymizer['manifest'] = Settings.absolute(Settings.pseudonymizer['manifest'] || Rails.root.join("config/pseudonymizer.yml")) +Settings.pseudonymizer['upload'] ||= Settingslogic.new({ 'remote_directory' => nil, 'connection' => nil }) +# Settings.pseudonymizer['upload']['multipart_chunk_size'] ||= 104857600 + # # Git # diff --git a/config/pseudonymizer.yml b/config/pseudonymizer.yml new file mode 100644 index 0000000000000000000000000000000000000000..56458f5c790bf3380c7a726457e32c33e3171a31 --- /dev/null +++ b/config/pseudonymizer.yml @@ -0,0 +1,787 @@ +tables: + approvals: + whitelist: + - id + - merge_request_id + - user_id + - created_at + - updated_at + pseudo: + - id + - merge_request_id + - user_id + approver_groups: + whitelist: + - id + - target_type + - group_id + - created_at + - updated_at + pseudo: + - id + - group_id + board_assignees: + whitelist: + - id + - board_id + - assignee_id + pseudo: + - id + - board_id + - assignee_id + board_labels: + whitelist: + - id + - board_id + - label_id + pseudo: + - id + - board_id + - label_id + boards: + whitelist: + - id + - project_id + - created_at + - updated_at + - milestone_id + - group_id + - weight + pseudo: + - id + - project_id + - milestone_id + - group_id + epic_issues: + whitelist: + - id + - epic_id + - issue_id + - relative_position + pseudo: + - id + - epic_id + - issue_id + epic_metrics: + whitelist: + - id + - epic_id + - created_at + - updated_at + pseudo: + - id + - epic_id + epics: + whitelist: + - id + - milestone_id + - group_id + - author_id + - assignee_id + - iid + - updated_by_id + - last_edited_by_id + - lock_version + - start_date + - end_date + - last_edited_at + - created_at + - updated_at + - title + - description + pseudo: + - id + - milestone_id + - group_id + - author_id + - assignee_id + - iid + - updated_by_id + - last_edited_by_id + - title + - description + issue_assignees: + whitelist: + - user_id + - issue_id + pseudo: + - user_id + - issue_id + issue_links: + whitelist: + - id + - source_id + - target_id + - created_at + - updated_at + pseudo: + - id + - source_id + - target_id + issue_metrics: + whitelist: + - id + - issue_id + - first_mentioned_in_commit_at + - first_associated_with_milestone_at + - first_added_to_board_at + - created_at + - updated_at + pseudo: + - id + - issue_id + issues: + whitelist: + - id + - title + - author_id + - project_id + - created_at + - updated_at + - description + - milestone_id + - state + - updated_by_id + - weight + - due_date + - moved_to_id + - lock_version + - time_estimate + - last_edited_at + - last_edited_by_id + - discussion_locked + - closed_at + pseudo: + - id + - title + - author_id + - project_id + - description + - milestone_id + - updated_by_id + - moved_to_id + - discussion_locked + label_links: + whitelist: + - id + - label_id + - target_id + - target_type + - created_at + - updated_at + pseudo: + - id + - label_id + - target_id + label_priorities: + whitelist: + - id + - project_id + - label_id + - priority + - created_at + - updated_at + pseudo: + - id + - project_id + - label_id + labels: + whitelist: + - id + - title + - color + - project_id + - created_at + - updated_at + - template + - type + - group_id + pseudo: + - id + - title + - color + - project_id + - template + - type + - group_id + licenses: + whitelist: + - id + - created_at + - updated_at + pseudo: + - id + merge_request_diffs: + whitelist: + - id + - state + - merge_request_id + - created_at + - updated_at + - base_commit_sha + - real_size + - head_commit_sha + - start_commit_sha + - commits_count + pseudo: + - id + - merge_request_id + - base_commit_sha + - head_commit_sha + - start_commit_sha + merge_request_metrics: + whitelist: + - id + - merge_request_id + - latest_build_started_at + - latest_build_finished_at + - first_deployed_to_production_at + - merged_at + - created_at + - updated_at + - pipeline_id + - merged_by_id + - latest_closed_by_id + - latest_closed_at + pseudo: + - id + - merge_request_id + - pipeline_id + - merged_by_id + - latest_closed_by_id + merge_requests: + whitelist: + - id + - target_branch + - source_branch + - source_project_id + - author_id + - assignee_id + - created_at + - updated_at + - milestone_id + - state + - merge_status + - target_project_id + - updated_by_id + - merge_error + - merge_params + - merge_when_pipeline_succeeds + - merge_user_id + - approvals_before_merge + - lock_version + - time_estimate + - squash + - last_edited_at + - last_edited_by_id + - head_pipeline_id + - discussion_locked + - latest_merge_request_diff_id + - allow_maintainer_to_push + pseudo: + - id + - target_branch + - source_branch + - source_project_id + - author_id + - assignee_id + - milestone_id + - target_project_id + - updated_by_id + - merge_user_id + - last_edited_by_id + - head_pipeline_id + - latest_merge_request_diff_id + merge_requests_closing_issues: + whitelist: + - id + - merge_request_id + - issue_id + - created_at + - updated_at + pseudo: + - id + - merge_request_id + - issue_id + milestones: + whitelist: + - id + - project_id + - due_date + - created_at + - updated_at + - state + - start_date + - group_id + pseudo: + - id + - project_id + - group_id + namespace_statistics: + whitelist: + - id + - namespace_id + - shared_runners_seconds + - shared_runners_seconds_last_reset + pseudo: + - id + - namespace_id + namespaces: + whitelist: + - id + - name + - path + - owner_id + - created_at + - updated_at + - type + - avatar + - membership_lock + - share_with_group_lock + - visibility_level + - request_access_enabled + - ldap_sync_status + - ldap_sync_error + - ldap_sync_last_update_at + - ldap_sync_last_successful_update_at + - ldap_sync_last_sync_at + - lfs_enabled + - parent_id + - shared_runners_minutes_limit + - repository_size_limit + - require_two_factor_authentication + - two_factor_grace_period + - plan_id + - project_creation_level + pseudo: + - id + - name + - path + - owner_id + - type + - avatar + - membership_lock + - share_with_group_lock + - visibility_level + - request_access_enabled + - ldap_sync_status + - ldap_sync_error + - parent_id + - shared_runners_minutes_limit + - repository_size_limit + - require_two_factor_authentication + - two_factor_grace_period + - plan_id + - project_creation_level + notes: + whitelist: + - id + - note + - noteable_type + - author_id + - created_at + - updated_at + - project_id + - line_code + - commit_id + - noteable_id + - system + - updated_by_id + - type + - position + - original_position + - resolved_at + - resolved_by_id + - discussion_id + - change_position + - resolved_by_push + pseudo: + - id + - note + - author_id + - project_id + - commit_id + - noteable_id + - updated_by_id + - resolved_by_id + - discussion_id + notification_settings: + whitelist: + - id + - user_id + - source_id + - source_type + - level + - created_at + - updated_at + - new_note + - new_issue + - reopen_issue + - close_issue + - reassign_issue + - new_merge_request + - reopen_merge_request + - close_merge_request + - reassign_merge_request + - merge_merge_request + - failed_pipeline + - success_pipeline + pseudo: + - id + - user_id + - source_id + - source_type + - level + - new_note + - new_issue + - reopen_issue + - close_issue + - reassign_issue + - new_merge_request + - reopen_merge_request + - close_merge_request + - reassign_merge_request + - merge_merge_request + - failed_pipeline + - success_pipeline + project_authorizations: + whitelist: + - user_id + - project_id + - access_level + pseudo: + - user_id + - project_id + - access_level + project_auto_devops: + whitelist: + - id + - project_id + - created_at + - updated_at + - enabled + - domain + pseudo: + - id + - project_id + - enabled + - domain + project_custom_attributes: + whitelist: + - id + - created_at + - updated_at + - project_id + - key + - value + pseudo: + - id + - project_id + - key + - value + project_features: + whitelist: + - id + - project_id + - merge_requests_access_level + - issues_access_level + - wiki_access_level + - snippets_access_level + - builds_access_level + - created_at + - updated_at + - repository_access_level + pseudo: + - id + - project_id + - merge_requests_access_level + - issues_access_level + - wiki_access_level + - snippets_access_level + - builds_access_level + - repository_access_level + project_group_links: + whitelist: + - id + - project_id + - group_id + - created_at + - updated_at + - group_access + - expires_at + pseudo: + - id + - project_id + - group_id + - group_access + project_import_data: + whitelist: + - id + - project_id + pseudo: + - id + - project_id + project_mirror_data: + whitelist: + - id + - project_id + - retry_count + - last_update_started_at + - last_update_scheduled_at + - next_execution_timestamp + pseudo: + - id + - project_id + project_repository_states: + whitelist: + - id + - project_id + - repository_verification_checksum + - wiki_verification_checksum + - last_repository_verification_failure + - last_wiki_verification_failure + pseudo: + - id + - project_id + - repository_verification_checksum + - wiki_verification_checksum + - last_repository_verification_failure + - last_wiki_verification_failure + project_statistics: + whitelist: + - id + - project_id + - namespace_id + - commit_count + - storage_size + - repository_size + - lfs_objects_size + - build_artifacts_size + - shared_runners_seconds + - shared_runners_seconds_last_reset + pseudo: + - id + - project_id + - namespace_id + - commit_count + - storage_size + - repository_size + - lfs_objects_size + - build_artifacts_size + - shared_runners_seconds + - shared_runners_seconds_last_reset + projects: + whitelist: + - id + - name + - path + - description + - created_at + - updated_at + - creator_id + - namespace_id + - last_activity_at + - import_url + - visibility_level + - archived + - avatar + - import_status + - merge_requests_template + - star_count + - merge_requests_rebase_enabled + - import_type + - import_source + - approvals_before_merge + - reset_approvals_on_push + - merge_requests_ff_only_enabled + - issues_template + - mirror + - mirror_last_update_at + - mirror_last_successful_update_at + - mirror_user_id + - import_error + - ci_id + - shared_runners_enabled + - build_coverage_regex + - build_allow_git_fetch + - build_timeout + - mirror_trigger_builds + - pending_delete + - public_builds + - last_repository_check_failed + - last_repository_check_at + - container_registry_enabled + - only_allow_merge_if_pipeline_succeeds + - has_external_issue_tracker + - repository_storage + - repository_read_only + - request_access_enabled + - has_external_wiki + - ci_config_path + - lfs_enabled + - only_allow_merge_if_all_discussions_are_resolved + - repository_size_limit + - printing_merge_request_link_enabled + - auto_cancel_pending_pipelines + - service_desk_enabled + - import_jid + - delete_error + - last_repository_updated_at + - disable_overriding_approvers_per_merge_request + - storage_version + - resolve_outdated_diff_discussions + - remote_mirror_available_overridden + - only_mirror_protected_branches + - pull_mirror_available_overridden + - mirror_overwrites_diverged_branches + - external_authorization_classification_label + pseudo: + - id + - name + - path + - description + - creator_id + - namespace_id + - import_url + - visibility_level + - archived + - avatar + - import_status + - merge_requests_template + - star_count + - import_type + - import_source + - approvals_before_merge + - reset_approvals_on_push + - issues_template + - mirror + - mirror_last_successful_update_at + - mirror_user_id + - import_error + - ci_id + - shared_runners_enabled + - build_coverage_regex + - build_allow_git_fetch + - build_timeout + - mirror_trigger_builds + - pending_delete + - public_builds + - last_repository_check_failed + - only_allow_merge_if_pipeline_succeeds + - repository_storage + - repository_read_only + - ci_config_path + - only_allow_merge_if_all_discussions_are_resolved + - repository_size_limit + - auto_cancel_pending_pipelines + - import_jid + - delete_error + - last_repository_updated_at + - disable_overriding_approvers_per_merge_request + - storage_version + - resolve_outdated_diff_discussions + - remote_mirror_available_overridden + - pull_mirror_available_overridden + - mirror_overwrites_diverged_branches + - external_authorization_classification_label + subscriptions: + whitelist: + - id + - user_id + - subscribable_id + - subscribable_type + - subscribed + - created_at + - updated_at + - project_id + pseudo: + - id + - user_id + - subscribable_id + - project_id + users: + whitelist: + - id + - email + - remember_created_at + - sign_in_count + - current_sign_in_at + - last_sign_in_at + - current_sign_in_ip + - last_sign_in_ip + - created_at + - updated_at + - name + - admin + - projects_limit + - skype + - linkedin + - twitter + - bio + - failed_attempts + - locked_at + - username + - can_create_group + - can_create_team + - state + - color_scheme_id + - password_expires_at + - created_by_id + - last_credential_check_at + - avatar + - confirmed_at + - confirmation_sent_at + - unconfirmed_email + - hide_no_ssh_key + - website_url + - admin_email_unsubscribed_at + - notification_email + - hide_no_password + - password_automatically_set + - location + - public_email + - dashboard + - project_view + - consumed_timestep + - layout + - hide_project_limit + - note + - otp_grace_period_started_at + - external + - organization + - auditor + - require_two_factor_authentication_from_group + - two_factor_grace_period + - ghost + - last_activity_on + - notified_of_own_activity + - support_bot + - preferred_language + - theme_id + pseudo: + - id + - email + - current_sign_in_ip + - last_sign_in_ip + - name + - admin + - skype + - linkedin + - twitter + - username + - created_by_id + - avatar + - unconfirmed_email + - hide_no_ssh_key + - website_url + - notification_email + - location + - public_email + - consumed_timestep + - hide_project_limit + - note + - external + - organization + - auditor + - two_factor_grace_period + - theme_id diff --git a/db/schema.rb b/db/schema.rb index d1955f910ee1a408ab4b1a13b4843834ffe97437..b153f7baad52ce1a61ceeb71e4333b5d51cd4f5a 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -206,6 +206,7 @@ t.string "encrypted_external_auth_client_key_pass_iv" t.string "email_additional_text" t.boolean "enforce_terms", default: false + t.boolean "pseudonymizer_enabled", default: false, null: false end create_table "approvals", force: :cascade do |t| diff --git a/doc/administration/index.md b/doc/administration/index.md index 56bf666c5254697f79f7076a9ed1d3663a64a850..ab00548d9438851c06ded51aa360469fa5767adc 100644 --- a/doc/administration/index.md +++ b/doc/administration/index.md @@ -167,6 +167,10 @@ created in snippets, wikis, and repos. - [Request Profiling](monitoring/performance/request_profiling.md): Get a detailed profile on slow requests. - [Performance Bar](monitoring/performance/performance_bar.md): Get performance information for the current page. +## Analytics + +- [Pseudonymizer](pseudonymizer.md): Export data from GitLab's database to CSV files in a secure way. + ## Troubleshooting - [Debugging tips](troubleshooting/debug.md): Tips to debug problems when things go wrong diff --git a/doc/administration/pseudonymizer.md b/doc/administration/pseudonymizer.md new file mode 100644 index 0000000000000000000000000000000000000000..df08a945358266f8f43b589e5e8e0b37ba8df5aa --- /dev/null +++ b/doc/administration/pseudonymizer.md @@ -0,0 +1,103 @@ +# Pseudonymizer + +> [Introduced](https://gitlab.com/gitlab-org/gitlab-ee/merge_requests/5532) in [GitLab Ultimate][ee] 11.1. + +As GitLab's database hosts sensitive information, using it unfiltered for analytics +implies high security requirements. To help alleviate this constraint, the Pseudonymizer +service is used to export GitLab's data in a pseudonymized way. + +CAUTION: **Warning:** +This process is not impervious. If the source data is available, it's possible for +a user to correlate data to the pseudonymized version. + +The Pseudonymizer currently uses `HMAC(SHA256)` to mutate fields that shouldn't +be textually exported. This ensures that: + +- the end-user of the data source cannot infer/revert the pseudonymized fields +- the referential integrity is maintained + +## Configuration + +To configure the pseudonymizer, you need to: + +- Provide a manifest file that describes which fields should be included or + pseudonymized ([example `manifest.yml` file](https://gitlab.com/gitlab-org/gitlab-ee/tree/master/config/pseudonymizer.yml)). + A default manifest is provided with the GitLab installation. Using a relative file path will be resolved from the Rails root. + Alternatively, you can use an absolute file path. +- Use an object storage and specify the connection parameters in the `pseudonymizer.upload.connection` configuration option. + +**For Omnibus installations:** + +1. Edit `/etc/gitlab/gitlab.rb` and add the following lines by replacing with + the values you want: + + ```ruby + gitlab_rails['pseudonymizer_manifest'] = 'config/pseudonymizer.yml' + gitlab_rails['pseudonymizer_upload_remote_directory'] = 'gitlab-elt' + gitlab_rails['pseudonymizer_upload_connection'] = { + 'provider' => 'AWS', + 'region' => 'eu-central-1', + 'aws_access_key_id' => 'AWS_ACCESS_KEY_ID', + 'aws_secret_access_key' => 'AWS_SECRET_ACCESS_KEY' + } + ``` + + NOTE: **Note:** + If you are using AWS IAM profiles, be sure to omit the AWS access key and secret access key/value pairs. + + ```ruby + gitlab_rails['pseudonymizer_upload_connection'] = { + 'provider' => 'AWS', + 'region' => 'eu-central-1', + 'use_iam_profile' => true + } + ``` + +1. Save the file and [reconfigure GitLab](restart_gitlab.md#omnibus-gitlab-reconfigure) + for the changes to take effect. + +--- + +**For installations from source:** + +1. Edit `/home/git/gitlab/config/gitlab.yml` and add or amend the following + lines: + + ```yaml + pseudonymizer: + manifest: config/pseudonymizer.yml + upload: + remote_directory: 'gitlab-elt' # The bucket name + connection: + provider: AWS # Only AWS supported at the moment + aws_access_key_id: AWS_ACCESS_KEY_ID + aws_secret_access_key: AWS_SECRET_ACCESS_KEY + region: eu-central-1 + ``` + +1. Save the file and [restart GitLab](restart_gitlab.md#installations-from-source) + for the changes to take effect. + +## Usage + +You can optionally run the pseudonymizer using the following environment variables: + +- `PSEUDONYMIZER_OUTPUT_DIR` - where to store the output CSV files (defaults to `/tmp`) +- `PSEUDONYMIZER_BATCH` - the batch size when querying the DB (defaults to `100000`) + +```bash +## Omnibus +sudo gitlab-rake gitlab:db:pseudonymizer + +## Source +sudo -u git -H bundle exec rake gitlab:db:pseudonymizer RAILS_ENV=production +``` + +This will produce some CSV files that might be very large, so make sure the +`PSEUDONYMIZER_OUTPUT_DIR` has sufficient space. As a rule of thumb, at least +10% of the database size is recommended. + +After the pseudonymizer has run, the output CSV files should be uploaded to the +configured object storage and deleted from the local disk. + +[ee]: https://about.gitlab.com/pricing/ diff --git a/ee/app/controllers/ee/admin/application_settings_controller.rb b/ee/app/controllers/ee/admin/application_settings_controller.rb index 62ca0cbc8c172d097a64987a6bb75bffa525918e..f3a461d743629b08b0df14d6d66e264252bcbeac 100644 --- a/ee/app/controllers/ee/admin/application_settings_controller.rb +++ b/ee/app/controllers/ee/admin/application_settings_controller.rb @@ -20,6 +20,10 @@ def visible_application_setting_attributes attrs << :email_additional_text end + if License.feature_available?(:pseudonymizer) + attrs << :pseudonymizer_enabled + end + attrs end end diff --git a/ee/app/helpers/ee/application_settings_helper.rb b/ee/app/helpers/ee/application_settings_helper.rb index 86052c233a3bca94ee1b55417745726470a3a3d1..006e3c738d3942dbd35f0df363f4c898f58e2bb1 100644 --- a/ee/app/helpers/ee/application_settings_helper.rb +++ b/ee/app/helpers/ee/application_settings_helper.rb @@ -35,6 +35,18 @@ def external_authorization_client_pass_help_text "and the value is encrypted at rest.") end + def pseudonymizer_enabled_help_text + _("Enable Pseudonymizer data collection") + end + + def pseudonymizer_description_text + _("GitLab will run a background job that will produce pseudonymized CSVs of the GitLab database that will be uploaded to your configured object storage directory.") + end + + def pseudonymizer_disabled_description_text + _("The pseudonymizer data collection is disabled. When enabled, GitLab will run a background job that will produce pseudonymized CSVs of the GitLab database that will be uploaded to your configured object storage directory.") + end + override :visible_attributes def visible_attributes super + [ @@ -55,7 +67,8 @@ def visible_attributes :slack_app_id, :slack_app_secret, :slack_app_verification_token, - :allow_group_owners_to_manage_ldap + :allow_group_owners_to_manage_ldap, + :pseudonymizer_enabled ] end diff --git a/ee/app/models/ee/application_setting.rb b/ee/app/models/ee/application_setting.rb index ed4cd6dbab136d62384a541cc17f23caa5c99a58..7a3cc80a5f2492bd77c8e3414e0828e92a9b1879 100644 --- a/ee/app/models/ee/application_setting.rb +++ b/ee/app/models/ee/application_setting.rb @@ -100,11 +100,20 @@ def defaults slack_app_enabled: false, slack_app_id: nil, slack_app_secret: nil, - slack_app_verification_token: nil + slack_app_verification_token: nil, + pseudonymizer_enabled: false ) end end + def pseudonymizer_available? + License.feature_available?(:pseudonymizer) + end + + def pseudonymizer_enabled? + pseudonymizer_available? && super + end + def should_check_namespace_plan? check_namespace_plan? && (Rails.env.test? || ::Gitlab.dev_env_or_com?) end diff --git a/ee/app/models/license.rb b/ee/app/models/license.rb index 2572e12c6cfbe372e4791c9171df7ab9c595ddb0..8eb9609604053306cdcd7745dad6a201ede2815d 100644 --- a/ee/app/models/license.rb +++ b/ee/app/models/license.rb @@ -73,6 +73,7 @@ class License < ActiveRecord::Base ide chatops pod_logs + pseudonymizer ].freeze # List all features available for early adopters, diff --git a/ee/app/views/admin/application_settings/_pseudonymizer.html.haml b/ee/app/views/admin/application_settings/_pseudonymizer.html.haml new file mode 100644 index 0000000000000000000000000000000000000000..483ecb7707cddf77adf3f85fda165d5b33229fe7 --- /dev/null +++ b/ee/app/views/admin/application_settings/_pseudonymizer.html.haml @@ -0,0 +1,18 @@ += form_for @application_setting, url: admin_application_settings_path, html: { class: 'fieldset-form' } do |f| + = form_errors(@application_setting) + + %fieldset + .form-group.row + .offset-sm-2.col-sm-10 + - is_enabled = @application_setting.pseudonymizer_enabled? + .form-check + = f.label :pseudonymizer_enabled do + = f.check_box :pseudonymizer_enabled + = pseudonymizer_enabled_help_text + .form-text.text-muted + - if is_enabled + = pseudonymizer_description_text + - else + = pseudonymizer_disabled_description_text + + = f.submit 'Save changes', class: "btn btn-success" diff --git a/ee/app/views/admin/application_settings/_pseudonymizer_settings.html.haml b/ee/app/views/admin/application_settings/_pseudonymizer_settings.html.haml new file mode 100644 index 0000000000000000000000000000000000000000..f65acf10ff86a33b7bc990fb5ef1c5cc137e7956 --- /dev/null +++ b/ee/app/views/admin/application_settings/_pseudonymizer_settings.html.haml @@ -0,0 +1,11 @@ +- if Gitlab::CurrentSettings.pseudonymizer_available? + %section.settings.as-pseudonymizer.no-animate#js-pseudonymizer-settings{ class: ('expanded' if expanded) } + .settings-header + %h4 + = _('Pseudonymizer data collection') + %button.btn.btn-default.js-settings-toggle{ type: 'button' } + = expanded ? _('Collapse') : _('Expand') + %p + = _('Enable or disable the Pseudonymizer data collection.') + .settings-content + = render 'pseudonymizer' diff --git a/ee/app/workers/pseudonymizer_worker.rb b/ee/app/workers/pseudonymizer_worker.rb new file mode 100644 index 0000000000000000000000000000000000000000..ce268c97a34e786d28d6a7e63339c7a8e9d6d90a --- /dev/null +++ b/ee/app/workers/pseudonymizer_worker.rb @@ -0,0 +1,28 @@ +class PseudonymizerWorker + include ApplicationWorker + include CronjobQueue + + def perform + return unless Gitlab::CurrentSettings.pseudonymizer_enabled? + + options = Pseudonymizer::Options.new( + config: YAML.load_file(Gitlab.config.pseudonymizer.manifest), + output_dir: ENV['PSEUDONYMIZER_OUTPUT_DIR'] + ) + + dumper = Pseudonymizer::Dumper.new(options) + uploader = Pseudonymizer::Uploader.new(options, progress_output: File.open(File::NULL, "w")) + + unless uploader.available? + Rails.logger.error("The pseudonymizer object storage must be configured.") + return + end + + begin + dumper.tables_to_csv + uploader.upload + ensure + uploader.cleanup + end + end +end diff --git a/ee/changelogs/unreleased/gitlab-elt.yml b/ee/changelogs/unreleased/gitlab-elt.yml new file mode 100644 index 0000000000000000000000000000000000000000..3bc6dc504a0204c88a276f4f3b0e7217257ecc4a --- /dev/null +++ b/ee/changelogs/unreleased/gitlab-elt.yml @@ -0,0 +1,5 @@ +--- +title: Pseudonymizer to safely export data for analytics. +merge_request: 5532 +author: +type: added diff --git a/ee/db/migrate/20180531221734_add_pseudonymizer_enabled_to_application_settings.rb b/ee/db/migrate/20180531221734_add_pseudonymizer_enabled_to_application_settings.rb new file mode 100644 index 0000000000000000000000000000000000000000..7517e78a61873125a7dd7714e417b53e24768e1e --- /dev/null +++ b/ee/db/migrate/20180531221734_add_pseudonymizer_enabled_to_application_settings.rb @@ -0,0 +1,31 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class AddPseudonymizerEnabledToApplicationSettings < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + # Set this constant to true if this migration requires downtime. + DOWNTIME = false + + # When a migration requires downtime you **must** uncomment the following + # constant and define a short and easy to understand explanation as to why the + # migration requires downtime. + # DOWNTIME_REASON = '' + + # When using the methods "add_concurrent_index", "remove_concurrent_index" or + # "add_column_with_default" you must disable the use of transactions + # as these methods can not run in an existing transaction. + # When using "add_concurrent_index" or "remove_concurrent_index" methods make sure + # that either of them is the _only_ method called in the migration, + # any other changes should go in a separate migration. + # This ensures that upon failure _only_ the index creation or removing fails + # and can be retried or reverted easily. + # + # To disable transactions uncomment the following line and remove these + # comments: + # disable_ddl_transaction! + + def change + add_column :application_settings, :pseudonymizer_enabled, :boolean, null: false, default: false + end +end diff --git a/ee/lib/pseudonymizer/dumper.rb b/ee/lib/pseudonymizer/dumper.rb new file mode 100644 index 0000000000000000000000000000000000000000..fa69281b44859ab1312f973abeaa7a43b0593d81 --- /dev/null +++ b/ee/lib/pseudonymizer/dumper.rb @@ -0,0 +1,128 @@ +module Pseudonymizer + class Dumper + attr_accessor :config, :output_dir + + def initialize(options) + @config = options.config.deep_symbolize_keys + @output_dir = options.output_dir + @start_at = options.start_at + + reset! + end + + def reset! + @schema = Hash.new { |h, k| h[k] = {} } + @output_files = [] + end + + def tables_to_csv + return @output_files unless @output_files.empty? + + tables = config[:tables] + FileUtils.mkdir_p(output_dir) unless File.directory?(output_dir) + + @output_files = tables.map do |k, v| + table_to_csv(k, v[:whitelist], v[:pseudo]) + end.compact + + schema_to_yml + file_list_to_json + + @output_files + end + + private + + def output_filename(basename = nil, ext = "csv.gz") + File.join(output_dir, "#{basename}.#{ext}") + end + + def schema_to_yml + file_path = output_filename("schema", "yml") + File.write(file_path, @schema.to_yaml) + end + + def file_list_to_json + file_path = output_filename("file_list", "json") + relative_files = @output_files.map(&File.method(:basename)) + File.write(file_path, relative_files.to_json) + end + + def table_to_csv(table, whitelist_columns, pseudonymity_columns) + table_to_schema(table) + write_to_csv_file( + table, + table_page_results(table, + whitelist_columns, + pseudonymity_columns) + ) + rescue => e + Rails.logger.error("Failed to export #{table}: #{e}") + raise e + end + + # yield every results, pagined, anonymized + def table_page_results(table, whitelist_columns, pseudonymity_columns) + filter = Filter.new(table, whitelist_columns, pseudonymity_columns) + pager = Pager.new(table, whitelist_columns) + + Enumerator.new do |yielder| + pager.pages do |page| + filter.anonymize(page).each do |result| + yielder << result + end + end + end.lazy + end + + def table_to_schema(table) + table_config = @config.dig(:tables, table) + + type_results = ActiveRecord::Base.connection.columns(table) + type_results = type_results.select do |c| + table_config[:whitelist].include?(c.name) + end + + type_results = type_results.map do |c| + data_type = c.sql_type + + if table_config[:pseudo].include?(c.name) + data_type = "character varying" + end + + { name: c.name, data_type: data_type } + end + + set_schema_column_types(table, type_results) + end + + def set_schema_column_types(table, type_results) + has_id = type_results.any? {|c| c[:name] == "id" } + + type_results.each do |type_result| + @schema[table.to_s][type_result[:name]] = type_result[:data_type] + end + + if has_id + # if there is an ID, it is the mapping_key + @schema[table.to_s]["gl_mapping_key"] = "id" + end + end + + def write_to_csv_file(table, contents) + file_path = output_filename(table) + headers = contents.peek.keys + + Rails.logger.info "#{self.class.name} writing #{table} to #{file_path}." + Zlib::GzipWriter.open(file_path) do |io| + csv = CSV.new(io, headers: headers, write_headers: true) + contents.each { |row| csv << row.values } + end + + file_path + rescue StopIteration + Rails.logger.info "#{self.class.name} table #{table} is empty." + nil + end + end +end diff --git a/ee/lib/pseudonymizer/filter.rb b/ee/lib/pseudonymizer/filter.rb new file mode 100644 index 0000000000000000000000000000000000000000..5c41b3413b912a57270faaf694bfc35fef0eb375 --- /dev/null +++ b/ee/lib/pseudonymizer/filter.rb @@ -0,0 +1,38 @@ +require 'openssl' +require 'digest' + +module Pseudonymizer + class Filter + def initialize(table, whitelisted_fields, pseudonymized_fields) + @table = table + @pseudo_fields = pseudo_fields(whitelisted_fields, pseudonymized_fields) + end + + def anonymize(results) + key = Rails.application.secrets[:secret_key_base] + digest = OpenSSL::Digest.new('sha256') + + Enumerator.new do |yielder| + results.each do |result| + @pseudo_fields.each do |field| + next if result[field].nil? + + result[field] = OpenSSL::HMAC.hexdigest(digest, key, String(result[field])) + end + yielder << result + end + end + end + + private + + def pseudo_fields(whitelisted, pseudonymized) + pseudo_extra_fields = pseudonymized - whitelisted + pseudo_extra_fields.each do |field| + Rails.logger.warn("#{self.class.name} extraneous pseudo: #{@table}.#{field} is not whitelisted and will be ignored.") + end + + pseudonymized & whitelisted + end + end +end diff --git a/ee/lib/pseudonymizer/options.rb b/ee/lib/pseudonymizer/options.rb new file mode 100644 index 0000000000000000000000000000000000000000..e3cbc1b8fbf660c52d1549f271220ca73afe171f --- /dev/null +++ b/ee/lib/pseudonymizer/options.rb @@ -0,0 +1,25 @@ +module Pseudonymizer + class Options + attr_reader :config + attr_reader :start_at + attr_reader :output_dir + + def initialize(config: {}, output_dir: nil) + @config = config + @start_at = Time.now.utc + + base_dir = output_dir || File.join(Dir.tmpdir, 'gitlab-pseudonymizer') + @output_dir = File.join(base_dir, batch_dir) + end + + def upload_dir + batch_dir + end + + private + + def batch_dir + start_at.iso8601 + end + end +end diff --git a/ee/lib/pseudonymizer/pager.rb b/ee/lib/pseudonymizer/pager.rb new file mode 100644 index 0000000000000000000000000000000000000000..4c1b9f2b1e464a0b4d858ef779f3ec23e49eaef6 --- /dev/null +++ b/ee/lib/pseudonymizer/pager.rb @@ -0,0 +1,63 @@ +module Pseudonymizer + class Pager + PAGE_SIZE = ENV.fetch('PSEUDONYMIZER_BATCH', 100_000) + + def initialize(table, columns) + @table = table + @columns = columns + end + + def pages(&block) + if @columns.include?("id") + # optimize the pagination using WHERE id > ? + pages_per_id(&block) + else + # fallback to `LIMIT ? OFFSET ?` when "id" is unavailable + pages_per_offset(&block) + end + end + + def pages_per_id(&block) + id_offset = 0 + + loop do + # a page of results + results = ActiveRecord::Base.connection.exec_query(<<-SQL.squish) + SELECT #{@columns.join(",")} + FROM #{@table} + WHERE id > #{id_offset} + ORDER BY id + LIMIT #{PAGE_SIZE} + SQL + Rails.logger.debug("#{self.class.name} fetch ids [#{id_offset}..+#{PAGE_SIZE}]") + break if results.empty? + + id_offset = results.last["id"].to_i + yield results + + break if results.count < PAGE_SIZE + end + end + + def pages_per_offset(&block) + offset = 0 + + loop do + # a page of results + results = ActiveRecord::Base.connection.exec_query(<<-SQL.squish) + SELECT #{@columns.join(",")} + FROM #{@table} + ORDER BY #{@columns.join(",")} + LIMIT #{PAGE_SIZE} OFFSET #{offset} + SQL + Rails.logger.debug("#{self.class.name} fetching offset [#{offset}..#{offset + PAGE_SIZE}]") + break if results.empty? + + offset += PAGE_SIZE + yield results + + break if results.count < PAGE_SIZE + end + end + end +end diff --git a/ee/lib/pseudonymizer/uploader.rb b/ee/lib/pseudonymizer/uploader.rb new file mode 100644 index 0000000000000000000000000000000000000000..65c69ade374a3b4cc1f74deb7a1684c7dcb364d8 --- /dev/null +++ b/ee/lib/pseudonymizer/uploader.rb @@ -0,0 +1,95 @@ +module Pseudonymizer + ObjectStorageUnavailableError = Class.new(StandardError) + + class Uploader + include Gitlab::Utils::StrongMemoize + + RemoteStorageUnavailableError = Class.new(StandardError) + + # Our settings use string keys, but Fog expects symbols + def self.object_store_credentials + Gitlab.config.pseudonymizer.upload.connection.to_hash.deep_symbolize_keys + end + + def self.remote_directory + Gitlab.config.pseudonymizer.upload.remote_directory + end + + def initialize(options, progress_output: nil) + @progress_output = progress_output || $stdout + @config = options.config + @output_dir = options.output_dir + @upload_dir = options.upload_dir + @remote_dir = self.class.remote_directory + @connection_params = self.class.object_store_credentials + end + + def available? + !connect_to_remote_directory.nil? + rescue ObjectStorageUnavailableError + false + end + + def upload + progress_output.puts "Uploading output files to remote storage #{remote_directory}:" + + file_list.each do |file| + upload_file(file, remote_directory) + end + rescue ObjectStorageUnavailableError + abort "Cannot upload files, make sure the `pseudonimizer.upload.connection` is set properly" + end + + def cleanup + return unless File.exist?(@output_dir) + + progress_output.print "Deleting tmp directory #{@output_dir} ... " + FileUtils.rm_rf(@output_dir) + progress_output.puts "done" + rescue + progress_output.puts "failed" + end + + private + + attr_reader :progress_output + + def upload_file(file, directory) + progress_output.print "\t#{file} ... " + + if directory.files.create(key: File.join(@upload_dir, File.basename(file)), + body: File.open(file), + public: false) + progress_output.puts "done" + else + progress_output.puts "failed" + end + end + + def remote_directory + strong_memoize(:remote_directory) { connect_to_remote_directory } + end + + def connect_to_remote_directory + if @connection_params.blank? + raise ObjectStorageUnavailableError + + end + + connection = ::Fog::Storage.new(@connection_params) + + # We only attempt to create the directory for local backups. For AWS + # and other cloud providers, we cannot guarantee the user will have + # permission to create the bucket. + if connection.service == ::Fog::Storage::Local + connection.directories.create(key: @remote_dir) + else + connection.directories.get(@remote_dir) + end + end + + def file_list + Dir[File.join(@output_dir, "*")] + end + end +end diff --git a/ee/lib/tasks/gitlab/db.rake b/ee/lib/tasks/gitlab/db.rake new file mode 100644 index 0000000000000000000000000000000000000000..f458212115272f32da4e1b649c11036a29577107 --- /dev/null +++ b/ee/lib/tasks/gitlab/db.rake @@ -0,0 +1,26 @@ +namespace :gitlab do + namespace :db do + desc 'Output pseudonymity dump of selected tables' + task pseudonymizer: :environment do + abort "The pseudonymizer is not available with this license." unless License.feature_available?(:pseudonymizer) + abort "The pseudonymizer is disabled." unless Gitlab::CurrentSettings.pseudonymizer_enabled? + + options = Pseudonymizer::Options.new( + config: YAML.load_file(Gitlab.config.pseudonymizer.manifest), + output_dir: ENV['PSEUDONYMIZER_OUTPUT_DIR'] + ) + + dumper = Pseudonymizer::Dumper.new(options) + uploader = Pseudonymizer::Uploader.new(options) + + abort "There is an error in the pseudonymizer object store configuration." unless uploader.available? + + begin + dumper.tables_to_csv + uploader.upload + ensure + uploader.cleanup + end + end + end +end diff --git a/ee/spec/lib/pseudonymizer/dumper_spec.rb b/ee/spec/lib/pseudonymizer/dumper_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..b3d54208d678f58e2e1d1e33b088cfbdebbe54bc --- /dev/null +++ b/ee/spec/lib/pseudonymizer/dumper_spec.rb @@ -0,0 +1,97 @@ +require 'spec_helper' + +describe Pseudonymizer::Dumper do + let!(:project) { create(:project) } + let(:base_dir) { Dir.mktmpdir } + let(:options) do + Pseudonymizer::Options.new( + config: YAML.load_file(Gitlab.config.pseudonymizer.manifest) + ) + end + subject(:pseudo) { described_class.new(options) } + + before do + allow(options).to receive(:output_dir).and_return(base_dir) + end + + after do + FileUtils.rm_rf(base_dir) + end + + describe 'Pseudo tables' do + it 'outputs project tables to csv' do + column_names = %w(id name path description) + pseudo.config[:tables] = { + projects: { + whitelist: column_names, + pseudo: %w(id) + } + } + + expect(pseudo.output_dir).to eq(base_dir) + + # grab the first table it outputs. There would only be 1. + project_table_file = pseudo.tables_to_csv[0] + expect(project_table_file).to end_with("projects.csv.gz") + + columns = [] + project_data = [] + Zlib::GzipReader.open(project_table_file) do |gz| + csv = CSV.new(gz, headers: true) + # csv.shift # read the header row + project_data = csv.gets + columns = csv.headers + end + + # check if CSV columns are correct + expect(columns).to include(*column_names) + + # is it pseudonymous + # sha 256 is 64 chars in length + expect(project_data["id"].length).to eq(64) + end + + it "warns when pseudonymized fields are extraneous" do + column_names = %w(id name path description) + pseudo.config[:tables] = { + projects: { + whitelist: column_names, + pseudo: %w(id extraneous) + } + } + + expect(Rails.logger).to receive(:warn).with(/extraneous/) + + pseudo.tables_to_csv + end + end + + describe "manifest is valid" do + it "all tables exist" do + existing_tables = ActiveRecord::Base.connection.tables + tables = options.config['tables'].keys + + expect(existing_tables).to include(*tables) + end + + it "all whitelisted attributes exist" do + options.config['tables'].each do |table, table_def| + whitelisted = table_def['whitelist'] + existing_columns = ActiveRecord::Base.connection.columns(table.to_sym).map(&:name) + diff = whitelisted - existing_columns + + expect(diff).to be_empty, "#{table} should define columns #{whitelisted.inspect}: missing #{diff.inspect}" + end + end + + it "all pseudonymized attributes are whitelisted" do + options.config['tables'].each do |table, table_def| + whitelisted = table_def['whitelist'] + pseudonymized = table_def['pseudo'] + diff = pseudonymized - whitelisted + + expect(diff).to be_empty, "#{table} should whitelist columns #{pseudonymized.inspect}: missing #{diff.inspect}" + end + end + end +end diff --git a/ee/spec/lib/pseudonymizer/pager_spec.rb b/ee/spec/lib/pseudonymizer/pager_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..8b20a381f923c8044d8d172881631539b4658f51 --- /dev/null +++ b/ee/spec/lib/pseudonymizer/pager_spec.rb @@ -0,0 +1,54 @@ +require 'spec_helper' + +describe Pseudonymizer::Pager do + let(:page_size) { 1 } + let!(:projects) { create_list(:project, 10) } + subject { described_class.new("projects", whitelisted_columns) } + + before do + stub_const("Pseudonymizer::Pager::PAGE_SIZE", page_size) + end + + shared_examples "yield results in page" do + it do + page_count = 0 + result_count = 0 + + subject.pages do |page| + result_count += page.count + page_count += 1 + end + + expect(result_count).to eq(projects.count) + expect(page_count).to eq(projects.count / page_size) + end + end + + context "`id` column is present" do + let(:whitelisted_columns) { %w(id name) } + + describe "#pages" do + it "delegates to #pages_per_id" do + expect(subject).to receive(:pages_per_id) + + subject.pages {|page| nil} + end + + include_examples "yield results in page" + end + end + + context "`id` column is missing" do + let(:whitelisted_columns) { %w(name) } + + describe "#pages" do + it "delegates to #pages_per_offset" do + expect(subject).to receive(:pages_per_offset) + + subject.pages {|page| nil} + end + + include_examples "yield results in page" + end + end +end diff --git a/ee/spec/lib/pseudonymizer/uploader_spec.rb b/ee/spec/lib/pseudonymizer/uploader_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..6da78267868c3e54f5175a7fc05e7750131d0412 --- /dev/null +++ b/ee/spec/lib/pseudonymizer/uploader_spec.rb @@ -0,0 +1,45 @@ +require 'spec_helper' + +describe Pseudonymizer::Uploader do + let(:base_dir) { Dir.mktmpdir } + let(:options) do + Pseudonymizer::Options.new( + config: YAML.load_file(Gitlab.config.pseudonymizer.manifest) + ) + end + let(:remote_directory) { subject.send(:remote_directory) } + subject { described_class.new(options) } + + def mock_file(file_name) + FileUtils.touch(File.join(base_dir, file_name)) + end + + before do + allow(options).to receive(:output_dir).and_return(base_dir) + stub_object_storage_pseudonymizer + + 10.times {|i| mock_file("file_#{i}.test")} + mock_file("schema.yml") + mock_file("file_list.json") + end + + after do + FileUtils.rm_rf(base_dir) + end + + describe "#upload" do + it "upload all file in the directory" do + subject.upload + + expect(remote_directory.files.all.count).to eq(12) + end + end + + describe "#cleanup" do + it "cleans the directory" do + subject.cleanup + + expect(Dir[File.join(base_dir, "*")].length).to eq(0) + end + end +end diff --git a/spec/support/helpers/stub_object_storage.rb b/spec/support/helpers/stub_object_storage.rb index bceaf8277ee627ca5ac97236d2c39624bba2516d..b1e2b5365b73c3e380604705d41439adaeb0f329 100644 --- a/spec/support/helpers/stub_object_storage.rb +++ b/spec/support/helpers/stub_object_storage.rb @@ -15,9 +15,14 @@ def stub_object_storage_uploader( return unless enabled + stub_object_storage(connection_params: uploader.object_store_credentials, + remote_directory: remote_directory) + end + + def stub_object_storage(connection_params:, remote_directory:) Fog.mock! - ::Fog::Storage.new(uploader.object_store_credentials).tap do |connection| + ::Fog::Storage.new(connection_params).tap do |connection| begin connection.directories.create(key: remote_directory) rescue Excon::Error::Conflict @@ -57,4 +62,9 @@ def stub_object_storage_multipart_init(endpoint, upload_id = "upload_id") EOS end + + def stub_object_storage_pseudonymizer + stub_object_storage(connection_params: Pseudonymizer::Uploader.object_store_credentials, + remote_directory: Pseudonymizer::Uploader.remote_directory) + end end