diff --git a/db/docs/batched_background_migrations/backfill_identifier_names_of_vulnerability_reads.yml b/db/docs/batched_background_migrations/backfill_identifier_names_of_vulnerability_reads.yml new file mode 100644 index 0000000000000000000000000000000000000000..cc99a01a68aebf473a2b926a38d19ab504ef9987 --- /dev/null +++ b/db/docs/batched_background_migrations/backfill_identifier_names_of_vulnerability_reads.yml @@ -0,0 +1,8 @@ +--- +migration_job_name: BackfillIdentifierNamesOfVulnerabilityReads +description: Backfills identifier_names column for vulnerability_reads table. +feature_category: vulnerability_management +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/163088 +milestone: '17.5' +queued_migration_version: 20241007115637 +finalize_after: '2024-10-20' diff --git a/db/post_migrate/20241007115637_queue_backfill_identifier_names_of_vulnerability_reads.rb b/db/post_migrate/20241007115637_queue_backfill_identifier_names_of_vulnerability_reads.rb new file mode 100644 index 0000000000000000000000000000000000000000..1d3a8f50cae8774c70390e42810261a7465159e9 --- /dev/null +++ b/db/post_migrate/20241007115637_queue_backfill_identifier_names_of_vulnerability_reads.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +class QueueBackfillIdentifierNamesOfVulnerabilityReads < Gitlab::Database::Migration[2.2] + milestone '17.5' + + restrict_gitlab_migration gitlab_schema: :gitlab_sec + + MIGRATION = "BackfillIdentifierNamesOfVulnerabilityReads" + DELAY_INTERVAL = 160.seconds + BATCH_SIZE = 12000 + SUB_BATCH_SIZE = 40 # Total number of sub-batches: 375 + + def up + queue_batched_background_migration( + MIGRATION, + :vulnerability_reads, + :id, + job_interval: DELAY_INTERVAL, + batch_size: BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE + ) + end + + def down + delete_batched_background_migration(MIGRATION, :vulnerability_reads, :id, []) + end +end diff --git a/db/schema_migrations/20241007115637 b/db/schema_migrations/20241007115637 new file mode 100644 index 0000000000000000000000000000000000000000..d33cc8f534241ea78c729d5bf0e6303bd0a9ce18 --- /dev/null +++ b/db/schema_migrations/20241007115637 @@ -0,0 +1 @@ +edcd4fe555d70c81956eabc52e6207e8a81f81620e64869cdfdd72e55113be4c \ No newline at end of file diff --git a/lib/gitlab/background_migration/backfill_identifier_names_of_vulnerability_reads.rb b/lib/gitlab/background_migration/backfill_identifier_names_of_vulnerability_reads.rb new file mode 100644 index 0000000000000000000000000000000000000000..bc97429b9eabab0e25c9fcd0f764327b59a352cc --- /dev/null +++ b/lib/gitlab/background_migration/backfill_identifier_names_of_vulnerability_reads.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + class BackfillIdentifierNamesOfVulnerabilityReads < BatchedMigrationJob + operation_name :backfill_identifier_names + feature_category :vulnerability_management + + UPDATE_SQL = <<~SQL + UPDATE vulnerability_reads AS vr + SET identifier_names = selected_ids.names + FROM (?) AS selected_ids + WHERE vr.id = selected_ids.id + SQL + + class VulnerabilitiesRead < ::Gitlab::Database::SecApplicationRecord + self.table_name = 'vulnerability_reads' + end + + def perform + each_sub_batch do |sub_batch| + cte = Gitlab::SQL::CTE.new(:batched_relation, sub_batch.limit(40)) + + filtered_results = cte + .apply_to(VulnerabilitiesRead.all) + .joins( + 'INNER JOIN vulnerability_occurrences vo ' \ + 'ON vulnerability_reads.vulnerability_id = vo.vulnerability_id' + ) + .joins('INNER JOIN vulnerability_occurrence_identifiers voi ON vo.id = voi.occurrence_id') + .joins('INNER JOIN vulnerability_identifiers vi ON voi.identifier_id = vi.id') + .group("vulnerability_reads.id") + .select( + 'vulnerability_reads.id AS id', + 'ARRAY_AGG(vi.name ORDER BY vi.name) AS names' + ) + + update_query = VulnerabilitiesRead.sanitize_sql([UPDATE_SQL, filtered_results]) + + connection.execute(update_query) + end + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/backfill_identifier_names_of_vulnerability_reads_spec.rb b/spec/lib/gitlab/background_migration/backfill_identifier_names_of_vulnerability_reads_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..4cdcc8688a45b708242707a80311060f51b989e8 --- /dev/null +++ b/spec/lib/gitlab/background_migration/backfill_identifier_names_of_vulnerability_reads_spec.rb @@ -0,0 +1,177 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BackgroundMigration::BackfillIdentifierNamesOfVulnerabilityReads, feature_category: :vulnerability_management do + let(:namespaces) { table(:namespaces) } + let(:projects) { table(:projects) } + let(:users) { table(:users) } + let(:scanners) { table(:vulnerability_scanners) } + let(:vulnerabilities) { table(:vulnerabilities) } + let(:vulnerability_reads) { table(:vulnerability_reads) } + let(:vulnerability_findings) { table(:vulnerability_occurrences) } + let(:vulnerability_occurrence_identifiers) { table(:vulnerability_occurrence_identifiers) } + let(:vulnerability_identifiers) { table(:vulnerability_identifiers) } + + let(:namespace) { namespaces.create!(name: 'user', path: 'user') } + let(:project) { projects.create!(namespace_id: namespace.id, project_namespace_id: namespace.id) } + let(:user) { users.create!(username: 'john_doe', email: 'johndoe@gitlab.com', projects_limit: 10) } + let(:scanner) { scanners.create!(project_id: project.id, external_id: 'external_id', name: 'Test Scanner') } + + shared_context 'with vulnerability data' do + let(:identifier_1) do + create_identifier(external_id: 'A03:2021', external_type: 'owasp', name: 'A03:2021 - Injection') + end + + let(:identifier_2) { create_identifier(external_id: 'CVE-2021-1234', external_type: 'cve', name: 'CVE-2021-1234') } + let(:identifier_3) { create_identifier(external_id: '79', external_type: 'cwe', name: 'CWE-79') } + + let(:finding_1) { create_finding(primary_identifier_id: identifier_1.id) } + let(:finding_2) { create_finding(primary_identifier_id: identifier_2.id) } + let(:finding_3) { create_finding(primary_identifier_id: identifier_3.id) } + + let(:vulnerability_1) { create_vulnerability(title: 'vulnerability 1', finding_id: finding_1.id) } + let(:vulnerability_2) { create_vulnerability(title: 'vulnerability 2', finding_id: finding_2.id) } + let(:vulnerability_3) { create_vulnerability(title: 'vulnerability 3', finding_id: finding_3.id) } + + let!(:vulnerability_read_1) { create_vulnerability_read(vulnerability_id: vulnerability_1.id) } + let!(:vulnerability_read_2) { create_vulnerability_read(vulnerability_id: vulnerability_2.id) } + let!(:vulnerability_read_3) { create_vulnerability_read(vulnerability_id: vulnerability_3.id) } + + before do + create_vulnerability_occurrence_identifier(occurrence_id: finding_1.id, identifier_id: identifier_1.id) + create_vulnerability_occurrence_identifier(occurrence_id: finding_2.id, identifier_id: identifier_2.id) + create_vulnerability_occurrence_identifier(occurrence_id: finding_3.id, identifier_id: identifier_3.id) + + finding_1.update!(vulnerability_id: vulnerability_1.id) + finding_2.update!(vulnerability_id: vulnerability_2.id) + finding_3.update!(vulnerability_id: vulnerability_3.id) + end + end + + describe '#perform' do + subject(:perform_migration) do + described_class.new( + start_id: vulnerability_reads.first.id, + end_id: vulnerability_reads.last.id, + batch_table: :vulnerability_reads, + batch_column: :id, + sub_batch_size: vulnerability_reads.count, + pause_ms: 0, + connection: ActiveRecord::Base.connection + ).perform + end + + context 'with vulnerability data' do + include_context 'with vulnerability data' + + it 'updates identifier_names for vulnerability_reads' do + expect { perform_migration } + .to change { vulnerability_read_1.reload.identifier_names } + .from([]).to(array_including(identifier_1.name)) + .and change { vulnerability_read_2.reload.identifier_names } + .from([]).to(array_including(identifier_2.name)) + .and change { vulnerability_read_3.reload.identifier_names } + .from([]).to(array_including(identifier_3.name)) + end + + it 'updates identifier_names with correct aggregation' do + create_vulnerability_occurrence_identifier(occurrence_id: finding_1.id, identifier_id: identifier_2.id) + create_vulnerability_occurrence_identifier(occurrence_id: finding_2.id, identifier_id: identifier_3.id) + + perform_migration + + expect(vulnerability_read_1.reload.identifier_names).to contain_exactly(identifier_1.name, + identifier_2.name) + expect(vulnerability_read_2.reload.identifier_names).to contain_exactly(identifier_2.name, + identifier_3.name) + expect(vulnerability_read_3.reload.identifier_names).to contain_exactly(identifier_3.name) + end + + it 'sorts identifier_names' do + create_vulnerability_occurrence_identifier(occurrence_id: finding_1.id, identifier_id: identifier_3.id) + create_vulnerability_occurrence_identifier(occurrence_id: finding_1.id, identifier_id: identifier_2.id) + + perform_migration + + expect(vulnerability_read_1.reload.identifier_names).to eq([identifier_1.name, + identifier_2.name, identifier_3.name]) + end + end + + context 'with no matching identifiers' do + include_context 'with vulnerability data' do + before do + vulnerability_occurrence_identifiers.delete_all + end + end + + it 'does not update identifier_names' do + perform_migration + + expect(vulnerability_reads.where.not(identifier_names: []).count).to eq(0) + end + end + end + + private + + def create_vulnerability(overrides = {}) + vulnerabilities.create!({ + project_id: project.id, + author_id: user.id, + title: 'test', + severity: 1, + confidence: 1, + report_type: 1 + }.merge(overrides)) + end + + def create_vulnerability_read(overrides = {}) + vulnerability_reads.create!({ + project_id: project.id, + vulnerability_id: 1, + scanner_id: scanner.id, + severity: 1, + report_type: 1, + state: 1, + uuid: SecureRandom.uuid + }.merge(overrides)) + end + + def create_finding(overrides = {}) + vulnerability_findings.create!({ + project_id: project.id, + scanner_id: scanner.id, + severity: 5, # medium + confidence: 2, # unknown, + report_type: 99, # generic + primary_identifier_id: create_identifier.id, + project_fingerprint: SecureRandom.hex(20), + location_fingerprint: SecureRandom.hex(20), + uuid: SecureRandom.uuid, + name: "CVE-2018-1234", + raw_metadata: "{}", + metadata_version: "test:1.0" + }.merge(overrides)) + end + + def create_identifier(overrides = {}) + vulnerability_identifiers.create!({ + project_id: project.id, + external_id: "CVE-2018-1234", + external_type: "CVE", + name: "CVE-2018-1234", + fingerprint: SecureRandom.hex(20) + }.merge(overrides)) + end + + def create_vulnerability_occurrence_identifier(overrides = {}) + vulnerability_occurrence_identifiers.create!({ + created_at: Time.now.utc, + updated_at: Time.now.utc, + occurrence_id: nil, + identifier_id: nil + }.merge(overrides)) + end +end diff --git a/spec/migrations/20241007115637_queue_backfill_identifier_names_of_vulnerability_reads_spec.rb b/spec/migrations/20241007115637_queue_backfill_identifier_names_of_vulnerability_reads_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..3a57400c104ccb577315ed614744cce42fe0a2fc --- /dev/null +++ b/spec/migrations/20241007115637_queue_backfill_identifier_names_of_vulnerability_reads_spec.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +RSpec.describe QueueBackfillIdentifierNamesOfVulnerabilityReads, + feature_category: :vulnerability_management do + let!(:batched_migration) { described_class::MIGRATION } + + it 'schedules a new batched migration' do + reversible_migration do |migration| + migration.before -> { + expect(batched_migration).not_to have_scheduled_batched_migration + } + + migration.after -> { + expect(batched_migration).to have_scheduled_batched_migration( + table_name: :vulnerability_reads, + column_name: :id, + interval: described_class::DELAY_INTERVAL, + batch_size: described_class::BATCH_SIZE, + sub_batch_size: described_class::SUB_BATCH_SIZE + ) + } + end + end +end