Skip to content
Snippets Groups Projects
Verified Commit 92fbabce authored by Bala Kumar's avatar Bala Kumar :two: Committed by GitLab
Browse files

Backfill migration for vulnerability_reads identifier_names

Changelog: other
parent 027417a6
No related branches found
No related tags found
2 merge requests!170053Security patch upgrade alert: Only expose to admins 17-4,!163088Backfill identifier_names to vulnerability_reads
---
migration_job_name: BackfillIdentifierNamesOfVulnerabilityReads
description: Backfills identifier_names column for vulnerability_reads table.
feature_category: vulnerability_management
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/163088
milestone: '17.5'
queued_migration_version: 20241007115637
finalize_after: '2024-10-20'
# frozen_string_literal: true
class QueueBackfillIdentifierNamesOfVulnerabilityReads < Gitlab::Database::Migration[2.2]
milestone '17.5'
restrict_gitlab_migration gitlab_schema: :gitlab_sec
MIGRATION = "BackfillIdentifierNamesOfVulnerabilityReads"
DELAY_INTERVAL = 160.seconds
BATCH_SIZE = 12000
SUB_BATCH_SIZE = 40 # Total number of sub-batches: 375
def up
queue_batched_background_migration(
MIGRATION,
:vulnerability_reads,
:id,
job_interval: DELAY_INTERVAL,
batch_size: BATCH_SIZE,
sub_batch_size: SUB_BATCH_SIZE
)
end
def down
delete_batched_background_migration(MIGRATION, :vulnerability_reads, :id, [])
end
end
edcd4fe555d70c81956eabc52e6207e8a81f81620e64869cdfdd72e55113be4c
\ No newline at end of file
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
class BackfillIdentifierNamesOfVulnerabilityReads < BatchedMigrationJob
operation_name :backfill_identifier_names
feature_category :vulnerability_management
UPDATE_SQL = <<~SQL
UPDATE vulnerability_reads AS vr
SET identifier_names = selected_ids.names
FROM (?) AS selected_ids
WHERE vr.id = selected_ids.id
SQL
class VulnerabilitiesRead < ::Gitlab::Database::SecApplicationRecord
self.table_name = 'vulnerability_reads'
end
def perform
each_sub_batch do |sub_batch|
cte = Gitlab::SQL::CTE.new(:batched_relation, sub_batch.limit(40))
filtered_results = cte
.apply_to(VulnerabilitiesRead.all)
.joins(
'INNER JOIN vulnerability_occurrences vo ' \
'ON vulnerability_reads.vulnerability_id = vo.vulnerability_id'
)
.joins('INNER JOIN vulnerability_occurrence_identifiers voi ON vo.id = voi.occurrence_id')
.joins('INNER JOIN vulnerability_identifiers vi ON voi.identifier_id = vi.id')
.group("vulnerability_reads.id")
.select(
'vulnerability_reads.id AS id',
'ARRAY_AGG(vi.name ORDER BY vi.name) AS names'
)
update_query = VulnerabilitiesRead.sanitize_sql([UPDATE_SQL, filtered_results])
connection.execute(update_query)
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::BackfillIdentifierNamesOfVulnerabilityReads, feature_category: :vulnerability_management do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:users) { table(:users) }
let(:scanners) { table(:vulnerability_scanners) }
let(:vulnerabilities) { table(:vulnerabilities) }
let(:vulnerability_reads) { table(:vulnerability_reads) }
let(:vulnerability_findings) { table(:vulnerability_occurrences) }
let(:vulnerability_occurrence_identifiers) { table(:vulnerability_occurrence_identifiers) }
let(:vulnerability_identifiers) { table(:vulnerability_identifiers) }
let(:namespace) { namespaces.create!(name: 'user', path: 'user') }
let(:project) { projects.create!(namespace_id: namespace.id, project_namespace_id: namespace.id) }
let(:user) { users.create!(username: 'john_doe', email: 'johndoe@gitlab.com', projects_limit: 10) }
let(:scanner) { scanners.create!(project_id: project.id, external_id: 'external_id', name: 'Test Scanner') }
shared_context 'with vulnerability data' do
let(:identifier_1) do
create_identifier(external_id: 'A03:2021', external_type: 'owasp', name: 'A03:2021 - Injection')
end
let(:identifier_2) { create_identifier(external_id: 'CVE-2021-1234', external_type: 'cve', name: 'CVE-2021-1234') }
let(:identifier_3) { create_identifier(external_id: '79', external_type: 'cwe', name: 'CWE-79') }
let(:finding_1) { create_finding(primary_identifier_id: identifier_1.id) }
let(:finding_2) { create_finding(primary_identifier_id: identifier_2.id) }
let(:finding_3) { create_finding(primary_identifier_id: identifier_3.id) }
let(:vulnerability_1) { create_vulnerability(title: 'vulnerability 1', finding_id: finding_1.id) }
let(:vulnerability_2) { create_vulnerability(title: 'vulnerability 2', finding_id: finding_2.id) }
let(:vulnerability_3) { create_vulnerability(title: 'vulnerability 3', finding_id: finding_3.id) }
let!(:vulnerability_read_1) { create_vulnerability_read(vulnerability_id: vulnerability_1.id) }
let!(:vulnerability_read_2) { create_vulnerability_read(vulnerability_id: vulnerability_2.id) }
let!(:vulnerability_read_3) { create_vulnerability_read(vulnerability_id: vulnerability_3.id) }
before do
create_vulnerability_occurrence_identifier(occurrence_id: finding_1.id, identifier_id: identifier_1.id)
create_vulnerability_occurrence_identifier(occurrence_id: finding_2.id, identifier_id: identifier_2.id)
create_vulnerability_occurrence_identifier(occurrence_id: finding_3.id, identifier_id: identifier_3.id)
finding_1.update!(vulnerability_id: vulnerability_1.id)
finding_2.update!(vulnerability_id: vulnerability_2.id)
finding_3.update!(vulnerability_id: vulnerability_3.id)
end
end
describe '#perform' do
subject(:perform_migration) do
described_class.new(
start_id: vulnerability_reads.first.id,
end_id: vulnerability_reads.last.id,
batch_table: :vulnerability_reads,
batch_column: :id,
sub_batch_size: vulnerability_reads.count,
pause_ms: 0,
connection: ActiveRecord::Base.connection
).perform
end
context 'with vulnerability data' do
include_context 'with vulnerability data'
it 'updates identifier_names for vulnerability_reads' do
expect { perform_migration }
.to change { vulnerability_read_1.reload.identifier_names }
.from([]).to(array_including(identifier_1.name))
.and change { vulnerability_read_2.reload.identifier_names }
.from([]).to(array_including(identifier_2.name))
.and change { vulnerability_read_3.reload.identifier_names }
.from([]).to(array_including(identifier_3.name))
end
it 'updates identifier_names with correct aggregation' do
create_vulnerability_occurrence_identifier(occurrence_id: finding_1.id, identifier_id: identifier_2.id)
create_vulnerability_occurrence_identifier(occurrence_id: finding_2.id, identifier_id: identifier_3.id)
perform_migration
expect(vulnerability_read_1.reload.identifier_names).to contain_exactly(identifier_1.name,
identifier_2.name)
expect(vulnerability_read_2.reload.identifier_names).to contain_exactly(identifier_2.name,
identifier_3.name)
expect(vulnerability_read_3.reload.identifier_names).to contain_exactly(identifier_3.name)
end
it 'sorts identifier_names' do
create_vulnerability_occurrence_identifier(occurrence_id: finding_1.id, identifier_id: identifier_3.id)
create_vulnerability_occurrence_identifier(occurrence_id: finding_1.id, identifier_id: identifier_2.id)
perform_migration
expect(vulnerability_read_1.reload.identifier_names).to eq([identifier_1.name,
identifier_2.name, identifier_3.name])
end
end
context 'with no matching identifiers' do
include_context 'with vulnerability data' do
before do
vulnerability_occurrence_identifiers.delete_all
end
end
it 'does not update identifier_names' do
perform_migration
expect(vulnerability_reads.where.not(identifier_names: []).count).to eq(0)
end
end
end
private
def create_vulnerability(overrides = {})
vulnerabilities.create!({
project_id: project.id,
author_id: user.id,
title: 'test',
severity: 1,
confidence: 1,
report_type: 1
}.merge(overrides))
end
def create_vulnerability_read(overrides = {})
vulnerability_reads.create!({
project_id: project.id,
vulnerability_id: 1,
scanner_id: scanner.id,
severity: 1,
report_type: 1,
state: 1,
uuid: SecureRandom.uuid
}.merge(overrides))
end
def create_finding(overrides = {})
vulnerability_findings.create!({
project_id: project.id,
scanner_id: scanner.id,
severity: 5, # medium
confidence: 2, # unknown,
report_type: 99, # generic
primary_identifier_id: create_identifier.id,
project_fingerprint: SecureRandom.hex(20),
location_fingerprint: SecureRandom.hex(20),
uuid: SecureRandom.uuid,
name: "CVE-2018-1234",
raw_metadata: "{}",
metadata_version: "test:1.0"
}.merge(overrides))
end
def create_identifier(overrides = {})
vulnerability_identifiers.create!({
project_id: project.id,
external_id: "CVE-2018-1234",
external_type: "CVE",
name: "CVE-2018-1234",
fingerprint: SecureRandom.hex(20)
}.merge(overrides))
end
def create_vulnerability_occurrence_identifier(overrides = {})
vulnerability_occurrence_identifiers.create!({
created_at: Time.now.utc,
updated_at: Time.now.utc,
occurrence_id: nil,
identifier_id: nil
}.merge(overrides))
end
end
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe QueueBackfillIdentifierNamesOfVulnerabilityReads,
feature_category: :vulnerability_management do
let!(:batched_migration) { described_class::MIGRATION }
it 'schedules a new batched migration' do
reversible_migration do |migration|
migration.before -> {
expect(batched_migration).not_to have_scheduled_batched_migration
}
migration.after -> {
expect(batched_migration).to have_scheduled_batched_migration(
table_name: :vulnerability_reads,
column_name: :id,
interval: described_class::DELAY_INTERVAL,
batch_size: described_class::BATCH_SIZE,
sub_batch_size: described_class::SUB_BATCH_SIZE
)
}
end
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment