Skip to content

Migrate OS SBOM occurrences to fixed components

What does this MR do and why?

This migration corrects the components, and their occurrences that were left over from the '20240425205205' migration. It does two things:

  1. It updates the "sbom_occurrences".component_name so that it no longer uses the OS prefix e.g. 'alpine/curl' becomes 'curl'.
  2. It updates the "sbom_occurrences".component_id so that it points to the correct parent component.

This ensures that we can navigate to the correct parent component using active record calls in the future, and have the correct name in the denormalized column. This intentionally does not migrate any vulnerability data! The reason for this is three-fold:

  1. It reduces the complexity of the migration which means the migration is less of a risk.
  2. We put less burden on the database by writing to fewer tables.
  3. The findings will be corrected in future scans, and the previous findings that don't match up will be marked as no longer found. These then can be remediated in mass, and is much simpler to do versus writing a batched background migration.

Approximated rows touched:

-- sbom_components that have occurrences to migrate
gitlabhq_dblab=# select count(*) from sbom_components where purl_type between 8 and 13 and name like '%/%';
 count
-------
 19203
(1 row)
-- occurrences to update
SELECT
    COUNT(*)
FROM
    sbom_occurrences AS a
    INNER JOIN sbom_components AS b ON a.component_id = b.id
WHERE
    b.purl_type IN (9, 10, 11, 12, 13)
    AND b.name LIKE '%/%';

 count
--------
 657161
(1 row)
-- max component versions expected to iterate
gitlabhq_dblab=# select count(component_id), component_id FROM sbom_component_versions where component_id IN (select id from sbom_components where purl_type between 8 and 13 and name like '%/%') group by 2 order by 1 desc limit 3;
 count | component_id
-------+--------------
   129 |   2153901748
    82 |   2153906853
    78 |   2160866920
(3 rows)
-- mean component versions expected to iterate
gitlabhq_dblab=# with cnts as (select count(component_id) as cnt, component_id FROM sbom_component_versions where component_id IN (select id from sbom_components where purl_type between 8 and 13 and name like '%/%') group by 2 order by 1 desc) select floor(avg(cnt)) from cnts;
 floor
-------
     3
(1 row)
-- median component versions expected to iterate
gitlabhq_dblab=# with cnts as (select count(component_id) as cnt, component_id FROM sbom_component_versions where component_id IN (select id from sbom_components where purl_type between 8 and 13 and name like '%/%') group by 2 order by 1 desc) select percentile_cont(0.5) within group (order by cnt) from cnts;
 percentile_cont
-----------------
               2
(1 row)

db:migrate

❯ bin/rails db:migrate
main: == [advisory_lock_connection] object_id: 127800, pg_backend_pid: 29253
main: == 20240507152320 QueueMigrateOsSbomOccurrencesToComponentsWithoutPrefix: migrating
main: == 20240507152320 QueueMigrateOsSbomOccurrencesToComponentsWithoutPrefix: migrated (0.0650s)

main: == [advisory_lock_connection] object_id: 127800, pg_backend_pid: 29253
ci: == [advisory_lock_connection] object_id: 128240, pg_backend_pid: 29255
ci: == 20240507152320 QueueMigrateOsSbomOccurrencesToComponentsWithoutPrefix: migrating
ci: -- The migration is skipped since it modifies the schemas: [:gitlab_main].
ci: -- This database can only apply migrations in one of the following schemas: [:gitlab_ci, :gitlab_internal, :gitlab_shared].
ci: == 20240507152320 QueueMigrateOsSbomOccurrencesToComponentsWithoutPrefix: migrated (0.0096s)

ci: == [advisory_lock_connection] object_id: 128240, pg_backend_pid: 29255

❯ bin/rails db:migrate:down:main VERSION=20240507152320
main: == [advisory_lock_connection] object_id: 127340, pg_backend_pid: 8614
main: == 20240507152320 QueueMigrateOsSbomOccurrencesToComponentsWithoutPrefix: reverting
main: == 20240507152320 QueueMigrateOsSbomOccurrencesToComponentsWithoutPrefix: reverted (0.0743s)

main: == [advisory_lock_connection] object_id: 127340, pg_backend_pid: 8614

Query plans

SELECT "sbom_components"."id" FROM "sbom_components" WHERE "sbom_components"."id" BETWEEN 1 AND 6 AND "sbom_components"."component_type" = 0 AND "sbom_components"."purl_type" IN (9, 10, 11, 12, 13) AND "sbom_components"."id" >= 1 ORDER BY "sbom_components"."id" ASC LIMIT 1

query plan

SELECT "sbom_components"."id" FROM "sbom_components" WHERE "sbom_components"."id" BETWEEN 1 AND 6 AND "sbom_components"."component_type" = 0 AND "sbom_components"."purl_type" IN (9, 10, 11, 12, 13) AND "sbom_components"."id" >= 1 ORDER BY "sbom_components"."id" ASC LIMIT 1 OFFSET 200

query plan

INSERT INTO "sbom_component_versions" ("component_id", "version", "source_package_name", "created_at", "updated_at")
    VALUES
    (1993018515, '3.0.3-r0', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.0.4-r0', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.0.5-r2', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.1.0-r0', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.1.0-r3', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.1.2-r0', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.2.0-r15', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.2.0-r16', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.2.0-r18', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.2.0-r20', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.2.0-r22', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.2.0-r23', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.2.0-r3', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.2.0-r6', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.2.0-r7', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.2.0-r8', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.4.0-r0', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.4.3-r1', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.4.3-r2', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.4.6-r0', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.5.3-r1', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP),
    (1993018515, '3.6.0-r0', NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
ON CONFLICT ("component_id", "version")
    DO UPDATE SET
        updated_at = (
            CASE WHEN ("sbom_component_versions"."source_package_name" IS NOT DISTINCT FROM excluded."source_package_name") THEN
                "sbom_component_versions".updated_at
            ELSE
                CURRENT_TIMESTAMP
            END), "source_package_name" = excluded."source_package_name"
    RETURNING
        "id", "version"

query plan

Click to expand UPDATE query
WITH updated_values (
    id,
    component_version_id,
    component_id,
    component_name
) AS (
VALUES (2439081552, 2293368233, 1993018517, 'alpine-keys'),
    (2481586696, 2293368233, 1993018517, 'alpine-keys'),
    (2482187974, 2293368233, 1993018517, 'alpine-keys'),
    (2482488198, 2293368233, 1993018517, 'alpine-keys'),
    (2482645002, 2293368233, 1993018517, 'alpine-keys'),
    (2483493814, 2293368233, 1993018517, 'alpine-keys'),
    (2484281277, 2293368233, 1993018517, 'alpine-keys'),
    (2484416636, 2293368233, 1993018517, 'alpine-keys'),
    (2484671504, 2293368233, 1993018517, 'alpine-keys'),
    (2484799930, 2293368233, 1993018517, 'alpine-keys'),
    (2484909192, 2293368233, 1993018517, 'alpine-keys'),
    (2485029500, 2293368233, 1993018517, 'alpine-keys'),
    (2485029744, 2293368233, 1993018517, 'alpine-keys'),
    (2485031927, 2293368233, 1993018517, 'alpine-keys'),
    (2485040072, 2293368233, 1993018517, 'alpine-keys'),
    (2485399559, 2293368233, 1993018517, 'alpine-keys'),
    (2485620617, 2293368233, 1993018517, 'alpine-keys'),
    (2485784384, 2293368233, 1993018517, 'alpine-keys'),
    (2485868868, 2293368233, 1993018517, 'alpine-keys'),
    (2485913099, 2293368233, 1993018517, 'alpine-keys'),
    (2485920538, 2293368233, 1993018517, 'alpine-keys'),
    (2486003226, 2293368233, 1993018517, 'alpine-keys'),
    (2486005277, 2293368233, 1993018517, 'alpine-keys'),
    (2486012523, 2293368233, 1993018517, 'alpine-keys'),
    (2486018201, 2293368233, 1993018517, 'alpine-keys'),
    (2486021006, 2293368233, 1993018517, 'alpine-keys'),
    (2486024432, 2293368233, 1993018517, 'alpine-keys'),
    (2486029513, 2293368233, 1993018517, 'alpine-keys'),
    (2486044270, 2293368233, 1993018517, 'alpine-keys'),
    (2486103797, 2293368233, 1993018517, 'alpine-keys'),
    (2486103837, 2293368233, 1993018517, 'alpine-keys'),
    (2486140073, 2293368233, 1993018517, 'alpine-keys'),
    (2486590719, 2293368233, 1993018517, 'alpine-keys'),
    (2486630334, 2293368233, 1993018517, 'alpine-keys'),
    (2486652441, 2293368233, 1993018517, 'alpine-keys'),
    (2486776649, 2293368233, 1993018517, 'alpine-keys'),
    (2486872547, 2293368233, 1993018517, 'alpine-keys'),
    (2487192517, 2293368233, 1993018517, 'alpine-keys'),
    (2490142630, 2293368233, 1993018517, 'alpine-keys'),
    (2490155964, 2293368233, 1993018517, 'alpine-keys'),
    (2490280837, 2293368233, 1993018517, 'alpine-keys'),
    (2490295798, 2293368233, 1993018517, 'alpine-keys'),
    (2490916494, 2293368233, 1993018517, 'alpine-keys'),
    (2491026259, 2293368233, 1993018517, 'alpine-keys'),
    (2494078067, 2293368233, 1993018517, 'alpine-keys'),
    (2494441901, 2293368233, 1993018517, 'alpine-keys'),
    (2497684410, 2293368233, 1993018517, 'alpine-keys'),
    (2504678466, 2293368233, 1993018517, 'alpine-keys'),
    (2505399164, 2293368233, 1993018517, 'alpine-keys'),
    (2505911616, 2293368233, 1993018517, 'alpine-keys'),
    (2508051899, 2293368233, 1993018517, 'alpine-keys'),
    (2515893346, 2293368233, 1993018517, 'alpine-keys'),
    (2516164283, 2293368233, 1993018517, 'alpine-keys'),
    (2516350864, 2293368233, 1993018517, 'alpine-keys'),
    (2516351144, 2293368233, 1993018517, 'alpine-keys'),
    (2517127381, 2293368233, 1993018517, 'alpine-keys'),
    (2517434141, 2293368233, 1993018517, 'alpine-keys'),
    (2517434156, 2293368233, 1993018517, 'alpine-keys'),
    (2517437835, 2293368233, 1993018517, 'alpine-keys'),
    (2517447079, 2293368233, 1993018517, 'alpine-keys'),
    (2517454890, 2293368233, 1993018517, 'alpine-keys'),
    (2517456302, 2293368233, 1993018517, 'alpine-keys'),
    (2517460380, 2293368233, 1993018517, 'alpine-keys'),
    (2517466687, 2293368233, 1993018517, 'alpine-keys'),
    (2517467070, 2293368233, 1993018517, 'alpine-keys'),
    (2517477061, 2293368233, 1993018517, 'alpine-keys'),
    (2517909485, 2293368233, 1993018517, 'alpine-keys'),
    (2517913863, 2293368233, 1993018517, 'alpine-keys'),
    (2518358000, 2293368233, 1993018517, 'alpine-keys'),
    (2518558722, 2293368233, 1993018517, 'alpine-keys'),
    (2518560441, 2293368233, 1993018517, 'alpine-keys'),
    (2518727960, 2293368233, 1993018517, 'alpine-keys'),
    (2519340437, 2293368233, 1993018517, 'alpine-keys'),
    (2519851984, 2293368233, 1993018517, 'alpine-keys'),
    (2520175969, 2293368233, 1993018517, 'alpine-keys'),
    (2521050829, 2293368233, 1993018517, 'alpine-keys'),
    (2521146765, 2293368233, 1993018517, 'alpine-keys'),
    (2522290506, 2293368233, 1993018517, 'alpine-keys'),
    (2522401075, 2293368233, 1993018517, 'alpine-keys'),
    (2523617167, 2293368233, 1993018517, 'alpine-keys'),
    (2524437222, 2293368233, 1993018517, 'alpine-keys'),
    (2525281568, 2293368233, 1993018517, 'alpine-keys'),
    (2525859578, 2293368233, 1993018517, 'alpine-keys'),
    (2527483832, 2293368233, 1993018517, 'alpine-keys'),
    (2528158042, 2293368233, 1993018517, 'alpine-keys'),
    (2529143237, 2293368233, 1993018517, 'alpine-keys'),
    (2532339090, 2293368233, 1993018517, 'alpine-keys'),
    (2534500990, 2293368233, 1993018517, 'alpine-keys'),
    (2535264709, 2293368233, 1993018517, 'alpine-keys'),
    (2536181669, 2293368233, 1993018517, 'alpine-keys'),
    (2540438695, 2293368233, 1993018517, 'alpine-keys'),
    (2541855865, 2293368233, 1993018517, 'alpine-keys'),
    (2542819974, 2293368233, 1993018517, 'alpine-keys'),
    (2542822364, 2293368233, 1993018517, 'alpine-keys'),
    (2543599172, 2293368233, 1993018517, 'alpine-keys'),
    (2543968737, 2293368233, 1993018517, 'alpine-keys'),
    (2544541146, 2293368233, 1993018517, 'alpine-keys'),
    (2545663497, 2293368233, 1993018517, 'alpine-keys'),
    (2551475653, 2293368233, 1993018517, 'alpine-keys'),
    (2551475934, 2293368233, 1993018517, 'alpine-keys')
)
UPDATE
    sbom_occurrences
SET
    updated_at = CURRENT_TIMESTAMP,
    component_version_id = updated_values.component_version_id,
    component_id = updated_values.component_id,
    component_name = updated_values.component_name
FROM
    updated_values
WHERE
    sbom_occurrences.id = updated_values.id;

query plan

Edited by Oscar Tovar

Merge request reports