Add UpdateTokenStatusWorker to add detected token statuses to secret detection findings
What does this MR do and why?
This MR implements token status tracking for GitLab Personal Access Tokens (PATs) discovered during Secret Detection scans.
It adds a new worker, Security::SecretDetection::UpdateTokenStatusWorker, which:
- Is triggered after
::Security::Ingestion::IngestReportsServicehas completed - Processes Secret Detection findings in batches of 100
- Checks if discovered tokens match existing GitLab PATs
- Assigns each finding a token status (active, inactive, or unknown)
Database Queries
Query list when Secret Detection finds more than 200 secrets (note: the shas included in this MR are from tokens on my GDK)
-- Query 1: Initial Finding Query
SELECT
"vulnerability_occurrences"."id"
FROM
"vulnerability_occurrences"
WHERE
"vulnerability_occurrences"."report_type" = 4
AND "vulnerability_occurrences"."latest_pipeline_id" = 604
ORDER BY
"vulnerability_occurrences"."id" ASC
LIMIT 1
-- Query 2: Batch Boundary Query
SELECT
"vulnerability_occurrences"."id"
FROM
"vulnerability_occurrences"
WHERE
"vulnerability_occurrences"."report_type" = 4
AND "vulnerability_occurrences"."latest_pipeline_id" = 604
AND "vulnerability_occurrences"."id" >= 541
ORDER BY
"vulnerability_occurrences"."id" ASC
LIMIT 1 OFFSET 100
-- Query 3: Batch Existence Check
SELECT 1 AS one
FROM
"vulnerability_occurrences"
WHERE
"vulnerability_occurrences"."report_type" = 4
AND "vulnerability_occurrences"."latest_pipeline_id" = 604
AND "vulnerability_occurrences"."id" >= 541
AND "vulnerability_occurrences"."id" < 797
LIMIT 1
-- Query 4: Fetch Findings
SELECT
"vulnerability_occurrences".*
FROM
"vulnerability_occurrences"
WHERE
"vulnerability_occurrences"."report_type" = 4
AND "vulnerability_occurrences"."latest_pipeline_id" = 604
AND "vulnerability_occurrences"."id" >= 541
AND "vulnerability_occurrences"."id" < 797
-- Query 5: Personal Access Token Lookup (First Batch)
SELECT
"personal_access_tokens".*
FROM
"personal_access_tokens"
WHERE
"personal_access_tokens"."token_digest" IN (
'cAoh9G5i5NxQ3fRDMXR4L9A1ZRkPZJRB9fBjwARD6NA=',
'q+dnwB3deeOAZ5GzAVUQmeLCkevM9U4VhvgzIiFy1KU=',
'MWxlTYKX4WBUFLPrUq4GPS4Q+ECdxoVFpeBzRWibPv8=',
'j90woIP/bLhumbvOG6oFou6thyt4WuLlF8fhPw2HA7A=',
-- Many more token digests (truncated for readability)
'fI9AvUch3NRXA9Ahyis9DurHVQZPvJVgk6909l4AKPE=',
'enX9ZJp+XM1knFcY/V9QPna/27icoAeyiFQLWu3mj/w='
-- More token digests omitted
)
-- Query 6: Finding Token Status Bulk Upsert (First Batch)
INSERT INTO
"secret_detection_token_statuses" (
"vulnerability_occurrence_id",
"project_id",
"status",
"created_at",
"updated_at"
)
VALUES
(704, 20, 0, '2025-03-25 06:56:36.410917', '2025-03-25 06:56:36.410917'),
(707, 20, 0, '2025-03-25 06:56:36.410917', '2025-03-25 06:56:36.410917'),
(708, 20, 0, '2025-03-25 06:56:36.410917', '2025-03-25 06:56:36.410917'),
(709, 20, 0, '2025-03-25 06:56:36.410917', '2025-03-25 06:56:36.410917'),
-- Many more values (truncated for readability)
(543, 20, 0, '2025-03-25 06:56:36.410917', '2025-03-25 06:56:36.410917')
ON CONFLICT ("vulnerability_occurrence_id") DO UPDATE
SET
updated_at = (
CASE
WHEN ("secret_detection_token_statuses"."status" IS NOT DISTINCT FROM excluded."status")
THEN "secret_detection_token_statuses".updated_at
ELSE CURRENT_TIMESTAMP
END
),
"status" = excluded."status"
RETURNING "vulnerability_occurrence_id";
-- Query 7: Next Batch Check
SELECT
"vulnerability_occurrences"."id"
FROM
"vulnerability_occurrences"
WHERE
"vulnerability_occurrences"."report_type" = 4
AND "vulnerability_occurrences"."latest_pipeline_id" = 604
AND "vulnerability_occurrences"."id" >= 797
ORDER BY
"vulnerability_occurrences"."id" ASC
LIMIT 1 OFFSET 100
-- Query 8: Next Batch Existence Check
SELECT 1 AS one
FROM
"vulnerability_occurrences"
WHERE
"vulnerability_occurrences"."report_type" = 4
AND "vulnerability_occurrences"."latest_pipeline_id" = 604
AND "vulnerability_occurrences"."id" >= 797
AND "vulnerability_occurrences"."id" < 899
LIMIT 1
-- Query 9: Fetch Next Batch Findings
SELECT
"vulnerability_occurrences".*
FROM
"vulnerability_occurrences"
WHERE
"vulnerability_occurrences"."report_type" = 4
AND "vulnerability_occurrences"."latest_pipeline_id" = 604
AND "vulnerability_occurrences"."id" >= 797
AND "vulnerability_occurrences"."id" < 899
-- Query 10: Personal Access Token Lookup (Second Batch)
SELECT
"personal_access_tokens".*
FROM
"personal_access_tokens"
WHERE
"personal_access_tokens"."token_digest" IN (
'pu9LmAATUepMJ63COVyWq+TMW0KU8nwBpWJKYaxejIw=',
'3atqgL3HuIiXFCPhJkrmxqhmenxNnUpqldp+2Al4Y2M=',
'mb20LfoYmKcTPGuXz4sntHTtlYn0mARquMXptVF9eyA=',
-- More token digests (truncated for readability)
'GqJBPnEjAFB79/hvazLDDcXrz/LqrAFOJ0fuHTdxpyk='
)
-- Query 11: Finding Token Status Bulk Upsert (Second Batch)
INSERT INTO
"secret_detection_token_statuses" (
"vulnerability_occurrence_id",
"project_id",
"status",
"created_at",
"updated_at"
)
VALUES
(797, 20, 0, '2025-03-25 06:56:36.463501', '2025-03-25 06:56:36.463501'),
(798, 20, 0, '2025-03-25 06:56:36.463501', '2025-03-25 06:56:36.463501'),
(800, 20, 0, '2025-03-25 06:56:36.463501', '2025-03-25 06:56:36.463501'),
-- More values (truncated for readability)
(898, 20, 0, '2025-03-25 06:56:36.463501', '2025-03-25 06:56:36.463501')
ON CONFLICT ("vulnerability_occurrence_id") DO UPDATE
SET
updated_at = (
CASE
WHEN ("secret_detection_token_statuses"."status" IS NOT DISTINCT FROM excluded."status")
THEN "secret_detection_token_statuses".updated_at
ELSE CURRENT_TIMESTAMP
END
),
"status" = excluded."status"
RETURNING "vulnerability_occurrence_id";
-- Query 12: Check for More Batches
SELECT
"vulnerability_occurrences"."id"
FROM
"vulnerability_occurrences"
WHERE
"vulnerability_occurrences"."report_type" = 4
AND "vulnerability_occurrences"."latest_pipeline_id" = 604
AND "vulnerability_occurrences"."id" >= 899
ORDER BY
"vulnerability_occurrences"."id" ASC
LIMIT 1 OFFSET 100
-- Query 13: Final Batch Existence Check
SELECT 1 AS one
FROM
"vulnerability_occurrences"
WHERE
"vulnerability_occurrences"."report_type" = 4
AND "vulnerability_occurrences"."latest_pipeline_id" = 604
AND "vulnerability_occurrences"."id" >= 899
LIMIT 1
-- Query 14: Fetch Final Batch Findings
SELECT
"vulnerability_occurrences".*
FROM
"vulnerability_occurrences"
WHERE
"vulnerability_occurrences"."report_type" = 4
AND "vulnerability_occurrences"."latest_pipeline_id" = 604
AND "vulnerability_occurrences"."id" >= 899
Query Plans
Findings lookup Queries
Initial batch setup
SELECT "vulnerability_occurrences"."id"
FROM "vulnerability_occurrences"
WHERE "vulnerability_occurrences"."report_type" = 4
AND "vulnerability_occurrences"."latest_pipeline_id" = 1751759489
ORDER BY "vulnerability_occurrences"."id" ASC
LIMIT 1
https://console.postgres.ai/gitlab/gitlab-production-main/sessions/38137/commands/116804
Batch boundary check
SELECT "vulnerability_occurrences"."id"
FROM "vulnerability_occurrences"
WHERE "vulnerability_occurrences"."report_type" = 4
AND "vulnerability_occurrences"."latest_pipeline_id" = 1751759489
AND "vulnerability_occurrences"."id" >= 156622814
ORDER BY "vulnerability_occurrences"."id" ASC
LIMIT 1 OFFSET 100
- 0 rows found, this is expected as there are less than 100 vulnerabilities
https://console.postgres.ai/gitlab/gitlab-production-main/sessions/38137/commands/116806
Empty check
SELECT 1 AS one
FROM "vulnerability_occurrences"
WHERE "vulnerability_occurrences"."report_type" = 4
AND "vulnerability_occurrences"."latest_pipeline_id" = 1751759489
AND "vulnerability_occurrences"."id" >= 156622814
LIMIT 1
https://console.postgres.ai/gitlab/gitlab-production-main/sessions/38137/commands/116827
Data retrieval
SELECT "vulnerability_occurrences".*
FROM "vulnerability_occurrences"
WHERE "vulnerability_occurrences"."report_type" = 4
AND "vulnerability_occurrences"."latest_pipeline_id" = 1751759489
AND "vulnerability_occurrences"."id" >= 156622814
https://console.postgres.ai/gitlab/gitlab-production-main/sessions/38137/commands/116828
Note to reviewer: These query filters on both report_type and latest_pipeline_id. Do you think we need a compound index on these columns, and if so, would you be happy for it to be added in a follow-up since this is behind a feature flag?
Personal Access Token Lookup
SELECT "personal_access_tokens".*
FROM "personal_access_tokens"
WHERE "personal_access_tokens"."token_digest" IN (...)
https://explain.depesz.com/s/sn5hG
Upsert Finding Token Statuses
INSERT INTO "secret_detection_token_statuses" (
"vulnerability_occurrence_id",
"project_id",
"status",
"created_at",
"updated_at"
)
VALUES
(704, 278964, 0, '2025-03-25 06:56:36.410917', '2025-03-25 06:56:36.410917'),
(707, 278964, 0, '2025-03-25 06:56:36.410917', '2025-03-25 06:56:36.410917'),
(708, 278964, 0, '2025-03-25 06:56:36.410917', '2025-03-25 06:56:36.410917')
-- records truncated for brevity
ON CONFLICT ("vulnerability_occurrence_id") DO UPDATE
SET
updated_at = (
CASE
WHEN ("secret_detection_token_statuses"."status" IS NOT DISTINCT FROM excluded."status")
THEN "secret_detection_token_statuses".updated_at
ELSE CURRENT_TIMESTAMP
END
),
"status" = excluded."status"
RETURNING "vulnerability_occurrence_id";
https://console.postgres.ai/gitlab/gitlab-production-main/sessions/38016/commands/116348
References
Screenshots or screen recordings
| Before | After |
|---|---|
How to set up and validate locally
-
In rails console enable the feature flag
Feature.enable(: validity_checks) -
Create a project
-
Create a personal access token and make a note of the token
-
Create a second personal access token, make a note of the token and disable the token
-
Enable secret detection in the
.gitlab-ci.ymlusing
include:
- template: Jobs/Secret-Detection.gitlab-ci.yml
- Add the tokens created in 3 and 4 to the README
- Add the token
glpat-00000000000000000000to the README - Wait for the Secret Detection pipeline to complete
- In the Rails Console run
Vulnerabilities::FindingTokenStatus.all - You should see three records.
MR acceptance checklist
Evaluate this MR against the MR acceptance checklist. It helps you analyze changes to reduce risks in quality, performance, reliability, security, and maintainability.