Add VulnerabilityReadsFinder to speed up API responses
What does this MR do and why?
Addresses #335503 (closed)
We have been getting time-out errors while loading the project and group vulnerability report page which have a large number of vulnerabilities. This is because we store a lot of information(eg: title, description html) in the vulnerabilities table which makes the filtering time consuming. As a part of &6903 (closed), we created a vulnerability_reads table with denormalized data to improve the performance of filtering vulnerabilities.
This MR introduces a new finder that uses vulnerability_reads table to query vulnerability data. vulnerability_reads table has the required indices to speed up the filtering of vulnerability data. This will replace the Security::VulnerabilitiesFinder in few places where reading from vulnerability_reads will improve the performance.
The finder is added behind feature flag: vulnerability_reads_table
⚠ Deviations from existing finder:
The new finder does not support these scenarios:
-
Filtering by
cluster_id: We store the cluster_id of a kubernetes cluster which is integrated to GitLab through certificate, it was deprecated in %14.5. Since the new table (vulnerability_reads) is introduced %14.8, we decided not to handle the filtering in the new table: !76220 (comment 859085751) -
Sorting by
stateandreport_type: We are planning to remove the sorting by state and report_type functionality as it is not being used much: !76220 (comment 862464790) Follow-up issues: #354504 (closed) and #354503 (closed)
Graphql Query
In order to test this MR, enabling vulnerability_reads_table feature flag on project and executing these 2 graphql queries would work:
Project.vulnerabilities
query
query {
project(fullPath:"gitlab-org/gitlab") {
vulnerabilities(reportType:[SAST,SECRET_DETECTION,CONTAINER_SCANNING,DAST,DEPENDENCY_SCANNING,COVERAGE_FUZZING,API_FUZZING], state:[DETECTED,CONFIRMED], sort:severity_desc){
nodes {
id
severity
}
}
}
}
Query analysis for gitlab-org/gitlab project
vulnerability_reads table
EXPLAIN SELECT * FROM "vulnerability_reads" WHERE "project_id" = 278964 AND "report_type" IN (2, 3, 1, 0, 4, 5, 6) AND "state" IN (1, 4) ORDER BY "severity" DESC, "vulnerability_id" DESC LIMIT 20
console.postgres.ai: 33.381 ms
vulnerabilities table
EXPLAIN SELECT * FROM "vulnerabilities" WHERE "project_id" = 278964 AND "report_type" IN (2, 3, 1, 0, 4, 5, 6) AND "state" IN (1, 4) ORDER BY "severity" DESC, "id" DESC LIMIT 20
console.postgres.ai: 29.852 ms
Group.vulnerabilities
Query
query {
group(fullPath:"gitlab-org") {
vulnerabilities(reportType:[SAST,SECRET_DETECTION,CONTAINER_SCANNING,DAST,DEPENDENCY_SCANNING,COVERAGE_FUZZING,API_FUZZING], state:[DETECTED,CONFIRMED], sort:severity_desc){
nodes {
id
severity
}
}
}
}
Query analysis for gitlab-org namespace
vulnerability_reads table
EXPLAIN SELECT * FROM "vulnerability_reads" WHERE "project_id" IN (SELECT "projects"."id" FROM "projects" WHERE "projects"."namespace_id" IN (SELECT "namespaces"."id" FROM "namespaces" WHERE "namespaces"."type" = 'Group' AND (traversal_ids @> ('{9970}'))) AND "projects"."archived" = FALSE AND "projects"."pending_delete" = FALSE) AND "report_type" IN (2, 3, 1, 0, 4, 5, 6) AND "state" IN (1, 4) ORDER BY "severity" DESC, "vulnerability_id" DESC LIMIT 20
console.postgres.ai: 305.076 ms
vulnerabilities table
EXPLAIN SELECT * FROM "vulnerabilities" WHERE "project_id" IN (SELECT "projects"."id" FROM "projects" WHERE "projects"."namespace_id" IN (SELECT "namespaces"."id" FROM "namespaces" WHERE "namespaces"."type" = 'Group' AND (traversal_ids @> ('{9970}'))) AND "projects"."archived" = FALSE AND "projects"."pending_delete" = FALSE) AND "report_type" IN (2, 3, 1, 0, 4, 5, 6) AND "state" IN (1, 4) ORDER BY "severity" DESC, "id" DESC LIMIT 20
console.postgres.ai: 759.789 ms
Project.vulnerabilitySeveritiesCount
Query
query {
project(fullPath:"gitlab-org/gitlab") {
vulnerabilitySeveritiesCount(reportType:[SAST,SECRET_DETECTION,CONTAINER_SCANNING,DAST,DEPENDENCY_SCANNING,COVERAGE_FUZZING,API_FUZZING], state:[DETECTED,CONFIRMED]){
high
medium
low
critical
}
}
}
Query analysis for gitlab-org/gitlab project
vulnerability_reads table
EXPLAIN SELECT COUNT(*) AS count_all, "severity" AS vulnerabilities_severity FROM "vulnerability_reads" WHERE "project_id" = 278964 AND "report_type" IN (2, 3, 1, 0, 4, 5, 6) AND "state" IN (1, 4) GROUP BY "severity" ORDER BY "severity" DESC
console.postgres.ai: 20.582 ms
vulnerabilities table
EXPLAIN SELECT COUNT(*) AS count_all, "severity" AS vulnerabilities_severity FROM "vulnerabilities" WHERE "project_id" = 278964 AND "report_type" IN (2, 3, 1, 0, 4, 5, 6) AND "state" IN (1, 4) GROUP BY "severity" ORDER BY "severity" DESC
console.postgres.ai: 30.324 ms
Group.vulnerabilitySeveritiesCount
Query
query {
group(fullPath:"gitlab-org") {
vulnerabilitySeveritiesCount(reportType:[SAST,SECRET_DETECTION,CONTAINER_SCANNING,DAST,DEPENDENCY_SCANNING,COVERAGE_FUZZING,API_FUZZING], state:[DETECTED,CONFIRMED]){
high
medium
low
critical
}
}
}
Query analysis for gitlab-org namespace
vulnerability_reads table
EXPLAIN SELECT COUNT(*) AS count_all, "severity" AS vulnerabilities_severity FROM "vulnerability_reads" WHERE "project_id" IN (SELECT "projects"."id" FROM "projects" WHERE "projects"."namespace_id" IN (SELECT "namespaces"."id" FROM "namespaces" WHERE "namespaces"."type" = 'Group' AND (traversal_ids @> ('{9970}'))) AND "projects"."archived" = FALSE AND "projects"."pending_delete" = FALSE) AND "report_type" IN (2, 3, 1, 0, 4, 5, 6) AND "state" IN (1, 4) GROUP BY "severity" ORDER BY "severity" DESC
console.postgres.ai: 315.843 ms
vulnerabilities table
EXPLAIN SELECT COUNT(*) AS count_all, "severity" AS vulnerabilities_severity FROM "vulnerabilities" WHERE "project_id" IN (SELECT "projects"."id" FROM "projects" WHERE "projects"."namespace_id" IN (SELECT "namespaces"."id" FROM "namespaces" WHERE "namespaces"."type" = 'Group' AND (traversal_ids @> ('{9970}'))) AND "projects"."archived" = FALSE AND "projects"."pending_delete" = FALSE) AND "report_type" IN (2, 3, 1, 0, 4, 5, 6) AND "state" IN (1, 4) GROUP BY "severity" ORDER BY "severity" DESC
console.postgres.ai: 600.092 ms
Database
Scopes
Scopes
order_severity_asc:
SELECT * FROM "vulnerability_reads" WHERE "project_id" = 278964 ORDER BY "severity" ASC, "vulnerability_id" DESC LIMIT 20
order_severity_desc:
SELECT * FROM "vulnerability_reads" WHERE "project_id" = 278964 ORDER BY "severity" DESC, "vulnerability_id" DESC LIMIT 20
order_detected_at_asc:
SELECT * FROM "vulnerability_reads" WHERE "project_id" = 278964 ORDER BY "vulnerability_id" ASC LIMIT 20
order_detected_at_desc:
SELECT * FROM "vulnerability_reads" WHERE "project_id" = 278964 ORDER BY "vulnerability_id" DESC LIMIT 20
by_scanner_ids:
SELECT * FROM "vulnerability_reads" WHERE "project_id" = 278964 AND "scanner_id"=703 LIMIT 20
for_projects:
SELECT * FROM "vulnerability_reads" WHERE "project_id" IN (278964, 2009901) LIMIT 20
grouped_by_severity:
SELECT * FROM "vulnerability_reads" WHERE "project_id" = 278964 GROUP BY "severity" ORDER BY "severity" DESC LIMIT 20
with_report_types:
SELECT * FROM "vulnerability_reads" WHERE "project_id" = 278964 AND "report_type" IN (0,1,2,3,4,5) LIMIT 20
with_severities:
SELECT * FROM "vulnerability_reads" WHERE "project_id" = 278964 AND "severity" IN (6,7) LIMIT 20
with_states:
SELECT * FROM "vulnerability_reads" WHERE "project_id" = 278964 AND "state" IN (1, 4) LIMIT 20
with_container_image:
SELECT * FROM "vulnerability_reads" WHERE "project_id" = 278964 AND "location_image" IN ('nginx:latest', 'alpine:latest') LIMIT 20
with_cluster_agent_ids:
SELECT * FROM "vulnerability_reads" WHERE "project_id" = 278964 AND "cluster_agent_id" IN ('1234', '5678') LIMIT 20
with_resolution:
SELECT * FROM "vulnerability_reads" WHERE "project_id" = 278964 AND "resolved_on_default_branch" = TRUE LIMIT 20
with_issues:
SELECT * FROM "vulnerability_reads" WHERE "project_id" = 278964 AND "has_issues" = TRUE LIMIT 20
with_scanner_external_ids:
SELECT "vulnerability_reads".* FROM "vulnerability_reads" INNER JOIN "vulnerability_scanners" ON "vulnerability_scanners"."id" = "vulnerability_reads"."scanner_id" WHERE "vulnerability_reads"."project_id" = 278964 AND "vulnerability_scanners"."external_id" IN ('1', '2', '3') LIMIT 20)
MR acceptance checklist
This checklist encourages us to confirm any changes have been analyzed to reduce risks in quality, performance, reliability, security, and maintainability.
-
I have evaluated the MR acceptance checklist for this MR.