Improve the performance of `vulnerability_reads` queries
requested to merge minac_461446_improve_group-level_vulnerability_reads_queries_for_sorting_by_detected_at into master
What does this MR do and why?
This MR tries to improve the performance of the vulnerability_reads
queries when the query is sorted by detected_at
(vulnerability_id
).
Database review
This MR introduces a new query which consists of two parts;
- Loose index scan to get all the
traversal_ids
. - Use those
traversal_ids
to query thevulnerability_reads
table.
The new Query
SELECT
"vulnerability_reads".*
FROM
"vulnerability_reads"
WHERE
"vulnerability_reads"."vulnerability_id" IN (
SELECT
"vulnerability_reads"."vulnerability_id"
FROM ( WITH RECURSIVE "loose_index_scan_cte" AS (
(
SELECT
"vulnerability_reads"."traversal_ids"
FROM
"vulnerability_reads"
WHERE
"vulnerability_reads"."traversal_ids" >= '{9970}'
AND "vulnerability_reads"."traversal_ids" < '{9971}'
AND "vulnerability_reads"."archived" = FALSE
ORDER BY
"vulnerability_reads"."traversal_ids" ASC
LIMIT 1)
UNION (
SELECT
(
SELECT
"vulnerability_reads"."traversal_ids"
FROM
"vulnerability_reads"
WHERE
"vulnerability_reads"."traversal_ids" >= '{9970}'
AND "vulnerability_reads"."traversal_ids" < '{9971}'
AND "vulnerability_reads"."archived" = FALSE
AND "vulnerability_reads"."traversal_ids" > "loose_index_scan_cte"."traversal_ids"
ORDER BY
"vulnerability_reads"."traversal_ids" ASC
LIMIT 1) AS traversal_ids
FROM
"loose_index_scan_cte"))
SELECT
"traversal_ids"
FROM
"loose_index_scan_cte" AS "vulnerability_reads"
WHERE
"vulnerability_reads"."traversal_ids" IS NOT NULL) AS "traversal_ids" ("traversal_ids"),
unnest('{1,2,4,5,6,7}'::smallint[]) AS "severities" ("severity"),
unnest('{1,4}'::smallint[]) AS "states" ("state"),
LATERAL (
SELECT
"vulnerability_reads"."traversal_ids",
"vulnerability_reads"."archived",
"vulnerability_reads"."report_type",
"vulnerability_reads"."severity",
"vulnerability_reads"."state",
"vulnerability_reads"."vulnerability_id"
FROM
"vulnerability_reads"
WHERE
"vulnerability_reads"."archived" = FALSE
AND "vulnerability_reads"."report_type" = 0
AND (vulnerability_reads."traversal_ids" = "traversal_ids"."traversal_ids")
AND (vulnerability_reads."severity" = "severities"."severity")
AND (vulnerability_reads."state" = "states"."state")
ORDER BY
"vulnerability_reads"."vulnerability_id" ASC
LIMIT 21) AS vulnerability_reads
ORDER BY
"vulnerability_reads"."vulnerability_id" ASC
LIMIT 21)
ORDER BY
"vulnerability_reads"."vulnerability_id" ASC
LIMIT 21
https://console.postgres.ai/gitlab/gitlab-production-main/sessions/28530/commands/89055
The old query
SELECT
"vulnerability_reads".*
FROM
"vulnerability_reads"
INNER JOIN "projects" ON "projects"."id" = "vulnerability_reads"."project_id"
WHERE
"vulnerability_reads"."namespace_id" IN (
SELECT
"namespaces"."id"
FROM
"namespaces"
WHERE
"namespaces"."type" = 'Group'
AND (traversal_ids @> ('{9970}')))
AND "projects"."archived" = FALSE
AND "vulnerability_reads"."report_type" = 0
AND "vulnerability_reads"."state" IN (1, 4)
ORDER BY
"vulnerability_reads"."vulnerability_id" ASC
LIMIT 21
https://console.postgres.ai/gitlab/gitlab-production-main/sessions/28530/commands/89058
Note: The new query would also perform badly if the user select too many states and report types but I couldn't find anything to address that problem.
Edited by Mehmet Emin INAC