Skip to content

Improve the performance of `vulnerability_reads` queries

What does this MR do and why?

This MR tries to improve the performance of the vulnerability_reads queries when the query is sorted by detected_at(vulnerability_id).

Database review

This MR introduces a new query which consists of two parts;

  1. Loose index scan to get all the traversal_ids.
  2. Use those traversal_ids to query the vulnerability_reads table.
The new Query
SELECT
    "vulnerability_reads".*
FROM
    "vulnerability_reads"
WHERE
    "vulnerability_reads"."vulnerability_id" IN (
        SELECT
            "vulnerability_reads"."vulnerability_id"
        FROM ( WITH RECURSIVE "loose_index_scan_cte" AS (
(
                    SELECT
                        "vulnerability_reads"."traversal_ids"
                    FROM
                        "vulnerability_reads"
                    WHERE
                        "vulnerability_reads"."traversal_ids" >= '{9970}'
                        AND "vulnerability_reads"."traversal_ids" < '{9971}'
                        AND "vulnerability_reads"."archived" = FALSE
                    ORDER BY
                        "vulnerability_reads"."traversal_ids" ASC
                    LIMIT 1)
            UNION (
                SELECT
                    (
                        SELECT
                            "vulnerability_reads"."traversal_ids"
                        FROM
                            "vulnerability_reads"
                        WHERE
                            "vulnerability_reads"."traversal_ids" >= '{9970}'
                            AND "vulnerability_reads"."traversal_ids" < '{9971}'
                            AND "vulnerability_reads"."archived" = FALSE
                            AND "vulnerability_reads"."traversal_ids" > "loose_index_scan_cte"."traversal_ids"
                        ORDER BY
                            "vulnerability_reads"."traversal_ids" ASC
                        LIMIT 1) AS traversal_ids
            FROM
                "loose_index_scan_cte"))
        SELECT
            "traversal_ids"
        FROM
            "loose_index_scan_cte" AS "vulnerability_reads"
        WHERE
            "vulnerability_reads"."traversal_ids" IS NOT NULL) AS "traversal_ids" ("traversal_ids"),
        unnest('{1,2,4,5,6,7}'::smallint[]) AS "severities" ("severity"),
        unnest('{1,4}'::smallint[]) AS "states" ("state"),
        LATERAL (
            SELECT
                "vulnerability_reads"."traversal_ids",
                "vulnerability_reads"."archived",
                "vulnerability_reads"."report_type",
                "vulnerability_reads"."severity",
                "vulnerability_reads"."state",
                "vulnerability_reads"."vulnerability_id"
            FROM
                "vulnerability_reads"
            WHERE
                "vulnerability_reads"."archived" = FALSE
                AND "vulnerability_reads"."report_type" = 0
                AND (vulnerability_reads."traversal_ids" = "traversal_ids"."traversal_ids")
                AND (vulnerability_reads."severity" = "severities"."severity")
                AND (vulnerability_reads."state" = "states"."state")
            ORDER BY
                "vulnerability_reads"."vulnerability_id" ASC
            LIMIT 21) AS vulnerability_reads
        ORDER BY
            "vulnerability_reads"."vulnerability_id" ASC
        LIMIT 21)
    ORDER BY
        "vulnerability_reads"."vulnerability_id" ASC
    LIMIT 21

https://console.postgres.ai/gitlab/gitlab-production-main/sessions/28530/commands/89055

The old query
SELECT
    "vulnerability_reads".*
FROM
    "vulnerability_reads"
    INNER JOIN "projects" ON "projects"."id" = "vulnerability_reads"."project_id"
WHERE
    "vulnerability_reads"."namespace_id" IN (
        SELECT
            "namespaces"."id"
        FROM
            "namespaces"
        WHERE
            "namespaces"."type" = 'Group'
            AND (traversal_ids @> ('{9970}')))
    AND "projects"."archived" = FALSE
    AND "vulnerability_reads"."report_type" = 0
    AND "vulnerability_reads"."state" IN (1, 4)
ORDER BY
    "vulnerability_reads"."vulnerability_id" ASC
LIMIT 21

https://console.postgres.ai/gitlab/gitlab-production-main/sessions/28530/commands/89058

Note: The new query would also perform badly if the user select too many states and report types but I couldn't find anything to address that problem.

Edited by Mehmet Emin INAC

Merge request reports