Skip to content

Improve the performance of bulk scheduling worker

What does this MR do and why?

Trying to load all the project_ids under a group can cause timeout issues, therefore, we've decided to iterate over all the subgroups and then load the project_ids in batches while bulk scheduling the UpdateNamespaceIdsOfVulnerabilityReadsWorker worker.

Database review

Getting all the group IDs
SELECT
    current_id,
    depth,
    ids,
    count,
    INDEX
FROM ( WITH RECURSIVE "result" AS (
(
            SELECT
                9970 AS current_id,
                ARRAY[9970]::int[] AS depth,
                ARRAY[9970]::int[] AS ids,
                1::bigint AS count,
                0::bigint AS index
            FROM (
                VALUES (1)) AS initializer_row
            WHERE (EXISTS (
                    SELECT
                        1
                    FROM
                        "namespaces"
                    WHERE
                        "namespaces"."id" = 9970)))
        UNION ALL ( WITH "cte" AS MATERIALIZED (
                SELECT
                    result.*
                FROM
                    result
)
                SELECT
                    current_id,
                    depth,
                    ids,
                    count,
                    INDEX
                FROM (
                    (
                        SELECT
                            "namespaces"."id" AS current_id,
                            cte.depth || "namespaces".id AS depth,
                            cte.ids || "namespaces".id AS ids,
                            cte.count + 1 AS count,
                            1::bigint AS index
                        FROM
                            cte,
                            LATERAL (
                                SELECT
                                    "namespaces"."id"
                                FROM
                                    "namespaces"
                                WHERE
                                    "namespaces"."type" = 'Group'
                                    AND (
                                        parent_id = cte.current_id
)
                                ORDER BY
                                    "namespaces"."id" ASC
                                LIMIT 1
) "namespaces"
)
                    UNION ALL (
                        SELECT
                            "namespaces"."id" AS current_id,
                            cte.depth[:array_length(
                                cte.depth, 1
) - 1] || "namespaces".id AS depth,
                            cte.ids || "namespaces".id AS ids,
                            cte.count + 1 AS count,
                            2::bigint AS index
                        FROM
                            cte,
                            LATERAL (
                                SELECT
                                    "namespaces"."id"
                                FROM
                                    "namespaces"
                                WHERE
                                    "namespaces"."type" = 'Group'
                                    AND (
                                        "namespaces".parent_id = cte.depth[array_length(
                                            cte.depth, 1
) - 1]
)
                                    AND (
                                        "namespaces".id > cte.depth[array_length(
                                            cte.depth, 1
)]
)
                                ORDER BY
                                    "namespaces"."id" ASC
                                LIMIT 1
) "namespaces"
)
                    UNION ALL (
                        SELECT
                            cte.current_id AS current_id,
                            cte.depth[:array_length(
                                cte.depth, 1
) - 1] AS depth,
                            cte.ids AS ids,
                            cte.count + 1 AS count,
                            3::bigint AS index
                        FROM
                            cte
                        WHERE (
                            cte.depth <> ARRAY[]::int[]
)
                    LIMIT 1
)
) namespaces
        ORDER BY
            "namespaces"."index" ASC
        LIMIT 1
))
SELECT
    current_id,
    depth,
    ids,
    count,
    INDEX
FROM
    "result" AS "namespaces"
LIMIT 101) namespaces
ORDER BY
    "count" DESC
LIMIT 1

https://console.postgres.ai/gitlab/gitlab-production-tunnel-pg12/sessions/26658/commands/83394

Iterating over projects for a group
SELECT
    "projects"."id"
FROM
    "projects"
WHERE
    "projects"."namespace_id" = 60472140
    AND "projects"."id" >= 41167378
ORDER BY
    "projects"."id" ASC
LIMIT 1 OFFSET 100

https://console.postgres.ai/gitlab/gitlab-production-tunnel-pg12/sessions/26658/commands/83395

Querying `project_settings` table
SELECT
    "project_settings"."project_id"
FROM
    "project_settings"
WHERE
    "project_settings"."project_id" IN (
        SELECT
            "projects"."id"
        FROM
            "projects"
        WHERE
            "projects"."namespace_id" = 60472140
            AND "projects"."id" >= 41167378)
    AND (has_vulnerabilities IS TRUE)

https://console.postgres.ai/gitlab/gitlab-production-tunnel-pg12/sessions/26658/commands/83396

Edited by Mehmet Emin INAC

Merge request reports