Improve the performance of bulk scheduling worker
What does this MR do and why?
Trying to load all the project_ids under a group can cause timeout issues, therefore, we've decided to iterate over all the subgroups and then load the project_ids in batches while bulk scheduling the UpdateNamespaceIdsOfVulnerabilityReadsWorker
worker.
Database review
Getting all the group IDs
SELECT
current_id,
depth,
ids,
count,
INDEX
FROM ( WITH RECURSIVE "result" AS (
(
SELECT
9970 AS current_id,
ARRAY[9970]::int[] AS depth,
ARRAY[9970]::int[] AS ids,
1::bigint AS count,
0::bigint AS index
FROM (
VALUES (1)) AS initializer_row
WHERE (EXISTS (
SELECT
1
FROM
"namespaces"
WHERE
"namespaces"."id" = 9970)))
UNION ALL ( WITH "cte" AS MATERIALIZED (
SELECT
result.*
FROM
result
)
SELECT
current_id,
depth,
ids,
count,
INDEX
FROM (
(
SELECT
"namespaces"."id" AS current_id,
cte.depth || "namespaces".id AS depth,
cte.ids || "namespaces".id AS ids,
cte.count + 1 AS count,
1::bigint AS index
FROM
cte,
LATERAL (
SELECT
"namespaces"."id"
FROM
"namespaces"
WHERE
"namespaces"."type" = 'Group'
AND (
parent_id = cte.current_id
)
ORDER BY
"namespaces"."id" ASC
LIMIT 1
) "namespaces"
)
UNION ALL (
SELECT
"namespaces"."id" AS current_id,
cte.depth[:array_length(
cte.depth, 1
) - 1] || "namespaces".id AS depth,
cte.ids || "namespaces".id AS ids,
cte.count + 1 AS count,
2::bigint AS index
FROM
cte,
LATERAL (
SELECT
"namespaces"."id"
FROM
"namespaces"
WHERE
"namespaces"."type" = 'Group'
AND (
"namespaces".parent_id = cte.depth[array_length(
cte.depth, 1
) - 1]
)
AND (
"namespaces".id > cte.depth[array_length(
cte.depth, 1
)]
)
ORDER BY
"namespaces"."id" ASC
LIMIT 1
) "namespaces"
)
UNION ALL (
SELECT
cte.current_id AS current_id,
cte.depth[:array_length(
cte.depth, 1
) - 1] AS depth,
cte.ids AS ids,
cte.count + 1 AS count,
3::bigint AS index
FROM
cte
WHERE (
cte.depth <> ARRAY[]::int[]
)
LIMIT 1
)
) namespaces
ORDER BY
"namespaces"."index" ASC
LIMIT 1
))
SELECT
current_id,
depth,
ids,
count,
INDEX
FROM
"result" AS "namespaces"
LIMIT 101) namespaces
ORDER BY
"count" DESC
LIMIT 1
https://console.postgres.ai/gitlab/gitlab-production-tunnel-pg12/sessions/26658/commands/83394
Iterating over projects for a group
SELECT
"projects"."id"
FROM
"projects"
WHERE
"projects"."namespace_id" = 60472140
AND "projects"."id" >= 41167378
ORDER BY
"projects"."id" ASC
LIMIT 1 OFFSET 100
https://console.postgres.ai/gitlab/gitlab-production-tunnel-pg12/sessions/26658/commands/83395
Querying `project_settings` table
SELECT
"project_settings"."project_id"
FROM
"project_settings"
WHERE
"project_settings"."project_id" IN (
SELECT
"projects"."id"
FROM
"projects"
WHERE
"projects"."namespace_id" = 60472140
AND "projects"."id" >= 41167378)
AND (has_vulnerabilities IS TRUE)
https://console.postgres.ai/gitlab/gitlab-production-tunnel-pg12/sessions/26658/commands/83396
Edited by Mehmet Emin INAC