Requests to `POST /api/:version/groups/:id/members` for large groups are causing database degradation
Summary
In multiple incidents, e.g.:
we noticed that at an exactly the same time, an SQL query is dominating the execution time on Postgres: gitlab-com/gl-infra/production#7025 (comment 944151804) . This has been traced down to POST /api/:version/groups/:id/members : https://gitlab.com/gitlab-com/gl-infra/reliability/-/issues/15710#note_945929007.
An example query is :
example query
/*application:sidekiq,correlation_id:24bc156eb29a1997a07df2bdc12a408c,jid:59a5673c561f4d154f79a7ec,endpoint_id:AuthorizedProjectUpdate::UserRefreshFromReplicaWorker,db_config_name:main_replica*/
WITH recursive "namespaces_cte"
AS
(
(
SELECT "namespaces"."id",
"members"."access_level"
FROM "namespaces"
INNER JOIN "members"
ON "namespaces"."id" = "members"."source_id"
WHERE "members"."type" = $1
AND "members"."source_type" = $2
AND "namespaces"."type" = $3
AND "members"."user_id" = $4
AND "members"."requested_at" IS NULL
AND (
access_level >= $5)
AND "members"."state" = $6)
UNION
(
SELECT "namespaces"."id",
least("members"."access_level", "group_group_links"."group_access") AS access_level
FROM "namespaces"
INNER JOIN "group_group_links"
ON "group_group_links"."shared_group_id" = "namespaces"."id"
INNER JOIN "members"
ON "group_group_links"."shared_with_group_id" = "members"."source_id"
AND "members"."source_type" = $7
AND "members"."requested_at" IS NULL
AND "members"."user_id" = $8
AND "members"."state" = $9
AND "members"."access_level" > $10
WHERE "namespaces"."type" = $11)
UNION
(
SELECT "namespaces"."id",
greatest("members"."access_level", "namespaces_cte"."access_level") AS access_level
FROM "namespaces"
INNER JOIN "namespaces_cte"
ON "namespaces_cte"."id" = "namespaces"."parent_id"
LEFT OUTER JOIN "members"
ON "members"."source_id" = "namespaces"."id"
AND "members"."source_type" = $12
AND "members"."requested_at" IS NULL
AND "members"."user_id" = $13
AND "members"."state" = $14
AND "members"."access_level" > $15
WHERE "namespaces"."type" = $16))
SELECT "project_authorizations"."project_id",
max(access_level) AS access_level
FROM (
(
SELECT projects.id AS project_id,
"members"."access_level"
FROM "projects"
INNER JOIN "members"
ON "projects"."id" = "members"."source_id"
WHERE "members"."type" IN ($17,
$18)
AND "members"."source_type" = $19
AND "members"."user_id" = $20
AND "members"."requested_at" IS NULL
AND "members"."state" = $21)
UNION
(
SELECT projects.id AS project_id,
$22 AS access_level
FROM "projects"
INNER JOIN "namespaces"
ON "projects"."namespace_id" = "namespaces"."id"
WHERE "namespaces"."owner_id" = $23
AND "namespaces"."type" = $24)
UNION
(
SELECT "projects"."id" AS project_id,
"namespaces"."access_level"
FROM "namespaces_cte" "namespaces"
INNER JOIN "projects"
ON "projects"."namespace_id" = "namespaces"."id")
UNION
(
SELECT "project_group_links"."project_id",
least("namespaces"."access_level", "project_group_links"."group_access") AS access_level
FROM "namespaces_cte" "namespaces"
INNER JOIN project_group_links
ON project_group_links.group_id = namespaces.id
INNER JOIN projects
ON projects.id = project_group_links.project_id
INNER JOIN namespaces p_ns
ON p_ns.id = projects.namespace_id
WHERE (
p_ns.share_with_group_lock IS FALSE))) project_authorizations
GROUP BY "project_authorizations"."project_id"
The purpose of this issue is to optimize the relevant query to prevent spikes in the database.
Recommendation
optimize the relevant query to prevent spikes in the database
Edited by Christina Lohr