Use UNION instead of OR for milestone queries
What does this MR do?
This MR optimizes some of the queries on the Milestone index page. I noticed that the planner is using a sequential scan which produces rather slow queries.
The change is behind a feature flag: optimized_timebox_queries
Old query structure:
SELECT ...
FROM milestones
WHERE project_id in (X) OR group_id = Y
New query structure:
SELECT ...
FROM (SELECT * FROM milestones WHERE project_id in (X)) UNION (SELECT * FROM milestones WHERE group_id = Y)
Milestone counts
Old Query
Uses OR - 370ms
SELECT Count(*) AS count_all,
"milestones"."state" AS milestones_state
FROM "milestones"
WHERE (
"milestones"."project_id" IN
(
SELECT "projects"."id"
FROM (
(
SELECT "projects".*
FROM "projects"
WHERE "projects"."namespace_id" IN (WITH recursive "base_and_descendants" AS (
(
SELECT "namespaces".*
FROM "namespaces"
WHERE "namespaces"."type" = 'Group'
AND "namespaces"."id" = 9970)
UNION
(
SELECT "namespaces".*
FROM "namespaces",
"base_and_descendants"
WHERE "namespaces"."type" = 'Group'
AND "namespaces"."parent_id" = "base_and_descendants"."id"))SELECT "id"
FROM "base_and_descendants" AS "namespaces")
AND (
EXISTS
(
SELECT 1
FROM "project_authorizations"
WHERE "project_authorizations"."user_id" = 4156052
AND (
project_authorizations.project_id = projects.id))
OR projects.visibility_level IN (10,20)))
UNION
(
SELECT "projects".*
FROM "projects"
INNER JOIN "project_group_links"
ON "projects"."id" = "project_group_links"."project_id"
WHERE "project_group_links"."group_id" = 9970
AND (
EXISTS
(
SELECT 1
FROM "project_authorizations"
WHERE "project_authorizations"."user_id" = 4156052
AND (
project_authorizations.project_id = projects.id))
OR projects.visibility_level IN (10,20)))) projects
LEFT JOIN project_features
ON projects.id = project_features.project_id
WHERE ((
"project_features"."issues_access_level" IS NULL
OR "project_features"."issues_access_level" IN (20,30)
OR (
"project_features"."issues_access_level" = 10
AND EXISTS
(
SELECT 1
FROM "project_authorizations"
WHERE "project_authorizations"."user_id" = 4156052
AND (
project_authorizations.project_id = projects.id)
AND (
project_authorizations.access_level >= 10))))
OR (
"project_features"."merge_requests_access_level" IS NULL
OR "project_features"."merge_requests_access_level" IN (20,30)
OR (
"project_features"."merge_requests_access_level" = 10
AND EXISTS
(
SELECT 1
FROM "project_authorizations"
WHERE "project_authorizations"."user_id" = 4156052
AND (
project_authorizations.project_id = projects.id)
AND (
project_authorizations.access_level >= 20)))))
ORDER BY "projects"."id" DESC)
OR "milestones"."group_id" = 9970)
GROUP BY "milestones"."state"
Seq Scan :(
New Query
Uses UNION instead of OR - 31ms
SELECT Count(*) AS count_all,
"milestones"."state" AS milestones_state
FROM (
(
SELECT "milestones".*
FROM "milestones"
WHERE "milestones"."project_id" IN
(
SELECT "projects"."id"
FROM (
(
SELECT "projects".*
FROM "projects"
WHERE "projects"."namespace_id" IN (WITH recursive "base_and_descendants" AS (
(
SELECT "namespaces".*
FROM "namespaces"
WHERE "namespaces"."type" = 'Group'
AND "namespaces"."id" = 9970)
UNION
(
SELECT "namespaces".*
FROM "namespaces",
"base_and_descendants"
WHERE "namespaces"."type" = 'Group'
AND "namespaces"."parent_id" = "base_and_descendants"."id"))SELECT "id"
FROM "base_and_descendants" AS "namespaces")
AND (
EXISTS
(
SELECT 1
FROM "project_authorizations"
WHERE "project_authorizations"."user_id" = 4156052
AND (
project_authorizations.project_id = projects.id))
OR projects.visibility_level IN (10,20)))
UNION
(
SELECT "projects".*
FROM "projects"
INNER JOIN "project_group_links"
ON "projects"."id" = "project_group_links"."project_id"
WHERE "project_group_links"."group_id" = 9970
AND (
EXISTS
(
SELECT 1
FROM "project_authorizations"
WHERE "project_authorizations"."user_id" = 4156052
AND (
project_authorizations.project_id = projects.id))
OR projects.visibility_level IN (10,20)))) projects
LEFT JOIN project_features
ON projects.id = project_features.project_id
WHERE ((
"project_features"."issues_access_level" IS NULL
OR "project_features"."issues_access_level" IN (20,30)
OR (
"project_features"."issues_access_level" = 10
AND EXISTS
(
SELECT 1
FROM "project_authorizations"
WHERE "project_authorizations"."user_id" = 4156052
AND (
project_authorizations.project_id = projects.id)
AND (
project_authorizations.access_level >= 10))))
OR (
"project_features"."merge_requests_access_level" IS NULL
OR "project_features"."merge_requests_access_level" IN (20,30)
OR (
"project_features"."merge_requests_access_level" = 10
AND EXISTS
(
SELECT 1
FROM "project_authorizations"
WHERE "project_authorizations"."user_id" = 4156052
AND (
project_authorizations.project_id = projects.id)
AND (
project_authorizations.access_level >= 20)))))))
UNION
(
SELECT "milestones".*
FROM "milestones"
WHERE "milestones"."group_id" = 9970)) milestones
GROUP BY "milestones"."state"
Active milestones
Old Query
Uses OR - 328ms
SELECT "milestones".*
FROM "milestones"
WHERE (
1=0
OR "milestones"."group_id" IN (WITH recursive "base_and_descendants" AS (
(
SELECT "namespaces".*
FROM "namespaces"
WHERE "namespaces"."type" = 'Group'
AND "namespaces"."id" = 9970)
UNION
(
SELECT "namespaces".*
FROM "namespaces",
"base_and_descendants"
WHERE "namespaces"."type" = 'Group'
AND "namespaces"."parent_id" = "base_and_descendants"."id"))SELECT "id"
FROM "base_and_descendants" AS "namespaces"
WHERE (
"namespaces"."visibility_level" IN (10,
20)
OR EXISTS
(
SELECT 1
FROM (
SELECT "namespaces".*
FROM (
(
SELECT "namespaces".*
FROM "namespaces"
INNER JOIN "members"
ON "namespaces"."id" = "members"."source_id"
WHERE "members"."type" = 'GroupMember'
AND "members"."source_type" = 'Namespace'
AND "namespaces"."type" = 'Group'
AND "members"."user_id" = 4156052
AND "members"."requested_at" IS NULL)
UNION
(
SELECT namespaces.*
FROM "projects"
INNER JOIN "project_authorizations"
ON "projects"."id" = "project_authorizations"."project_id"
INNER JOIN "namespaces"
ON "namespaces"."id" = "projects"."namespace_id"
WHERE "project_authorizations"."user_id" = 4156052)) namespaces
WHERE "namespaces"."type" = 'Group') authorized
WHERE authorized."id" = "namespaces"."id"))))
AND (
"milestones"."state" IN ('active'))
ORDER BY due_date ASC nulls last,
title ASC
New Query
Uses UNION instead of OR - 42ms
SELECT "milestones".*
FROM (
(
SELECT "milestones".*
FROM "milestones"
WHERE 1=0)
UNION ALL
(
SELECT "milestones".*
FROM "milestones"
WHERE "milestones"."group_id" IN (WITH recursive "base_and_descendants" AS (
(
SELECT "namespaces".*
FROM "namespaces"
WHERE "namespaces"."type" = 'Group'
AND "namespaces"."id" = 9970)
UNION
(
SELECT "namespaces".*
FROM "namespaces",
"base_and_descendants"
WHERE "namespaces"."type" = 'Group'
AND "namespaces"."parent_id" = "base_and_descendants"."id"))SELECT "id"
FROM "base_and_descendants" AS "namespaces"
WHERE (
"namespaces"."visibility_level" IN (10,
20)
OR EXISTS
(
SELECT 1
FROM (
SELECT "namespaces".*
FROM (
(
SELECT "namespaces".*
FROM "namespaces"
INNER JOIN "members"
ON "namespaces"."id" = "members"."source_id"
WHERE "members"."type" = 'GroupMember'
AND "members"."source_type" = 'Namespace'
AND "namespaces"."type" = 'Group'
AND "members"."user_id" = 4156052
AND "members"."requested_at" IS NULL)
UNION
(
SELECT namespaces.*
FROM "projects"
INNER JOIN "project_authorizations"
ON "projects"."id" = "project_authorizations"."project_id"
INNER JOIN "namespaces"
ON "namespaces"."id" = "projects"."namespace_id"
WHERE "project_authorizations"."user_id" = 4156052)) namespaces
WHERE "namespaces"."type" = 'Group') authorized
WHERE authorized."id" = "namespaces"."id"))))) milestones
WHERE (
"milestones"."state" IN ('active'))
ORDER BY due_date ASC nulls last,
title ASC
Note: We can optimize the queries further (and win a few ms) if we'd SELECT
less columns in the unions. See HashAggregate
entries in the plan. UNION will eliminate the duplicated rows, the more columns we have in the SELECT
the more work the DB needs to do to dedup the rows. To fix this, we'd need to rework the scope/finder which is quite a bit of work.
Does this MR meet the acceptance criteria?
Conformity
-
Changelog entry -
Documentation (if required) -
Code review guidelines -
Merge request performance guidelines -
Style guides -
Database guides -
Separation of EE specific content
Availability and Testing
-
Review and add/update tests for this feature/bug. Consider all test levels. See the Test Planning Process. -
Tested in all supported browsers -
Informed Infrastructure department of a default or new setting change, if applicable per definition of done
Security
If this MR contains changes to processing or storing of credentials or tokens, authorization and authentication methods and other items described in the security review guidelines:
-
Label as security and @ mention @gitlab-com/gl-security/appsec
-
The MR includes necessary changes to maintain consistency between UI, API, email, or other methods -
Security reports checked/validated by a reviewer from the AppSec team