Optimize indexes used for denormalized data when using big queuing query
Description
We recently started to denormalize some data to our ci_pending_builds
table.
Here are the columns we introduced:
column | index |
---|---|
instance_runners_enabled |
|
minutes_exceeded |
This issue is about determining our query plan strategy and investigating what indexes are required when queuing build for shared runners.
- Should we have one partial index for each column?
- Should we leverage partial index?
Proposal
Some previous exploration done when attempting to optimize our query with 1M
pending builds:
MR with all the context required to work on this issue.
gitlabhq_dblab=# \d ci_pending_builds
Table "public.ci_pending_builds"
Column | Type | Collation | Nullable | Default
--------------------------+--------------------------+-----------+----------+-----------------------------------------------
id | bigint | | not null | nextval('ci_pending_builds_id_seq'::regclass)
build_id | bigint | | not null |
project_id | bigint | | not null |
created_at | timestamp with time zone | | not null | now()
protected | boolean | | not null | false
instance_runners_enabled | boolean | | not null | false
namespace_id | bigint | | |
minutes_exceeded | boolean | | not null | false
tag_ids | integer[] | | | '{}'::integer[]
Indexes:
"ci_pending_builds_pkey" PRIMARY KEY, btree (id)
"index_ci_pending_builds_id_on_protected_partial" btree (id) WHERE protected = true
"index_ci_pending_builds_on_build_id" UNIQUE, btree (build_id)
"index_ci_pending_builds_on_namespace_id" btree (namespace_id)
"index_ci_pending_builds_on_project_id" btree (project_id)
"index_ci_pending_builds_on_tag_ids" btree (tag_ids) WHERE cardinality(tag_ids) > 0
Foreign-key constraints:
"fk_fdc0137e4a" FOREIGN KEY (namespace_id) REFERENCES namespaces(id) ON DELETE CASCADE
"fk_rails_480669c3b3" FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE
"fk_rails_725a2644a3" FOREIGN KEY (build_id) REFERENCES ci_builds(id) ON DELETE CASCADE
gitlabhq_dblab=# DROP INDEX index_ci_pending_builds_on_build_id;
DROP INDEX
gitlabhq_dblab=# ALTER TABLE ci_pending_builds DROP CONSTRAINT ci_pending_builds_pkey;
ALTER TABLE
gitlabhq_dblab=# CREATE INDEX ci_pending_builds_id ON ci_pending_builds (id);
CREATE INDEX
gitlabhq_dblab=# CREATE INDEX index_ci_pending_builds_on_build_id ON ci_pending_builds (build_id);
CREATE INDEX
gitlabhq_dblab=# \d ci_pending_builds;
Table "public.ci_pending_builds"
Column | Type | Collation | Nullable | Default
--------------------------+--------------------------+-----------+----------+-----------------------------------------------
id | bigint | | not null | nextval('ci_pending_builds_id_seq'::regclass)
build_id | bigint | | not null |
project_id | bigint | | not null |
created_at | timestamp with time zone | | not null | now()
protected | boolean | | not null | false
instance_runners_enabled | boolean | | not null | false
namespace_id | bigint | | |
minutes_exceeded | boolean | | not null | false
tag_ids | integer[] | | | '{}'::integer[]
Indexes:
"ci_pending_builds_id" btree (id)
"index_ci_pending_builds_id_on_protected_partial" btree (id) WHERE protected = true
"index_ci_pending_builds_on_build_id" btree (build_id)
"index_ci_pending_builds_on_namespace_id" btree (namespace_id)
"index_ci_pending_builds_on_project_id" btree (project_id)
"index_ci_pending_builds_on_tag_ids" btree (tag_ids) WHERE cardinality(tag_ids) > 0
Foreign-key constraints:
"fk_fdc0137e4a" FOREIGN KEY (namespace_id) REFERENCES namespaces(id) ON DELETE CASCADE
"fk_rails_480669c3b3" FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE
"fk_rails_725a2644a3" FOREIGN KEY (build_id) REFERENCES ci_builds(id) ON DELETE CASCADE
gitlabhq_dblab=# INSERT INTO ci_pending_builds (SELECT * FROM ci_pending_builds);
INSERT 0 8664
gitlabhq_dblab=# INSERT INTO ci_pending_builds (SELECT * FROM ci_pending_builds);
INSERT 0 17328
gitlabhq_dblab=# INSERT INTO ci_pending_builds (SELECT * FROM ci_pending_builds);
INSERT 0 34656
gitlabhq_dblab=# INSERT INTO ci_pending_builds (SELECT * FROM ci_pending_builds);
INSERT 0 69312
gitlabhq_dblab=# INSERT INTO ci_pending_builds (SELECT * FROM ci_pending_builds);
INSERT 0 138624
gitlabhq_dblab=# INSERT INTO ci_pending_builds (SELECT * FROM ci_pending_builds);
INSERT 0 277248
gitlabhq_dblab=# INSERT INTO ci_pending_builds (SELECT * FROM ci_pending_builds);
INSERT 0 554496
gitlabhq_dblab=# SELECT COUNT(*) FROM ci_pending_builds;
count
---------
1108992
(1 row)
gitlabhq_dblab=# SELECT id FROM tags WHERE name = 'docker';
id
------
4060
(1 row)
gitlabhq_dblab=# SELECT COUNT(*) FROM ci_pending_builds WHERE tag_ids && ARRAY[4060]::int[];
count
-------
52480
(1 row)
gitlabhq_dblab=# EXPLAIN (analyze, buffers, timing) WITH "project_builds" AS MATERIALIZED
(SELECT "ci_running_builds"."project_id", COUNT(*) AS running_builds
FROM "ci_running_builds"
WHERE "ci_running_builds"."runner_type" = 1
GROUP BY "ci_running_builds"."project_id")
SELECT "ci_pending_builds"."build_id"
FROM "ci_pending_builds"
LEFT JOIN project_builds ON ci_pending_builds.project_id = project_builds.project_id
WHERE "ci_pending_builds"."instance_runners_enabled" = TRUE
AND "ci_pending_builds"."minutes_exceeded" = FALSE
AND (tag_ids <@ ARRAY[4060]::int[])
ORDER BY COALESCE(project_builds.running_builds, 0) ASC, ci_pending_builds.build_id ASC;
query plan
-------
https://explain.depesz.com/s/uqyp
(1 row)
gitlabhq_dblab=# CREATE INDEX index_ci_pending_builds_on_runners_and_ci_minutes_partial ON ci_pending_builds USING btree (project_id) INCLUDE (id, build_id) WHERE ((instance_runners_enabled = true) AND (minutes_exceeded = false) AND (tag_ids = '{}'::integer[]));
CREATE INDEX
gitlabhq_dblab=# VACUUM ANALYZE ci_pending_builds;
VACUUM
gitlabhq_dblab=# EXPLAIN (analyze, buffers, timing) WITH "project_builds" AS MATERIALIZED
(SELECT "ci_running_builds"."project_id", COUNT(*) AS running_builds
FROM "ci_running_builds"
WHERE "ci_running_builds"."runner_type" = 1
GROUP BY "ci_running_builds"."project_id")
SELECT "ci_pending_builds"."build_id"
FROM "ci_pending_builds"
LEFT JOIN project_builds ON ci_pending_builds.project_id = project_builds.project_id
WHERE "ci_pending_builds"."instance_runners_enabled" = TRUE
AND "ci_pending_builds"."minutes_exceeded" = FALSE
AND (tag_ids <@ ARRAY[4060]::int[])
ORDER BY COALESCE(project_builds.running_builds, 0) ASC, ci_pending_builds.build_id ASC;
query plan
-------
https://explain.depesz.com/s/7qGM
(1 row)
Before index: https://explain.depesz.com/s/uqyp
Planning time | : | 0.906 ms |
---|---|---|
Execution time | : | 1,028.670 ms |
After index https://explain.depesz.com/s/VdxR
Planning time | : | 0.857 ms |
---|---|---|
Execution time | : | 8.436 ms |
cc @grzesiek
Edited by Max Orefice