Slow runners admin page when there millions of jobs on the instance
While recording a debug video for the client I noticed on my dev instance that loading the admin page takes a while because of this query:
SELECT "ci_runners"."id",
Count(*) AS count
FROM "ci_runners"
JOIN lateral
(
select 1
FROM "ci_builds"
WHERE "ci_builds"."type" = 'Ci::Build'
AND "ci_builds"."runner_id" = "ci_runners"."id" limit 1001) builds_with_limit
ON true
WHERE "ci_runners"."id" IN (10016,
10015,
10014,
10013,
10012,
10011,
10010,
10009,
10008,
10007,
10006,
10005,
10004,
10003,
10002,
10001,
10000,
9999,
9998,
9997)
GROUP BY "ci_runners"."id"
/*application:web,correlation_id:01HARYTZSGY74ES5AK78WX0ZMY,endpoint_id:GraphqlController#execute,db_config_name:ci,line:/app/graphql/resolvers/ci/runner_job_count_resolver.rb:44:in `calculate_job_count_per_runner'*/
Stacktrace:
[
"lib/peek/views/active_record.rb:55:in `block in setup_subscribers'",
"app/graphql/resolvers/ci/runner_job_count_resolver.rb:44:in `calculate_job_count_per_runner'",
"app/graphql/resolvers/ci/runner_job_count_resolver.rb:22:in `block in resolve'",
"lib/gitlab/graphql/tracers/timer_tracer.rb:20:in `trace'",
"lib/gitlab/graphql/tracers/logger_tracer.rb:14:in `trace'",
"lib/gitlab/graphql/tracers/metrics_tracer.rb:13:in `trace'",
"lib/gitlab/graphql/tracers/application_context_tracer.rb:23:in `trace'",
"lib/gitlab/graphql/tracers/timer_tracer.rb:20:in `trace'",
"lib/gitlab/graphql/tracers/logger_tracer.rb:14:in `trace'",
"lib/gitlab/graphql/tracers/metrics_tracer.rb:13:in `trace'",
"lib/gitlab/graphql/tracers/application_context_tracer.rb:23:in `trace'",
"lib/gitlab/graphql/tracers/timer_tracer.rb:20:in `trace'",
"lib/gitlab/graphql/tracers/logger_tracer.rb:14:in `trace'",
"lib/gitlab/graphql/tracers/metrics_tracer.rb:13:in `trace'",
"lib/gitlab/graphql/tracers/application_context_tracer.rb:23:in `trace'",
"app/graphql/gitlab_schema.rb:48:in `multiplex'",
"app/controllers/graphql_controller.rb:212:in `execute_query'",
"app/controllers/graphql_controller.rb:67:in `execute'",
"ee/lib/gitlab/ip_address_state.rb:10:in `with'",
"ee/app/controllers/ee/application_controller.rb:45:in `set_current_ip_address'",
"app/controllers/application_controller.rb:520:in `set_current_admin'",
"lib/gitlab/session.rb:11:in `with_session'",
"app/controllers/application_controller.rb:511:in `set_session_storage'",
"app/controllers/application_controller.rb:502:in `set_locale'",
"app/controllers/application_controller.rb:495:in `set_current_context'",
"lib/gitlab/jira/middleware.rb:19:in `call'",
"lib/gitlab/database/query_analyzer.rb:37:in `within'"
]
Explain:
EXPLAIN ANALYZE SELECT "ci_runners"."id", COUNT(*) AS count FROM "ci_runners" JOIN LATERAL (SELECT 1 FROM "ci_builds" WHERE "ci_builds"."type" = 'Ci::Build' AND "ci_builds"."runner_id" = "ci_runners"."id" LIMIT 1001) builds_with_limit ON true WHERE "ci_runners"."id" IN (10016, 10015, 10014, 10013, 10012, 10011, 10010, 10009, 10008, 10007, 10006, 10005, 10004, 10003, 10002, 10001, 10000, 9999, 9998, 9997) GROUP BY "ci_runners"."id" /*application:web,correlation_id:01HARZKDMKQ3XKXWVDG8GBH3RT,endpoint_id:GraphqlController#execute,db_config_name:ci,line:/app/graphql/resolvers/ci/runner_job_count_resolver.rb:44:in `calculate_job_count_per_runner'*/;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
GroupAggregate (cost=0.84..2942.10 rows=20 width=12) (actual time=1992.455..1992.456 rows=0 loops=1)
Group Key: ci_runners.id
-> Nested Loop (cost=0.84..2841.80 rows=20020 width=4) (actual time=1992.454..1992.455 rows=0 loops=1)
-> Index Only Scan using ci_runners_pkey on ci_runners (cost=0.29..24.12 rows=20 width=4) (actual time=0.054..0.176 rows=20 loops=1)
Index Cond: (id = ANY ('{10016,10015,10014,10013,10012,10011,10010,10009,10008,10007,10006,10005,10004,10003,10002,10001,10000,9999,9998,9997}'::integer[]))
Heap Fetches: 20
-> Limit (cost=0.56..120.86 rows=1001 width=4) (actual time=99.613..99.613 rows=0 loops=20)
-> Index Only Scan using index_ci_builds_on_status_and_type_and_runner_id on ci_builds (cost=0.56..51742.19 rows=430503 width=4) (actual time=99.611..99.611 rows=0 loops=20)
Index Cond: ((type = 'Ci::Build'::text) AND (runner_id = ci_runners.id))
Heap Fetches: 0
Planning Time: 7.985 ms
Execution Time: 1992.486 ms
(12 rows)
If I understand correctly this query is responsible for these numbers:
Getting this jobs count
for every runner is about 100ms
in my case, and we have 20 of them on the page, hence 2s
time
Edited by Vladimir Shushlin