Optimize notes counters in usage data
Optimize notes counters in usage data
Optimization
CREATE INDEX index_notes_on_author_id_and_created_at ON public.notes USING btree (author_id, created_at)
# The query has been executed. Duration: 39.040 min (edited)
DROP INDEX index_notes_on_author_id
# The query has been executed. Duration: 35.500 min (edited) in the database-lab
# 1 Locations https://gitlab.com/gitlab-org/gitlab/-/blob/master/ee/lib/ee/gitlab/usage_data.rb
time_period = { }
distinct_count(::Note.where(time_period), :author_id),
time_period = { created_at: 28.days.ago..Time.current }
distinct_count(::Note.where(time_period), :author_id),
SELECT MIN("notes"."author_id") FROM "notes" WHERE "notes"."created_at" BETWEEN '2020-02-13 18:23:37.245832' AND '2020-03-12 18:23:37.245906'
https://explain.depesz.com/s/LXh8
Before:https://explain.depesz.com/s/gjdNa
After:SELECT MAX("notes"."author_id") FROM "notes" WHERE "notes"."created_at" BETWEEN '2020-02-13 18:23:37.245832' AND '2020-03-12 18:23:37.245906'
https://explain.depesz.com/s/1rHj
Before:https://explain.depesz.com/s/NSZH
After:SELECT COUNT(DISTINCT "notes"."author_id") FROM "notes" WHERE "notes"."created_at" BETWEEN '2020-02-13 18:23:37.245832' AND '2020-03-12 18:23:37.245906' AND "notes"."author_id" BETWEEN 2000000 AND 2001250
https://explain.depesz.com/s/LWRP
Before:https://explain.depesz.com/s/3bvW
After: >450 msecs cold on database labTiming 5.5million users / 1250 * 450 msecs => 33 minutes . Note that notes has 230million rows and is challenging
Migration Output
$ VERBOSE=true bundle exec rake db:migrate:up VERSION=20200310133822
== 20200310133822 AddIndexOnAuthorIdAndIdAndCreatedAtToNotes: migrating =======
-- transaction_open?()
-> 0.0000s
-- index_exists?(:notes, [:author_id, :created_at], {:algorithm=>:concurrently})
-> 0.0044s
-- execute("SET statement_timeout TO 0")
-> 0.0003s
-- add_index(:notes, [:author_id, :created_at], {:algorithm=>:concurrently})
-> 0.0267s
-- execute("RESET ALL")
-> 0.0012s
-- transaction_open?()
-> 0.0000s
-- index_exists?(:notes, [:author_id], {:algorithm=>:concurrently})
-> 0.0084s
-- execute("SET statement_timeout TO 0")
-> 0.0003s
-- remove_index(:notes, {:algorithm=>:concurrently, :column=>[:author_id]})
-> 0.0066s
-- execute("RESET ALL")
-> 0.0003s
== 20200310133822 AddIndexOnAuthorIdAndIdAndCreatedAtToNotes: migrated (0.0489s)
$ VERBOSE=true bundle exec rake db:migrate:down VERSION=20200310133822
== 20200310133822 AddIndexOnAuthorIdAndIdAndCreatedAtToNotes: reverting =======
-- transaction_open?()
-> 0.0000s
-- index_exists?(:notes, [:author_id], {:algorithm=>:concurrently})
-> 0.0044s
-- execute("SET statement_timeout TO 0")
-> 0.0003s
-- add_index(:notes, [:author_id], {:algorithm=>:concurrently})
-> 0.0285s
-- execute("RESET ALL")
-> 0.0012s
-- transaction_open?()
-> 0.0000s
-- index_exists?(:notes, [:author_id, :created_at], {:algorithm=>:concurrently})
-> 0.0136s
-- execute("SET statement_timeout TO 0")
-> 0.0011s
-- remove_index(:notes, {:algorithm=>:concurrently, :column=>[:author_id, :created_at]})
-> 0.0218s
-- execute("RESET ALL")
-> 0.0012s
== 20200310133822 AddIndexOnAuthorIdAndIdAndCreatedAtToNotes: reverted (0.0728s)
Main issue #208890 (closed)
Edited by Alper Akgun