Skip to content
Snippets Groups Projects
Verified Commit 501a6f1f authored by Ahmed Hemdan's avatar Ahmed Hemdan :two:
Browse files

Introduce database metric batch average operation

Changelog: added
parent 6ab6b87a
No related branches found
No related tags found
1 merge request!89913Introduce database metric batch average operation
# frozen_string_literal: true
# For large tables, PostgreSQL can take a long time to count rows due to MVCC.
# Implements a distinct and ordinary batch counter
# Implements:
# - distinct batch counter
# - ordinary batch counter
# - sum batch counter
# - average batch counter
# Needs indexes on the column below to calculate max, min and range queries
# For larger tables just set use higher batch_size with index optimization
#
......@@ -22,6 +26,8 @@
# batch_distinct_count(Project.group(:visibility_level), :creator_id)
# batch_sum(User, :sign_in_count)
# batch_sum(Issue.group(:state_id), :weight))
# batch_average(Ci::Pipeline, :duration)
# batch_average(MergeTrain.group(:status), :duration)
module Gitlab
module Database
module BatchCount
......@@ -37,6 +43,10 @@ def batch_sum(relation, column, batch_size: nil, start: nil, finish: nil)
BatchCounter.new(relation, column: nil, operation: :sum, operation_args: [column]).count(batch_size: batch_size, start: start, finish: finish)
end
def batch_average(relation, column, batch_size: nil, start: nil, finish: nil)
BatchCounter.new(relation, column: nil, operation: :average, operation_args: [column]).count(batch_size: batch_size, start: start, finish: finish)
end
class << self
include BatchCount
end
......
......@@ -6,6 +6,7 @@ class BatchCounter
FALLBACK = -1
MIN_REQUIRED_BATCH_SIZE = 1_250
DEFAULT_SUM_BATCH_SIZE = 1_000
DEFAULT_AVERAGE_BATCH_SIZE = 1_000
MAX_ALLOWED_LOOPS = 10_000
SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
ALLOWED_MODES = [:itself, :distinct].freeze
......@@ -26,6 +27,7 @@ def initialize(relation, column: nil, operation: :count, operation_args: nil)
def unwanted_configuration?(finish, batch_size, start)
(@operation == :count && batch_size <= MIN_REQUIRED_BATCH_SIZE) ||
(@operation == :sum && batch_size < DEFAULT_SUM_BATCH_SIZE) ||
(@operation == :average && batch_size < DEFAULT_AVERAGE_BATCH_SIZE) ||
(finish - start) / batch_size >= MAX_ALLOWED_LOOPS ||
start >= finish
end
......@@ -92,6 +94,7 @@ def build_relation_batch(start, finish, mode)
def batch_size_for_mode_and_operation(mode, operation)
return DEFAULT_SUM_BATCH_SIZE if operation == :sum
return DEFAULT_AVERAGE_BATCH_SIZE if operation == :average
mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE
end
......
......@@ -18,7 +18,7 @@ class DatabaseMetric < BaseMetric
UnimplementedOperationError = Class.new(StandardError) # rubocop:disable UsageData/InstrumentationSuperclass
class << self
IMPLEMENTED_OPERATIONS = %i(count distinct_count estimate_batch_distinct_count sum).freeze
IMPLEMENTED_OPERATIONS = %i(count distinct_count estimate_batch_distinct_count sum average).freeze
private_constant :IMPLEMENTED_OPERATIONS
......
......@@ -104,6 +104,15 @@ def sum(relation, column, batch_size: nil, start: nil, finish: nil)
end
end
def average(relation, column, batch_size: nil, start: nil, finish: nil)
with_duration do
Gitlab::Database::BatchCount.batch_average(relation, column, batch_size: batch_size, start: start, finish: finish)
rescue ActiveRecord::StatementInvalid => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
FALLBACK
end
end
# We don't support batching with histograms.
# Please avoid using this method on large tables.
# See https://gitlab.com/gitlab-org/gitlab/-/issues/323949.
......
......@@ -384,4 +384,58 @@ def calculate_batch_size(batch_size)
subject { described_class.method(:batch_sum) }
end
end
describe '#batch_average' do
let(:model) { Issue }
let(:column) { :weight }
before do
Issue.update_all(weight: 2)
end
it 'returns the average of values in the given column' do
expect(described_class.batch_average(model, column)).to eq(2)
end
it 'works when given an Arel column' do
expect(described_class.batch_average(model, model.arel_table[column])).to eq(2)
end
it 'works with a batch size of 50K' do
expect(described_class.batch_average(model, column, batch_size: 50_000)).to eq(2)
end
it 'works with start and finish provided' do
expect(described_class.batch_average(model, column, start: model.minimum(:id), finish: model.maximum(:id))).to eq(2)
end
it "defaults the batch size to #{Gitlab::Database::BatchCounter::DEFAULT_AVERAGE_BATCH_SIZE}" do
min_id = model.minimum(:id)
relation = instance_double(ActiveRecord::Relation)
allow(model).to receive_message_chain(:select, public_send: relation)
batch_end_id = min_id + calculate_batch_size(Gitlab::Database::BatchCounter::DEFAULT_AVERAGE_BATCH_SIZE)
expect(relation).to receive(:where).with("id" => min_id..batch_end_id).and_return(double(send: 1))
described_class.batch_average(model, column)
end
it_behaves_like 'when a transaction is open' do
subject { described_class.batch_average(model, column) }
end
it_behaves_like 'disallowed configurations', :batch_average do
let(:args) { [model, column] }
let(:default_batch_size) { Gitlab::Database::BatchCounter::DEFAULT_AVERAGE_BATCH_SIZE }
let(:small_batch_size) { Gitlab::Database::BatchCounter::DEFAULT_AVERAGE_BATCH_SIZE - 1 }
end
it_behaves_like 'when batch fetch query is canceled' do
let(:mode) { :itself }
let(:operation) { :average }
let(:operation_args) { [column] }
subject { described_class.method(:batch_average) }
end
end
end
......@@ -259,6 +259,37 @@
end
end
describe '#average' do
let(:relation) { double(:relation) }
it 'returns the average when operation succeeds' do
allow(Gitlab::Database::BatchCount)
.to receive(:batch_average)
.with(relation, :column, batch_size: 100, start: 2, finish: 3)
.and_return(1)
expect(described_class.average(relation, :column, batch_size: 100, start: 2, finish: 3)).to eq(1)
end
it 'records duration' do
expect(described_class).to receive(:with_duration)
allow(Gitlab::Database::BatchCount).to receive(:batch_average).and_return(1)
described_class.average(relation, :column)
end
context 'when operation fails' do
subject { described_class.average(relation, :column) }
let(:fallback) { 15 }
let(:failing_class) { Gitlab::Database::BatchCount }
let(:failing_method) { :batch_average }
it_behaves_like 'failing hardening method'
end
end
describe '#histogram' do
let_it_be(:projects) { create_list(:project, 3) }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment