Skip to content
Snippets Groups Projects
Commit a8ae13bb authored by Krasimir Angelov's avatar Krasimir Angelov :two:
Browse files

Adjust BBM job efficiency when only part of the batch was affected

When data distribution is not even, i.e. iterating the whole table
but updating just some rows based on a filter, the batch optimizer can
get confused and scale up the batch size to an unsafe level.

This adjusts the job efficiency for such batches in order to avoid batch
size changes.

#351786
parent 1e2dcb5f
No related branches found
No related tags found
1 merge request!85152Adjust BBM job efficiency when only part of the batch was affected
......@@ -8,6 +8,7 @@ module BackgroundMigration
class BatchedJob < SharedModel
include EachBatch
include FromUnion
include Gitlab::Utils::StrongMemoize
self.table_name = :batched_background_migration_jobs
......@@ -103,11 +104,24 @@ class BatchedJob < SharedModel
def time_efficiency
return unless succeeded?
return unless finished_at && started_at
return unless affected_rows
duration = finished_at - started_at
# TODO: Switch to individual job interval (prereq: https://gitlab.com/gitlab-org/gitlab/-/issues/328801)
duration.to_f / batched_migration.interval
efficiency = duration.to_f / batched_migration.interval
if batch_size != affected_rows
# Only part of the work was done, adjust efficiency
adjustment = batch_size.to_f / affected_rows
adjusted_efficiency = efficiency * adjustment
# Cap so that jobs with higher efficiency
# does not result in decreasing the batch size
[BatchOptimizer::TARGET_EFFICIENCY.max, adjusted_efficiency].min
else
efficiency
end
end
def can_split?(exception)
......@@ -159,6 +173,16 @@ def split_and_retry!
end
end
end
private
def affected_rows
strong_memoize(:affected_rows) do
if affected_rows = metrics['affected_rows']
affected_rows.values.reduce(0) {|acc, rows| acc + rows.sum}
end
end
end
end
end
end
......
......@@ -301,7 +301,7 @@
end
describe '#time_efficiency' do
subject { job.time_efficiency }
subject(:efficiency) { job.time_efficiency }
let(:migration) { build(:batched_background_migration, interval: 120.seconds) }
let(:job) { build(:batched_background_migration_job, :succeeded, batched_migration: migration) }
......@@ -310,7 +310,7 @@
let(:job) { build(:batched_background_migration_job, :running) }
it 'returns nil' do
expect(subject).to be_nil
expect(efficiency).to be_nil
end
end
......@@ -318,7 +318,7 @@
it 'returns nil' do
job.started_at = Time.zone.now
expect(subject).to be_nil
expect(efficiency).to be_nil
end
end
......@@ -326,26 +326,76 @@
it 'returns nil' do
job.finished_at = Time.zone.now
expect(subject).to be_nil
expect(efficiency).to be_nil
end
end
context 'when job has finished' do
it 'returns ratio of duration to interval, here: 0.5' do
freeze_time do
job.started_at = Time.zone.now - migration.interval / 2
job.finished_at = Time.zone.now
let(:now) { Time.zone.now }
let(:batch_size) { 12 }
before do
job.batch_size = batch_size
job.finished_at = now
end
expect(subject).to eq(0.5)
context 'when there is no information for affected records' do
it 'returns nil' do
freeze_time do
job.started_at = Time.zone.now - migration.interval
expect(efficiency).to be_nil
end
end
end
it 'returns ratio of duration to interval, here: 1' do
freeze_time do
context 'when the whole batch is updated' do
let(:tuples) { batch_size / 4 }
before do
job.metrics = { 'affected_rows' => { 'update_all' => [batch_size] } }
end
it 'returns ratio of duration to interval, adjusted for work done (here 0.5)' do
job.started_at = now - migration.interval / 2
# all the work done for 50% of the interval time, efficiency is 0.5
expect(efficiency).to be_within(0.001).of(0.5)
end
it 'returns ratio of duration to interval, adjusted for work done (here 0.95)' do
job.started_at = now - migration.interval
# all the work done for 100% of the interval the time, efficiency 1.0
expect(efficiency).to be_within(0.001).of(1.0)
end
end
context 'when part of the batch is updated' do
before do
tuples = batch_size / 4
job.metrics = { 'affected_rows' => { 'update_all' => [tuples, tuples] } }
end
it 'returns ratio of duration to interval, adjusted for work done (here 0.95)' do
job.started_at = Time.zone.now - migration.interval / 2
# half the work done for 50% of the interval time, efficiency is capped to 0.95
expect(efficiency).to be_within(0.001).of(0.95)
end
it 'returns ratio of duration to interval, adjusted for work done (here 0.5)' do
job.started_at = Time.zone.now - migration.interval / 4
# half the work done for 25% of the interval time, efficiency is 0.5
expect(efficiency).to be_within(0.001).of(0.5)
end
it 'returns ratio of duration to interval, adjusted for work done (here 0.95)' do
job.started_at = Time.zone.now - migration.interval
job.finished_at = Time.zone.now
expect(subject).to eq(1)
# half the work done for 100% of the interval time, efficiency is capped to 0.95
expect(efficiency).to be_within(0.001).of(0.95)
end
end
end
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment