Add and backfill runner_type and sharding_key_id columns to ci_runner_machines

In order to have the automated trigger in the partitioned table copy valid data into new records, we need the source table to already be populated with the sharding key. We also need runner_type since the partitioned table will be partitioned by it.

Context: !166308 (comment 2124543673)

Proposed plan

!168657 (merged): Update the app to populate runner_type and sharding_key_id for new runners

Add and backfill the sharding_key_id column with a BBM in %17.5

NOTE: The first attempt caused an S2 incident which can be reproduced and fixed with the following patch:

Patch

diff --git a/app/models/ci/runner.rb b/app/models/ci/runner.rb
index 750748217118..90aed72eb560 100644
--- a/app/models/ci/runner.rb
+++ b/app/models/ci/runner.rb
@@ -515,15 +515,17 @@ def compute_token_expiration
       end
     end
 
-    def ensure_manager(system_xid, &blk)
+    def ensure_manager(system_xid)
       # rubocop: disable Performance/ActiveRecordSubtransactionMethods -- This is used only in API endpoints outside of transactions
-      RunnerManager.safe_find_or_create_by!(
-        runner_id: id,
-        runner_type: runner_type,
-        sharding_key_id: sharding_key_id,
-        system_xid: system_xid.to_s,
-        &blk)
+      runner_manager = RunnerManager.safe_find_or_create_by!(runner_id: id, system_xid: system_xid.to_s) do |m|
+        m.runner_type = runner_type
+        m.sharding_key_id = sharding_key_id
+      end
       # rubocop: enable Performance/ActiveRecordSubtransactionMethods
+
+      return runner_manager if runner_manager.runner_type && runner_manager.sharding_key_id
+
+      runner_manager.tap { |m| m.update!(runner_type: runner_type, sharding_key_id: sharding_key_id) }
     end
 
     def registration_available?
diff --git a/spec/lib/api/ci/helpers/runner_spec.rb b/spec/lib/api/ci/helpers/runner_spec.rb
index 8a3afbfac630..fd48ed6ea9f0 100644
--- a/spec/lib/api/ci/helpers/runner_spec.rb
+++ b/spec/lib/api/ci/helpers/runner_spec.rb
@@ -89,20 +89,38 @@
     let_it_be(:group) { create(:group) }
 
     let(:runner) { create(:ci_runner, :group, token: 'foo', groups: [group]) }
-    let(:runner_manager) { create(:ci_runner_machine, runner: runner, system_xid: 'bar', contacted_at: 1.hour.ago) }
 
     subject(:current_runner_manager) { helper.current_runner_manager }
 
     context 'when runner manager already exists' do
+      let!(:existing_runner_manager) do
+        create(:ci_runner_machine, runner: runner, system_xid: 'bar', contacted_at: 1.hour.ago)
+      end
+
       before do
-        allow(helper).to receive(:params).and_return(token: runner.token, system_id: runner_manager.system_xid)
+        allow(helper).to receive(:params).and_return(token: runner.token, system_id: existing_runner_manager.system_xid)
       end
 
-      it { is_expected.to eq(runner_manager) }
+      it { is_expected.to eq(existing_runner_manager) }
 
       it 'does not update the contacted_at field' do
         expect(current_runner_manager.contacted_at).to eq 1.hour.ago
       end
+
+      context 'when a runner manager with nil runner_type and sharding_key_id already exists' do
+        before do
+          existing_runner_manager.update_columns(runner_type: nil, sharding_key_id: nil)
+        end
+
+        it 'reuses and updates existing runner manager', :aggregate_failures do
+          expect { current_runner_manager }
+            .to change { existing_runner_manager.reload.runner_type }.from(nil).to(runner.runner_type)
+            .and change { existing_runner_manager.reload.sharding_key_id }.from(nil).to(runner.sharding_key_id)
+
+          expect(current_runner_manager).not_to be_nil
+          expect(current_runner_manager.contacted_at).to eq 1.hour.ago
+        end
+      end
     end
 
     context 'when runner manager cannot be found' do
@@ -112,11 +130,17 @@
         expect { current_runner_manager }.to change { Ci::RunnerManager.count }.by(1)
 
         expect(current_runner_manager).not_to be_nil
+        current_runner_manager.reload
+
         expect(current_runner_manager.system_xid).to eq('new_system_id')
         expect(current_runner_manager.contacted_at).to be_nil
         expect(current_runner_manager.runner).to eq(runner)
         expect(current_runner_manager.runner_type).to eq(runner.runner_type)
         expect(current_runner_manager.sharding_key_id).to eq(runner.sharding_key_id)
+
+        # Verify that a second call doesn't raise an error
+        expect { helper.current_runner_manager }.not_to raise_error
+        expect(Ci::RunnerManager.count).to eq(1)
       end
 
       it 'creates a new <legacy> runner manager if system_id is not specified', :aggregate_failures do

Finalize BBM in the milestone (%17.6) following the next required stop (%17.5)

Edited Oct 21, 2024 by Pedro Pombeiro