Add retry attempts to issues backfilling
What does this MR do and why?
Updating issues table results in multiple failed batches, to prevent that as much as possible we try to update the same sub-batch up to 3 times.
Screenshots or screen recordings
Sample extract from logs with some debugging:
Click to expand
update from update_with_retry!
(2376.2ms) UPDATE issues
SET namespace_id = projects.project_namespace_id
FROM (SELECT issues.id AS issue_id, projects.project_namespace_id FROM "issues" INNER JOIN projects ON projects.id = issues.project_id WHERE "issues"."id" BETWEEN 92241610 AND 92246241 AND "issues"."namespace_id" IS NULL AND "issues"."id" >= 92241610 AND "issues"."id" < 92241620) AS projects(issue_id, project_namespace_id)
WHERE issues.id = issue_id
/*application:console,db_config_name:main,line:/lib/gitlab/background_migration/backfill_project_namespace_on_issues.rb:49:in `update_batch'*/
retrying with attempt: 2!!!
update from update_with_retry!
(983.4ms) UPDATE issues
SET namespace_id = projects.project_namespace_id
FROM (SELECT issues.id AS issue_id, projects.project_namespace_id FROM "issues" INNER JOIN projects ON projects.id = issues.project_id WHERE "issues"."id" BETWEEN 92241610 AND 92246241 AND "issues"."namespace_id" IS NULL AND "issues"."id" >= 92241610 AND "issues"."id" < 92241620) AS projects(issue_id, project_namespace_id)
WHERE issues.id = issue_id
/*application:console,db_config_name:main,line:/lib/gitlab/background_migration/backfill_project_namespace_on_issues.rb:49:in `update_batch'*/
Load (274.5ms) SELECT "issues"."id" FROM "issues" INNER JOIN projects ON projects.id = issues.project_id WHERE "issues"."id" BETWEEN 92241610 AND 92246241 AND "issues"."namespace_id" IS NULL AND "issues"."id" >= 92241620 ORDER BY "issues"."id" ASC LIMIT 1 OFFSET 10 /*application:console,db_config_name:main,line:/app/models/concerns/each_batch.rb:81:in `block in each_batch'*/
update from update_with_retry!
(2192.6ms) UPDATE issues
SET namespace_id = projects.project_namespace_id
FROM (SELECT issues.id AS issue_id, projects.project_namespace_id FROM "issues" INNER JOIN projects ON projects.id = issues.project_id WHERE "issues"."id" BETWEEN 92241610 AND 92246241 AND "issues"."namespace_id" IS NULL AND "issues"."id" >= 92241620 AND "issues"."id" < 92241632) AS projects(issue_id, project_namespace_id)
WHERE issues.id = issue_id
/*application:console,db_config_name:main,line:/lib/gitlab/background_migration/backfill_project_namespace_on_issues.rb:49:in `update_batch'*/
Load (249.4ms) SELECT "issues"."id" FROM "issues" INNER JOIN projects ON projects.id = issues.project_id WHERE "issues"."id" BETWEEN 92241610 AND 92246241 AND "issues"."namespace_id" IS NULL AND "issues"."id" >= 92241632 ORDER BY "issues"."id" ASC LIMIT 1 OFFSET 10 /*application:console,db_config_name:main,line:/app/models/concerns/each_batch.rb:81:in `block in each_batch'*/
update from update_with_retry!
(1011.6ms) UPDATE issues
SET namespace_id = projects.project_namespace_id
FROM (SELECT issues.id AS issue_id, projects.project_namespace_id FROM "issues" INNER JOIN projects ON projects.id = issues.project_id WHERE "issues"."id" BETWEEN 92241610 AND 92246241 AND "issues"."namespace_id" IS NULL AND "issues"."id" >= 92241632 AND "issues"."id" < 92241642) AS projects(issue_id, project_namespace_id)
WHERE issues.id = issue_id
/*application:console,db_config_name:main,line:/lib/gitlab/background_migration/backfill_project_namespace_on_issues.rb:49:in `update_batch'*/
Load (198.5ms) SELECT "issues"."id" FROM "issues" INNER JOIN projects ON projects.id = issues.project_id WHERE "issues"."id" BETWEEN 92241610 AND 92246241 AND "issues"."namespace_id" IS NULL AND "issues"."id" >= 92241642 ORDER BY "issues"."id" ASC LIMIT 1 OFFSET 10 /*application:console,db_config_name:main,line:/app/models/concerns/each_batch.rb:81:in `block in each_batch'*/
update from update_with_retry!
(2371.1ms) UPDATE issues
SET namespace_id = projects.project_namespace_id
FROM (SELECT issues.id AS issue_id, projects.project_namespace_id FROM "issues" INNER JOIN projects ON projects.id = issues.project_id WHERE "issues"."id" BETWEEN 92241610 AND 92246241 AND "issues"."namespace_id" IS NULL AND "issues"."id" >= 92241642 AND "issues"."id" < 92241652) AS projects(issue_id, project_namespace_id)
WHERE issues.id = issue_id
/*application:console,db_config_name:main,line:/lib/gitlab/background_migration/backfill_project_namespace_on_issues.rb:49:in `update_batch'*/
retrying with attempt: 2!!!
update from update_with_retry!
(568.1ms) UPDATE issues
SET namespace_id = projects.project_namespace_id
FROM (SELECT issues.id AS issue_id, projects.project_namespace_id FROM "issues" INNER JOIN projects ON projects.id = issues.project_id WHERE "issues"."id" BETWEEN 92241610 AND 92246241 AND "issues"."namespace_id" IS NULL AND "issues"."id" >= 92241642 AND "issues"."id" < 92241652) AS projects(issue_id, project_namespace_id)
WHERE issues.id = issue_id
/*application:console,db_config_name:main,line:/lib/gitlab/background_migration/backfill_project_namespace_on_issues.rb:49:in `update_batch'*/
Load (208.2ms) SELECT "issues"."id" FROM "issues" INNER JOIN projects ON projects.id = issues.project_id WHERE "issues"."id" BETWEEN 92241610 AND 92246241 AND "issues"."namespace_id" IS NULL AND "issues"."id" >= 92241652 ORDER BY "issues"."id" ASC LIMIT 1 OFFSET 10 /*application:console,db_config_name:main,line:/app/models/concerns/each_batch.rb:81:in `block in each_batch'*/
A bit more context. I assume ~2022-10-13 gin index on issues title was modified to fast_update=false
, which seems to have resulted in less failed batched jobs:
gitlabhq_dblab=# select date(created_at), count(*), avg(batch_size) from batched_background_migration_jobs where created_at > '2022-10-01' AND batched_background_migration_id = 223 and status = 2 group by date(created_at);
date | count | avg
------------+-------+-----------------------
2022-10-01 | 21 | 4239.8571428571428571
2022-10-02 | 18 | 4201.9444444444444444
2022-10-03 | 95 | 3787.0000000000000000
2022-10-04 | 74 | 3870.1351351351351351
2022-10-05 | 126 | 3681.7063492063492063
2022-10-06 | 96 | 3214.0625000000000000
2022-10-07 | 22 | 6010.5454545454545455
2022-10-08 | 28 | 3762.9285714285714286
2022-10-09 | 21 | 3680.4761904761904762
2022-10-10 | 73 | 3332.1506849315068493
2022-10-11 | 109 | 4020.4036697247706422
2022-10-12 | 92 | 3097.5217391304347826
2022-10-13 | 11 | 3206.2727272727272727
2022-10-14 | 8 | 3089.3750000000000000
2022-10-15 | 10 | 4187.4000000000000000
2022-10-16 | 4 | 5499.2500000000000000
2022-10-17 | 12 | 3615.3333333333333333
2022-10-18 | 14 | 3037.4285714285714286
2022-10-19 | 8 | 3009.8750000000000000
2022-10-20 | 1 | 4043.0000000000000000
(20 rows)
How to set up and validate locally
Numbered steps to set up and validate the change are strongly suggested.
MR acceptance checklist
This checklist encourages us to confirm any changes have been analyzed to reduce risks in quality, performance, reliability, security, and maintainability.
-
I have evaluated the MR acceptance checklist for this MR.
Edited by Alexandru Croitor