Batched background NullifyOrphanRunnerIdOnCiBuilds is failing to complete
Summary
We have a customer that upgraded followed the upgrade path:
14.4.1 => 14.9.5 => 14.10.5 => 15.0.5 => 15.4.6 => 15.11.4
They initially have some issues with the background migrations as I believe they might not have waited to complete them before jumping to a specific GitLab version.
We manage to fix most of them but they are still encountering an issue with NullifyOrphanRunnerIdOnCiBuilds
. Looking at the logs, it appears to be failing with:
"exception.class": "ActiveRecord::StatementInvalid",
"exception.message": "PG::SyntaxError: ERROR: zero-length delimited identifier at or near \"\"\"\"\nLINE 1: ...COALESCE(\"lock_version\", 0) + 1 WHERE \"ci_builds\".\"\" IN (SEL...\n ^\n",
"exception.sql": "/*application:sidekiq,correlation_id:3b68fce763778907c11d7d5a2ae03b2a,jid:3e40fe790c7b6c42f317a1ce,endpoint_id:Database::BatchedBackgroundMigration::MainExecutionWorker,db_config_name:main*/ UPDATE \"ci_builds\" SET \"runner_id\" = NULL, \"lock_version\" = COALESCE(\"lock_version\", 0) + 1 WHERE \"ci_builds\".\"\" IN (SELECT \"ci_builds\".\"\" FROM \"ci_builds\" LEFT OUTER JOIN ci_runners ON ci_runners.id = ci_builds.runner_id WHERE \"ci_builds\".\"id\" BETWEEN 50245 AND 50634 AND \"ci_builds\".\"id\" >= 50245 AND (ci_builds.runner_id IS NOT NULL AND ci_runners.id IS NULL))",
Steps to reproduce
Example Project
What is the current bug behavior?
NullifyOrphanRunnerIdOnCiBuilds
is failing due to an invalid SQL statement.
What is the expected correct behavior?
NullifyOrphanRunnerIdOnCiBuilds
should complete
Relevant logs and/or screenshots
{
"severity": "ERROR",
"time": "2023-06-14T01:15:06.420Z",
"correlation_id": "3b68fce763778907c11d7d5a2ae03b2a",
"exception.class": "ActiveRecord::StatementInvalid",
"exception.message": "PG::SyntaxError: ERROR: zero-length delimited identifier at or near \"\"\"\"\nLINE 1: ...COALESCE(\"lock_version\", 0) + 1 WHERE \"ci_builds\".\"\" IN (SEL...\n ^\n",
"exception.backtrace": [
"lib/gitlab/database/load_balancing/connection_proxy.rb:121:in `public_send'",
"lib/gitlab/database/load_balancing/connection_proxy.rb:121:in `block in write_using_load_balancer'",
"lib/gitlab/database/load_balancing/load_balancer.rb:127:in `block in read_write'",
"lib/gitlab/database/load_balancing/load_balancer.rb:198:in `retry_with_backoff'",
"lib/gitlab/database/load_balancing/load_balancer.rb:116:in `read_write'",
"lib/gitlab/database/load_balancing/connection_proxy.rb:120:in `write_using_load_balancer'",
"lib/gitlab/database/load_balancing/connection_proxy.rb:62:in `block (2 levels) in <class:ConnectionProxy>'",
"lib/gitlab/background_migration/nullify_orphan_runner_id_on_ci_builds.rb:15:in `block (2 levels) in perform'",
"lib/gitlab/database/background_migration/batch_metrics.rb:22:in `instrument_operation'",
"lib/gitlab/database/background_migration/batch_metrics.rb:16:in `time_operation'",
"lib/gitlab/background_migration/nullify_orphan_runner_id_on_ci_builds.rb:14:in `block in perform'",
"app/models/concerns/each_batch.rb:99:in `block (2 levels) in each_batch'",
"app/models/concerns/each_batch.rb:99:in `block in each_batch'",
"app/models/concerns/each_batch.rb:69:in `step'",
"app/models/concerns/each_batch.rb:69:in `each_batch'",
"lib/gitlab/background_migration/nullify_orphan_runner_id_on_ci_builds.rb:13:in `perform'",
"lib/gitlab/database/background_migration/batched_migration_wrapper.rb:89:in `execute_legacy_job'",
"lib/gitlab/database/background_migration/batched_migration_wrapper.rb:65:in `execute_job'",
"lib/gitlab/database/background_migration/batched_migration_wrapper.rb:50:in `execute_batch'",
"lib/gitlab/database/background_migration/batched_migration_wrapper.rb:25:in `perform'",
"lib/gitlab/database/background_migration/batched_migration_runner.rb:30:in `run_migration_job'",
"app/workers/database/batched_background_migration/execution_worker.rb:103:in `run_migration_job'",
"app/workers/database/batched_background_migration/execution_worker.rb:56:in `block (2 levels) in perform_work'",
"app/services/concerns/exclusive_lease_guard.rb:29:in `try_obtain_lease'",
"app/workers/database/batched_background_migration/execution_worker.rb:55:in `block in perform_work'",
"lib/gitlab/database/shared_model.rb:34:in `using_connection'",
"app/workers/database/batched_background_migration/execution_worker.rb:50:in `perform_work'",
"app/workers/concerns/limited_capacity/worker.rb:94:in `perform_registered'",
"app/workers/concerns/limited_capacity/worker.rb:65:in `perform'",
"lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb:26:in `call'",
"lib/gitlab/sidekiq_middleware/duplicate_jobs/strategies/none.rb:14:in `perform'",
"lib/gitlab/sidekiq_middleware/duplicate_jobs/duplicate_job.rb:44:in `perform'",
"lib/gitlab/sidekiq_middleware/duplicate_jobs/server.rb:8:in `call'",
"lib/gitlab/sidekiq_middleware/worker_context.rb:9:in `wrap_in_optional_context'",
"lib/gitlab/sidekiq_middleware/worker_context/server.rb:19:in `block in call'",
"lib/gitlab/application_context.rb:118:in `block in use'",
"lib/gitlab/application_context.rb:118:in `use'",
"lib/gitlab/application_context.rb:57:in `with_context'",
"lib/gitlab/sidekiq_middleware/worker_context/server.rb:17:in `call'",
"lib/gitlab/sidekiq_status/server_middleware.rb:7:in `call'",
"lib/gitlab/sidekiq_versioning/middleware.rb:9:in `call'",
"lib/gitlab/sidekiq_middleware/query_analyzer.rb:7:in `block in call'",
"lib/gitlab/database/query_analyzer.rb:37:in `within'",
"lib/gitlab/sidekiq_middleware/query_analyzer.rb:7:in `call'",
"lib/gitlab/sidekiq_middleware/admin_mode/server.rb:14:in `call'",
"lib/gitlab/sidekiq_middleware/instrumentation_logger.rb:9:in `call'",
"lib/gitlab/sidekiq_middleware/batch_loader.rb:7:in `call'",
"lib/gitlab/sidekiq_middleware/extra_done_log_metadata.rb:7:in `call'",
"lib/gitlab/sidekiq_middleware/request_store_middleware.rb:10:in `block in call'",
"lib/gitlab/with_request_store.rb:17:in `enabling_request_store'",
"lib/gitlab/with_request_store.rb:10:in `with_request_store'",
"lib/gitlab/sidekiq_middleware/request_store_middleware.rb:9:in `call'",
"lib/gitlab/sidekiq_middleware/server_metrics.rb:76:in `block in call'",
"lib/gitlab/sidekiq_middleware/server_metrics.rb:103:in `block in instrument'",
"lib/gitlab/metrics/background_transaction.rb:33:in `run'",
"lib/gitlab/sidekiq_middleware/server_metrics.rb:103:in `instrument'",
"lib/gitlab/sidekiq_middleware/server_metrics.rb:75:in `call'",
"lib/gitlab/sidekiq_middleware/monitor.rb:10:in `block in call'",
"lib/gitlab/sidekiq_daemon/monitor.rb:46:in `within_job'",
"lib/gitlab/sidekiq_middleware/monitor.rb:9:in `call'",
"lib/gitlab/sidekiq_middleware/size_limiter/server.rb:13:in `call'",
"lib/gitlab/sidekiq_logging/structured_logger.rb:21:in `call'"
],
"exception.cause_class": "PG::SyntaxError",
"exception.sql": "/*application:sidekiq,correlation_id:3b68fce763778907c11d7d5a2ae03b2a,jid:3e40fe790c7b6c42f317a1ce,endpoint_id:Database::BatchedBackgroundMigration::MainExecutionWorker,db_config_name:main*/ UPDATE \"ci_builds\" SET \"runner_id\" = NULL, \"lock_version\" = COALESCE(\"lock_version\", 0) + 1 WHERE \"ci_builds\".\"\" IN (SELECT \"ci_builds\".\"\" FROM \"ci_builds\" LEFT OUTER JOIN ci_runners ON ci_runners.id = ci_builds.runner_id WHERE \"ci_builds\".\"id\" BETWEEN 50245 AND 50634 AND \"ci_builds\".\"id\" >= 50245 AND (ci_builds.runner_id IS NOT NULL AND ci_runners.id IS NULL))",
"user.username": null,
"tags.program": "sidekiq",
"tags.locale": "en",
"tags.feature_category": null,
"tags.correlation_id": "3b68fce763778907c11d7d5a2ae03b2a",
"extra.sidekiq": {
"retry": 0,
"queue": "batched_background_migrations:database_batched_background_migration_main_execution",
"version": 0,
"status_expiration": 1800,
"queue_namespace": "batched_background_migrations",
"class": "Database::BatchedBackgroundMigration::MainExecutionWorker",
"args": [
"[FILTERED]",
"21"
],
"jid": "3e40fe790c7b6c42f317a1ce",
"created_at": 1686705306.069346,
"meta.caller_id": "Database::BatchedBackgroundMigrationWorker",
"correlation_id": "3b68fce763778907c11d7d5a2ae03b2a",
"meta.root_caller_id": "Cronjob",
"meta.feature_category": "database",
"meta.client_id": "ip/",
"worker_data_consistency": "always",
"size_limiter": "validated",
"enqueued_at": 1686705306.0721405
},
"extra.batched_job_id": 376,
"extra.job_class_name": "NullifyOrphanRunnerIdOnCiBuilds",
"extra.job_arguments": []
}
Output of checks
Results of GitLab environment info
Expand for output related to GitLab environment info
(For installations with omnibus-gitlab package run and paste the output of: `sudo gitlab-rake gitlab:env:info`) (For installations from source run and paste the output of: `sudo -u git -H bundle exec rake gitlab:env:info RAILS_ENV=production`)
Results of GitLab application Check
Expand for output related to the GitLab application check
(For installations with omnibus-gitlab package run and paste the output of:
sudo gitlab-rake gitlab:check SANITIZE=true
)(For installations from source run and paste the output of:
sudo -u git -H bundle exec rake gitlab:check RAILS_ENV=production SANITIZE=true
)(we will only investigate if the tests are passing)