Fix deduplication of skipped jobs when scheduling
What does this MR do and why?
This MR fixes the deduplication logic for skipped jobs when they are scheduled jobs (perform_at/perform_in
).
See #435602 (comment 1839458356)
MR acceptance checklist
Please evaluate this MR against the MR acceptance checklist. It helps you analyze changes to reduce risks in quality, performance, reliability, security, and maintainability.
Screenshots or screen recordings
Screenshots are required for UI changes, and strongly recommended for all other merge requests.
Before | After |
---|---|
How to set up and validate locally
- Apply the below diff and run
gdk restart
.
diff --git a/app/workers/chaos/cpu_spin_worker.rb b/app/workers/chaos/cpu_spin_worker.rb
index ff468d598068..f5d51acbe810 100644
--- a/app/workers/chaos/cpu_spin_worker.rb
+++ b/app/workers/chaos/cpu_spin_worker.rb
@@ -6,6 +6,7 @@ class CpuSpinWorker # rubocop:disable Scalability/IdempotentWorker
data_consistency :always
+ deduplicate :until_executed, including_scheduled: true
sidekiq_options retry: 3
include ChaosQueue
diff --git a/lib/gitlab/sidekiq_middleware/skip_jobs.rb b/lib/gitlab/sidekiq_middleware/skip_jobs.rb
index 1af8ddf7837c..cfc00ca61801 100644
--- a/lib/gitlab/sidekiq_middleware/skip_jobs.rb
+++ b/lib/gitlab/sidekiq_middleware/skip_jobs.rb
@@ -3,7 +3,7 @@
module Gitlab
module SidekiqMiddleware
class SkipJobs
- DELAY = ENV.fetch("SIDEKIQ_DEFER_JOBS_DELAY", 5.minutes)
+ DELAY = ENV.fetch("SIDEKIQ_DEFER_JOBS_DELAY", 15.seconds)
RUN_FEATURE_FLAG_PREFIX = "run_sidekiq_jobs"
DROP_FEATURE_FLAG_PREFIX = "drop_sidekiq_jobs"
- Start up a rails console and enable the defer feature flag.
Feature.disable(:"run_sidekiq_jobs_Chaos::CpuSpinWorker")
- Push a job
Chaos::CpuSpinWorker.perform_async(1)
- Check the logs. Look out for
"class":"Chaos::CpuSpinWorker"
and the keys of interest (deferred_count
should increase,"job_deferred_by":"feature_flag"
and is present
{"severity":"INFO","time":"2024-04-02T09:43:06.230Z","retry":3,"queue":"default","backtrace":true,"version":0,"store":null,"queue_namespace":"chaos","deferred":true,"deferred_count":2,"deferred_by":"feature_flag","args":["11"],"class":"Chaos::CpuSpinWorker","jid":"61f85fe9de7d0027eb52b7a4","created_at":"2024-04-02T09:42:48.214Z","meta.sidekiq_destination_shard_redis":"main","correlation_id":"e5312bf9e200fac816233c78aab95559","meta.caller_id":"Chaos::CpuSpinWorker","meta.feature_category":"not_owned","meta.
- Re-enable the feature flag using
Feature.enable(:"run_sidekiq_jobs_Chaos::CpuSpinWorker")
and observe that theChaos::CpuSpinWorker
gets processed after ~1 minute (feature flag in-memory TTL). Look out forjob_status: done
.
{"severity":"INFO","time":"2024-04-04T01:05:33.717Z","retry":3,"queue":"default","backtrace":true,"version":0,"store":null,"queue_namespace":"chaos","deferred":true,"deferred_count":4,"deferred_by":"feature_flag","args":["1"],"class":"Chaos::CpuSpinWorker","jid":"450750943089a67138f51b77","created_at":"2024-04-04T01:05:13.316Z","meta.sidekiq_destination_shard_redis":"main","correlation_id":"12ad1b114c20a153f0a2a9ddab5481ad","meta.caller_id":"Chaos::CpuSpinWorker","meta.feature_category":"not_owned","meta.root_caller_id":"Chaos::CpuSpinWorker","worker_data_consistency":"always","size_limiter":"validated","scheduled_at":"2024-04-04T01:05:28.316Z","idempotency_key":"resque:gitlab:duplicate:default:22a5e8e9d30eac0fa7a7ff37a2624dcb6b65842c19798591fa0cc57d26805043","enqueued_at":"2024-04-04T01:05:32.699Z","job_size_bytes":3,"pid":10135,"message":"Chaos::CpuSpinWorker JID-450750943089a67138f51b77: done: 1.015638 sec","job_status":"done","scheduling_latency_s":0.003252,"enqueue_latency_s":4.382967,"redis_calls":5,"redis_duration_s":0.001268,"redis_read_bytes":411,"redis_write_bytes":385,"redis_feature_flag_calls":2,"redis_feature_flag_duration_s":0.000263,"redis_feature_flag_read_bytes":408,"redis_feature_flag_write_bytes":145,"redis_queues_metadata_calls":2,"redis_queues_metadata_duration_s":0.00087,"redis_queues_metadata_read_bytes":2,"redis_queues_metadata_write_bytes":172,"redis_shared_state_calls":1,"redis_shared_state_duration_s":0.000135,"redis_shared_state_read_bytes":1,"redis_shared_state_write_bytes":68,"db_count":0,"db_write_count":0,"db_cached_count":0,"db_txn_count":0,"db_replica_txn_count":0,"db_primary_txn_count":0,"db_main_txn_count":0,"db_ci_txn_count":0,"db_main_replica_txn_count":0,"db_ci_replica_txn_count":0,"db_replica_count":0,"db_primary_count":0,"db_main_count":0,"db_ci_count":0,"db_main_replica_count":0,"db_ci_replica_count":0,"db_replica_cached_count":0,"db_primary_cached_count":0,"db_main_cached_count":0,"db_ci_cached_count":0,"db_main_replica_cached_count":0,"db_ci_replica_cached_count":0,"db_replica_wal_count":0,"db_primary_wal_count":0,"db_main_wal_count":0,"db_ci_wal_count":0,"db_main_replica_wal_count":0,"db_ci_replica_wal_count":0,"db_replica_wal_cached_count":0,"db_primary_wal_cached_count":0,"db_main_wal_cached_count":0,"db_ci_wal_cached_count":0,"db_main_replica_wal_cached_count":0,"db_ci_replica_wal_cached_count":0,"db_replica_txn_max_duration_s":0.0,"db_primary_txn_max_duration_s":0.0,"db_main_txn_max_duration_s":0.0,"db_ci_txn_max_duration_s":0.0,"db_main_replica_txn_max_duration_s":0.0,"db_ci_replica_txn_max_duration_s":0.0,"db_replica_txn_duration_s":0.0,"db_primary_txn_duration_s":0.0,"db_main_txn_duration_s":0.0,"db_ci_txn_duration_s":0.0,"db_main_replica_txn_duration_s":0.0,"db_ci_replica_txn_duration_s":0.0,"db_replica_duration_s":0.0,"db_primary_duration_s":0.0,"db_main_duration_s":0.0,"db_ci_duration_s":0.0,"db_main_replica_duration_s":0.0,"db_ci_replica_duration_s":0.0,"cpu_s":1.005494,"worker_id":"sidekiq_0","rate_limiting_gates":[],"duration_s":1.015638,"completed_at":"2024-04-04T01:05:33.717Z","load_balancing_strategy":"primary","db_duration_s":0.0,"urgency":"low","target_duration_s":300,"target_scheduling_latency_s":60}
Edited by Sylvester Chin