Skip to content
Snippets Groups Projects
Commit 25d6eb5f authored by Adam Hegyi's avatar Adam Hegyi
Browse files

Automatically enable group hierarchy optimization

This change will automatically enable the group hierarchy optimization
if the feature flag is enabled.

Changelog: added
parent 49a513ff
No related branches found
No related tags found
3 merge requests!181325Fix ambiguous `created_at` in project.rb,!179611Draft: Rebase CR approach for zoekt assignments,!176648Automatically enable group hierarchy optimization
......@@ -660,6 +660,15 @@
:weight: 1
:idempotent: true
:tags: []
- :name: cronjob:namespaces_enable_descendants_cache_cron
:worker_name: Namespaces::EnableDescendantsCacheCronWorker
:feature_category: :groups_and_projects
:has_external_dependencies: false
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: cronjob:namespaces_process_outdated_namespace_descendants_cron
:worker_name: Namespaces::ProcessOutdatedNamespaceDescendantsCronWorker
:feature_category: :groups_and_projects
......
# frozen_string_literal: true
module Namespaces
class EnableDescendantsCacheCronWorker
include ApplicationWorker
# rubocop:disable Scalability/CronWorkerContext -- This worker does not perform work scoped to a context
include CronjobQueue
GROUP_BATCH_SIZE = 5000
NAMESPACE_BATCH_SIZE = 500
PERSIST_SLICE_SIZE = 100
# Covers the top 3000 namespaces on .com
CACHE_THRESHOLD = 700
CURSOR_KEY = 'enable_namespace_descendants_cron_worker'
MAX_RUNTIME = 45.seconds
data_consistency :sticky
# rubocop:enable Scalability/CronWorkerContext
feature_category :groups_and_projects
idempotent!
# rubocop: disable CodeReuse/ActiveRecord -- Batching over groups.
def perform
# rubocop: disable Gitlab/FeatureFlagWithoutActor -- This is a global worker.
return if Feature.disabled?(:periodical_namespace_descendants_cache_worker)
# rubocop: enable Gitlab/FeatureFlagWithoutActor
limiter = Gitlab::Metrics::RuntimeLimiter.new(MAX_RUNTIME)
ids_to_cache = Set.new
last_id = get_last_id
# 1. Iterate over groups.
# 2. For each group, start counting the descendants.
# 3. When CACHE_THRESHOLD count is reached, stop the counting.
Group.where('id > ?', last_id || 0).each_batch(of: GROUP_BATCH_SIZE) do |relation|
relation.select(:id).each do |group|
cursor = { current_id: group.id, depth: [group.id] }
iterator = Gitlab::Database::NamespaceEachBatch.new(namespace_class: Namespace, cursor: cursor)
count = 0
iterator.each_batch(of: NAMESPACE_BATCH_SIZE) do |ids|
count += ids.size
break if count >= CACHE_THRESHOLD || limiter.over_time?
end
ids_to_cache << group.id if count >= CACHE_THRESHOLD
break if limiter.was_over_time?
last_id = group.id
end
break if limiter.was_over_time?
end
last_id = nil unless limiter.was_over_time?
persist(ids_to_cache)
set_last_id(last_id)
log_extra_metadata_on_done(:result,
{ over_time: limiter.was_over_time?, last_id: last_id, cache_count: ids_to_cache.size })
end
# rubocop: enable CodeReuse/ActiveRecord
def persist(ids_to_cache)
ids_to_cache.each_slice(PERSIST_SLICE_SIZE) do |slice|
Namespaces::Descendants.upsert_all(slice.map { |id| { namespace_id: id } })
end
end
def get_last_id
value = Gitlab::Redis::SharedState.with { |redis| redis.get(CURSOR_KEY) }
return if value.nil?
Integer(value)
end
def set_last_id(last_id)
if last_id.nil?
Gitlab::Redis::SharedState.with { |redis| redis.del(CURSOR_KEY) }
else
Gitlab::Redis::SharedState.with { |redis| redis.set(CURSOR_KEY, last_id, ex: 1.day) }
end
end
end
end
---
name: periodical_namespace_descendants_cache_worker
feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/509554
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/176648
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/510967
milestone: '17.8'
group: group::optimize
type: beta
default_enabled: false
......@@ -971,6 +971,9 @@
Settings.cron_jobs['ai_conversation_cleanup_cron_worker'] ||= {}
Settings.cron_jobs['ai_conversation_cleanup_cron_worker']['cron'] ||= '30 2 * * * *'
Settings.cron_jobs['ai_conversation_cleanup_cron_worker']['job_class'] = 'Ai::Conversation::CleanupCronWorker'
Settings.cron_jobs['namespaces_enable_descendants_cache_cron_worker'] ||= {}
Settings.cron_jobs['namespaces_enable_descendants_cache_cron_worker']['cron'] ||= '*/11 * * * *'
Settings.cron_jobs['namespaces_enable_descendants_cache_cron_worker']['job_class'] = 'Namespaces::EnableDescendantsCacheCronWorker'
Gitlab.com do
Settings.cron_jobs['disable_legacy_open_source_license_for_inactive_projects'] ||= {}
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Namespaces::EnableDescendantsCacheCronWorker, '#perform', :clean_gitlab_redis_shared_state, feature_category: :source_code_management do
let_it_be(:group) { create(:group) }
let_it_be(:subgroup) { create(:group, parent: group) }
let_it_be(:subsubgroup) { create(:group, parent: subgroup) }
let_it_be(:project1) { create(:project, group: subsubgroup) }
let_it_be(:project2) { create(:project, group: subsubgroup) }
let_it_be(:other_group) { create(:group) }
let_it_be(:other_project) { create(:project, group: group) }
subject(:worker) { described_class.new }
context 'when periodical_namespace_descendants_cache_worker feature is enabled' do
before do
stub_feature_flags(periodical_namespace_descendants_cache_worker: true)
stub_const("#{described_class}::CACHE_THRESHOLD", 4)
stub_const("#{described_class}::GROUP_BATCH_SIZE", 1)
stub_const("#{described_class}::NAMESPACE_BATCH_SIZE", 1)
end
it 'creates the cache record for the top level group and the subgroup' do
metadata = worker.perform
ids = Namespaces::Descendants.pluck(:namespace_id)
expect(ids).to match_array([group.id, subgroup.id])
expect(metadata).to eq({ over_time: false, last_id: nil, cache_count: 2 })
end
context 'when cached record already exist' do
it 'does not fail' do
create(:namespace_descendants, namespace: group)
worker.perform
ids = Namespaces::Descendants.pluck(:namespace_id)
expect(ids).to match_array([group.id, subgroup.id])
end
end
context 'when time limit is reached' do
it 'stores the last processed group id as the cursor' do
# Reach the limit after finishing counting the first group's descendants:
# group, subgroup, subsubgroup, project1
allow_next_instance_of(Gitlab::Metrics::RuntimeLimiter) do |limiter|
call_count = 0
allow(limiter).to receive(:over_time?).and_wrap_original do |_, _name|
# on the 4th call, we reach over time
call_count += 1
if call_count >= 4
limiter.instance_variable_set(:@last_check, true)
true
else
false
end
end
end
metadata = worker.perform
ids = Namespaces::Descendants.pluck(:namespace_id)
expect(ids).to match_array([group.id])
value = Gitlab::Redis::SharedState.with { |redis| redis.get(described_class::CURSOR_KEY) }
expect(Integer(value)).to eq(group.id)
expect(metadata).to eq({ over_time: true, last_id: group.id, cache_count: 1 })
end
end
context 'when cursor is present' do
it 'continues processing from the cursor' do
# Assume that the first group was already processed
Gitlab::Redis::SharedState.with { |redis| redis.set(described_class::CURSOR_KEY, group.id) }
worker.perform
ids = Namespaces::Descendants.pluck(:namespace_id)
expect(ids).to match_array([subgroup.id])
end
end
context 'when reaching the end of the table' do
it 'clears the cursor' do
Gitlab::Redis::SharedState.with { |redis| redis.set(described_class::CURSOR_KEY, group.id) }
metadata = worker.perform
value = Gitlab::Redis::SharedState.with { |redis| redis.get(described_class::CURSOR_KEY) }
expect(value).to be_nil
expect(metadata).to eq({ over_time: false, last_id: nil, cache_count: 1 })
end
end
end
it_behaves_like 'an idempotent worker' do
it 'does nothing' do
expect(Namespaces::Descendants.count).to eq(0)
end
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment