Skip to content
Snippets Groups Projects
Commit f95a5c98 authored by Dylan Griffith's avatar Dylan Griffith Committed by Harsha Muralidhar
Browse files

Automatically index projects in Zoekt when namespace is enabled

Zoekt is a new code search database being gradually rolled out on
GitLab.com . It is intended to replace Elasticsearch for code search.

We use the `::Zoekt::IndexedNamespace` model as a way to keep track of
namespaces that are enabled as part of our rollout of using Zoekt for
code search. When we implemented this we did not automate the indexing
of the projects when this was enabled and we had this as another manual
step as part of the rollout. Now that we're building APIs to enable a
namespace in
!116650 we want
this to also automatically index all the projects in the namespace when
it is enabled.

This MR just adds an after create hook on the model to trigger a Sidekiq
worker which then loops through all projects in the namespace and
triggers another Sidekiq worker to do the indexing. This is larger a
copy of the way we do it with the `ElasticNamespaceIndexerWorker` and as
such we have added a 2nd argument to the worker which will be used in
the future to handle deletes.

This change also required removing `sidekiq_inline` and tweaking some
specs a little to avoiding the indexing requests being sent to Zoekt
when creating a `::Zoekt::IndexedNamespace`.

Changelog: added
EE: true
MR: !117634
parent 77df645e
No related branches found
No related tags found
1 merge request!117114Draft: Add reporting jobs to QA tests on GDK
......@@ -503,6 +503,8 @@
- 1
- - search_project_index_integrity
- 1
- - search_zoekt_namespace_indexer
- 1
- - security_auto_fix
- 1
- - security_orchestration_policy_rule_schedule_namespace
......
......@@ -14,6 +14,8 @@ def self.table_name_prefix
scope :recent, -> { order(id: :desc) }
scope :with_limit, ->(maximum) { limit(maximum) }
after_commit :index, on: :create
def self.for_shard_and_namespace!(shard:, namespace:)
find_by!(shard: shard, namespace: namespace)
end
......@@ -30,10 +32,16 @@ def self.enabled_for_namespace?(namespace)
where(namespace: namespace.root_ancestor).exists?
end
private
def only_root_namespaces_can_be_indexed
return unless namespace.parent_id.present?
errors.add(:base, 'Only root namespaces can be indexed')
end
def index
::Search::Zoekt::NamespaceIndexerWorker.perform_async(namespace_id, :index)
end
end
end
......@@ -1623,6 +1623,15 @@
:weight: 1
:idempotent: true
:tags: []
- :name: search_zoekt_namespace_indexer
:worker_name: Search::Zoekt::NamespaceIndexerWorker
:feature_category: :global_search
:has_external_dependencies: false
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: security_auto_fix
:worker_name: Security::AutoFixWorker
:feature_category: :software_composition_analysis
......
# frozen_string_literal: true
module Search
module Zoekt
class NamespaceIndexerWorker
include ApplicationWorker
# Must be always otherwise we risk race condition where it does not think that indexing is enabled yet for the
# namespace.
data_consistency :always # rubocop:disable SidekiqLoadBalancing/WorkerDataConsistency
feature_category :global_search
idempotent!
def perform(namespace_id, operation)
return unless ::Feature.enabled?(:index_code_with_zoekt)
namespace = Namespace.find(namespace_id)
return unless namespace.use_zoekt?
# Symbols convert to string when queuing in Sidekiq
index_projects(namespace) if operation.to_s == 'index'
end
private
def index_projects(namespace)
namespace.all_projects.find_in_batches do |batch|
::Zoekt::IndexerWorker.bulk_perform_async_with_contexts(
batch,
arguments_proc: ->(project) { project.id },
context_proc: ->(project) { { project: project } }
)
end
end
end
end
end
......@@ -15,7 +15,7 @@
zoekt_ensure_project_indexed!(project_2)
end
describe 'blobs', :sidekiq_inline do
describe 'blobs' do
before do
zoekt_ensure_project_indexed!(project_1)
end
......
......@@ -76,22 +76,11 @@ def search_for(term)
end
end
describe '#async_update_zoekt_index', :sidekiq_inline do
describe '#async_update_zoekt_index' do
it 'makes updates available via ::Zoekt::IndexerWorker' do
expect(::Zoekt::IndexerWorker).to receive(:perform_async).with(project.id).and_call_original
project.repository.create_file(
user,
'anothernewsearchablefile.txt',
'some content',
message: 'added test file',
branch_name: project.default_branch)
expect(search_for('anothernewsearchablefile.txt')).to be_empty
expect(::Zoekt::IndexerWorker).to receive(:perform_async).with(project.id)
repository.async_update_zoekt_index
expect(search_for('anothernewsearchablefile.txt')).to match_array(['anothernewsearchablefile.txt'])
end
end
end
......@@ -57,4 +57,15 @@
expect(described_class.enabled_for_project?(indexed_project_of_child_namespace)).to eq(true)
end
end
describe '#create!' do
let(:newly_indexed_namespace) { create(:namespace) }
it 'triggers indexing for the namespace' do
expect(::Search::Zoekt::NamespaceIndexerWorker).to receive(:perform_async)
.with(newly_indexed_namespace.id, :index)
described_class.create!(shard: shard, namespace: newly_indexed_namespace)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ::Search::Zoekt::NamespaceIndexerWorker, :zoekt, feature_category: :global_search do
let_it_be(:namespace) { create(:namespace) }
let_it_be(:unindexed_namespace) { create(:namespace) }
let_it_be(:unindexed_project) { create(:project, namespace: unindexed_namespace) }
before do
zoekt_ensure_namespace_indexed!(namespace)
end
describe '#perform' do
subject { described_class.new.perform(namespace.id, 'index') }
let_it_be(:projects) { create_list :project, 3, namespace: namespace }
it 'indexes all projects belonging to the namespace' do
expect(Zoekt::IndexerWorker).to receive(:bulk_perform_async).with(a_collection_containing_exactly(
[projects[0].id],
[projects[1].id],
[projects[2].id]
))
subject
end
context 'when zoekt indexing is disabled' do
before do
stub_feature_flags(index_code_with_zoekt: false)
end
it 'does nothing' do
expect(::Zoekt::IndexerWorker).not_to receive(:bulk_perform_async)
subject
end
end
context 'when zoekt indexing is not enabled for the namespace' do
subject { described_class.new.perform(unindexed_namespace.id, 'index') }
it 'does nothing' do
expect(::Zoekt::IndexerWorker).not_to receive(:bulk_perform_async)
subject
end
end
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment