Skip to content
Snippets Groups Projects
Commit 5cf8c3e1 authored by Zhiyuan Lu's avatar Zhiyuan Lu
Browse files

Update WIKI_SCHEMA_VERSION to 2504

parent 1561cecb
No related branches found
No related tags found
3 merge requests!181325Fix ambiguous `created_at` in project.rb,!179611Draft: Rebase CR approach for zoekt assignments,!177223Enhance search capability for Chinese in the Wiki
# frozen_string_literal: true
class ReindexWikiToUpdateAnalyzerForContent < Elastic::Migration
include Elastic::MigrationHelper
include Search::Elastic::MigrationReindexBasedOnSchemaVersion
batched!
throttle_delay 5.minutes
retry_on_failure
batch_size 9_000
throttle_delay 1.minute
ELASTIC_TIMEOUT = '5m'
MAX_BATCH_SIZE = 50
SCHEMA_VERSION = 25_04
def migrate
if completed?
log 'Migration Completed', total_remaining: 0
return
end
set_migration_state(batch_size: batch_size) if migration_state[:batch_size].blank?
remaining_rids_to_reindex.each do |rid|
m = rid.match(/wiki_(?<type>project|group)_(?<id>\d+)/)
ElasticWikiIndexerWorker.perform_in(rand(throttle_delay).seconds, m[:id], m[:type].capitalize, force: true)
end
end
def completed?
total_remaining = remaining_documents_count
set_migration_state(documents_remaining: total_remaining)
log('Checking if migration is finished', total_remaining: total_remaining)
total_remaining == 0
end
def batch_size
migration_state[:batch_size].presence || [get_number_of_shards(index_name: index_name), MAX_BATCH_SIZE].min
end
private
def remaining_rids_to_reindex
results = client.search(
index: index_name,
body: {
size: 0, query: query_with_old_schema_version, aggs: { rids: { terms: { size: batch_size, field: 'rid' } } }
}
)
rids_hist = results.dig('aggregations', 'rids', 'buckets') || []
rids_hist.pluck('key') # rubocop: disable CodeReuse/ActiveRecord -- no ActiveRecord relation
end
def remaining_documents_count
helper.refresh_index(index_name: index_name)
client.count(index: index_name, body: { query: query_with_old_schema_version })['count']
end
def query_with_old_schema_version
{ range: { schema_version: { lt: SCHEMA_VERSION } } }
end
def index_name
Elastic::Latest::WikiConfig.index_name
end
DOCUMENT_TYPE = Wiki
NEW_SCHEMA_VERSION = 2504
end
......@@ -13,7 +13,7 @@ class Indexer
Error = Class.new(StandardError)
BLOB_SCHEMA_VERSION = 23_08
COMMIT_SCHEMA_VERSION = 23_06
WIKI_SCHEMA_VERSION = 24_02
WIKI_SCHEMA_VERSION = 25_04
class << self
def indexer_version
......
......@@ -3,6 +3,157 @@
require 'spec_helper'
require File.expand_path('ee/elastic/migrate/20250113152652_reindex_wiki_to_update_analyzer_for_content.rb')
RSpec.describe ReindexWikiToUpdateAnalyzerForContent, feature_category: :global_search do
it_behaves_like 'a deprecated Advanced Search migration', 20250113152652
RSpec.describe ReindexWikiToUpdateAnalyzerForContent, :elastic_clean, :sidekiq_inline, feature_category: :global_search do
let(:version) { 20250113152652 }
let(:migration) { described_class.new(version) }
let(:helper) { Gitlab::Elastic::Helper.new }
let(:client) { ::Gitlab::Search::Client.new }
let(:index_name) { Elastic::Latest::WikiConfig.index_name }
let_it_be(:project) { create(:project, :wiki_repo) }
let_it_be(:project2) { create(:project, :wiki_repo) }
let_it_be(:project3) { create(:project, :wiki_repo) }
let_it_be(:group) { create(:group) }
let_it_be(:group2) { create(:group) }
let_it_be(:group3) { create(:group) }
let_it_be(:group_wiki) { create(:group_wiki, group: group) }
let_it_be(:group_wiki2) { create(:group_wiki, group: group2) }
let_it_be(:group_wiki3) { create(:group_wiki, group: group3) }
let_it_be(:project_wiki) { create(:project_wiki, project: project) }
let_it_be(:project_wiki2) { create(:project_wiki, project: project2) }
let_it_be(:project_wiki3) { create(:project_wiki, project: project3) }
before do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
allow(::Gitlab::CurrentSettings).to receive(:elasticsearch_indexes_project?).with(anything).and_return true
allow(::Gitlab::CurrentSettings).to receive(:elasticsearch_indexes_namespace?).with(anything).and_return true
allow(migration).to receive_messages(helper: helper, client: client)
set_elasticsearch_migration_to :reindex_wiki_to_update_analyzer_for_content, including: false
[project_wiki, project_wiki2, project_wiki3, group_wiki, group_wiki2, group_wiki3].each do |wiki|
wiki.create_page('index_page', 'Bla bla term')
wiki.create_page('index_page2', 'Bla bla term')
wiki.index_wiki_blobs
end
ensure_elasticsearch_index! # ensure objects are indexed
end
describe 'migration_options' do
before do
set_old_schema_version_in_all_documents!
end
it 'has migration options set', :aggregate_failures do
batch_size = [migration.get_number_of_shards(index_name: index_name), described_class::MAX_BATCH_SIZE].min
expect(migration).to be_batched
expect(migration.batch_size).to eq batch_size
expect(migration.throttle_delay).to eq(5.minutes)
expect(migration).to be_retry_on_failure
end
end
describe '.migrate' do
context 'if migration is completed' do
it 'performs logging and does not call ElasticWikiIndexerWorker' do
expect(migration).to receive(:log).with("Setting migration_state to #{{ documents_remaining: 0 }.to_json}").once
expect(migration).to receive(:log).with('Checking if migration is finished', { total_remaining: 0 }).once
expect(migration).to receive(:log).with('Migration Completed', { total_remaining: 0 }).once
expect(ElasticWikiIndexerWorker).not_to receive(:perform_in)
migration.migrate
end
end
context 'if migration is not completed' do
let(:batch_size) { migration.batch_size }
before do
set_old_schema_version_in_all_documents!
end
it 'performs logging and calls ElasticWikiIndexerWorker' do
expect(migration).to receive(:log).with(
"Setting migration_state to #{{ documents_remaining: 3 * total_rids }.to_json}").once
expect(migration).to receive(:log).with("Setting migration_state to #{{ batch_size: batch_size }.to_json}").once
expect(migration).to receive(:log).with('Checking if migration is finished',
{ total_remaining: 3 * total_rids }).once
delay = a_value_between(0, migration.throttle_delay.seconds)
expect(ElasticWikiIndexerWorker).to receive(:perform_in).exactly(batch_size).times.with(delay, anything,
anything, force: true)
migration.migrate
end
end
end
describe 'integration test' do
let(:batch_size) { 2 }
before do
set_old_schema_version_in_all_documents!
allow(migration).to receive(:batch_size).and_return(batch_size)
# Remove elasticsearch for project2 and group2
allow(::Gitlab::CurrentSettings).to receive(:elasticsearch_indexes_project?).with(project2).and_return false
allow(::Gitlab::CurrentSettings).to receive(:elasticsearch_indexes_namespace?).with(group2).and_return false
# Delete project3 and group3
project3.delete
group3.delete
end
it "migration will be completed and delete docs of the container that don't use elasticsearch or deleted" do
initial_rids_to_reindex = total_rids
expect(remaining_rids_to_reindex).to eq initial_rids_to_reindex
expect(migration).not_to be_completed
migration.migrate
expect(migration).not_to be_completed
expect(remaining_rids_to_reindex).to eq initial_rids_to_reindex - batch_size
10.times do
break if migration.completed?
migration.migrate
sleep 0.01
end
expect(migration).to be_completed
# Less project3(deleted), group3(deleted), project2(not used elasticsearch), group2(not used elasticsearch)
expect(total_rids).to eq initial_rids_to_reindex - 4
end
end
describe '.completed?' do
subject { migration.completed? }
context 'when all the documents have the new schema_version(2504)' do
# With the 4.7.0 GITLAB_ELASTICSEARCH_INDEXER_VERSION all the new wikis will have schema_version 2504
it 'returns true' do
is_expected.to be true
end
end
context 'when some items are missing new schema_version' do
before do
set_old_schema_version_in_all_documents!
end
it 'returns false' do
is_expected.to be false
end
end
end
def set_old_schema_version_in_all_documents!
client.update_by_query(index: index_name, refresh: true, conflicts: 'proceed',
body: { script: { lang: 'painless', source: 'ctx._source.schema_version = 2402' } }
)
end
def total_rids
helper.refresh_index(index_name: index_name)
client.search(
index: index_name, body: { size: 0, aggs: { rids: { terms: { field: 'rid' } } } }
).dig('aggregations', 'rids', 'buckets').size
end
def remaining_rids_to_reindex
helper.refresh_index(index_name: index_name)
client.search(index: index_name,
body: { size: 0, query: { range: { schema_version: { lt: described_class::NEW_SCHEMA_VERSION } } },
aggs: { rids: { terms: { field: 'rid' } } } }).dig('aggregations', 'rids', 'buckets').size
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment