Skip to content
Snippets Groups Projects
Commit 9bd405f4 authored by Ravi Kumar's avatar Ravi Kumar Committed by Dmitry Gruzd
Browse files

Initiate the process of group wiki indexing

This MR will index any newly created Group wikis or updates to an
existing group wikis in a separate wiki index.

Changelog: added
MR: !120171
EE: true
parent e0a3414f
No related branches found
No related tags found
1 merge request!120171Indexing for newly created GroupWiki
Showing
with 227 additions and 49 deletions
......@@ -21,6 +21,7 @@ Search/NamespacedClass:
- 'ee/app/graphql/types/iteration_searchable_field_enum.rb'
- 'ee/app/helpers/ee/search_helper.rb'
- 'ee/app/models/concerns/elastic/application_versioned_search.rb'
- 'ee/app/models/concerns/elastic/maintain_elasticsearch_on_group_update.rb'
- 'ee/app/models/concerns/elastic/namespace_update.rb'
- 'ee/app/models/concerns/elastic/projects_search.rb'
- 'ee/app/models/concerns/elastic/repositories_search.rb'
......
......@@ -539,6 +539,8 @@
- 1
- - search_project_index_integrity
- 1
- - search_wiki_elastic_delete_group_wiki
- 1
- - search_zoekt_namespace_indexer
- 1
- - security_auto_fix
......
# frozen_string_literal: true
module Elastic
module MaintainElasticsearchOnGroupUpdate
extend ActiveSupport::Concern
included do
after_create_commit :sync_group_wiki_in_elastic, if: :should_index_group_wiki?
after_update_commit :maintain_group_wiki_permissions_in_elastic, if: :should_index_group_wiki?
after_destroy_commit :remove_group_wiki_in_elastic, if: :should_index_group_wiki?
end
private
def maintain_group_wiki_permissions_in_elastic
sync_group_wiki_in_elastic if visibility_level_previously_changed?
end
def remove_group_wiki_in_elastic
::Search::Wiki::ElasticDeleteGroupWikiWorker.perform_async(id)
end
def sync_group_wiki_in_elastic
ElasticWikiIndexerWorker.perform_async(id, self.class.name, force: true)
end
def should_index_group_wiki?
Feature.enabled?(:maintain_group_wiki_index, self) && use_elasticsearch? && ::Wiki.use_separate_indices?
end
end
end
......@@ -9,7 +9,7 @@ module WikiRepositoriesSearch
delegate(:delete_index_for_commits_and_blobs, :elastic_search, to: :__elasticsearch__)
def index_wiki_blobs
ElasticWikiIndexerWorker.perform_async(project.id, project.class.name)
ElasticWikiIndexerWorker.perform_async(container.id, container.class.name)
end
end
end
......@@ -17,6 +17,7 @@ module Group
include CanMoveRepositoryStorage
include ReactiveCaching
include IssueParent
include Elastic::MaintainElasticsearchOnGroupUpdate
ALLOWED_ACTIONS_TO_USE_FILTERING_OPTIMIZATION = [:read_epic, :read_confidential_epic].freeze
EPIC_BATCH_SIZE = 500
......@@ -43,7 +44,7 @@ module Group
accepts_nested_attributes_for :value_stream_dashboard_aggregation, update_only: true
has_one :analytics_dashboards_configuration_project, through: :analytics_dashboards_pointer, source: :target_project
has_one :scim_oauth_access_token
has_one :index_status, class_name: 'Elastic::GroupIndexStatus', foreign_key: :namespace_id
has_one :index_status, class_name: 'Elastic::GroupIndexStatus', foreign_key: :namespace_id, dependent: :destroy
has_many :external_audit_event_destinations, class_name: "AuditEvents::ExternalAuditEventDestination", foreign_key: 'namespace_id'
has_many :google_cloud_logging_configurations, class_name: "AuditEvents::GoogleCloudLoggingConfiguration",
foreign_key: 'namespace_id',
......@@ -86,7 +87,7 @@ module Group
delegate :ai_settings_allowed?, to: :namespace_settings
delegate :wiki_access_level=, to: :group_feature, allow_nil: true
delegate :wiki_access_level, :wiki_access_level=, to: :group_feature, allow_nil: true
# Use +checked_file_template_project+ instead, which implements important
# visibility checks
......
......@@ -12,6 +12,11 @@ module FeatureSetting
attribute :wiki_access_level, default: -> { Featurable::ENABLED }
after_update_commit :maintain_group_wiki_permissions_in_elastic, if: -> {
group.use_elasticsearch? && ::Wiki.use_separate_indices? &&
::Feature.enabled?(:maintain_group_wiki_index, group)
}
def wiki_access_level=(value)
value = ::Groups::FeatureSetting.access_level_from_str(value) if %w[disabled private enabled].include?(value)
raise ArgumentError, "Invalid wiki_access_level \"#{value}\"" unless %w[0 10 20].include?(value.to_s)
......@@ -19,6 +24,14 @@ def wiki_access_level=(value)
write_attribute(:wiki_access_level, value)
end
end
private
def maintain_group_wiki_permissions_in_elastic
return unless wiki_access_level_previously_changed?
ElasticWikiIndexerWorker.perform_async(group.id, group.class.name, force: true)
end
end
end
end
......@@ -5,12 +5,6 @@ module ProjectWiki
extend ActiveSupport::Concern
extend ::Gitlab::Utils::Override
prepended do
# TODO: Move this into EE::Wiki once we implement ES support for group wikis.
# https://gitlab.com/gitlab-org/gitlab/-/issues/207889
include Elastic::WikiRepositoriesSearch
end
override :after_wiki_activity
def after_wiki_activity
super
......
......@@ -107,6 +107,10 @@ def update_root_ref(remote_url, authorization)
nil
end
def group
container.try(:group)
end
private
def diverged?(branch_name, remote_ref)
......
......@@ -3,6 +3,9 @@
module EE
module Wiki
extend ActiveSupport::Concern
prepended do
include Elastic::WikiRepositoriesSearch
end
# No need to have a Kerberos Web url. Kerberos URL will be used only to
# clone
......
......@@ -55,4 +55,9 @@ def after_post_receive
def git_garbage_collect_worker_klass
GroupWikis::GitGarbageCollectWorker
end
override :use_elasticsearch?
def use_elasticsearch?
group&.use_elasticsearch?
end
end
......@@ -9,11 +9,12 @@ module WikiPushService
def execute
super
# TODO: Support Elasticsearch indexing for group wikis
# https://gitlab.com/gitlab-org/gitlab/-/issues/207889
return unless wiki.is_a?(::ProjectWiki)
return unless wiki.container.use_elasticsearch?
return unless default_branch_changes.any?
if (::Feature.disabled?(:maintain_group_wiki_index, wiki.container) || !::Wiki.use_separate_indices?) &&
wiki.is_a?(::GroupWiki)
return
end
return unless wiki.container.use_elasticsearch? && default_branch_changes.any?
wiki.index_wiki_blobs
end
......
......@@ -47,6 +47,13 @@ def update_elasticsearch_hooks
project.invalidate_elasticsearch_indexes_cache! if ::Gitlab::CurrentSettings.elasticsearch_limit_indexing?
::Elastic::ProcessInitialBookkeepingService.backfill_projects!(project) if project.maintaining_elasticsearch?
end
return unless ::Wiki.use_separate_indices? && ::Feature.enabled?(:maintain_group_wiki_index, group)
group.self_and_descendants.find_each.with_index do |grp, idx|
interval = idx % ElasticWikiIndexerWorker::MAX_JOBS_PER_HOUR
ElasticWikiIndexerWorker.perform_in(interval, grp.id, grp.class.name, { force: true })
end
end
end
end
......
......@@ -1695,6 +1695,15 @@
:weight: 1
:idempotent: true
:tags: []
- :name: search_wiki_elastic_delete_group_wiki
:worker_name: Search::Wiki::ElasticDeleteGroupWikiWorker
:feature_category: :global_search
:has_external_dependencies: false
:urgency: :throttled
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: search_zoekt_namespace_indexer
:worker_name: Search::Zoekt::NamespaceIndexerWorker
:feature_category: :global_search
......
......@@ -3,7 +3,8 @@
# Concern for pausing/unpausing elasticsearch indexing workers
module Elastic
module IndexingControl
WORKERS = [ElasticCommitIndexerWorker, ElasticDeleteProjectWorker, ElasticWikiIndexerWorker].freeze
WORKERS = [ElasticCommitIndexerWorker, ElasticDeleteProjectWorker, ElasticWikiIndexerWorker,
Search::Wiki::ElasticDeleteGroupWikiWorker].freeze
def perform(*args)
if Elastic::IndexingControl.non_cached_pause_indexing? && WORKERS.include?(self.class)
......
......@@ -19,8 +19,10 @@ def perform(namespace_id, operation)
case operation.to_s
when /index/
index_projects(namespace)
index_group_wikis(namespace) if should_maintain_group_wiki_index?(namespace)
when /delete/
delete_from_index(namespace)
delete_group_wikis(namespace) if should_maintain_group_wiki_index?(namespace)
end
end
......@@ -32,10 +34,28 @@ def index_projects(namespace)
end
end
def index_group_wikis(namespace)
namespace.self_and_descendants.find_each.with_index do |grp, idx|
interval = idx % ElasticWikiIndexerWorker::MAX_JOBS_PER_HOUR
ElasticWikiIndexerWorker.perform_in(interval, grp.id, grp.class.name, { force: true })
end
end
def delete_from_index(namespace)
namespace.all_projects.find_in_batches do |batch|
args = batch.map { |project| [project.id, project.es_id] }
ElasticDeleteProjectWorker.bulk_perform_async(args) # rubocop:disable Scalability/BulkPerformWithContext
end
end
def delete_group_wikis(namespace)
namespace.self_and_descendants.find_each.with_index do |grp, idx|
interval = idx % Search::Wiki::ElasticDeleteGroupWikiWorker::MAX_JOBS_PER_HOUR
Search::Wiki::ElasticDeleteGroupWikiWorker.perform_in(interval, grp.id)
end
end
def should_maintain_group_wiki_index?(namespace)
namespace.group_namespace? && Wiki.use_separate_indices? && Feature.enabled?(:maintain_group_wiki_index, namespace)
end
end
# frozen_string_literal: true
class ElasticWikiIndexerWorker
MAX_JOBS_PER_HOUR = 3600
include ApplicationWorker
data_consistency :delayed
......@@ -27,19 +28,17 @@ def perform(container_id, container_type, options = {})
end
container_class = container_type.safe_constantize
unless container_class == Project
logger.error(message: 'ElasticWikiIndexerWorker only accepts Project',
unless container_class == Project || container_class == Group
logger.error(message: 'ElasticWikiIndexerWorker only accepts Project and Group',
container_id: container_id, container_type: container_type)
return true
end
return true unless Gitlab::CurrentSettings.elasticsearch_indexing?
container = container_class.find_by_id(container_id)
unless container&.use_elasticsearch?
es_id = Gitlab::Elastic::Helper.build_es_id(es_type: container_class.es_type, target_id: container_id)
ElasticDeleteProjectWorker.perform_async(container_id, es_id)
container = container_class.find(container_id)
unless container.use_elasticsearch?
cleanup_container_elastic_documents(container_id, container_type)
return true
end
......@@ -61,6 +60,8 @@ def perform(container_id, container_type, options = {})
when 'Project'
project_id = container_id
group_id = container.group&.id
when 'Group'
group_id = container_id
end
logger.info(
project_id: project_id,
......@@ -73,5 +74,23 @@ def perform(container_id, container_type, options = {})
end
@ret
rescue ActiveRecord::RecordNotFound
logger.warn(message: 'Container record not found', container_type: container_type, container_id: container_id)
cleanup_container_elastic_documents(container_id, container_type)
true
end
private
def cleanup_container_elastic_documents(container_id, container_type)
if container_type == 'Project'
ElasticDeleteProjectWorker.perform_async(container_id, es_id(container_id, container_type))
else
Search::Wiki::ElasticDeleteGroupWikiWorker.perform_async(container_id)
end
end
def es_id(container_id, container_type)
Gitlab::Elastic::Helper.build_es_id(es_type: container_type.safe_constantize&.es_type, target_id: container_id)
end
end
# frozen_string_literal: true
# Support bulk delete
module Search
module Wiki
class ElasticDeleteGroupWikiWorker
MAX_JOBS_PER_HOUR = 3600
include ApplicationWorker
data_consistency :delayed
prepend Elastic::IndexingControl
feature_category :global_search
urgency :throttled
idempotent!
def perform(group_id)
remove_group_wiki_documents(group_id)
end
private
def remove_group_wiki_documents(group_id)
Gitlab::Elastic::Helper.default.client.delete_by_query(
{
index: Elastic::Latest::WikiConfig.index_name,
routing: "group_#{group_id}",
conflicts: 'proceed',
body: {
query: {
bool: {
filter: {
term: {
rid: "wiki_group_#{group_id}"
}
}
}
}
}
}
)
end
end
end
end
---
name: maintain_group_wiki_index
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/120171
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/414526
milestone: '16.1'
type: development
group: group::global search
default_enabled: false
......@@ -12,7 +12,7 @@ def methods_for_all_write_targets
end
def es_parent
"project_#{project_id}"
project_id ? "project_#{project_id}" : "group_#{group_id}"
end
def elastic_search(query, type: 'all', page: 1, per: 20, options: {})
......@@ -32,41 +32,31 @@ def blob_aggregations(query, options)
self.class.blob_aggregations(query, repository_specific_options(options))
end
# If wiki is true and migrate_wikis_to_separate_index is finished then set
# index as (#{env}-wikis)
# rid as (wiki_project_#{id}) for ProjectWiki and (wiki_group_#{id}) for GroupWiki
# If add_suffix_project_in_wiki_rid has not finished then rid might not have prefix(project/group) then
# run delete_query_by_rid with sending rid as 'wiki_#{project_id}'
def delete_index_for_commits_and_blobs(wiki: false)
types = wiki ? %w[wiki_blob] : %w[commit blob]
if (wiki && ::Elastic::DataMigrationService.migration_has_finished?(:migrate_wikis_to_separate_index)) || types.include?('commit')
index, rid = if wiki
output = [::Elastic::Latest::WikiConfig.index_name]
output << if ::Elastic::DataMigrationService.migration_has_finished?(:add_suffix_project_in_wiki_rid)
"wiki_project_#{project_id}"
else
"wiki_#{project_id}"
end
[::Elastic::Latest::WikiConfig.index_name, "wiki_#{es_parent}"]
else
[::Elastic::Latest::CommitConfig.index_name, project_id]
end
response = client.delete_by_query(
index: index,
routing: es_parent,
conflicts: 'proceed',
body: {
query: {
bool: {
filter: [
{
term: {
rid: rid
}
}
]
}
}
}
)
return response if wiki
response = delete_query_by_rid(index, rid)
# Consider to delete wikis by older rid(without suffix _project) as well
if wiki && project_id && !::Elastic::DataMigrationService.migration_has_finished?(:add_suffix_project_in_wiki_rid)
response = delete_query_by_rid(index, "wiki_#{project_id}")
end
return response if wiki # if condition can be removed once the blob gets migrated to the separate index
end
# This delete_by_query can be removed completely once the blob gets migrated to the separate index
client.delete_by_query(
index: index_name,
routing: es_parent,
......@@ -110,6 +100,27 @@ def repository_specific_options(options)
options
end
def delete_query_by_rid(index, rid)
client.delete_by_query(
index: index,
routing: es_parent,
conflicts: 'proceed',
body: {
query: {
bool: {
filter: [
{
term: {
rid: rid
}
}
]
}
}
}
)
end
end
end
end
......@@ -6,7 +6,9 @@ class RepositoryInstanceProxy < ApplicationInstanceProxy
include GitInstanceProxy
delegate :project, to: :target
delegate :id, to: :project, prefix: true
delegate :group, to: :target
delegate :id, to: :project, prefix: true, allow_nil: true
delegate :id, to: :group, prefix: true, allow_nil: true
def find_commits_by_message_with_elastic(query, page: 1, per_page: 20, options: {}, preload_method: nil)
response = elastic_search(query, type: 'commit', options: options, page: page, per: per_page)[:commits][:results]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment