Skip to content
Snippets Groups Projects

Add migration for backfilling traversal_ids in blobs and wiki blobs

Merged Siddharth Dungarwal requested to merge 351381-backfill-blobs-and-wiki-blobs into master
1 unresolved thread
Compare and Show latest version
2 files
+ 112
10
Compare changes
  • Side-by-side
  • Inline
Files
2
@@ -3,23 +3,24 @@
class BackfillTraversalIdsToBlobsAndWikiBlobs < Elastic::Migration
include Elastic::MigrationHelper
BATCH_SIZE = 1000
ELASTIC_TIMEOUT = '5m'
BLOB_AND_WIKI_BLOB = %w[blob wiki_blob].freeze
batched!
throttle_delay 1.minute
retry_on_failure
BATCH_SIZE = 1000
ITERATIONS_PER_RUN = 10
def migrate
if completed?
log "There are no projects left to add traversal_ids"
log "Migration Completed: There are no projects left to add traversal_ids"
return
end
log "Searching for the projects with missing traversal_ids"
project_ids = projects_with_missing_traversal_ids_for_blobs_or_wiki_blobs
log "Found #{len(project_ids)} projects with missing traversal_ids"
project_ids = projects_with_missing_traversal_ids
log "Found #{project_ids.size} projects with missing traversal_ids"
project_ids.each do |project_id|
Gitlab::Elastic::Indexer.new(Project.find(project_id), wiki: false, force: true).run
update_by_query(Project.find(project_id))
end
end
@@ -28,13 +29,36 @@ def completed?
log "Running the count_items_missing_traversal_ids query"
total_remaining = count_items_missing_traversal_ids
log "Checking to see if migration is completed based on index counts remaining:#{total_remaining}"
log "Checking to see if migration is completed based on index counts remaining: #{total_remaining}"
total_remaining == 0
end
private
def update_by_query(project)
client.update_by_query(
index: helper.target_name,
body: {
query: {
bool: {
filter: [
{ term: { project_id: project.id.to_s } },
{ terms: { type: BLOB_AND_WIKI_BLOB } }
]
}
},
script: {
lang: "painless",
source: "ctx._source.traversal_ids = '#{project.namespace_ancestry}'"
}
},
wait_for_completion: true,
timeout: ELASTIC_TIMEOUT,
conflicts: 'proceed'
)
end
def count_items_missing_traversal_ids
client.count(
index: helper.target_name,
@@ -42,14 +66,14 @@ def count_items_missing_traversal_ids
query: {
bool: {
must_not: { exists: { field: "traversal_ids" } },
must: { terms: { type: %w[blob wiki_blob] } }
must: { terms: { type: BLOB_AND_WIKI_BLOB } }
}
}
}
)['count']
end
def projects_with_missing_traversal_ids_for_blobs_or_wiki_blobs
def projects_with_missing_traversal_ids
results = client.search(
index: helper.target_name,
body: {
@@ -57,7 +81,7 @@ def projects_with_missing_traversal_ids_for_blobs_or_wiki_blobs
query: {
bool: {
must_not: { exists: { field: "traversal_ids" } },
must: { terms: { type: %w[blob wiki_blob] } }
must: { terms: { type: BLOB_AND_WIKI_BLOB } }
}
},
aggs: {
Loading