Skip to content
Snippets Groups Projects

Add migration for backfilling traversal_ids in blobs and wiki blobs

Merged Siddharth Dungarwal requested to merge 351381-backfill-blobs-and-wiki-blobs into master
Compare and
2 files
+ 177
0
Compare changes
  • Side-by-side
  • Inline
Files
2
# frozen_string_literal: true
class BackfillTraversalIdsToBlobsAndWikiBlobs < Elastic::Migration
include Elastic::MigrationHelper
BATCH_SIZE = 1000
ELASTIC_TIMEOUT = '5m'
BLOB_AND_WIKI_BLOB = %w[blob wiki_blob].freeze
batched!
throttle_delay 1.minute
retry_on_failure
def migrate
if completed?
log "Migration Completed: There are no projects left to add traversal_ids"
return
end
log "Searching for the projects with missing traversal_ids"
project_ids = projects_with_missing_traversal_ids
log "Found #{project_ids.size} projects with missing traversal_ids"
project_ids.each do |project_id|
update_by_query(Project.find(project_id))
end
end
def completed?
helper.refresh_index(index_name: helper.target_name)
log "Running the count_items_missing_traversal_ids query"
total_remaining = count_items_missing_traversal_ids
log "Checking to see if migration is completed based on index counts remaining: #{total_remaining}"
total_remaining == 0
end
private
def update_by_query(project)
client.update_by_query(
index: helper.target_name,
body: {
query: {
bool: {
filter: [
{ term: { project_id: project.id.to_s } },
{ terms: { type: BLOB_AND_WIKI_BLOB } }
]
}
},
script: {
lang: "painless",
source: "ctx._source.traversal_ids = '#{project.namespace_ancestry}'"
}
},
wait_for_completion: true,
timeout: ELASTIC_TIMEOUT,
conflicts: 'proceed'
)
end
def count_items_missing_traversal_ids
client.count(
index: helper.target_name,
body: {
query: {
bool: {
must_not: { exists: { field: "traversal_ids" } },
must: { terms: { type: BLOB_AND_WIKI_BLOB } }
}
}
}
)['count']
end
def projects_with_missing_traversal_ids
results = client.search(
index: helper.target_name,
body: {
size: 0,
query: {
bool: {
must_not: { exists: { field: "traversal_ids" } },
must: { terms: { type: BLOB_AND_WIKI_BLOB } }
}
},
aggs: {
project_ids: {
terms: { size: BATCH_SIZE, field: "project_id" }
}
}
}
)
project_ids_hist = results.dig('aggregations', 'project_ids', 'buckets') || []
# rubocop: disable CodeReuse/ActiveRecord
project_ids_hist.pluck("key")
# rubocop: enable CodeReuse/ActiveRecord
end
end
Loading