Skip to content
Snippets Groups Projects
Commit d0512a9c authored by Siddharth Dungarwal's avatar Siddharth Dungarwal :two:
Browse files

Merge branch '351381-backfill-blobs-and-wiki-blobs' into 'master'

Add migration for backfilling traversal_ids in blobs and wiki blobs

See merge request !107730



Merged-by: default avatarSiddharth Dungarwal <sdungarwal@gitlab.com>
Approved-by: default avatarIan Anderson <ianderson@gitlab.com>
Reviewed-by: Terri Chu's avatarTerri Chu <tchu@gitlab.com>
Reviewed-by: default avatarIan Anderson <ianderson@gitlab.com>
Co-authored-by: Terri Chu's avatarTerri Chu <tchu@gitlab.com>
Co-authored-by: Siddharth Dungarwal's avatarSiddharth Dungarwal <sd5869@gmail.com>
Co-authored-by: default avatarIan Anderson <ianderson@gitlab.com>
parents e97eae05 fac98710
No related branches found
No related tags found
No related merge requests found
Pipeline #742839518 canceled
Pipeline: GitLab

#742846188

    # frozen_string_literal: true
    class BackfillTraversalIdsToBlobsAndWikiBlobs < Elastic::Migration
    include Elastic::MigrationHelper
    BATCH_SIZE = 1000
    ELASTIC_TIMEOUT = '5m'
    BLOB_AND_WIKI_BLOB = %w[blob wiki_blob].freeze
    batched!
    throttle_delay 1.minute
    retry_on_failure
    def migrate
    if completed?
    log "Migration Completed: There are no projects left to add traversal_ids"
    return
    end
    log "Searching for the projects with missing traversal_ids"
    project_ids = projects_with_missing_traversal_ids
    log "Found #{project_ids.size} projects with missing traversal_ids"
    project_ids.each do |project_id|
    update_by_query(Project.find(project_id))
    end
    end
    def completed?
    helper.refresh_index(index_name: helper.target_name)
    log "Running the count_items_missing_traversal_ids query"
    total_remaining = count_items_missing_traversal_ids
    log "Checking to see if migration is completed based on index counts remaining: #{total_remaining}"
    total_remaining == 0
    end
    private
    def update_by_query(project)
    client.update_by_query(
    index: helper.target_name,
    body: {
    query: {
    bool: {
    filter: [
    { term: { project_id: project.id.to_s } },
    { terms: { type: BLOB_AND_WIKI_BLOB } }
    ]
    }
    },
    script: {
    lang: "painless",
    source: "ctx._source.traversal_ids = '#{project.namespace_ancestry}'"
    }
    },
    wait_for_completion: true,
    timeout: ELASTIC_TIMEOUT,
    conflicts: 'proceed'
    )
    end
    def count_items_missing_traversal_ids
    client.count(
    index: helper.target_name,
    body: {
    query: {
    bool: {
    must_not: { exists: { field: "traversal_ids" } },
    must: { terms: { type: BLOB_AND_WIKI_BLOB } }
    }
    }
    }
    )['count']
    end
    def projects_with_missing_traversal_ids
    results = client.search(
    index: helper.target_name,
    body: {
    size: 0,
    query: {
    bool: {
    must_not: { exists: { field: "traversal_ids" } },
    must: { terms: { type: BLOB_AND_WIKI_BLOB } }
    }
    },
    aggs: {
    project_ids: {
    terms: { size: BATCH_SIZE, field: "project_id" }
    }
    }
    }
    )
    project_ids_hist = results.dig('aggregations', 'project_ids', 'buckets') || []
    # rubocop: disable CodeReuse/ActiveRecord
    project_ids_hist.pluck("key")
    # rubocop: enable CodeReuse/ActiveRecord
    end
    end
    # frozen_string_literal: true
    require 'spec_helper'
    require_relative 'migration_shared_examples'
    require File.expand_path('ee/elastic/migrate/20221221110300_backfill_traversal_ids_to_blobs_and_wiki_blobs.rb')
    RSpec.describe BackfillTraversalIdsToBlobsAndWikiBlobs, :elastic, :sidekiq_inline, feature_category: :global_search do
    let(:version) { 20221221110300 }
    let(:old_version_without_traversal_ids) { 20221213090600 }
    let(:helper) { Gitlab::Elastic::Helper.new }
    let(:index_name) { Project.__elasticsearch__.index_name }
    subject(:migration) { described_class.new(version) }
    before do
    stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
    allow(migration).to receive(:helper).and_return(helper)
    ensure_elasticsearch_index!
    helper.delete_migration_record(migration)
    end
    describe 'integration test' do
    let(:projects) { create_list(:project, 3, :repository) }
    let(:migration_completed_message) { 'Migration Completed: There are no projects left to add traversal_ids' }
    before do
    set_elasticsearch_migration_to(old_version_without_traversal_ids, including: false)
    ensure_elasticsearch_index!
    projects.each do |project|
    project.repository.index_commits_and_blobs # ensure objects are indexed
    end
    set_elasticsearch_migration_to(version, including: false)
    ensure_elasticsearch_index!
    end
    it 'index all the remaining documents in single iteration' do
    expect(migration.completed?).to be_falsey
    subject.migrate
    expect(migration.completed?).to be_truthy
    expect(migration).to receive(:log).with(/Running the count_items_missing_traversal_ids query/).once
    expect(migration).to receive(:log).with(/Checking to see if migration is completed/).once
    expect(migration).to receive(:log).with(/#{migration_completed_message}/).once
    subject.migrate
    end
    context 'with more than one batch' do
    before do
    stub_const("#{described_class.name}::BATCH_SIZE", 2)
    end
    it 'tracks all user documents in two iterations in one batch' do
    expect(migration.completed?).to be_falsey
    # First batch
    expect(migration).to receive(:log).with(/Running the count_items_missing_traversal_ids query/).twice
    expect(migration).to receive(:log).with(/Checking to see if migration is completed/).twice
    expect(migration).to receive(:log).with(/projects with missing traversal_ids/).twice
    subject.migrate
    expect(migration.completed?).to be_falsey
    # Second batch
    expect(migration).to receive(:log).with(/Running the count_items_missing_traversal_ids query/).twice
    expect(migration).to receive(:log).with(/Checking to see if migration is completed/).twice
    expect(migration).to receive(:log).with(/projects with missing traversal_ids/).twice
    subject.migrate
    expect(migration.completed?).to be_truthy
    end
    end
    end
    end
    0% Loading or .
    You are about to add 0 people to the discussion. Proceed with caution.
    Finish editing this message first!
    Please register or to comment