Skip to content

Improve performance of searching by BuildNames

The following discussion from !151465 (merged) should be addressed:

Ci::BuildName.include(EachBatch)

# Building a custom order definition that is keyset pagination aware
order = Gitlab::Pagination::Keyset::Order.build(
  [
    Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
      attribute_name: 'build_id',
      order_expression: Ci::BuildName.arel_table[:build_id].desc
    ),
    Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
      attribute_name: 'partition_id',
      order_expression: Ci::BuildName.arel_table[:partition_id].desc
    )
  ])

# Base scope, search in a given project
scope = Ci::BuildName.where(project_id: 278964).order(order)

# Distinct search of build names
array_scope = Ci::BuildName.where(project_id: 278964).loose_index_scan(column: :name).select(:name).where("LOWER(name) like 'gdk%'")

array_mapping_scope = -> (name_expression) { Ci::BuildName.where(Ci::BuildName.arel_table[:name].eq(name_expression)) }

build_name_scope = Gitlab::Pagination::Keyset::InOperatorOptimization::QueryBuilder.new(
  scope: scope,
  array_scope: array_scope,
  array_mapping_scope: array_mapping_scope
).execute

# Subquery for loading 1 ci_builds row for 1 build_names row
ci_builds_query = Ci::Build
  .where("status NOT IN ('created')")
  .where("id = p_ci_build_names.build_id and partition_id = p_ci_build_names.partition_id")
  .limit(1)

# Select from ci_builds model since we want ci_builds records
builds = ci_builds_query = Ci::Build
  # by using lateral, we ensure that the sort order is not changing
  # The build_name_scope query is already sorted, here we just load the associated CI builds query (N+1)
  .from("(#{build_name_scope.to_sql}) p_ci_build_names, LATERAL (#{ci_builds_query.to_sql}) p_ci_builds")
  .limit(100)

puts builds.to_a

!151465 (comment 1923039706)