feat: opensearch indexer support

Description

This adds support for the OpenSearch indexer in Chunk mode. The existing Elasticsearch indexer is 100% compatible with OpenSearch, so I'm making it a bit more generic but keeping it under the elasticsearch package.

Test

Run OpenSearch locally with Open Dashboards:

Reference:

Setup OpenSearch

PUT gitlab_active_context_code_0
{
  "settings": {
    "index.knn": true,
    "number_of_shards": 1,
    "number_of_replicas": 0
  },
  "mappings": {
    "dynamic": "strict",
    "properties": {
      "content": { "type": "text" },
      "embeddings_v1": {
        "type": "knn_vector",
        "dimension": 768,
        "method": {
          "name": "hnsw",
          "space_type": "cosinesimil",
          "engine": "lucene",
          "parameters": {
            "m": 16,
            "ef_construction": 100
          }
        }
      },
      "id": { "type": "keyword" },
      "language": { "type": "keyword" },
      "name": { "type": "text" },
      "path": { "type": "keyword" },
      "project_id": { "type": "long" },
      "reindexing": { "type": "boolean" },
      "source": { "type": "keyword" },
      "type": { "type": "short" }
    }
  }
}
PUT gitlab_active_context_code_1
{
  "settings": {
    "index.knn": true,
    "number_of_shards": 1,
    "number_of_replicas": 0
  },
  "mappings": {
    "dynamic": "strict",
    "properties": {
      "content": { "type": "text" },
      "embeddings_v1": {
        "type": "knn_vector",
        "dimension": 768,
        "method": {
          "name": "hnsw",
          "space_type": "cosinesimil",
          "engine": "lucene",
          "parameters": {
            "m": 16,
            "ef_construction": 100
          }
        }
      },
      "id": { "type": "keyword" },
      "language": { "type": "keyword" },
      "name": { "type": "text" },
      "path": { "type": "keyword" },
      "project_id": { "type": "long" },
      "reindexing": { "type": "boolean" },
      "source": { "type": "keyword" },
      "type": { "type": "short" }
    }
  }
}
POST _aliases
{
  "actions": [
    { "add": { "index": "gitlab_active_context_code_0", "alias": "gitlab_active_context_code" } },
    { "add": { "index": "gitlab_active_context_code_1", "alias": "gitlab_active_context_code" } }
  ]
}

Run Indexer

Note

Usage of port 9202 and opensearch adapter

$ make && GITLAB_INDEXER_MODE=chunk GITLAB_INDEXER_DEBUG_LOGGING=1 ./bin/gitlab-elasticsearch-indexer -adapter "opensearch" -connection '{"url": ["http://localhost:9202"]}' -options '{
  "timeout": "30m",
  "chunk_size": 1000,
  "gitaly_batch_size": 1000,
  "from_sha": "",
  "to_sha": "",
  "project_id": 1,
  "partition_name": "gitlab_active_context_code",
  "partition_number": 0,
  "gitaly_config": {
    "address": "unix:/Users/arturo/projects/gdk/praefect.socket",
    "storage": "default",
    "relative_path": "@hashed/d4/73/d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35.git",
    "project_path": "gitlab-org/gitlab-test"
  }
}'

Verification

GET gitlab_active_context_code/_count
GET gitlab_active_context_code/_search

Related to gitlab#545483 (closed)

Edited by Arturo Herrero

Merge request reports

Loading