Skip to content

Advanced Search: Use reverse filename index

What does this MR do and why?

This feature is behind a feature flag called elastic_file_name_reverse_optimization.

Disclaimer: You should only use this feature flag with a newly created or reindexed index. The current plan is to reindex gitlab-production after this is merged in gitlab-com/gl-infra/production#6116 (closed)

This MR adds a new multi-field to our main index mappings in order to improve the efficiency of the Advanced Search extension filter. Currently it uses a wildcard search on path (for example, *.rb), which is really expensive. With the new multi-field, we'll have file_name indexed in reverse and we'll be able to utilize this index to improve query performance.

Click to see old query
{
  "query": {
    "bool": {
      "must": {
        "simple_query_string": {
          "_name": "blob:match:search_terms",
          "fields": [
            "blob.content",
            "blob.file_name",
            "blob.path"
          ],
          "query": "*",
          "default_operator": "and"
        }
      },
      "must_not": [],
      "should": [],
      "filter": [
        {
          "has_parent": {
            "_name": "blob:authorized:project",
            "parent_type": "project",
            "query": {
              "bool": {
                "should": [
                  {
                    "bool": {
                      "filter": [
                        {
                          "term": {
                            "visibility_level": {
                              "_name": "blob:authorized:project:any",
                              "value": 0
                            }
                          }
                        },
                        {
                          "terms": {
                            "_name": "blob:authorized:project:repository:enabled_or_private",
                            "repository_access_level": [
                              20,
                              10
                            ]
                          }
                        }
                      ]
                    }
                  },
                  {
                    "bool": {
                      "_name": "blob:authorized:project:visibility:10:repository:access_level",
                      "filter": [
                        {
                          "term": {
                            "visibility_level": {
                              "_name": "blob:authorized:project:visibility:10",
                              "value": 10
                            }
                          }
                        },
                        {
                          "terms": {
                            "_name": "blob:authorized:project:visibility:10:repository:access_level:enabled_or_private",
                            "repository_access_level": [
                              20,
                              10
                            ]
                          }
                        }
                      ]
                    }
                  },
                  {
                    "bool": {
                      "_name": "blob:authorized:project:visibility:20:repository:access_level",
                      "filter": [
                        {
                          "term": {
                            "visibility_level": {
                              "_name": "blob:authorized:project:visibility:20",
                              "value": 20
                            }
                          }
                        },
                        {
                          "terms": {
                            "_name": "blob:authorized:project:visibility:20:repository:access_level:enabled_or_private",
                            "repository_access_level": [
                              20,
                              10
                            ]
                          }
                        }
                      ]
                    }
                  }
                ]
              }
            }
          }
        },
        {
          "term": {
            "type": {
              "_name": "doc:is_a:blob",
              "value": "blob"
            }
          }
        },
        {
          "wildcard": {
            "blob.path": "*.js"
          }
        }
      ]
    }
  },
  "size": 20,
  "from": 0,
  "sort": [
    "_score"
  ],
  "highlight": {
    "pre_tags": [
      "gitlabelasticsearch→"
    ],
    "post_tags": [
      "←gitlabelasticsearch"
    ],
    "number_of_fragments": 0,
    "fields": {
      "blob.content": {},
      "blob.file_name": {}
    }
  }
}
Click to see new query
{
  "query": {
    "bool": {
      "must": {
        "simple_query_string": {
          "_name": "blob:match:search_terms",
          "fields": [
            "blob.content",
            "blob.file_name",
            "blob.path"
          ],
          "query": "*",
          "default_operator": "and"
        }
      },
      "must_not": [],
      "should": [],
      "filter": [
        {
          "has_parent": {
            "_name": "blob:authorized:project",
            "parent_type": "project",
            "query": {
              "bool": {
                "should": [
                  {
                    "bool": {
                      "filter": [
                        {
                          "term": {
                            "visibility_level": {
                              "_name": "blob:authorized:project:any",
                              "value": 0
                            }
                          }
                        },
                        {
                          "terms": {
                            "_name": "blob:authorized:project:repository:enabled_or_private",
                            "repository_access_level": [
                              20,
                              10
                            ]
                          }
                        }
                      ]
                    }
                  },
                  {
                    "bool": {
                      "_name": "blob:authorized:project:visibility:10:repository:access_level",
                      "filter": [
                        {
                          "term": {
                            "visibility_level": {
                              "_name": "blob:authorized:project:visibility:10",
                              "value": 10
                            }
                          }
                        },
                        {
                          "terms": {
                            "_name": "blob:authorized:project:visibility:10:repository:access_level:enabled_or_private",
                            "repository_access_level": [
                              20,
                              10
                            ]
                          }
                        }
                      ]
                    }
                  },
                  {
                    "bool": {
                      "_name": "blob:authorized:project:visibility:20:repository:access_level",
                      "filter": [
                        {
                          "term": {
                            "visibility_level": {
                              "_name": "blob:authorized:project:visibility:20",
                              "value": 20
                            }
                          }
                        },
                        {
                          "terms": {
                            "_name": "blob:authorized:project:visibility:20:repository:access_level:enabled_or_private",
                            "repository_access_level": [
                              20,
                              10
                            ]
                          }
                        }
                      ]
                    }
                  }
                ]
              }
            }
          }
        },
        {
          "term": {
            "type": {
              "_name": "doc:is_a:blob",
              "value": "blob"
            }
          }
        },
        {
          "prefix": {
            "blob.file_name.reverse": "sj."
          }
        }
      ]
    }
  },
  "size": 20,
  "from": 0,
  "sort": [
    "_score"
  ],
  "highlight": {
    "pre_tags": [
      "gitlabelasticsearch→"
    ],
    "post_tags": [
      "←gitlabelasticsearch"
    ],
    "number_of_fragments": 0,
    "fields": {
      "blob.content": {},
      "blob.file_name": {}
    }
  }
}
Click to see query diff
diff --git a/query.json b/query.json
index 1e31b3e..e8ef073 100644
--- a/query.json
+++ b/query.json
@@ -108,8 +108,8 @@
           }
         },
         {
-          "wildcard": {
-            "blob.path": "*.js"
+          "prefix": {
+            "blob.file_name.reverse": "sj."
           }
         }
       ]

#349099 (closed)

Useful links

Screenshots or screen recordings

These are strongly recommended to assist reviewers and reduce the time to merge your change.

How to set up and validate locally

  1. Checkout the branch of this MR
  2. Enable Advanced Search if it's not configured
  3. If your index was created before that, you'll need to reindex everything using Elasticsearch zero-downtime reindexing to apply new mappings. Please ensure this step is finished before processing to the next one. You can run this command to force faster reindexing
    while true; ElasticClusterReindexingCronWorker.new.perform; sleep 1; end
  4. Search for extension:js using Code tab (http://localhost:3000/search?repository_ref=&scope=blobs&search=extension%3Ajs&snippets=)
  5. Ensure that it returns results with *.js files
  6. Enable the feature flag
    Feature.enable(:elastic_file_name_reverse_optimization)
  7. Perform the search again and ensure that the results are the same

MR acceptance checklist

This checklist encourages us to confirm any changes have been analyzed to reduce risks in quality, performance, reliability, security, and maintainability.

Related to #349099 (closed)

Edited by Dmitry Gruzd

Merge request reports