Make language aggregations a separate Elasticsearch call
What does this MR do and why?
Related to &6853 (closed) and #366581 (closed)
This MR separates the Elasticsearch call to calculate aggregations into a separate Elasticsearch call. The specs were updated to not require calling search to populate blob_aggregations (like was previously required)
Global search
Before Elasticsearch query (search + aggregation combined)
{
"query": {
"bool": {
"must": {
"simple_query_string": {
"_name": "blob:match:search_terms",
"fields": [
"blob.content",
"blob.file_name",
"blob.path"
],
"query": "1.11.0",
"default_operator": "and"
}
},
"must_not": [],
"should": [],
"filter": [
{
"has_parent": {
"_name": "blob:authorized:project",
"parent_type": "project",
"query": {
"bool": {
"should": [
{
"bool": {
"filter": [
{
"term": {
"visibility_level": {
"_name": "blob:authorized:project:any",
"value": 0
}
}
},
{
"terms": {
"_name": "blob:authorized:project:repository:enabled_or_private",
"repository_access_level": [
20,
10
]
}
}
]
}
},
{
"bool": {
"_name": "blob:authorized:project:visibility:10:repository:access_level",
"filter": [
{
"term": {
"visibility_level": {
"_name": "blob:authorized:project:visibility:10",
"value": 10
}
}
},
{
"terms": {
"_name": "blob:authorized:project:visibility:10:repository:access_level:enabled_or_private",
"repository_access_level": [
20,
10
]
}
}
]
}
},
{
"bool": {
"_name": "blob:authorized:project:visibility:20:repository:access_level",
"filter": [
{
"term": {
"visibility_level": {
"_name": "blob:authorized:project:visibility:20",
"value": 20
}
}
},
{
"terms": {
"_name": "blob:authorized:project:visibility:20:repository:access_level:enabled_or_private",
"repository_access_level": [
20,
10
]
}
}
]
}
}
]
}
}
}
},
{
"term": {
"type": {
"_name": "doc:is_a:blob",
"value": "blob"
}
}
}
]
}
},
"size": 20,
"from": 0,
"sort": [
"_score"
],
"highlight": {
"pre_tags": [
"gitlabelasticsearch→"
],
"post_tags": [
"←gitlabelasticsearch"
],
"number_of_fragments": 0,
"fields": {
"blob.content": {},
"blob.file_name": {}
}
}
}
After Elasticsearch query (search separate)
{
"query": {
"bool": {
"must": {
"simple_query_string": {
"_name": "blob:match:search_terms",
"fields": [
"blob.content",
"blob.file_name",
"blob.path"
],
"query": "1.11.0",
"default_operator": "and"
}
},
"must_not": [],
"should": [],
"filter": [
{
"has_parent": {
"_name": "blob:authorized:project",
"parent_type": "project",
"query": {
"bool": {
"should": [
{
"bool": {
"filter": [
{
"term": {
"visibility_level": {
"_name": "blob:authorized:project:any",
"value": 0
}
}
},
{
"terms": {
"_name": "blob:authorized:project:repository:enabled_or_private",
"repository_access_level": [
20,
10
]
}
}
]
}
},
{
"bool": {
"_name": "blob:authorized:project:visibility:10:repository:access_level",
"filter": [
{
"term": {
"visibility_level": {
"_name": "blob:authorized:project:visibility:10",
"value": 10
}
}
},
{
"terms": {
"_name": "blob:authorized:project:visibility:10:repository:access_level:enabled_or_private",
"repository_access_level": [
20,
10
]
}
}
]
}
},
{
"bool": {
"_name": "blob:authorized:project:visibility:20:repository:access_level",
"filter": [
{
"term": {
"visibility_level": {
"_name": "blob:authorized:project:visibility:20",
"value": 20
}
}
},
{
"terms": {
"_name": "blob:authorized:project:visibility:20:repository:access_level:enabled_or_private",
"repository_access_level": [
20,
10
]
}
}
]
}
}
]
}
}
}
},
{
"term": {
"type": {
"_name": "doc:is_a:blob",
"value": "blob"
}
}
}
]
}
},
"size": 20,
"from": 0,
"sort": [
"_score"
],
"highlight": {
"pre_tags": [
"gitlabelasticsearch→"
],
"post_tags": [
"←gitlabelasticsearch"
],
"number_of_fragments": 0,
"fields": {
"blob.content": {},
"blob.file_name": {}
}
}
}
After Elasticsearch query (aggregation separate)
{
"query": {
"bool": {
"must": {
"simple_query_string": {
"_name": "blob:match:search_terms",
"fields": [
"blob.content",
"blob.file_name",
"blob.path"
],
"query": "1.11.0",
"default_operator": "and"
}
},
"must_not": [],
"should": [],
"filter": [
{
"has_parent": {
"_name": "blob:authorized:project",
"parent_type": "project",
"query": {
"bool": {
"should": [
{
"bool": {
"filter": [
{
"term": {
"visibility_level": {
"_name": "blob:authorized:project:any",
"value": 0
}
}
},
{
"terms": {
"_name": "blob:authorized:project:repository:enabled_or_private",
"repository_access_level": [
20,
10
]
}
}
]
}
},
{
"bool": {
"_name": "blob:authorized:project:visibility:10:repository:access_level",
"filter": [
{
"term": {
"visibility_level": {
"_name": "blob:authorized:project:visibility:10",
"value": 10
}
}
},
{
"terms": {
"_name": "blob:authorized:project:visibility:10:repository:access_level:enabled_or_private",
"repository_access_level": [
20,
10
]
}
}
]
}
},
{
"bool": {
"_name": "blob:authorized:project:visibility:20:repository:access_level",
"filter": [
{
"term": {
"visibility_level": {
"_name": "blob:authorized:project:visibility:20",
"value": 20
}
}
},
{
"terms": {
"_name": "blob:authorized:project:visibility:20:repository:access_level:enabled_or_private",
"repository_access_level": [
20,
10
]
}
}
]
}
}
]
}
}
}
},
{
"term": {
"type": {
"_name": "doc:is_a:blob",
"value": "blob"
}
}
}
]
}
},
"size": 0,
"aggs": {
"language": {
"composite": {
"sources": [
{
"language": {
"terms": {
"field": "blob.language"
}
}
}
]
}
}
}
}
Group search
Before Elasticsearch query (search + aggregation combined)
{
"query": {
"bool": {
"must": {
"simple_query_string": {
"_name": "blob:match:search_terms",
"fields": [
"blob.content",
"blob.file_name",
"blob.path"
],
"query": "1.11.0",
"default_operator": "and"
}
},
"must_not": [],
"should": [],
"filter": [
{
"has_parent": {
"_name": "blob:authorized:project",
"parent_type": "project",
"query": {
"bool": {
"should": [
{
"bool": {
"filter": [
{
"terms": {
"_name": "blob:authorized:project:membership:id",
"id": [
7,
4
]
}
},
{
"terms": {
"_name": "blob:authorized:project:repository:enabled_or_private",
"repository_access_level": [
20,
10
]
}
}
]
}
}
]
}
}
}
},
{
"term": {
"type": {
"_name": "doc:is_a:blob",
"value": "blob"
}
}
}
]
}
},
"size": 20,
"from": 0,
"sort": [
"_score"
],
"highlight": {
"pre_tags": [
"gitlabelasticsearch→"
],
"post_tags": [
"←gitlabelasticsearch"
],
"number_of_fragments": 0,
"fields": {
"blob.content": {},
"blob.file_name": {}
}
}
}
After Elasticsearch query (search separate)
{
"query": {
"bool": {
"must": {
"simple_query_string": {
"_name": "blob:match:search_terms",
"fields": [
"blob.content",
"blob.file_name",
"blob.path"
],
"query": "0.11.1",
"default_operator": "and"
}
},
"must_not": [],
"should": [],
"filter": [
{
"has_parent": {
"_name": "blob:authorized:project",
"parent_type": "project",
"query": {
"bool": {
"should": [
{
"bool": {
"filter": [
{
"terms": {
"_name": "blob:authorized:project:membership:id",
"id": [
7,
4
]
}
},
{
"terms": {
"_name": "blob:authorized:project:repository:enabled_or_private",
"repository_access_level": [
20,
10
]
}
}
]
}
}
]
}
}
}
},
{
"term": {
"type": {
"_name": "doc:is_a:blob",
"value": "blob"
}
}
}
]
}
},
"size": 20,
"from": 0,
"sort": [
"_score"
],
"highlight": {
"pre_tags": [
"gitlabelasticsearch→"
],
"post_tags": [
"←gitlabelasticsearch"
],
"number_of_fragments": 0,
"fields": {
"blob.content": {},
"blob.file_name": {}
}
}
}
After Elasticsearch query (aggregate separate)
{
"query": {
"bool": {
"must": {
"simple_query_string": {
"_name": "blob:match:search_terms",
"fields": [
"blob.content",
"blob.file_name",
"blob.path"
],
"query": "0.11.1",
"default_operator": "and"
}
},
"must_not": [],
"should": [],
"filter": [
{
"has_parent": {
"_name": "blob:authorized:project",
"parent_type": "project",
"query": {
"bool": {
"should": [
{
"bool": {
"filter": [
{
"terms": {
"_name": "blob:authorized:project:membership:id",
"id": [
7,
4
]
}
},
{
"terms": {
"_name": "blob:authorized:project:repository:enabled_or_private",
"repository_access_level": [
20,
10
]
}
}
]
}
}
]
}
}
}
},
{
"term": {
"type": {
"_name": "doc:is_a:blob",
"value": "blob"
}
}
}
]
}
},
"size": 0,
"aggs": {
"language": {
"composite": {
"sources": [
{
"language": {
"terms": {
"field": "blob.language"
}
}
}
]
}
}
}
}
Project search
Before Elasticsearch query (search + aggregation combined)
{
"query": {
"bool": {
"must": {
"simple_query_string": {
"_name": "blob:match:search_terms",
"fields": [
"blob.content",
"blob.file_name",
"blob.path"
],
"query": "test",
"default_operator": "and"
}
},
"must_not": [],
"should": [],
"filter": [
{
"has_parent": {
"_name": "blob:authorized:project",
"parent_type": "project",
"query": {
"bool": {
"should": [
{
"bool": {
"filter": [
{
"terms": {
"_name": "blob:authorized:project:membership:id",
"id": [
3
]
}
},
{
"terms": {
"_name": "blob:authorized:project:repository:enabled_or_private",
"repository_access_level": [
20,
10
]
}
}
]
}
}
]
}
}
}
},
{
"term": {
"type": {
"_name": "doc:is_a:blob",
"value": "blob"
}
}
},
{
"terms": {
"_name": "blob:related:repositories",
"blob.rid": [
3
]
}
},
{
"terms": {
"_name": "blob:match:languages",
"blob.language": [
"Markdown"
]
}
}
]
}
},
"size": 20,
"from": 0,
"sort": [
"_score"
],
"highlight": {
"pre_tags": [
"gitlabelasticsearch→"
],
"post_tags": [
"←gitlabelasticsearch"
],
"number_of_fragments": 0,
"fields": {
"blob.content": {},
"blob.file_name": {}
}
},
"aggs": {
"language": {
"composite": {
"sources": [
{
"language": {
"terms": {
"field": "blob.language"
}
}
}
]
}
}
}
}
After Elasticsearch queries (search separate)
{
"query": {
"bool": {
"must": {
"simple_query_string": {
"_name": "blob:match:search_terms",
"fields": [
"blob.content",
"blob.file_name",
"blob.path"
],
"query": "0.11.1",
"default_operator": "and"
}
},
"must_not": [],
"should": [],
"filter": [
{
"has_parent": {
"_name": "blob:authorized:project",
"parent_type": "project",
"query": {
"bool": {
"should": [
{
"bool": {
"filter": [
{
"terms": {
"_name": "blob:authorized:project:membership:id",
"id": [
7
]
}
},
{
"terms": {
"_name": "blob:authorized:project:repository:enabled_or_private",
"repository_access_level": [
20,
10
]
}
}
]
}
}
]
}
}
}
},
{
"term": {
"type": {
"_name": "doc:is_a:blob",
"value": "blob"
}
}
},
{
"terms": {
"_name": "blob:related:repositories",
"blob.rid": [
7
]
}
}
]
}
},
"size": 20,
"from": 0,
"sort": [
"_score"
],
"highlight": {
"pre_tags": [
"gitlabelasticsearch→"
],
"post_tags": [
"←gitlabelasticsearch"
],
"number_of_fragments": 0,
"fields": {
"blob.content": {},
"blob.file_name": {}
}
}
}
After Elasticsearch queries (aggregation separate)
{
"query": {
"bool": {
"must": {
"simple_query_string": {
"_name": "blob:match:search_terms",
"fields": [
"blob.content",
"blob.file_name",
"blob.path"
],
"query": "0.11.1",
"default_operator": "and"
}
},
"must_not": [],
"should": [],
"filter": [
{
"has_parent": {
"_name": "blob:authorized:project",
"parent_type": "project",
"query": {
"bool": {
"should": [
{
"bool": {
"filter": [
{
"terms": {
"_name": "blob:authorized:project:membership:id",
"id": [
7
]
}
},
{
"terms": {
"_name": "blob:authorized:project:repository:enabled_or_private",
"repository_access_level": [
20,
10
]
}
}
]
}
}
]
}
}
}
},
{
"term": {
"type": {
"_name": "doc:is_a:blob",
"value": "blob"
}
}
},
{
"terms": {
"_name": "blob:related:repositories",
"blob.rid": [
7
]
}
}
]
}
},
"size": 0,
"aggs": {
"language": {
"composite": {
"sources": [
{
"language": {
"terms": {
"field": "blob.language"
}
}
}
]
}
}
}
}
Screenshots or screen recordings
These are strongly recommended to assist reviewers and reduce the time to merge your change.
How to set up and validate locally
- Ensure that gdk is setup for Elasticsearch and that Advanced Search is enabled via the Admin Setting
- Index all projects prior to testing (
bundle exec rake gitlab:elastic:index) - Make sure the feature flag
search_blobs_language_aggregationis enabled locally - Enable performance bar
- Run a search for
testin the code tab: http://gdk.test:3000/search?scope=blobs&search=test - Validate two Elasticsearch calls are made via the performance bar
- Run a search for
testin a non-code tab (issues, merge requests, etc) - Validate one Elasticsearch call is made via the performance bar
- Validate that one Elasticsearch call is made for the blobs counts call via the performance bar
- disable the feature flag
search_blobs_language_aggregation - Run a search for
testin the code tab: http://gdk.test:3000/search?scope=blobs&search=test - Validate one Elasticsearch calls is made via the performance bar
MR acceptance checklist
This checklist encourages us to confirm any changes have been analyzed to reduce risks in quality, performance, reliability, security, and maintainability.
-
I have evaluated the MR acceptance checklist for this MR.
Edited by Terri Chu