Improve performance of Search API (Advanced): users scope

Summary

In &3166 (closed) a performance issue was raised around the response time of the Advanced Search API. This issue deals specifically with the users scope, at the Global, Group, and Project levels. Below are results of a performance test that was run on the 10k reference architecture:

API - Global Search
    ✓ { endpoint:users }............: avg=8298.67ms  min=31.19ms  med=10539.07ms max=12639.17ms p(90)=11797.77ms p(95)=11981.41ms
API - Group Search
    ✓ { endpoint:users }............: avg=11049.06ms min=573.63ms  med=10853.37ms max=23765.19ms p(90)=17645.64ms p(95)=19981.77ms
API - Project Search
    ✓ { endpoint:users }............: avg=3262.30ms  min=198.66ms  med=3525.94ms  max=7031.63ms  p(90)=4491.66ms  p(95)=4967.65ms

Improvements

As seen here, this scope does not query Elasticsearch. Rather, the database is used for search, via the UsersFinder.

Global

Queries
Started GET "/api/v4/search?scope=users&search=reported"
  PersonalAccessToken Load (0.3ms)  SELECT "personal_access_tokens".* FROM "personal_access_tokens" WHERE "personal_access_tokens"."token_digest" = '' LIMIT 1
  ↳ app/models/concerns/token_authenticatable_strategies/digest.rb:8:in `find_token_authenticatable'
  User Load (0.5ms)  SELECT "users".* FROM "users" WHERE "users"."id" = 1 LIMIT 1
  ↳ lib/gitlab/auth/auth_finders.rb:103:in `find_user_from_access_token'
  License Load (0.2ms)  SELECT "licenses".* FROM "licenses" ORDER BY "licenses"."id" DESC LIMIT 1
  ↳ ee/app/models/license.rb:268:in `load_license'
  User Load (0.9ms)  SELECT "users".* FROM "users" WHERE (("users"."name" ILIKE '%reported%' OR "users"."username" ILIKE '%reported%') OR "users"."email" = 'reported') ORDER BY CASE
  WHEN users.name = 'reported' THEN 0
  WHEN users.username = 'reported' THEN 1
  WHEN users.email = 'reported' THEN 2
  ELSE 3
END
, "users"."name" ASC LIMIT 20 OFFSET 0
  ↳ app/services/search_service.rb:75:in `redact_unauthorized_results'
   (0.5ms)  SELECT COUNT(*) FROM "users" WHERE (("users"."name" ILIKE '%reported%' OR "users"."username" ILIKE '%reported%') OR "users"."email" = 'reported') AND 1=1
  ↳ lib/gitlab/pagination/offset_pagination.rb:47:in `add_pagination_headers'
  User Load (0.8ms)  SELECT "users".* FROM "users" WHERE (("users"."name" ILIKE '%reported%' OR "users"."username" ILIKE '%reported%') OR "users"."email" = 'reported') AND 1=1 ORDER BY CASE
  WHEN users.name = 'reported' THEN 0
  WHEN users.username = 'reported' THEN 1
  WHEN users.email = 'reported' THEN 2
  ELSE 3
END
, "users"."name" ASC LIMIT 20 OFFSET 0
  ↳ lib/api/search.rb:75:in `block (2 levels) in <class:Search>'

It looks like the same query for Users is being run twice. The only difference between the queries is that the second one has AND 1=1.

Group

Queries
Started GET "/api/v4/groups/28/search?scope=users&search=root"
  ApplicationSetting Load (0.8ms)  SELECT "application_settings".* FROM "application_settings" ORDER BY "application_settings"."id" DESC LIMIT 1
  ↳ app/models/concerns/cacheable_attributes.rb:19:in `current_without_cache'
  PersonalAccessToken Load (0.2ms)  SELECT "personal_access_tokens".* FROM "personal_access_tokens" WHERE "personal_access_tokens"."token_digest" = '' LIMIT 1
  ↳ app/models/concerns/token_authenticatable_strategies/digest.rb:8:in `find_token_authenticatable'
  User Load (0.5ms)  SELECT "users".* FROM "users" WHERE "users"."id" = 1 LIMIT 1
  ↳ lib/gitlab/auth/auth_finders.rb:103:in `find_user_from_access_token'
   (0.2ms)  SELECT "features"."key" FROM "features"
  ↳ lib/feature.rb:15:in `feature_names'
  License Load (0.2ms)  SELECT "licenses".* FROM "licenses" ORDER BY "licenses"."id" DESC LIMIT 1
  ↳ ee/app/models/license.rb:268:in `load_license'
  Group Load (0.4ms)  SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = 28 LIMIT 1
  ↳ lib/api/helpers.rb:128:in `find_group'
  IpRestriction Load (0.2ms)  SELECT "ip_restrictions".* FROM "ip_restrictions" WHERE "ip_restrictions"."group_id" = 28
  ↳ ee/lib/gitlab/ip_restriction/enforcer.rb:31:in `allows_address?'
  Group Load (0.4ms)  SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = 28 LIMIT 1
  ↳ app/services/search_service.rb:34:in `group'
  Route Load (0.3ms)  SELECT "routes".* FROM "routes" WHERE "routes"."source_id" = 28 AND "routes"."source_type" = 'Namespace' LIMIT 1
  ↳ app/models/concerns/routable.rb:77:in `full_path'
   (0.9ms)  SELECT "projects"."id" FROM "projects" INNER JOIN routes rs ON rs.source_id = projects.id AND rs.source_type = 'Project' WHERE (EXISTS (SELECT 1 FROM "project_authorizations" WHERE "project_authorizations"."user_id" = 1 AND (project_authorizations.project_id = projects.id)) OR projects.visibility_level IN (0,10,20)) AND "projects"."archived" = FALSE AND (rs.path LIKE 'h5bp/%') ORDER BY "projects"."id" DESC
  ↳ ee/app/services/ee/search/group_service.rb:15:in `elastic_projects'
  User Load (10.5ms)  SELECT "users".* FROM "users" WHERE (("users"."name" ILIKE '%root%' OR "users"."username" ILIKE '%root%') OR "users"."email" = 'root') AND "users"."id" IN (SELECT "users"."id" FROM ((SELECT "users".* FROM "users" WHERE "users"."id" IN (SELECT "members"."user_id" FROM "members" LEFT OUTER JOIN "users" ON "members"."user_id" = "users"."id" WHERE "members"."type" = 'GroupMember' AND "members"."source_type" = 'Namespace' AND "users"."state" = 'active' AND "members"."requested_at" IS NULL AND "members"."source_id" IN (WITH RECURSIVE "base_and_ancestors" AS ((SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = 28)
UNION
(SELECT "namespaces".* FROM "namespaces", "base_and_ancestors" WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = "base_and_ancestors"."parent_id")), "base_and_descendants" AS ((SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = 28)
UNION
(SELECT "namespaces".* FROM "namespaces", "base_and_descendants" WHERE "namespaces"."type" = 'Group' AND "namespaces"."parent_id" = "base_and_descendants"."id")) SELECT "namespaces"."id" FROM ((SELECT "namespaces".* FROM "base_and_ancestors" AS "namespaces" WHERE "namespaces"."type" = 'Group')
UNION
(SELECT "namespaces".* FROM "base_and_descendants" AS "namespaces" WHERE "namespaces"."type" = 'Group')) namespaces WHERE "namespaces"."type" = 'Group')))
UNION
(SELECT "users".* FROM "users" INNER JOIN "members" ON "members"."source_type" = 'Project' AND "members"."user_id" = "users"."id" AND "members"."type" = 'ProjectMember' AND "members"."requested_at" IS NULL INNER JOIN "projects" ON "projects"."id" = "members"."source_id" INNER JOIN "namespaces" ON "namespaces"."id" = "projects"."namespace_id" AND "namespaces"."type" = 'Group' WHERE "namespaces"."id" IN (WITH RECURSIVE "base_and_descendants" AS ((SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = 28)
UNION
(SELECT "namespaces".* FROM "namespaces", "base_and_descendants" WHERE "namespaces"."type" = 'Group' AND "namespaces"."parent_id" = "base_and_descendants"."id")) SELECT "id" FROM "base_and_descendants" AS "namespaces"))) users) AND "users"."id" IN (SELECT members.user_id FROM "namespaces" INNER JOIN "members" ON "members"."source_type" = 'Namespace' AND "members"."source_id" = "namespaces"."id" AND "members"."source_type" = 'Namespace' AND "members"."type" = 'GroupMember' AND "members"."requested_at" IS NULL INNER JOIN "users" ON "users"."id" = "members"."user_id" WHERE "namespaces"."type" = 'Group' ORDER BY "namespaces"."id" DESC) ORDER BY CASE
  WHEN users.name = 'root' THEN 0
  WHEN users.username = 'root' THEN 1
  WHEN users.email = 'root' THEN 2
  ELSE 3
END
, "users"."name" ASC LIMIT 20 OFFSET 0
  ↳ app/services/search_service.rb:75:in `redact_unauthorized_results'
   (8.7ms)  SELECT COUNT(*) FROM "users" WHERE (("users"."name" ILIKE '%root%' OR "users"."username" ILIKE '%root%') OR "users"."email" = 'root') AND "users"."id" IN (SELECT "users"."id" FROM ((SELECT "users".* FROM "users" WHERE "users"."id" IN (SELECT "members"."user_id" FROM "members" LEFT OUTER JOIN "users" ON "members"."user_id" = "users"."id" WHERE "members"."type" = 'GroupMember' AND "members"."source_type" = 'Namespace' AND "users"."state" = 'active' AND "members"."requested_at" IS NULL AND "members"."source_id" IN (WITH RECURSIVE "base_and_ancestors" AS ((SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = 28)
UNION
(SELECT "namespaces".* FROM "namespaces", "base_and_ancestors" WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = "base_and_ancestors"."parent_id")), "base_and_descendants" AS ((SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = 28)
UNION
(SELECT "namespaces".* FROM "namespaces", "base_and_descendants" WHERE "namespaces"."type" = 'Group' AND "namespaces"."parent_id" = "base_and_descendants"."id")) SELECT "namespaces"."id" FROM ((SELECT "namespaces".* FROM "base_and_ancestors" AS "namespaces" WHERE "namespaces"."type" = 'Group')
UNION
(SELECT "namespaces".* FROM "base_and_descendants" AS "namespaces" WHERE "namespaces"."type" = 'Group')) namespaces WHERE "namespaces"."type" = 'Group')))
UNION
(SELECT "users".* FROM "users" INNER JOIN "members" ON "members"."source_type" = 'Project' AND "members"."user_id" = "users"."id" AND "members"."type" = 'ProjectMember' AND "members"."requested_at" IS NULL INNER JOIN "projects" ON "projects"."id" = "members"."source_id" INNER JOIN "namespaces" ON "namespaces"."id" = "projects"."namespace_id" AND "namespaces"."type" = 'Group' WHERE "namespaces"."id" IN (WITH RECURSIVE "base_and_descendants" AS ((SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = 28)
UNION
(SELECT "namespaces".* FROM "namespaces", "base_and_descendants" WHERE "namespaces"."type" = 'Group' AND "namespaces"."parent_id" = "base_and_descendants"."id")) SELECT "id" FROM "base_and_descendants" AS "namespaces"))) users) AND "users"."id" IN (SELECT members.user_id FROM "namespaces" INNER JOIN "members" ON "members"."source_type" = 'Namespace' AND "members"."source_id" = "namespaces"."id" AND "members"."source_type" = 'Namespace' AND "members"."type" = 'GroupMember' AND "members"."requested_at" IS NULL INNER JOIN "users" ON "users"."id" = "members"."user_id" WHERE "namespaces"."type" = 'Group' ORDER BY "namespaces"."id" DESC) AND 1=1
  ↳ lib/gitlab/pagination/offset_pagination.rb:47:in `add_pagination_headers'
  User Load (9.6ms)  SELECT "users".* FROM "users" WHERE (("users"."name" ILIKE '%root%' OR "users"."username" ILIKE '%root%') OR "users"."email" = 'root') AND "users"."id" IN (SELECT "users"."id" FROM ((SELECT "users".* FROM "users" WHERE "users"."id" IN (SELECT "members"."user_id" FROM "members" LEFT OUTER JOIN "users" ON "members"."user_id" = "users"."id" WHERE "members"."type" = 'GroupMember' AND "members"."source_type" = 'Namespace' AND "users"."state" = 'active' AND "members"."requested_at" IS NULL AND "members"."source_id" IN (WITH RECURSIVE "base_and_ancestors" AS ((SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = 28)
UNION
(SELECT "namespaces".* FROM "namespaces", "base_and_ancestors" WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = "base_and_ancestors"."parent_id")), "base_and_descendants" AS ((SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = 28)
UNION
(SELECT "namespaces".* FROM "namespaces", "base_and_descendants" WHERE "namespaces"."type" = 'Group' AND "namespaces"."parent_id" = "base_and_descendants"."id")) SELECT "namespaces"."id" FROM ((SELECT "namespaces".* FROM "base_and_ancestors" AS "namespaces" WHERE "namespaces"."type" = 'Group')
UNION
(SELECT "namespaces".* FROM "base_and_descendants" AS "namespaces" WHERE "namespaces"."type" = 'Group')) namespaces WHERE "namespaces"."type" = 'Group')))
UNION
(SELECT "users".* FROM "users" INNER JOIN "members" ON "members"."source_type" = 'Project' AND "members"."user_id" = "users"."id" AND "members"."type" = 'ProjectMember' AND "members"."requested_at" IS NULL INNER JOIN "projects" ON "projects"."id" = "members"."source_id" INNER JOIN "namespaces" ON "namespaces"."id" = "projects"."namespace_id" AND "namespaces"."type" = 'Group' WHERE "namespaces"."id" IN (WITH RECURSIVE "base_and_descendants" AS ((SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = 28)
UNION
(SELECT "namespaces".* FROM "namespaces", "base_and_descendants" WHERE "namespaces"."type" = 'Group' AND "namespaces"."parent_id" = "base_and_descendants"."id")) SELECT "id" FROM "base_and_descendants" AS "namespaces"))) users) AND "users"."id" IN (SELECT members.user_id FROM "namespaces" INNER JOIN "members" ON "members"."source_type" = 'Namespace' AND "members"."source_id" = "namespaces"."id" AND "members"."source_type" = 'Namespace' AND "members"."type" = 'GroupMember' AND "members"."requested_at" IS NULL INNER JOIN "users" ON "users"."id" = "members"."user_id" WHERE "namespaces"."type" = 'Group' ORDER BY "namespaces"."id" DESC) AND 1=1 ORDER BY CASE
  WHEN users.name = 'root' THEN 0
  WHEN users.username = 'root' THEN 1
  WHEN users.email = 'root' THEN 2
  ELSE 3
END
, "users"."name" ASC LIMIT 20 OFFSET 0
  ↳ lib/api/search.rb:96:in `block (2 levels) in <class:Search>'

The query to find Users seems quite complex, and does take longer relative to the Global query. Again, it looks like the same query is being performed twice.

Project

Queries
Started GET "/api/v4/projects/1/search?scope=users&search=root"
   (0.4ms)  SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))::float as lag
  ↳ app/models/concerns/cacheable_attributes.rb:19:in `current_without_cache'
  ApplicationSetting Load (2.5ms)  SELECT "application_settings".* FROM "application_settings" ORDER BY "application_settings"."id" DESC LIMIT 1
  ↳ app/models/concerns/cacheable_attributes.rb:19:in `current_without_cache'
  PersonalAccessToken Load (0.8ms)  SELECT "personal_access_tokens".* FROM "personal_access_tokens" WHERE "personal_access_tokens"."token_digest" = '' LIMIT 1
  ↳ app/models/concerns/token_authenticatable_strategies/digest.rb:8:in `find_token_authenticatable'
  User Load (2.5ms)  SELECT "users".* FROM "users" WHERE "users"."id" = 1 LIMIT 1
  ↳ lib/gitlab/auth/auth_finders.rb:103:in `find_user_from_access_token'
   (0.4ms)  SELECT "features"."key" FROM "features"
  ↳ lib/feature.rb:15:in `feature_names'
  License Load (0.4ms)  SELECT "licenses".* FROM "licenses" ORDER BY "licenses"."id" DESC LIMIT 1
  ↳ ee/app/models/license.rb:268:in `load_license'
  Project Load (3.1ms)  SELECT "projects".* FROM "projects" WHERE "projects"."pending_delete" = FALSE AND "projects"."id" = 1 LIMIT 1
  ↳ lib/api/helpers.rb:108:in `find_project'
  Group Load (1.4ms)  SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = 22 AND "namespaces"."type" = 'Group' LIMIT 1
  ↳ ee/app/policies/ee/project_policy.rb:308:in `block (2 levels) in <module:ProjectPolicy>'
  IpRestriction Load (0.4ms)  SELECT "ip_restrictions".* FROM "ip_restrictions" WHERE "ip_restrictions"."group_id" = 22
  ↳ ee/lib/gitlab/ip_restriction/enforcer.rb:31:in `allows_address?'
  Project Load (0.6ms)  SELECT "projects".* FROM "projects" WHERE "projects"."id" = 1 LIMIT 1
  ↳ app/services/search_service.rb:20:in `project'
  Route Load (0.6ms)  SELECT "routes".* FROM "routes" WHERE "routes"."source_id" = 1 AND "routes"."source_type" = 'Project' LIMIT 1
  ↳ app/models/concerns/routable.rb:77:in `full_path'
  User Load (2.7ms)  SELECT "users".* FROM "users" WHERE (("users"."name" ILIKE '%root%' OR "users"."username" ILIKE '%root%') OR "users"."email" = 'root') AND "users"."id" IN (SELECT "users"."id" FROM "users" INNER JOIN "project_authorizations" ON "users"."id" = "project_authorizations"."user_id" WHERE "project_authorizations"."project_id" = 1) ORDER BY CASE
  WHEN users.name = 'root' THEN 0
  WHEN users.username = 'root' THEN 1
  WHEN users.email = 'root' THEN 2
  ELSE 3
END
, "users"."name" ASC LIMIT 20 OFFSET 0
  ↳ app/services/search_service.rb:75:in `redact_unauthorized_results'
   (1.2ms)  SELECT COUNT(*) FROM "users" WHERE (("users"."name" ILIKE '%root%' OR "users"."username" ILIKE '%root%') OR "users"."email" = 'root') AND "users"."id" IN (SELECT "users"."id" FROM "users" INNER JOIN "project_authorizations" ON "users"."id" = "project_authorizations"."user_id" WHERE "project_authorizations"."project_id" = 1) AND 1=1
  ↳ lib/gitlab/pagination/offset_pagination.rb:47:in `add_pagination_headers'
  User Load (1.4ms)  SELECT "users".* FROM "users" WHERE (("users"."name" ILIKE '%root%' OR "users"."username" ILIKE '%root%') OR "users"."email" = 'root') AND "users"."id" IN (SELECT "users"."id" FROM "users" INNER JOIN "project_authorizations" ON "users"."id" = "project_authorizations"."user_id" WHERE "project_authorizations"."project_id" = 1) AND 1=1 ORDER BY CASE
  WHEN users.name = 'root' THEN 0
  WHEN users.username = 'root' THEN 1
  WHEN users.email = 'root' THEN 2
  ELSE 3
END
, "users"."name" ASC LIMIT 20 OFFSET 0
  ↳ lib/api/search.rb:117:in `block (2 levels) in <class:Search>'

Same issue with duplicate query.

Edited Apr 24, 2020 by Alishan Ladhani
Assignee Loading
Time tracking Loading