Optimize groups storage size sorting in Admin Area

As observed in Add GraphQL support to sorting by storage_size (!194655 - merged), sorting groups in the Admin Area by storage size has a really poor performance.

The storage size for each groups is computed on the fly with a lateral join. This does not work well for decently sized instance as it has to scan and aggregate the data for all groups.

https://console.postgres.ai/gitlab/gitlab-production-main/sessions/41513/commands/127527

SELECT "namespaces".*, "statistics".*
FROM (SELECT "namespaces"."id",
             "namespaces"."name",
             "namespaces"."path",
             "namespaces"."owner_id",
             "namespaces"."created_at",
             "namespaces"."updated_at",
             "namespaces"."type",
             "namespaces"."avatar",
             "namespaces"."membership_lock",
             "namespaces"."share_with_group_lock",
             "namespaces"."visibility_level",
             "namespaces"."request_access_enabled",
             "namespaces"."ldap_sync_status",
             "namespaces"."ldap_sync_error",
             "namespaces"."ldap_sync_last_update_at",
             "namespaces"."ldap_sync_last_successful_update_at",
             "namespaces"."ldap_sync_last_sync_at",
             "namespaces"."lfs_enabled",
             "namespaces"."parent_id",
             "namespaces"."shared_runners_minutes_limit",
             "namespaces"."repository_size_limit",
             "namespaces"."require_two_factor_authentication",
             "namespaces"."two_factor_grace_period",
             "namespaces"."project_creation_level",
             "namespaces"."runners_token",
             "namespaces"."file_template_project_id",
             "namespaces"."saml_discovery_token",
             "namespaces"."runners_token_encrypted",
             "namespaces"."custom_project_templates_group_id",
             "namespaces"."auto_devops_enabled",
             "namespaces"."extra_shared_runners_minutes_limit",
             "namespaces"."last_ci_minutes_notification_at",
             "namespaces"."last_ci_minutes_usage_notification_level",
             "namespaces"."subgroup_creation_level",
             "namespaces"."max_pages_size",
             "namespaces"."max_artifacts_size",
             "namespaces"."mentions_disabled",
             "namespaces"."default_branch_protection",
             "namespaces"."max_personal_access_token_lifetime",
             "namespaces"."push_rule_id",
             "namespaces"."shared_runners_enabled",
             "namespaces"."allow_descendants_override_disabled_shared_runners",
             "namespaces"."traversal_ids",
             "namespaces"."organization_id"
      FROM "namespaces"
      WHERE "namespaces"."type" = 'Group') namespaces,
     LATERAL (SELECT COALESCE(SUM("project_statistics"."storage_size"), 0)            AS storage_size,
                     COALESCE(SUM("project_statistics"."repository_size"), 0)         AS repository_size,
                     COALESCE(SUM("project_statistics"."wiki_size"), 0)               AS wiki_size,
                     COALESCE(SUM("project_statistics"."snippets_size"), 0)           AS snippets_size,
                     COALESCE(SUM("project_statistics"."lfs_objects_size"), 0)        AS lfs_objects_size,
                     COALESCE(SUM("project_statistics"."build_artifacts_size"), 0)    AS build_artifacts_size,
                     COALESCE(SUM("project_statistics"."pipeline_artifacts_size"), 0) AS pipeline_artifacts_size,
                     COALESCE(SUM("project_statistics"."packages_size"), 0)           AS packages_size,
                     COALESCE(SUM("project_statistics"."uploads_size"), 0)            AS uploads_size
              FROM "project_statistics"
              WHERE "project_statistics"."namespace_id" = "namespaces"."id") statistics
WHERE "namespaces"."type" = 'Group'
ORDER BY storage_size DESC, namespaces.id DESC
LIMIT 20 OFFSET 0

Implementation Guide

Needs refinement

Edited by Shane Maglangit