From d231961c094f1de2904e07456ef927afb2e4198c Mon Sep 17 00:00:00 2001
From: Hercules Merscher <hmerscher@gitlab.com>
Date: Thu, 6 Jun 2024 12:34:21 +0200
Subject: [PATCH 1/7] feat: zoekt service definition

---
 metrics-catalog/services/all.jsonnet   |  1 +
 metrics-catalog/services/zoekt.jsonnet | 12 ++++++++++++
 2 files changed, 13 insertions(+)
 create mode 100644 metrics-catalog/services/zoekt.jsonnet

diff --git a/metrics-catalog/services/all.jsonnet b/metrics-catalog/services/all.jsonnet
index 26a1042167..969b21dcd8 100644
--- a/metrics-catalog/services/all.jsonnet
+++ b/metrics-catalog/services/all.jsonnet
@@ -74,4 +74,5 @@
   import 'web.jsonnet',
   import 'websockets.jsonnet',
   import 'woodhouse.jsonnet',
+  import 'zoekt.jsonnet',
 ]
diff --git a/metrics-catalog/services/zoekt.jsonnet b/metrics-catalog/services/zoekt.jsonnet
new file mode 100644
index 0000000000..a79d9a550e
--- /dev/null
+++ b/metrics-catalog/services/zoekt.jsonnet
@@ -0,0 +1,12 @@
+local metricsCatalog = import 'servicemetrics/metrics.libsonnet';
+local rateMetric = metricsCatalog.rateMetric;
+
+metricsCatalog.serviceDefinition({
+  type: 'zoekt',
+  tier: 'inf',
+  monitoringThresholds: {
+    apdexScore: 0.999,
+    errorRatio: 0.999,
+  },
+  serviceLevelIndicators: {},
+})
-- 
GitLab


From fb3f964cd913c4e65199d3976b057ec8aabd4db8 Mon Sep 17 00:00:00 2001
From: Hercules Merscher <hmerscher@gitlab.com>
Date: Thu, 6 Jun 2024 12:46:12 +0200
Subject: [PATCH 2/7] chore: make generate

---
 dashboards/zoekt/main.dashboard.jsonnet       |   6 +
 .../autogenerated-saturation.yml              |  28 +-
 ...tlab-gprd-gprd-zoekt-saturation-alerts.yml | 628 ++++++++++++++++++
 ...ab-gprd-gprd-zoekt-saturation-metadata.yml |  78 +++
 ...ated-gitlab-gprd-gprd-zoekt-saturation.yml | 145 ++++
 ...zoekt-service-anomaly-detection-alerts.yml |  79 +++
 ...rd-zoekt-service_ops_anomaly_detection.yml |  38 ++
 ...lab-gprd-gprd-zoekt-zoekt-service-slos.yml |  18 +
 ...tlab-gstg-gstg-zoekt-saturation-alerts.yml | 628 ++++++++++++++++++
 ...ab-gstg-gstg-zoekt-saturation-metadata.yml |  78 +++
 ...ated-gitlab-gstg-gstg-zoekt-saturation.yml | 145 ++++
 ...zoekt-service-anomaly-detection-alerts.yml |  79 +++
 ...tg-zoekt-service_ops_anomaly_detection.yml |  38 ++
 ...lab-gstg-gstg-zoekt-zoekt-service-slos.yml |  18 +
 thanos-rules/autogenerated-service-slos.yml   |  10 +
 15 files changed, 2002 insertions(+), 14 deletions(-)
 create mode 100644 dashboards/zoekt/main.dashboard.jsonnet
 create mode 100644 mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation-alerts.yml
 create mode 100644 mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation-metadata.yml
 create mode 100644 mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation.yml
 create mode 100644 mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-service-anomaly-detection-alerts.yml
 create mode 100644 mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-service_ops_anomaly_detection.yml
 create mode 100644 mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-zoekt-service-slos.yml
 create mode 100644 mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation-alerts.yml
 create mode 100644 mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation-metadata.yml
 create mode 100644 mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation.yml
 create mode 100644 mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-service-anomaly-detection-alerts.yml
 create mode 100644 mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-service_ops_anomaly_detection.yml
 create mode 100644 mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-zoekt-service-slos.yml

diff --git a/dashboards/zoekt/main.dashboard.jsonnet b/dashboards/zoekt/main.dashboard.jsonnet
new file mode 100644
index 0000000000..16472d53f1
--- /dev/null
+++ b/dashboards/zoekt/main.dashboard.jsonnet
@@ -0,0 +1,6 @@
+// This file is autogenerated using scripts/generate-service-dashboards
+// Please feel free to customize this file.
+local serviceDashboard = import 'gitlab-dashboards/service_dashboard.libsonnet';
+
+serviceDashboard.overview('zoekt')
+.overviewTrailer()
diff --git a/legacy-prometheus-rules/autogenerated-saturation.yml b/legacy-prometheus-rules/autogenerated-saturation.yml
index fd03514a2a..ca319dc3c2 100644
--- a/legacy-prometheus-rules/autogenerated-saturation.yml
+++ b/legacy-prometheus-rules/autogenerated-saturation.yml
@@ -79,7 +79,7 @@ groups:
         clamp_min(
           clamp_max(
             1 - avg by (environment, tier, type, stage, shard) (
-              rate(node_cpu_seconds_total{mode="idle", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}[5m])
+              rate(node_cpu_seconds_total{mode="idle", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}[5m])
             )
             ,
             1)
@@ -94,9 +94,9 @@ groups:
         clamp_min(
           clamp_max(
             1 - (
-              node_filesystem_files_free{fstype=~"(ext.|xfs)", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}
+              node_filesystem_files_free{fstype=~"(ext.|xfs)", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}
               /
-              node_filesystem_files{fstype=~"(ext.|xfs)", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}
+              node_filesystem_files{fstype=~"(ext.|xfs)", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}
             )
             ,
             1)
@@ -111,7 +111,7 @@ groups:
         clamp_min(
           clamp_max(
             (
-              1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}
+              1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}
             )
             ,
             1)
@@ -523,7 +523,7 @@ groups:
       max by(environment, tier, type, stage, shard) (
         clamp_min(
           clamp_max(
-            instance:node_memory_utilization:ratio{type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"} or instance:node_memory_utilisation:ratio{type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}
+            instance:node_memory_utilization:ratio{type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"} or instance:node_memory_utilisation:ratio{type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}
             ,
             1)
         ,
@@ -578,9 +578,9 @@ groups:
       max by(environment, tier, type, stage, shard) (
         clamp_min(
           clamp_max(
-            max_over_time(node_nf_conntrack_entries{type=~"patroni|ci-runners|consul|customersdot|frontend|gitaly|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}[1m])
+            max_over_time(node_nf_conntrack_entries{type=~"patroni|ci-runners|consul|customersdot|frontend|gitaly|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}[1m])
             /
-            node_nf_conntrack_entries_limit{type=~"patroni|ci-runners|consul|customersdot|frontend|gitaly|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}
+            node_nf_conntrack_entries_limit{type=~"patroni|ci-runners|consul|customersdot|frontend|gitaly|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}
             ,
             1)
         ,
@@ -593,7 +593,7 @@ groups:
       max by(environment, tier, type, stage, shard) (
         clamp_min(
           clamp_max(
-            avg without (cpu) (rate(node_schedstat_waiting_seconds_total{type=~"patroni|ci-runners|consul|customersdot|frontend|gitaly|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}[1h]))
+            avg without (cpu) (rate(node_schedstat_waiting_seconds_total{type=~"patroni|ci-runners|consul|customersdot|frontend|gitaly|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}[1h]))
             ,
             1)
         ,
@@ -607,15 +607,15 @@ groups:
         clamp_min(
           clamp_max(
             (
-              process_open_fds{type=~"ai-assisted|api|atlantis|camoproxy|ci-runners|cloudflare|consul|customersdot|external-dns|frontend|git|gitaly|google-cloud-storage|internal-api|istio|jaeger|kas|logging|mailgun|mailroom|memorystore|monitoring|nginx|ops-gitlab-net|packagecloud|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|plantuml|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|runway|search|sentry|sidekiq|vault|web-pages|web|websockets|woodhouse"}
+              process_open_fds{type=~"ai-assisted|api|atlantis|camoproxy|ci-runners|cloudflare|consul|customersdot|external-dns|frontend|git|gitaly|google-cloud-storage|internal-api|istio|jaeger|kas|logging|mailgun|mailroom|memorystore|monitoring|nginx|ops-gitlab-net|packagecloud|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|plantuml|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|runway|search|sentry|sidekiq|vault|web-pages|web|websockets|woodhouse|zoekt"}
               /
-              process_max_fds{type=~"ai-assisted|api|atlantis|camoproxy|ci-runners|cloudflare|consul|customersdot|external-dns|frontend|git|gitaly|google-cloud-storage|internal-api|istio|jaeger|kas|logging|mailgun|mailroom|memorystore|monitoring|nginx|ops-gitlab-net|packagecloud|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|plantuml|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|runway|search|sentry|sidekiq|vault|web-pages|web|websockets|woodhouse"}
+              process_max_fds{type=~"ai-assisted|api|atlantis|camoproxy|ci-runners|cloudflare|consul|customersdot|external-dns|frontend|git|gitaly|google-cloud-storage|internal-api|istio|jaeger|kas|logging|mailgun|mailroom|memorystore|monitoring|nginx|ops-gitlab-net|packagecloud|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|plantuml|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|runway|search|sentry|sidekiq|vault|web-pages|web|websockets|woodhouse|zoekt"}
             )
             or
             (
-              ruby_file_descriptors{type=~"ai-assisted|api|atlantis|camoproxy|ci-runners|cloudflare|consul|customersdot|external-dns|frontend|git|gitaly|google-cloud-storage|internal-api|istio|jaeger|kas|logging|mailgun|mailroom|memorystore|monitoring|nginx|ops-gitlab-net|packagecloud|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|plantuml|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|runway|search|sentry|sidekiq|vault|web-pages|web|websockets|woodhouse"}
+              ruby_file_descriptors{type=~"ai-assisted|api|atlantis|camoproxy|ci-runners|cloudflare|consul|customersdot|external-dns|frontend|git|gitaly|google-cloud-storage|internal-api|istio|jaeger|kas|logging|mailgun|mailroom|memorystore|monitoring|nginx|ops-gitlab-net|packagecloud|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|plantuml|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|runway|search|sentry|sidekiq|vault|web-pages|web|websockets|woodhouse|zoekt"}
               /
-              ruby_process_max_fds{type=~"ai-assisted|api|atlantis|camoproxy|ci-runners|cloudflare|consul|customersdot|external-dns|frontend|git|gitaly|google-cloud-storage|internal-api|istio|jaeger|kas|logging|mailgun|mailroom|memorystore|monitoring|nginx|ops-gitlab-net|packagecloud|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|plantuml|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|runway|search|sentry|sidekiq|vault|web-pages|web|websockets|woodhouse"}
+              ruby_process_max_fds{type=~"ai-assisted|api|atlantis|camoproxy|ci-runners|cloudflare|consul|customersdot|external-dns|frontend|git|gitaly|google-cloud-storage|internal-api|istio|jaeger|kas|logging|mailgun|mailroom|memorystore|monitoring|nginx|ops-gitlab-net|packagecloud|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|plantuml|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|runway|search|sentry|sidekiq|vault|web-pages|web|websockets|woodhouse|zoekt"}
             )
             ,
             1)
@@ -1294,7 +1294,7 @@ groups:
         clamp_min(
           clamp_max(
             1 - avg by (environment, tier, type, stage, shard, shard) (
-              rate(node_cpu_seconds_total{mode="idle", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}[5m])
+              rate(node_cpu_seconds_total{mode="idle", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}[5m])
             )
             ,
             1)
@@ -1361,7 +1361,7 @@ groups:
       max by(environment, tier, type, stage, shard) (
         clamp_min(
           clamp_max(
-            avg without(cpu, mode) (1 - rate(node_cpu_seconds_total{mode="idle", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}[5m]))
+            avg without(cpu, mode) (1 - rate(node_cpu_seconds_total{mode="idle", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}[5m]))
             ,
             1)
         ,
diff --git a/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation-alerts.yml b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation-alerts.yml
new file mode 100644
index 0000000000..191ec4be99
--- /dev/null
+++ b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation-alerts.yml
@@ -0,0 +1,628 @@
+# WARNING. DO NOT EDIT THIS FILE BY HAND. USE ./mimir-rules-jsonnet/saturation.jsonnet TO GENERATE IT
+# YOUR CHANGES WILL BE OVERRIDDEN
+groups:
+- name: GitLab Component Saturation Statistics
+  interval: 5m
+  rules:
+  - record: gitlab_component_saturation:ratio_quantile95_1w
+    expr: quantile_over_time(0.95, gitlab_component_saturation:ratio{env="gprd",type="zoekt"}[1w])
+  - record: gitlab_component_saturation:ratio_quantile99_1w
+    expr: quantile_over_time(0.99, gitlab_component_saturation:ratio{env="gprd",type="zoekt"}[1w])
+  - record: gitlab_component_saturation:ratio_quantile95_1h
+    expr: quantile_over_time(0.95, gitlab_component_saturation:ratio{env="gprd",type="zoekt"}[1h])
+  - record: gitlab_component_saturation:ratio_quantile99_1h
+    expr: quantile_over_time(0.99, gitlab_component_saturation:ratio{env="gprd",type="zoekt"}[1h])
+  - record: gitlab_component_saturation:ratio_avg_1h
+    expr: avg_over_time(gitlab_component_saturation:ratio{env="gprd",type="zoekt"}[1h])
+- name: GitLab Saturation Alerts
+  interval: 1m
+  rules:
+  - alert: component_saturation_slo_out_of_bounds:cpu
+    for: 5m
+    annotations:
+      title: The Average Service CPU Utilization resource of the {{ $labels.type }}
+        service ({{ $labels.stage }} stage) has a saturation exceeding SLO and is
+        close to its capacity limit.
+      description: |
+        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
+
+        Details of the Average Service CPU Utilization resource:
+
+        This resource measures average CPU utilization across an all cores in a service fleet. If it is becoming saturated, it may indicate that the fleet needs horizontal or vertical scaling.
+      grafana_dashboard_id: alerts-sat_cpu
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_cpu?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gprd
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "1465724101"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard) (
+          clamp_min(
+            clamp_max(
+              1 - avg by (environment, tier, type, stage, shard) (
+                rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard) (
+          clamp_min(
+            clamp_max(
+              1 - avg by (environment, tier, type, stage, shard) (
+                rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      rules_domain: general
+      severity: s3
+    expr: |
+      gitlab_component_saturation:ratio{component="cpu",env="gprd",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="cpu"}
+  - alert: component_saturation_slo_out_of_bounds:disk_inodes
+    for: 15m
+    annotations:
+      title: The Disk inode Utilization per Device per Node resource of the {{ $labels.type
+        }} service ({{ $labels.stage }} stage) has a saturation exceeding SLO and
+        is close to its capacity limit.
+      description: |
+        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
+
+        Details of the Disk inode Utilization per Device per Node resource:
+
+        Disk inode utilization per device per node.
+
+        If this is too high, its possible that a directory is filling up with files. Consider logging in an checking temp directories for large numbers of files
+      grafana_dashboard_id: alerts-sat_disk_inodes
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_disk_inodes?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gprd
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "39965907"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, fqdn, device) (
+          clamp_min(
+            clamp_max(
+              1 - (
+                node_filesystem_files_free{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                /
+                node_filesystem_files{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, fqdn, device) (
+          clamp_min(
+            clamp_max(
+              1 - (
+                node_filesystem_files_free{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                /
+                node_filesystem_files{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      pager: pagerduty
+      rules_domain: general
+      severity: s2
+    expr: |
+      gitlab_component_saturation:ratio{component="disk_inodes",env="gprd",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="disk_inodes"}
+  - alert: ComponentResourceRunningOut_disk_inodes
+    for: 15m
+    annotations:
+      title: The Disk inode Utilization per Device per Node resource of the {{ $labels.type
+        }} service ({{ $labels.stage }} stage) is on track to hit capacity within
+        6h
+      description: |
+        This means that this resource is growing rapidly and is predicted to exceed saturation threshold within 6h.
+
+        Details of the Disk inode Utilization per Device per Node resource:
+
+        Disk inode utilization per device per node.
+
+        If this is too high, its possible that a directory is filling up with files. Consider logging in an checking temp directories for large numbers of files
+      grafana_dashboard_id: alerts-sat_disk_inodes
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_disk_inodes?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gprd
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "39965907"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, fqdn, device) (
+          clamp_min(
+            clamp_max(
+              1 - (
+                node_filesystem_files_free{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                /
+                node_filesystem_files{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, fqdn, device) (
+          clamp_min(
+            clamp_max(
+              1 - (
+                node_filesystem_files_free{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                /
+                node_filesystem_files{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      linear_prediction_saturation_alert: 6h
+      pager: pagerduty
+      rules_domain: general
+      severity: s2
+    expr: |
+      predict_linear(gitlab_component_saturation:ratio{component="disk_inodes",env="gprd",type="zoekt"}[6h], 21600)
+      > on (component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="disk_inodes"}
+  - alert: component_saturation_slo_out_of_bounds:disk_space
+    for: 15m
+    annotations:
+      title: The Disk Space Utilization per Device per Node resource of the {{ $labels.type
+        }} service ({{ $labels.stage }} stage) has a saturation exceeding SLO and
+        is close to its capacity limit.
+      description: |
+        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
+
+        Details of the Disk Space Utilization per Device per Node resource:
+
+        Disk space utilization per device per node.
+      grafana_dashboard_id: alerts-sat_disk_space
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_disk_space?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gprd
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "2661375984"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, fqdn, device) (
+          clamp_min(
+            clamp_max(
+              (
+                1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, fqdn, device) (
+          clamp_min(
+            clamp_max(
+              (
+                1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      pager: pagerduty
+      rules_domain: general
+      severity: s2
+    expr: |
+      gitlab_component_saturation:ratio{component="disk_space",env="gprd",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="disk_space"}
+  - alert: ComponentResourceRunningOut_disk_space
+    for: 15m
+    annotations:
+      title: The Disk Space Utilization per Device per Node resource of the {{ $labels.type
+        }} service ({{ $labels.stage }} stage) is on track to hit capacity within
+        6h
+      description: |
+        This means that this resource is growing rapidly and is predicted to exceed saturation threshold within 6h.
+
+        Details of the Disk Space Utilization per Device per Node resource:
+
+        Disk space utilization per device per node.
+      grafana_dashboard_id: alerts-sat_disk_space
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_disk_space?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gprd
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "2661375984"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, fqdn, device) (
+          clamp_min(
+            clamp_max(
+              (
+                1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, fqdn, device) (
+          clamp_min(
+            clamp_max(
+              (
+                1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      linear_prediction_saturation_alert: 6h
+      pager: pagerduty
+      rules_domain: general
+      severity: s2
+    expr: |
+      predict_linear(gitlab_component_saturation:ratio{component="disk_space",env="gprd",type="zoekt"}[6h], 21600)
+      > on (component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="disk_space"}
+  - alert: component_saturation_slo_out_of_bounds:memory
+    for: 5m
+    annotations:
+      title: The Memory Utilization per Node resource of the {{ $labels.type }} service
+        ({{ $labels.stage }} stage) has a saturation exceeding SLO and is close to
+        its capacity limit.
+      description: |
+        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
+
+        Details of the Memory Utilization per Node resource:
+
+        Memory utilization per device per node.
+      grafana_dashboard_id: alerts-sat_memory
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_memory?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gprd
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "1955556769"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, fqdn) (
+          clamp_min(
+            clamp_max(
+              instance:node_memory_utilization:ratio{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} or instance:node_memory_utilisation:ratio{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, fqdn) (
+          clamp_min(
+            clamp_max(
+              instance:node_memory_utilization:ratio{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} or instance:node_memory_utilisation:ratio{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      rules_domain: general
+      severity: s4
+    expr: |
+      gitlab_component_saturation:ratio{component="memory",env="gprd",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="memory"}
+  - alert: component_saturation_slo_out_of_bounds:nf_conntrack_entries
+    for: 5m
+    annotations:
+      title: The conntrack Entries per Node resource of the {{ $labels.type }} service
+        ({{ $labels.stage }} stage) has a saturation exceeding SLO and is close to
+        its capacity limit.
+      description: |
+        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
+
+        Details of the conntrack Entries per Node resource:
+
+        Netfilter connection tracking table utilization per node.
+
+        When saturated, new connection attempts (incoming SYN packets) are dropped with no reply, leaving clients to slowly retry (and typically fail again) over the next several seconds.  When packets are being dropped due to this condition, kernel will log the event as: "nf_conntrack: table full, dropping packet".
+      grafana_dashboard_id: alerts-sat_conntrack
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_conntrack?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gprd
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "503581002"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, fqdn, instance) (
+          clamp_min(
+            clamp_max(
+              max_over_time(node_nf_conntrack_entries{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[1m])
+              /
+              node_nf_conntrack_entries_limit{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, fqdn, instance) (
+          clamp_min(
+            clamp_max(
+              max_over_time(node_nf_conntrack_entries{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[1m])
+              /
+              node_nf_conntrack_entries_limit{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      rules_domain: general
+      severity: s3
+    expr: |
+      gitlab_component_saturation:ratio{component="nf_conntrack_entries",env="gprd",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="nf_conntrack_entries"}
+  - alert: component_saturation_slo_out_of_bounds:node_schedstat_waiting
+    for: 90m
+    annotations:
+      title: The Node Scheduler Waiting Time resource of the {{ $labels.type }} service
+        ({{ $labels.stage }} stage) has a saturation exceeding SLO and is close to
+        its capacity limit.
+      description: |
+        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
+
+        Details of the Node Scheduler Waiting Time resource:
+
+        Measures the amount of scheduler waiting time that processes are waiting to be scheduled, according to [`CPU Scheduling Metrics`](https://www.robustperception.io/cpu-scheduling-metrics-from-the-node-exporter).
+
+        A high value indicates that a node has more processes to be run than CPU time available to handle them, and may lead to degraded responsiveness and performance from the application.
+
+        Additionally, it may indicate that the fleet is under-provisioned.
+      grafana_dashboard_id: alerts-sat_node_schedstat_waiting
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_node_schedstat_waiting?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gprd
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "1415313189"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, fqdn, shard) (
+          clamp_min(
+            clamp_max(
+              avg without (cpu) (rate(node_schedstat_waiting_seconds_total{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[1h]))
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, fqdn, shard) (
+          clamp_min(
+            clamp_max(
+              avg without (cpu) (rate(node_schedstat_waiting_seconds_total{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[1h]))
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      rules_domain: general
+      severity: s4
+    expr: |
+      gitlab_component_saturation:ratio{component="node_schedstat_waiting",env="gprd",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="node_schedstat_waiting"}
+  - alert: component_saturation_slo_out_of_bounds:open_fds
+    for: 5m
+    annotations:
+      title: The Open file descriptor utilization per instance resource of the {{
+        $labels.type }} service ({{ $labels.stage }} stage) has a saturation exceeding
+        SLO and is close to its capacity limit.
+      description: |
+        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
+
+        Details of the Open file descriptor utilization per instance resource:
+
+        Open file descriptor utilization per instance.
+
+        Saturation on file descriptor limits may indicate a resource-descriptor leak in the application.
+
+        As a temporary fix, you may want to consider restarting the affected process.
+      grafana_dashboard_id: alerts-sat_open_fds
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_open_fds?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gprd
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "1001792825"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, job, instance) (
+          clamp_min(
+            clamp_max(
+              (
+                process_open_fds{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                /
+                process_max_fds{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              or
+              (
+                ruby_file_descriptors{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                /
+                ruby_process_max_fds{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, job, instance) (
+          clamp_min(
+            clamp_max(
+              (
+                process_open_fds{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                /
+                process_max_fds{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              or
+              (
+                ruby_file_descriptors{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                /
+                ruby_process_max_fds{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      pager: pagerduty
+      rules_domain: general
+      severity: s2
+    expr: |
+      gitlab_component_saturation:ratio{component="open_fds",env="gprd",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="open_fds"}
+  - alert: component_saturation_slo_out_of_bounds:shard_cpu
+    for: 5m
+    annotations:
+      title: The Average CPU Utilization per Shard resource of the {{ $labels.type
+        }} service ({{ $labels.stage }} stage) has a saturation exceeding SLO and
+        is close to its capacity limit.
+      description: |
+        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
+
+        Details of the Average CPU Utilization per Shard resource:
+
+        This resource measures average CPU utilization across an all cores in a shard of a service fleet. If it is becoming saturated, it may indicate that the shard needs horizontal or vertical scaling.
+      grafana_dashboard_id: alerts-sat_shard_cpu
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_shard_cpu?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gprd
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "1472933476"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, shard) (
+          clamp_min(
+            clamp_max(
+              1 - avg by (environment, tier, type, stage, shard, shard) (
+                rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, shard) (
+          clamp_min(
+            clamp_max(
+              1 - avg by (environment, tier, type, stage, shard, shard) (
+                rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      rules_domain: general
+      severity: s3
+    expr: |
+      gitlab_component_saturation:ratio{component="shard_cpu",env="gprd",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="shard_cpu"}
+  - alert: component_saturation_slo_out_of_bounds:single_node_cpu
+    for: 10m
+    annotations:
+      title: The Average CPU Utilization per Node resource of the {{ $labels.type
+        }} service ({{ $labels.stage }} stage) has a saturation exceeding SLO and
+        is close to its capacity limit.
+      description: |
+        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
+
+        Details of the Average CPU Utilization per Node resource:
+
+        Average CPU utilization per Node.
+
+        If average CPU is saturated, it may indicate that a fleet is in need to horizontal or vertical scaling. It may also indicate imbalances in load in a fleet.
+      grafana_dashboard_id: alerts-sat_single_node_cpu
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_single_node_cpu?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gprd
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "3372411356"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, fqdn) (
+          clamp_min(
+            clamp_max(
+              avg without(cpu, mode) (1 - rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m]))
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, fqdn) (
+          clamp_min(
+            clamp_max(
+              avg without(cpu, mode) (1 - rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m]))
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      rules_domain: general
+      severity: s4
+    expr: |
+      gitlab_component_saturation:ratio{component="single_node_cpu",env="gprd",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="single_node_cpu"}
diff --git a/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation-metadata.yml b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation-metadata.yml
new file mode 100644
index 0000000000..fd74401a84
--- /dev/null
+++ b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation-metadata.yml
@@ -0,0 +1,78 @@
+# WARNING. DO NOT EDIT THIS FILE BY HAND. USE ./mimir-rules-jsonnet/saturation.jsonnet TO GENERATE IT
+# YOUR CHANGES WILL BE OVERRIDDEN
+groups:
+- name: GitLab Component Saturation Max SLOs
+  interval: 5m
+  rules:
+  - record: slo:max:soft:gitlab_component_saturation:ratio
+    labels:
+      component: cpu
+    expr: "0.8"
+  - record: slo:max:hard:gitlab_component_saturation:ratio
+    labels:
+      component: cpu
+    expr: "0.9"
+  - record: slo:max:soft:gitlab_component_saturation:ratio
+    labels:
+      component: disk_inodes
+    expr: "0.75"
+  - record: slo:max:hard:gitlab_component_saturation:ratio
+    labels:
+      component: disk_inodes
+    expr: "0.8"
+  - record: slo:max:soft:gitlab_component_saturation:ratio
+    labels:
+      component: disk_space
+    expr: "0.85"
+  - record: slo:max:hard:gitlab_component_saturation:ratio
+    labels:
+      component: disk_space
+    expr: "0.9"
+  - record: slo:max:soft:gitlab_component_saturation:ratio
+    labels:
+      component: memory
+    expr: "0.9"
+  - record: slo:max:hard:gitlab_component_saturation:ratio
+    labels:
+      component: memory
+    expr: "0.98"
+  - record: slo:max:soft:gitlab_component_saturation:ratio
+    labels:
+      component: nf_conntrack_entries
+    expr: "0.95"
+  - record: slo:max:hard:gitlab_component_saturation:ratio
+    labels:
+      component: nf_conntrack_entries
+    expr: "0.98"
+  - record: slo:max:soft:gitlab_component_saturation:ratio
+    labels:
+      component: node_schedstat_waiting
+    expr: "0.1"
+  - record: slo:max:hard:gitlab_component_saturation:ratio
+    labels:
+      component: node_schedstat_waiting
+    expr: "0.15"
+  - record: slo:max:soft:gitlab_component_saturation:ratio
+    labels:
+      component: open_fds
+    expr: "0.8"
+  - record: slo:max:hard:gitlab_component_saturation:ratio
+    labels:
+      component: open_fds
+    expr: "0.9"
+  - record: slo:max:soft:gitlab_component_saturation:ratio
+    labels:
+      component: shard_cpu
+    expr: "0.85"
+  - record: slo:max:hard:gitlab_component_saturation:ratio
+    labels:
+      component: shard_cpu
+    expr: "0.95"
+  - record: slo:max:soft:gitlab_component_saturation:ratio
+    labels:
+      component: single_node_cpu
+    expr: "0.9"
+  - record: slo:max:hard:gitlab_component_saturation:ratio
+    labels:
+      component: single_node_cpu
+    expr: "0.95"
diff --git a/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation.yml b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation.yml
new file mode 100644
index 0000000000..ff447b39c1
--- /dev/null
+++ b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation.yml
@@ -0,0 +1,145 @@
+# WARNING. DO NOT EDIT THIS FILE BY HAND. USE ./mimir-rules-jsonnet/saturation.jsonnet TO GENERATE IT
+# YOUR CHANGES WILL BE OVERRIDDEN
+groups:
+- name: Saturation Rules (autogenerated)
+  interval: 1m
+  rules:
+  - record: gitlab_component_saturation:ratio
+    labels:
+      component: cpu
+    expr: |
+      max by(env, environment, tier, type, stage, shard) (
+        clamp_min(
+          clamp_max(
+            1 - avg by (env, environment, tier, type, stage, shard) (
+              rate(node_cpu_seconds_total{mode="idle", env="gprd",type="zoekt"}[5m])
+            )
+            ,
+            1)
+        ,
+        0)
+      )
+  - record: gitlab_component_saturation:ratio
+    labels:
+      component: disk_inodes
+    expr: |
+      max by(env, environment, tier, type, stage, shard) (
+        clamp_min(
+          clamp_max(
+            1 - (
+              node_filesystem_files_free{fstype=~"(ext.|xfs)", env="gprd",type="zoekt"}
+              /
+              node_filesystem_files{fstype=~"(ext.|xfs)", env="gprd",type="zoekt"}
+            )
+            ,
+            1)
+        ,
+        0)
+      )
+  - record: gitlab_component_saturation:ratio
+    labels:
+      component: disk_space
+    expr: |
+      max by(env, environment, tier, type, stage, shard) (
+        clamp_min(
+          clamp_max(
+            (
+              1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", env="gprd",type="zoekt"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", env="gprd",type="zoekt"}
+            )
+            ,
+            1)
+        ,
+        0)
+      )
+  - record: gitlab_component_saturation:ratio
+    labels:
+      component: memory
+    expr: |
+      max by(env, environment, tier, type, stage, shard) (
+        clamp_min(
+          clamp_max(
+            instance:node_memory_utilization:ratio{env="gprd",type="zoekt"} or instance:node_memory_utilisation:ratio{env="gprd",type="zoekt"}
+            ,
+            1)
+        ,
+        0)
+      )
+  - record: gitlab_component_saturation:ratio
+    labels:
+      component: nf_conntrack_entries
+    expr: |
+      max by(env, environment, tier, type, stage, shard) (
+        clamp_min(
+          clamp_max(
+            max_over_time(node_nf_conntrack_entries{env="gprd",type="zoekt"}[1m])
+            /
+            node_nf_conntrack_entries_limit{env="gprd",type="zoekt"}
+            ,
+            1)
+        ,
+        0)
+      )
+  - record: gitlab_component_saturation:ratio
+    labels:
+      component: node_schedstat_waiting
+    expr: |
+      max by(env, environment, tier, type, stage, shard) (
+        clamp_min(
+          clamp_max(
+            avg without (cpu) (rate(node_schedstat_waiting_seconds_total{env="gprd",type="zoekt"}[1h]))
+            ,
+            1)
+        ,
+        0)
+      )
+  - record: gitlab_component_saturation:ratio
+    labels:
+      component: open_fds
+    expr: |
+      max by(env, environment, tier, type, stage, shard) (
+        clamp_min(
+          clamp_max(
+            (
+              process_open_fds{env="gprd",type="zoekt"}
+              /
+              process_max_fds{env="gprd",type="zoekt"}
+            )
+            or
+            (
+              ruby_file_descriptors{env="gprd",type="zoekt"}
+              /
+              ruby_process_max_fds{env="gprd",type="zoekt"}
+            )
+            ,
+            1)
+        ,
+        0)
+      )
+  - record: gitlab_component_saturation:ratio
+    labels:
+      component: shard_cpu
+    expr: |
+      max by(env, environment, tier, type, stage, shard) (
+        clamp_min(
+          clamp_max(
+            1 - avg by (env, environment, tier, type, stage, shard, shard) (
+              rate(node_cpu_seconds_total{mode="idle", env="gprd",type="zoekt"}[5m])
+            )
+            ,
+            1)
+        ,
+        0)
+      )
+  - record: gitlab_component_saturation:ratio
+    labels:
+      component: single_node_cpu
+    expr: |
+      max by(env, environment, tier, type, stage, shard) (
+        clamp_min(
+          clamp_max(
+            avg without(cpu, mode) (1 - rate(node_cpu_seconds_total{mode="idle", env="gprd",type="zoekt"}[5m]))
+            ,
+            1)
+        ,
+        0)
+      )
diff --git a/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-service-anomaly-detection-alerts.yml b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-service-anomaly-detection-alerts.yml
new file mode 100644
index 0000000000..937aad313d
--- /dev/null
+++ b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-service-anomaly-detection-alerts.yml
@@ -0,0 +1,79 @@
+# WARNING. DO NOT EDIT THIS FILE BY HAND. USE ./mimir-rules-jsonnet/service-anomaly-detection-alerts.jsonnet TO GENERATE IT
+# YOUR CHANGES WILL BE OVERRIDDEN
+groups:
+- name: zoekt - service_ops_anomaly_detection
+  rules:
+  - alert: service_ops_out_of_bounds_upper_5m
+    for: 5m
+    annotations:
+      title: 'Anomaly detection: The `{{ $labels.type }}` service (`{{ $labels.stage
+        }}` stage) is receiving more requests than normal'
+      description: |
+        The `{{ $labels.type }}` service (`{{ $labels.stage }}` stage) is receiving more requests than normal. This is often caused by user generated traffic, sometimes abuse. It can also be cause by application changes that lead to higher operations rates or from retries in the event of errors. Check the abuse reporting watches in Elastic, ELK for possible abuse, error rates (possibly on upstream services) for root cause.
+      grafana_dashboard_id: general-service/service-platform-metrics
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/general-service/service-platform-metrics?from=now-12h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gprd
+      grafana_min_zoom_hours: "12"
+      grafana_panel_id: "2633741645"
+      grafana_variables: environment,type,stage
+      link1_title: Definition
+      link1_url: https://gitlab.com/gitlab-com/runbooks/blob/master/docs/monitoring/definition-service-ops-rate.md
+      promql_template_1: gitlab_service_ops:rate{environment="$environment", type="$type",
+        stage="$stage"}
+      promql_template_2: gitlab_component_ops:rate{environment="$environment", type="$type",
+        stage="$stage"}
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      rules_domain: general
+      severity: s4
+    expr: |
+      (
+          (
+            (gitlab_service_ops:rate{env="gprd",monitor="global",type="zoekt"} -  gitlab_service_ops:rate:prediction{env="gprd",monitor="global",type="zoekt"}) /
+          gitlab_service_ops:rate:stddev_over_time_1w{env="gprd",monitor="global",type="zoekt"}
+        )
+        >
+        3
+      )
+      unless on(tier, type)
+      gitlab_service:mapping:disable_ops_rate_prediction{monitor="global"}
+  - alert: service_ops_out_of_bounds_lower_5m
+    for: 5m
+    annotations:
+      title: 'Anomaly detection: The `{{ $labels.type }}` service (`{{ $labels.stage
+        }}` stage) is receiving fewer requests than normal'
+      description: |
+        The `{{ $labels.type }}` service (`{{ $labels.stage }}` stage) is receiving fewer requests than normal. This is often caused by a failure in an upstream service - for example, an upstream load balancer rejected all incoming traffic. In many cases, this is as serious or more serious than a traffic spike. Check upstream services for errors that may be leading to traffic flow issues in downstream services.
+      grafana_dashboard_id: general-service/service-platform-metrics
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/general-service/service-platform-metrics?from=now-12h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gprd
+      grafana_min_zoom_hours: "12"
+      grafana_panel_id: "2633741645"
+      grafana_variables: environment,type,stage
+      link1_title: Definition
+      link1_url: https://gitlab.com/gitlab-com/runbooks/blob/master/docs/monitoring/definition-service-ops-rate.md
+      promql_template_1: gitlab_service_ops:rate{environment="$environment", type="$type",
+        stage="$stage"}
+      promql_template_2: gitlab_component_ops:rate{environment="$environment", type="$type",
+        stage="$stage"}
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      rules_domain: general
+      severity: s4
+    expr: |
+      (
+          (
+            (gitlab_service_ops:rate{env="gprd",monitor="global",type="zoekt"} -  gitlab_service_ops:rate:prediction{env="gprd",monitor="global",type="zoekt"}) /
+          gitlab_service_ops:rate:stddev_over_time_1w{env="gprd",monitor="global",type="zoekt"}
+        )
+        <
+        -3
+      )
+      unless on(tier, type)
+      gitlab_service:mapping:disable_ops_rate_prediction{monitor="global"}
diff --git a/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-service_ops_anomaly_detection.yml b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-service_ops_anomaly_detection.yml
new file mode 100644
index 0000000000..b821aa7213
--- /dev/null
+++ b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-service_ops_anomaly_detection.yml
@@ -0,0 +1,38 @@
+# WARNING. DO NOT EDIT THIS FILE BY HAND. USE ./mimir-rules-jsonnet/service-ops-anomaly-detection.jsonnet TO GENERATE IT
+# YOUR CHANGES WILL BE OVERRIDDEN
+groups:
+- name: 'zoekt operation rate weekly statistics: {"env": "gprd", "type": "zoekt"}'
+  interval: 5m
+  rules:
+  - record: gitlab_service_ops:rate:avg_over_time_1w
+    expr: |
+      avg_over_time(gitlab_service_ops:rate_5m{env="gprd",monitor="global",type="zoekt"}[1w])
+      unless on(tier, type)
+      gitlab_service:mapping:disable_ops_rate_prediction{monitor="global",type="zoekt"}
+  - record: gitlab_service_ops:rate:stddev_over_time_1w
+    expr: |
+      stddev_over_time(gitlab_service_ops:rate_5m{env="gprd",monitor="global",type="zoekt"}[1w])
+      unless on(tier, type)
+      gitlab_service:mapping:disable_ops_rate_prediction{monitor="global",type="zoekt"}
+- name: 'zoekt ops rate weekly prediction values: {"env": "gprd", "type": "zoekt"}'
+  interval: 5m
+  rules:
+  - record: gitlab_service_ops:rate:prediction
+    expr: |
+      quantile(0.5,
+        label_replace(
+          gitlab_service_ops:rate_1h{env="gprd",monitor="global",type="zoekt"} offset 10050m # 1 week - 30mins
+          + delta(gitlab_service_ops:rate:avg_over_time_1w{env="gprd",monitor="global",type="zoekt"}[1w])
+          , "p", "1w", "", "")
+        or
+        label_replace(
+          gitlab_service_ops:rate_1h{env="gprd",monitor="global",type="zoekt"} offset 20130m # 2 weeks - 30mins
+          + delta(gitlab_service_ops:rate:avg_over_time_1w{env="gprd",monitor="global",type="zoekt"}[2w])
+          , "p", "2w", "", "")
+        or
+        label_replace(
+          gitlab_service_ops:rate_1h{env="gprd",monitor="global",type="zoekt"} offset 30210m # 3 weeks - 30mins
+          + delta(gitlab_service_ops:rate:avg_over_time_1w{env="gprd",monitor="global",type="zoekt"}[3w])
+          , "p", "3w", "", "")
+      )
+      without (p)
diff --git a/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-zoekt-service-slos.yml b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-zoekt-service-slos.yml
new file mode 100644
index 0000000000..c5401614d1
--- /dev/null
+++ b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-zoekt-service-slos.yml
@@ -0,0 +1,18 @@
+# WARNING. DO NOT EDIT THIS FILE BY HAND. USE ./mimir-rules-jsonnet/service-slos.jsonnet TO GENERATE IT
+# YOUR CHANGES WILL BE OVERRIDDEN
+groups:
+- name: Autogenerated Service SLOs
+  interval: 5m
+  rules:
+  - record: slo:min:events:gitlab_service_apdex:ratio
+    labels:
+      monitor: global
+      tier: inf
+      type: zoekt
+    expr: "0.999000"
+  - record: slo:max:events:gitlab_service_errors:ratio
+    labels:
+      monitor: global
+      tier: inf
+      type: zoekt
+    expr: "0.001000"
diff --git a/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation-alerts.yml b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation-alerts.yml
new file mode 100644
index 0000000000..221970a5e4
--- /dev/null
+++ b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation-alerts.yml
@@ -0,0 +1,628 @@
+# WARNING. DO NOT EDIT THIS FILE BY HAND. USE ./mimir-rules-jsonnet/saturation.jsonnet TO GENERATE IT
+# YOUR CHANGES WILL BE OVERRIDDEN
+groups:
+- name: GitLab Component Saturation Statistics
+  interval: 5m
+  rules:
+  - record: gitlab_component_saturation:ratio_quantile95_1w
+    expr: quantile_over_time(0.95, gitlab_component_saturation:ratio{env="gstg",type="zoekt"}[1w])
+  - record: gitlab_component_saturation:ratio_quantile99_1w
+    expr: quantile_over_time(0.99, gitlab_component_saturation:ratio{env="gstg",type="zoekt"}[1w])
+  - record: gitlab_component_saturation:ratio_quantile95_1h
+    expr: quantile_over_time(0.95, gitlab_component_saturation:ratio{env="gstg",type="zoekt"}[1h])
+  - record: gitlab_component_saturation:ratio_quantile99_1h
+    expr: quantile_over_time(0.99, gitlab_component_saturation:ratio{env="gstg",type="zoekt"}[1h])
+  - record: gitlab_component_saturation:ratio_avg_1h
+    expr: avg_over_time(gitlab_component_saturation:ratio{env="gstg",type="zoekt"}[1h])
+- name: GitLab Saturation Alerts
+  interval: 1m
+  rules:
+  - alert: component_saturation_slo_out_of_bounds:cpu
+    for: 5m
+    annotations:
+      title: The Average Service CPU Utilization resource of the {{ $labels.type }}
+        service ({{ $labels.stage }} stage) has a saturation exceeding SLO and is
+        close to its capacity limit.
+      description: |
+        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
+
+        Details of the Average Service CPU Utilization resource:
+
+        This resource measures average CPU utilization across an all cores in a service fleet. If it is becoming saturated, it may indicate that the fleet needs horizontal or vertical scaling.
+      grafana_dashboard_id: alerts-sat_cpu
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_cpu?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gstg
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "1465724101"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard) (
+          clamp_min(
+            clamp_max(
+              1 - avg by (environment, tier, type, stage, shard) (
+                rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard) (
+          clamp_min(
+            clamp_max(
+              1 - avg by (environment, tier, type, stage, shard) (
+                rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      rules_domain: general
+      severity: s3
+    expr: |
+      gitlab_component_saturation:ratio{component="cpu",env="gstg",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="cpu"}
+  - alert: component_saturation_slo_out_of_bounds:disk_inodes
+    for: 15m
+    annotations:
+      title: The Disk inode Utilization per Device per Node resource of the {{ $labels.type
+        }} service ({{ $labels.stage }} stage) has a saturation exceeding SLO and
+        is close to its capacity limit.
+      description: |
+        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
+
+        Details of the Disk inode Utilization per Device per Node resource:
+
+        Disk inode utilization per device per node.
+
+        If this is too high, its possible that a directory is filling up with files. Consider logging in an checking temp directories for large numbers of files
+      grafana_dashboard_id: alerts-sat_disk_inodes
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_disk_inodes?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gstg
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "39965907"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, fqdn, device) (
+          clamp_min(
+            clamp_max(
+              1 - (
+                node_filesystem_files_free{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                /
+                node_filesystem_files{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, fqdn, device) (
+          clamp_min(
+            clamp_max(
+              1 - (
+                node_filesystem_files_free{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                /
+                node_filesystem_files{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      pager: pagerduty
+      rules_domain: general
+      severity: s2
+    expr: |
+      gitlab_component_saturation:ratio{component="disk_inodes",env="gstg",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="disk_inodes"}
+  - alert: ComponentResourceRunningOut_disk_inodes
+    for: 15m
+    annotations:
+      title: The Disk inode Utilization per Device per Node resource of the {{ $labels.type
+        }} service ({{ $labels.stage }} stage) is on track to hit capacity within
+        6h
+      description: |
+        This means that this resource is growing rapidly and is predicted to exceed saturation threshold within 6h.
+
+        Details of the Disk inode Utilization per Device per Node resource:
+
+        Disk inode utilization per device per node.
+
+        If this is too high, its possible that a directory is filling up with files. Consider logging in an checking temp directories for large numbers of files
+      grafana_dashboard_id: alerts-sat_disk_inodes
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_disk_inodes?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gstg
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "39965907"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, fqdn, device) (
+          clamp_min(
+            clamp_max(
+              1 - (
+                node_filesystem_files_free{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                /
+                node_filesystem_files{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, fqdn, device) (
+          clamp_min(
+            clamp_max(
+              1 - (
+                node_filesystem_files_free{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                /
+                node_filesystem_files{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      linear_prediction_saturation_alert: 6h
+      pager: pagerduty
+      rules_domain: general
+      severity: s2
+    expr: |
+      predict_linear(gitlab_component_saturation:ratio{component="disk_inodes",env="gstg",type="zoekt"}[6h], 21600)
+      > on (component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="disk_inodes"}
+  - alert: component_saturation_slo_out_of_bounds:disk_space
+    for: 15m
+    annotations:
+      title: The Disk Space Utilization per Device per Node resource of the {{ $labels.type
+        }} service ({{ $labels.stage }} stage) has a saturation exceeding SLO and
+        is close to its capacity limit.
+      description: |
+        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
+
+        Details of the Disk Space Utilization per Device per Node resource:
+
+        Disk space utilization per device per node.
+      grafana_dashboard_id: alerts-sat_disk_space
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_disk_space?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gstg
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "2661375984"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, fqdn, device) (
+          clamp_min(
+            clamp_max(
+              (
+                1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, fqdn, device) (
+          clamp_min(
+            clamp_max(
+              (
+                1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      pager: pagerduty
+      rules_domain: general
+      severity: s2
+    expr: |
+      gitlab_component_saturation:ratio{component="disk_space",env="gstg",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="disk_space"}
+  - alert: ComponentResourceRunningOut_disk_space
+    for: 15m
+    annotations:
+      title: The Disk Space Utilization per Device per Node resource of the {{ $labels.type
+        }} service ({{ $labels.stage }} stage) is on track to hit capacity within
+        6h
+      description: |
+        This means that this resource is growing rapidly and is predicted to exceed saturation threshold within 6h.
+
+        Details of the Disk Space Utilization per Device per Node resource:
+
+        Disk space utilization per device per node.
+      grafana_dashboard_id: alerts-sat_disk_space
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_disk_space?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gstg
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "2661375984"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, fqdn, device) (
+          clamp_min(
+            clamp_max(
+              (
+                1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, fqdn, device) (
+          clamp_min(
+            clamp_max(
+              (
+                1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      linear_prediction_saturation_alert: 6h
+      pager: pagerduty
+      rules_domain: general
+      severity: s2
+    expr: |
+      predict_linear(gitlab_component_saturation:ratio{component="disk_space",env="gstg",type="zoekt"}[6h], 21600)
+      > on (component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="disk_space"}
+  - alert: component_saturation_slo_out_of_bounds:memory
+    for: 5m
+    annotations:
+      title: The Memory Utilization per Node resource of the {{ $labels.type }} service
+        ({{ $labels.stage }} stage) has a saturation exceeding SLO and is close to
+        its capacity limit.
+      description: |
+        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
+
+        Details of the Memory Utilization per Node resource:
+
+        Memory utilization per device per node.
+      grafana_dashboard_id: alerts-sat_memory
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_memory?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gstg
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "1955556769"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, fqdn) (
+          clamp_min(
+            clamp_max(
+              instance:node_memory_utilization:ratio{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} or instance:node_memory_utilisation:ratio{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, fqdn) (
+          clamp_min(
+            clamp_max(
+              instance:node_memory_utilization:ratio{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} or instance:node_memory_utilisation:ratio{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      rules_domain: general
+      severity: s4
+    expr: |
+      gitlab_component_saturation:ratio{component="memory",env="gstg",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="memory"}
+  - alert: component_saturation_slo_out_of_bounds:nf_conntrack_entries
+    for: 5m
+    annotations:
+      title: The conntrack Entries per Node resource of the {{ $labels.type }} service
+        ({{ $labels.stage }} stage) has a saturation exceeding SLO and is close to
+        its capacity limit.
+      description: |
+        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
+
+        Details of the conntrack Entries per Node resource:
+
+        Netfilter connection tracking table utilization per node.
+
+        When saturated, new connection attempts (incoming SYN packets) are dropped with no reply, leaving clients to slowly retry (and typically fail again) over the next several seconds.  When packets are being dropped due to this condition, kernel will log the event as: "nf_conntrack: table full, dropping packet".
+      grafana_dashboard_id: alerts-sat_conntrack
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_conntrack?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gstg
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "503581002"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, fqdn, instance) (
+          clamp_min(
+            clamp_max(
+              max_over_time(node_nf_conntrack_entries{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[1m])
+              /
+              node_nf_conntrack_entries_limit{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, fqdn, instance) (
+          clamp_min(
+            clamp_max(
+              max_over_time(node_nf_conntrack_entries{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[1m])
+              /
+              node_nf_conntrack_entries_limit{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      rules_domain: general
+      severity: s3
+    expr: |
+      gitlab_component_saturation:ratio{component="nf_conntrack_entries",env="gstg",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="nf_conntrack_entries"}
+  - alert: component_saturation_slo_out_of_bounds:node_schedstat_waiting
+    for: 90m
+    annotations:
+      title: The Node Scheduler Waiting Time resource of the {{ $labels.type }} service
+        ({{ $labels.stage }} stage) has a saturation exceeding SLO and is close to
+        its capacity limit.
+      description: |
+        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
+
+        Details of the Node Scheduler Waiting Time resource:
+
+        Measures the amount of scheduler waiting time that processes are waiting to be scheduled, according to [`CPU Scheduling Metrics`](https://www.robustperception.io/cpu-scheduling-metrics-from-the-node-exporter).
+
+        A high value indicates that a node has more processes to be run than CPU time available to handle them, and may lead to degraded responsiveness and performance from the application.
+
+        Additionally, it may indicate that the fleet is under-provisioned.
+      grafana_dashboard_id: alerts-sat_node_schedstat_waiting
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_node_schedstat_waiting?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gstg
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "1415313189"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, fqdn, shard) (
+          clamp_min(
+            clamp_max(
+              avg without (cpu) (rate(node_schedstat_waiting_seconds_total{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[1h]))
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, fqdn, shard) (
+          clamp_min(
+            clamp_max(
+              avg without (cpu) (rate(node_schedstat_waiting_seconds_total{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[1h]))
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      rules_domain: general
+      severity: s4
+    expr: |
+      gitlab_component_saturation:ratio{component="node_schedstat_waiting",env="gstg",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="node_schedstat_waiting"}
+  - alert: component_saturation_slo_out_of_bounds:open_fds
+    for: 5m
+    annotations:
+      title: The Open file descriptor utilization per instance resource of the {{
+        $labels.type }} service ({{ $labels.stage }} stage) has a saturation exceeding
+        SLO and is close to its capacity limit.
+      description: |
+        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
+
+        Details of the Open file descriptor utilization per instance resource:
+
+        Open file descriptor utilization per instance.
+
+        Saturation on file descriptor limits may indicate a resource-descriptor leak in the application.
+
+        As a temporary fix, you may want to consider restarting the affected process.
+      grafana_dashboard_id: alerts-sat_open_fds
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_open_fds?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gstg
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "1001792825"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, job, instance) (
+          clamp_min(
+            clamp_max(
+              (
+                process_open_fds{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                /
+                process_max_fds{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              or
+              (
+                ruby_file_descriptors{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                /
+                ruby_process_max_fds{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, job, instance) (
+          clamp_min(
+            clamp_max(
+              (
+                process_open_fds{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                /
+                process_max_fds{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              or
+              (
+                ruby_file_descriptors{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                /
+                ruby_process_max_fds{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      pager: pagerduty
+      rules_domain: general
+      severity: s2
+    expr: |
+      gitlab_component_saturation:ratio{component="open_fds",env="gstg",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="open_fds"}
+  - alert: component_saturation_slo_out_of_bounds:shard_cpu
+    for: 5m
+    annotations:
+      title: The Average CPU Utilization per Shard resource of the {{ $labels.type
+        }} service ({{ $labels.stage }} stage) has a saturation exceeding SLO and
+        is close to its capacity limit.
+      description: |
+        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
+
+        Details of the Average CPU Utilization per Shard resource:
+
+        This resource measures average CPU utilization across an all cores in a shard of a service fleet. If it is becoming saturated, it may indicate that the shard needs horizontal or vertical scaling.
+      grafana_dashboard_id: alerts-sat_shard_cpu
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_shard_cpu?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gstg
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "1472933476"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, shard) (
+          clamp_min(
+            clamp_max(
+              1 - avg by (environment, tier, type, stage, shard, shard) (
+                rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, shard) (
+          clamp_min(
+            clamp_max(
+              1 - avg by (environment, tier, type, stage, shard, shard) (
+                rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
+              )
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      rules_domain: general
+      severity: s3
+    expr: |
+      gitlab_component_saturation:ratio{component="shard_cpu",env="gstg",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="shard_cpu"}
+  - alert: component_saturation_slo_out_of_bounds:single_node_cpu
+    for: 10m
+    annotations:
+      title: The Average CPU Utilization per Node resource of the {{ $labels.type
+        }} service ({{ $labels.stage }} stage) has a saturation exceeding SLO and
+        is close to its capacity limit.
+      description: |
+        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
+
+        Details of the Average CPU Utilization per Node resource:
+
+        Average CPU utilization per Node.
+
+        If average CPU is saturated, it may indicate that a fleet is in need to horizontal or vertical scaling. It may also indicate imbalances in load in a fleet.
+      grafana_dashboard_id: alerts-sat_single_node_cpu
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_single_node_cpu?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gstg
+      grafana_min_zoom_hours: "6"
+      grafana_panel_id: "3372411356"
+      grafana_variables: environment,type,stage
+      promql_query: |
+        max by(environment, tier, type, stage, shard, fqdn) (
+          clamp_min(
+            clamp_max(
+              avg without(cpu, mode) (1 - rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m]))
+              ,
+              1)
+          ,
+          0)
+        )
+      promql_template_1: |
+        max by(environment, tier, type, stage, shard, fqdn) (
+          clamp_min(
+            clamp_max(
+              avg without(cpu, mode) (1 - rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m]))
+              ,
+              1)
+          ,
+          0)
+        )
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      rules_domain: general
+      severity: s4
+    expr: |
+      gitlab_component_saturation:ratio{component="single_node_cpu",env="gstg",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="single_node_cpu"}
diff --git a/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation-metadata.yml b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation-metadata.yml
new file mode 100644
index 0000000000..fd74401a84
--- /dev/null
+++ b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation-metadata.yml
@@ -0,0 +1,78 @@
+# WARNING. DO NOT EDIT THIS FILE BY HAND. USE ./mimir-rules-jsonnet/saturation.jsonnet TO GENERATE IT
+# YOUR CHANGES WILL BE OVERRIDDEN
+groups:
+- name: GitLab Component Saturation Max SLOs
+  interval: 5m
+  rules:
+  - record: slo:max:soft:gitlab_component_saturation:ratio
+    labels:
+      component: cpu
+    expr: "0.8"
+  - record: slo:max:hard:gitlab_component_saturation:ratio
+    labels:
+      component: cpu
+    expr: "0.9"
+  - record: slo:max:soft:gitlab_component_saturation:ratio
+    labels:
+      component: disk_inodes
+    expr: "0.75"
+  - record: slo:max:hard:gitlab_component_saturation:ratio
+    labels:
+      component: disk_inodes
+    expr: "0.8"
+  - record: slo:max:soft:gitlab_component_saturation:ratio
+    labels:
+      component: disk_space
+    expr: "0.85"
+  - record: slo:max:hard:gitlab_component_saturation:ratio
+    labels:
+      component: disk_space
+    expr: "0.9"
+  - record: slo:max:soft:gitlab_component_saturation:ratio
+    labels:
+      component: memory
+    expr: "0.9"
+  - record: slo:max:hard:gitlab_component_saturation:ratio
+    labels:
+      component: memory
+    expr: "0.98"
+  - record: slo:max:soft:gitlab_component_saturation:ratio
+    labels:
+      component: nf_conntrack_entries
+    expr: "0.95"
+  - record: slo:max:hard:gitlab_component_saturation:ratio
+    labels:
+      component: nf_conntrack_entries
+    expr: "0.98"
+  - record: slo:max:soft:gitlab_component_saturation:ratio
+    labels:
+      component: node_schedstat_waiting
+    expr: "0.1"
+  - record: slo:max:hard:gitlab_component_saturation:ratio
+    labels:
+      component: node_schedstat_waiting
+    expr: "0.15"
+  - record: slo:max:soft:gitlab_component_saturation:ratio
+    labels:
+      component: open_fds
+    expr: "0.8"
+  - record: slo:max:hard:gitlab_component_saturation:ratio
+    labels:
+      component: open_fds
+    expr: "0.9"
+  - record: slo:max:soft:gitlab_component_saturation:ratio
+    labels:
+      component: shard_cpu
+    expr: "0.85"
+  - record: slo:max:hard:gitlab_component_saturation:ratio
+    labels:
+      component: shard_cpu
+    expr: "0.95"
+  - record: slo:max:soft:gitlab_component_saturation:ratio
+    labels:
+      component: single_node_cpu
+    expr: "0.9"
+  - record: slo:max:hard:gitlab_component_saturation:ratio
+    labels:
+      component: single_node_cpu
+    expr: "0.95"
diff --git a/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation.yml b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation.yml
new file mode 100644
index 0000000000..14e37fdc5d
--- /dev/null
+++ b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation.yml
@@ -0,0 +1,145 @@
+# WARNING. DO NOT EDIT THIS FILE BY HAND. USE ./mimir-rules-jsonnet/saturation.jsonnet TO GENERATE IT
+# YOUR CHANGES WILL BE OVERRIDDEN
+groups:
+- name: Saturation Rules (autogenerated)
+  interval: 1m
+  rules:
+  - record: gitlab_component_saturation:ratio
+    labels:
+      component: cpu
+    expr: |
+      max by(env, environment, tier, type, stage, shard) (
+        clamp_min(
+          clamp_max(
+            1 - avg by (env, environment, tier, type, stage, shard) (
+              rate(node_cpu_seconds_total{mode="idle", env="gstg",type="zoekt"}[5m])
+            )
+            ,
+            1)
+        ,
+        0)
+      )
+  - record: gitlab_component_saturation:ratio
+    labels:
+      component: disk_inodes
+    expr: |
+      max by(env, environment, tier, type, stage, shard) (
+        clamp_min(
+          clamp_max(
+            1 - (
+              node_filesystem_files_free{fstype=~"(ext.|xfs)", env="gstg",type="zoekt"}
+              /
+              node_filesystem_files{fstype=~"(ext.|xfs)", env="gstg",type="zoekt"}
+            )
+            ,
+            1)
+        ,
+        0)
+      )
+  - record: gitlab_component_saturation:ratio
+    labels:
+      component: disk_space
+    expr: |
+      max by(env, environment, tier, type, stage, shard) (
+        clamp_min(
+          clamp_max(
+            (
+              1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", env="gstg",type="zoekt"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", env="gstg",type="zoekt"}
+            )
+            ,
+            1)
+        ,
+        0)
+      )
+  - record: gitlab_component_saturation:ratio
+    labels:
+      component: memory
+    expr: |
+      max by(env, environment, tier, type, stage, shard) (
+        clamp_min(
+          clamp_max(
+            instance:node_memory_utilization:ratio{env="gstg",type="zoekt"} or instance:node_memory_utilisation:ratio{env="gstg",type="zoekt"}
+            ,
+            1)
+        ,
+        0)
+      )
+  - record: gitlab_component_saturation:ratio
+    labels:
+      component: nf_conntrack_entries
+    expr: |
+      max by(env, environment, tier, type, stage, shard) (
+        clamp_min(
+          clamp_max(
+            max_over_time(node_nf_conntrack_entries{env="gstg",type="zoekt"}[1m])
+            /
+            node_nf_conntrack_entries_limit{env="gstg",type="zoekt"}
+            ,
+            1)
+        ,
+        0)
+      )
+  - record: gitlab_component_saturation:ratio
+    labels:
+      component: node_schedstat_waiting
+    expr: |
+      max by(env, environment, tier, type, stage, shard) (
+        clamp_min(
+          clamp_max(
+            avg without (cpu) (rate(node_schedstat_waiting_seconds_total{env="gstg",type="zoekt"}[1h]))
+            ,
+            1)
+        ,
+        0)
+      )
+  - record: gitlab_component_saturation:ratio
+    labels:
+      component: open_fds
+    expr: |
+      max by(env, environment, tier, type, stage, shard) (
+        clamp_min(
+          clamp_max(
+            (
+              process_open_fds{env="gstg",type="zoekt"}
+              /
+              process_max_fds{env="gstg",type="zoekt"}
+            )
+            or
+            (
+              ruby_file_descriptors{env="gstg",type="zoekt"}
+              /
+              ruby_process_max_fds{env="gstg",type="zoekt"}
+            )
+            ,
+            1)
+        ,
+        0)
+      )
+  - record: gitlab_component_saturation:ratio
+    labels:
+      component: shard_cpu
+    expr: |
+      max by(env, environment, tier, type, stage, shard) (
+        clamp_min(
+          clamp_max(
+            1 - avg by (env, environment, tier, type, stage, shard, shard) (
+              rate(node_cpu_seconds_total{mode="idle", env="gstg",type="zoekt"}[5m])
+            )
+            ,
+            1)
+        ,
+        0)
+      )
+  - record: gitlab_component_saturation:ratio
+    labels:
+      component: single_node_cpu
+    expr: |
+      max by(env, environment, tier, type, stage, shard) (
+        clamp_min(
+          clamp_max(
+            avg without(cpu, mode) (1 - rate(node_cpu_seconds_total{mode="idle", env="gstg",type="zoekt"}[5m]))
+            ,
+            1)
+        ,
+        0)
+      )
diff --git a/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-service-anomaly-detection-alerts.yml b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-service-anomaly-detection-alerts.yml
new file mode 100644
index 0000000000..b9c2f56dfc
--- /dev/null
+++ b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-service-anomaly-detection-alerts.yml
@@ -0,0 +1,79 @@
+# WARNING. DO NOT EDIT THIS FILE BY HAND. USE ./mimir-rules-jsonnet/service-anomaly-detection-alerts.jsonnet TO GENERATE IT
+# YOUR CHANGES WILL BE OVERRIDDEN
+groups:
+- name: zoekt - service_ops_anomaly_detection
+  rules:
+  - alert: service_ops_out_of_bounds_upper_5m
+    for: 5m
+    annotations:
+      title: 'Anomaly detection: The `{{ $labels.type }}` service (`{{ $labels.stage
+        }}` stage) is receiving more requests than normal'
+      description: |
+        The `{{ $labels.type }}` service (`{{ $labels.stage }}` stage) is receiving more requests than normal. This is often caused by user generated traffic, sometimes abuse. It can also be cause by application changes that lead to higher operations rates or from retries in the event of errors. Check the abuse reporting watches in Elastic, ELK for possible abuse, error rates (possibly on upstream services) for root cause.
+      grafana_dashboard_id: general-service/service-platform-metrics
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/general-service/service-platform-metrics?from=now-12h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gstg
+      grafana_min_zoom_hours: "12"
+      grafana_panel_id: "2633741645"
+      grafana_variables: environment,type,stage
+      link1_title: Definition
+      link1_url: https://gitlab.com/gitlab-com/runbooks/blob/master/docs/monitoring/definition-service-ops-rate.md
+      promql_template_1: gitlab_service_ops:rate{environment="$environment", type="$type",
+        stage="$stage"}
+      promql_template_2: gitlab_component_ops:rate{environment="$environment", type="$type",
+        stage="$stage"}
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      rules_domain: general
+      severity: s4
+    expr: |
+      (
+          (
+            (gitlab_service_ops:rate{env="gstg",monitor="global",type="zoekt"} -  gitlab_service_ops:rate:prediction{env="gstg",monitor="global",type="zoekt"}) /
+          gitlab_service_ops:rate:stddev_over_time_1w{env="gstg",monitor="global",type="zoekt"}
+        )
+        >
+        3
+      )
+      unless on(tier, type)
+      gitlab_service:mapping:disable_ops_rate_prediction{monitor="global"}
+  - alert: service_ops_out_of_bounds_lower_5m
+    for: 5m
+    annotations:
+      title: 'Anomaly detection: The `{{ $labels.type }}` service (`{{ $labels.stage
+        }}` stage) is receiving fewer requests than normal'
+      description: |
+        The `{{ $labels.type }}` service (`{{ $labels.stage }}` stage) is receiving fewer requests than normal. This is often caused by a failure in an upstream service - for example, an upstream load balancer rejected all incoming traffic. In many cases, this is as serious or more serious than a traffic spike. Check upstream services for errors that may be leading to traffic flow issues in downstream services.
+      grafana_dashboard_id: general-service/service-platform-metrics
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/general-service/service-platform-metrics?from=now-12h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}
+      grafana_datasource_id: mimir-gitlab-gstg
+      grafana_min_zoom_hours: "12"
+      grafana_panel_id: "2633741645"
+      grafana_variables: environment,type,stage
+      link1_title: Definition
+      link1_url: https://gitlab.com/gitlab-com/runbooks/blob/master/docs/monitoring/definition-service-ops-rate.md
+      promql_template_1: gitlab_service_ops:rate{environment="$environment", type="$type",
+        stage="$stage"}
+      promql_template_2: gitlab_component_ops:rate{environment="$environment", type="$type",
+        stage="$stage"}
+      runbook: docs/{{ $labels.type }}/README.md
+    labels:
+      alert_type: cause
+      rules_domain: general
+      severity: s4
+    expr: |
+      (
+          (
+            (gitlab_service_ops:rate{env="gstg",monitor="global",type="zoekt"} -  gitlab_service_ops:rate:prediction{env="gstg",monitor="global",type="zoekt"}) /
+          gitlab_service_ops:rate:stddev_over_time_1w{env="gstg",monitor="global",type="zoekt"}
+        )
+        <
+        -3
+      )
+      unless on(tier, type)
+      gitlab_service:mapping:disable_ops_rate_prediction{monitor="global"}
diff --git a/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-service_ops_anomaly_detection.yml b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-service_ops_anomaly_detection.yml
new file mode 100644
index 0000000000..1c1e1feab1
--- /dev/null
+++ b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-service_ops_anomaly_detection.yml
@@ -0,0 +1,38 @@
+# WARNING. DO NOT EDIT THIS FILE BY HAND. USE ./mimir-rules-jsonnet/service-ops-anomaly-detection.jsonnet TO GENERATE IT
+# YOUR CHANGES WILL BE OVERRIDDEN
+groups:
+- name: 'zoekt operation rate weekly statistics: {"env": "gstg", "type": "zoekt"}'
+  interval: 5m
+  rules:
+  - record: gitlab_service_ops:rate:avg_over_time_1w
+    expr: |
+      avg_over_time(gitlab_service_ops:rate_5m{env="gstg",monitor="global",type="zoekt"}[1w])
+      unless on(tier, type)
+      gitlab_service:mapping:disable_ops_rate_prediction{monitor="global",type="zoekt"}
+  - record: gitlab_service_ops:rate:stddev_over_time_1w
+    expr: |
+      stddev_over_time(gitlab_service_ops:rate_5m{env="gstg",monitor="global",type="zoekt"}[1w])
+      unless on(tier, type)
+      gitlab_service:mapping:disable_ops_rate_prediction{monitor="global",type="zoekt"}
+- name: 'zoekt ops rate weekly prediction values: {"env": "gstg", "type": "zoekt"}'
+  interval: 5m
+  rules:
+  - record: gitlab_service_ops:rate:prediction
+    expr: |
+      quantile(0.5,
+        label_replace(
+          gitlab_service_ops:rate_1h{env="gstg",monitor="global",type="zoekt"} offset 10050m # 1 week - 30mins
+          + delta(gitlab_service_ops:rate:avg_over_time_1w{env="gstg",monitor="global",type="zoekt"}[1w])
+          , "p", "1w", "", "")
+        or
+        label_replace(
+          gitlab_service_ops:rate_1h{env="gstg",monitor="global",type="zoekt"} offset 20130m # 2 weeks - 30mins
+          + delta(gitlab_service_ops:rate:avg_over_time_1w{env="gstg",monitor="global",type="zoekt"}[2w])
+          , "p", "2w", "", "")
+        or
+        label_replace(
+          gitlab_service_ops:rate_1h{env="gstg",monitor="global",type="zoekt"} offset 30210m # 3 weeks - 30mins
+          + delta(gitlab_service_ops:rate:avg_over_time_1w{env="gstg",monitor="global",type="zoekt"}[3w])
+          , "p", "3w", "", "")
+      )
+      without (p)
diff --git a/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-zoekt-service-slos.yml b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-zoekt-service-slos.yml
new file mode 100644
index 0000000000..c5401614d1
--- /dev/null
+++ b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-zoekt-service-slos.yml
@@ -0,0 +1,18 @@
+# WARNING. DO NOT EDIT THIS FILE BY HAND. USE ./mimir-rules-jsonnet/service-slos.jsonnet TO GENERATE IT
+# YOUR CHANGES WILL BE OVERRIDDEN
+groups:
+- name: Autogenerated Service SLOs
+  interval: 5m
+  rules:
+  - record: slo:min:events:gitlab_service_apdex:ratio
+    labels:
+      monitor: global
+      tier: inf
+      type: zoekt
+    expr: "0.999000"
+  - record: slo:max:events:gitlab_service_errors:ratio
+    labels:
+      monitor: global
+      tier: inf
+      type: zoekt
+    expr: "0.001000"
diff --git a/thanos-rules/autogenerated-service-slos.yml b/thanos-rules/autogenerated-service-slos.yml
index 5b40661342..f391786a05 100644
--- a/thanos-rules/autogenerated-service-slos.yml
+++ b/thanos-rules/autogenerated-service-slos.yml
@@ -3273,6 +3273,16 @@ groups:
       tier: sv
       type: woodhouse
     expr: "0.001000"
+  - record: slo:min:events:gitlab_service_apdex:ratio
+    labels:
+      tier: inf
+      type: zoekt
+    expr: "0.999000"
+  - record: slo:max:events:gitlab_service_errors:ratio
+    labels:
+      tier: inf
+      type: zoekt
+    expr: "0.001000"
   - record: gitlab_service:mapping:disable_ops_rate_prediction
     labels:
       tier: inf
-- 
GitLab


From 48ca531acd97cd68dd86fc5400ae4bc16aa595b9 Mon Sep 17 00:00:00 2001
From: Hercules Merscher <hmerscher@gitlab.com>
Date: Mon, 10 Jun 2024 16:11:57 +0200
Subject: [PATCH 3/7] feat(zoekt): skippedMaturityCriteria

---
 metrics-catalog/services/zoekt.jsonnet | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/metrics-catalog/services/zoekt.jsonnet b/metrics-catalog/services/zoekt.jsonnet
index a79d9a550e..5a99784501 100644
--- a/metrics-catalog/services/zoekt.jsonnet
+++ b/metrics-catalog/services/zoekt.jsonnet
@@ -9,4 +9,7 @@ metricsCatalog.serviceDefinition({
     errorRatio: 0.999,
   },
   serviceLevelIndicators: {},
+  skippedMaturityCriteria: {
+    'Structured logs available in Kibana': 'zoekt is an infrastructure component, developers do not interact with it',
+  },
 })
-- 
GitLab


From af9e6cf6e3b91ae2737264dfdf14df1facb90412 Mon Sep 17 00:00:00 2001
From: Hercules Merscher <hmerscher@gitlab.com>
Date: Mon, 10 Jun 2024 16:17:33 +0200
Subject: [PATCH 4/7] feat(zoekt): serviceDependencies

---
 metrics-catalog/services/api.jsonnet     | 1 +
 metrics-catalog/services/sidekiq.jsonnet | 3 ++-
 metrics-catalog/services/web.jsonnet     | 3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/metrics-catalog/services/api.jsonnet b/metrics-catalog/services/api.jsonnet
index 082f2838c9..5fa980e799 100644
--- a/metrics-catalog/services/api.jsonnet
+++ b/metrics-catalog/services/api.jsonnet
@@ -61,6 +61,7 @@ metricsCatalog.serviceDefinition({
     search: true,
     consul: true,
     'google-cloud-storage': true,
+    zoekt: true,
   },
   provisioning: {
     vms: false,
diff --git a/metrics-catalog/services/sidekiq.jsonnet b/metrics-catalog/services/sidekiq.jsonnet
index 6f0229efef..e4bca9f108 100644
--- a/metrics-catalog/services/sidekiq.jsonnet
+++ b/metrics-catalog/services/sidekiq.jsonnet
@@ -22,7 +22,7 @@ local baseSelector = { type: 'sidekiq' } + ignoredWorkers;
 metricsCatalog.serviceDefinition({
   type: 'sidekiq',
   tier: 'sv',
-  tenants: [ 'gitlab-gprd', 'gitlab-gstg', 'gitlab-pre' ],
+  tenants: ['gitlab-gprd', 'gitlab-gstg', 'gitlab-pre'],
   tags: ['rails'],
 
   // overrides monitoringThresholds for specific shards and SLIs
@@ -82,6 +82,7 @@ metricsCatalog.serviceDefinition({
     search: true,
     consul: true,
     'google-cloud-storage': true,
+    zoekt: true,
   },
   provisioning: {
     kubernetes: true,
diff --git a/metrics-catalog/services/web.jsonnet b/metrics-catalog/services/web.jsonnet
index 56c4e4a28f..9f26798af9 100644
--- a/metrics-catalog/services/web.jsonnet
+++ b/metrics-catalog/services/web.jsonnet
@@ -14,7 +14,7 @@ local railsSelector = { job: 'gitlab-rails', type: 'web' };
 metricsCatalog.serviceDefinition({
   type: 'web',
   tier: 'sv',
-  tenants: [ 'gitlab-gprd', 'gitlab-gstg', 'gitlab-pre' ],
+  tenants: ['gitlab-gprd', 'gitlab-gstg', 'gitlab-pre'],
 
   tags: ['golang', 'rails', 'puma'],
 
@@ -61,6 +61,7 @@ metricsCatalog.serviceDefinition({
     search: true,
     consul: true,
     'google-cloud-storage': true,
+    zoekt: true,
   },
   recordingRuleMetrics: [
     'http_requests_total',
-- 
GitLab


From 276e7ba0af6b14d8ed2514984466c84fcbb01429 Mon Sep 17 00:00:00 2001
From: Hercules Merscher <hmerscher@gitlab.com>
Date: Wed, 12 Jun 2024 15:11:51 +0000
Subject: [PATCH 5/7] feat(zoekt): Kibana url for zoekt logs

---
 metrics-catalog/services/zoekt.jsonnet | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metrics-catalog/services/zoekt.jsonnet b/metrics-catalog/services/zoekt.jsonnet
index 5a99784501..e3d6430c99 100644
--- a/metrics-catalog/services/zoekt.jsonnet
+++ b/metrics-catalog/services/zoekt.jsonnet
@@ -10,6 +10,6 @@ metricsCatalog.serviceDefinition({
   },
   serviceLevelIndicators: {},
   skippedMaturityCriteria: {
-    'Structured logs available in Kibana': 'zoekt is an infrastructure component, developers do not interact with it',
+    'Structured logs available in Kibana': 'logs are available at https://log.gprd.gitlab.net/app/r/s/U9Av8, but not linked to SLIs as there are no SLIs for now.',
   },
 })
-- 
GitLab


From b16811adb7789f8f04d9ee20e691c0a61397540b Mon Sep 17 00:00:00 2001
From: Hercules Merscher <hmerscher@gitlab.com>
Date: Thu, 13 Jun 2024 17:14:22 +0200
Subject: [PATCH 6/7] feat(zoekt): kube resources

---
 metrics-catalog/services/zoekt.jsonnet | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/metrics-catalog/services/zoekt.jsonnet b/metrics-catalog/services/zoekt.jsonnet
index e3d6430c99..7764076143 100644
--- a/metrics-catalog/services/zoekt.jsonnet
+++ b/metrics-catalog/services/zoekt.jsonnet
@@ -8,6 +8,26 @@ metricsCatalog.serviceDefinition({
     apdexScore: 0.999,
     errorRatio: 0.999,
   },
+  provisioning: {
+    kubernetes: true,
+    vms: false,
+  },
+  kubeResources: {
+    'gitlab-zoekt': {
+      kind: 'StatefulSet',
+      containers: [
+        'zoekt-indexer',
+        'zoekt-webserver',
+        'zoekt-internal-gateway',
+      ],
+    },
+    'gitlab-zoekt-gateway': {
+      kind: 'Deployment',
+      containers: [
+        'zoekt-external-gateway',
+      ],
+    },
+  },
   serviceLevelIndicators: {},
   skippedMaturityCriteria: {
     'Structured logs available in Kibana': 'logs are available at https://log.gprd.gitlab.net/app/r/s/U9Av8, but not linked to SLIs as there are no SLIs for now.',
-- 
GitLab


From d47250b7cef8b4cce624f62fb83ac646a1736c1b Mon Sep 17 00:00:00 2001
From: Hercules Merscher <hmerscher@gitlab.com>
Date: Thu, 13 Jun 2024 17:38:34 +0200
Subject: [PATCH 7/7] chore: make generate

---
 ...ted-kube-state-metrics-recording-rules.yml | 188 ++++---
 .../autogenerated-saturation.yml              |  44 +-
 ...tlab-gprd-gprd-zoekt-kube-cause-alerts.yml |  39 ++
 ...lab-gprd-gprd-zoekt-kube-state-metrics.yml | 446 ++++++++++++++++
 ...tlab-gprd-gprd-zoekt-saturation-alerts.yml | 488 ++++--------------
 ...ab-gprd-gprd-zoekt-saturation-metadata.yml |  62 +--
 ...ated-gitlab-gprd-gprd-zoekt-saturation.yml |  96 ++--
 ...tlab-gstg-gstg-zoekt-kube-cause-alerts.yml |  39 ++
 ...lab-gstg-gstg-zoekt-kube-state-metrics.yml | 446 ++++++++++++++++
 ...tlab-gstg-gstg-zoekt-saturation-alerts.yml | 488 ++++--------------
 ...ab-gstg-gstg-zoekt-saturation-metadata.yml |  62 +--
 ...ated-gitlab-gstg-gstg-zoekt-saturation.yml |  96 ++--
 12 files changed, 1415 insertions(+), 1079 deletions(-)
 create mode 100644 mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-kube-cause-alerts.yml
 create mode 100644 mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-kube-state-metrics.yml
 create mode 100644 mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-kube-cause-alerts.yml
 create mode 100644 mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-kube-state-metrics.yml

diff --git a/legacy-prometheus-rules/autogenerated-kube-state-metrics-recording-rules.yml b/legacy-prometheus-rules/autogenerated-kube-state-metrics-recording-rules.yml
index fd195a01d9..9661ccc6fe 100644
--- a/legacy-prometheus-rules/autogenerated-kube-state-metrics-recording-rules.yml
+++ b/legacy-prometheus-rules/autogenerated-kube-state-metrics-recording-rules.yml
@@ -2216,6 +2216,68 @@ groups:
           "shard", "$0", "label_shard", ".*"
         )
       )
+- name: 'kube-state-metrics-recording-rules: zoekt'
+  interval: 1m
+  rules:
+  - record: kube_pod_labels:labeled
+    labels:
+      tier: inf
+      type: zoekt
+    expr: |
+      group without(label_stage,label_shard,label_deployment) (
+        label_replace(
+          label_replace(
+            label_replace(
+              topk by(environment,cluster,pod) (1, kube_pod_labels{label_type="zoekt"}),
+              "stage", "$0", "label_stage", ".*"
+            ),
+            "shard", "$0", "label_shard", ".*"
+          ),
+          "deployment", "$0", "label_deployment", ".*"
+        )
+      )
+  - record: kube_horizontalpodautoscaler_labels:labeled
+    labels:
+      tier: inf
+      type: zoekt
+    expr: |
+      group without(label_stage,label_shard) (
+        label_replace(
+          label_replace(
+            topk by(environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels{label_type="zoekt"}),
+            "stage", "$0", "label_stage", ".*"
+          ),
+          "shard", "$0", "label_shard", ".*"
+        )
+      )
+  - record: kube_ingress_labels:labeled
+    labels:
+      tier: inf
+      type: zoekt
+    expr: |
+      group without(label_stage,label_shard) (
+        label_replace(
+          label_replace(
+            topk by(environment,cluster,ingress) (1, kube_ingress_labels{label_type="zoekt"}),
+            "stage", "$0", "label_stage", ".*"
+          ),
+          "shard", "$0", "label_shard", ".*"
+        )
+      )
+  - record: kube_deployment_labels:labeled
+    labels:
+      tier: inf
+      type: zoekt
+    expr: |
+      group without(label_stage,label_shard) (
+        label_replace(
+          label_replace(
+            topk by(environment,cluster,deployment) (1, kube_deployment_labels{label_type="zoekt"}),
+            "stage", "$0", "label_stage", ".*"
+          ),
+          "shard", "$0", "label_shard", ".*"
+        )
+      )
 - name: 'kube-state-metrics-recording-rules: enriched label recording rules'
   interval: 1m
   rules:
@@ -2224,376 +2286,376 @@ groups:
       container_start_time_seconds{metrics_path="/metrics/cadvisor"}
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: container_cpu_cfs_periods_total:labeled
     expr: |
       container_cpu_cfs_periods_total{metrics_path="/metrics/cadvisor"}
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: container_cpu_cfs_throttled_periods_total:labeled
     expr: |
       container_cpu_cfs_throttled_periods_total{metrics_path="/metrics/cadvisor"}
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: container_cpu_cfs_throttled_seconds_total:labeled
     expr: |
       container_cpu_cfs_throttled_seconds_total{metrics_path="/metrics/cadvisor"}
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: container_cpu_usage_seconds_total:labeled
     expr: |
       container_cpu_usage_seconds_total{metrics_path="/metrics/cadvisor"}
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: container_memory_cache:labeled
     expr: |
       container_memory_cache{metrics_path="/metrics/cadvisor"}
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: container_memory_rss:labeled
     expr: |
       container_memory_rss{metrics_path="/metrics/cadvisor"}
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: container_memory_swap:labeled
     expr: |
       container_memory_swap{metrics_path="/metrics/cadvisor"}
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: container_memory_usage_bytes:labeled
     expr: |
       container_memory_usage_bytes{metrics_path="/metrics/cadvisor"}
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: container_memory_working_set_bytes:labeled
     expr: |
       container_memory_working_set_bytes{metrics_path="/metrics/cadvisor"}
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: container_network_receive_bytes_total:labeled
     expr: |
       container_network_receive_bytes_total{metrics_path="/metrics/cadvisor"}
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: container_network_transmit_bytes_total:labeled
     expr: |
       container_network_transmit_bytes_total{metrics_path="/metrics/cadvisor"}
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: container_spec_cpu_period:labeled
     expr: |
       container_spec_cpu_period{metrics_path="/metrics/cadvisor"}
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: container_spec_cpu_quota:labeled
     expr: |
       container_spec_cpu_quota{metrics_path="/metrics/cadvisor"}
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: container_spec_cpu_shares:labeled
     expr: |
       container_spec_cpu_shares{metrics_path="/metrics/cadvisor"}
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: container_spec_memory_limit_bytes:labeled
     expr: |
       container_spec_memory_limit_bytes{metrics_path="/metrics/cadvisor"}
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_pod_container_resource_limits:labeled
     expr: |
       kube_pod_container_resource_limits
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_pod_container_resource_requests:labeled
     expr: |
       kube_pod_container_resource_requests
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_pod_container_status_last_terminated_reason:labeled
     expr: |
       kube_pod_container_status_last_terminated_reason
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_pod_container_status_ready:labeled
     expr: |
       kube_pod_container_status_ready
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_pod_container_status_restarts_total:labeled
     expr: |
       kube_pod_container_status_restarts_total
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_pod_container_status_running:labeled
     expr: |
       kube_pod_container_status_running
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_pod_container_status_terminated:labeled
     expr: |
       kube_pod_container_status_terminated
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_pod_container_status_terminated_reason:labeled
     expr: |
       kube_pod_container_status_terminated_reason
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_pod_container_status_waiting:labeled
     expr: |
       kube_pod_container_status_waiting
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_pod_container_status_waiting_reason:labeled
     expr: |
       kube_pod_container_status_waiting_reason
       *
       on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
-      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_horizontalpodautoscaler_spec_target_metric:labeled
     expr: |
       kube_horizontalpodautoscaler_spec_target_metric
       *
       on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_horizontalpodautoscaler_status_condition:labeled
     expr: |
       kube_horizontalpodautoscaler_status_condition
       *
       on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_horizontalpodautoscaler_status_current_replicas:labeled
     expr: |
       kube_horizontalpodautoscaler_status_current_replicas
       *
       on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_horizontalpodautoscaler_status_desired_replicas:labeled
     expr: |
       kube_horizontalpodautoscaler_status_desired_replicas
       *
       on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_horizontalpodautoscaler_metadata_generation:labeled
     expr: |
       kube_horizontalpodautoscaler_metadata_generation
       *
       on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_horizontalpodautoscaler_spec_max_replicas:labeled
     expr: |
       kube_horizontalpodautoscaler_spec_max_replicas
       *
       on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_horizontalpodautoscaler_spec_min_replicas:labeled
     expr: |
       kube_horizontalpodautoscaler_spec_min_replicas
       *
       on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_node_status_capacity:labeled
     expr: |
       kube_node_status_capacity
       *
       on(environment,cluster,node) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_node_status_allocatable:labeled
     expr: |
       kube_node_status_allocatable
       *
       on(environment,cluster,node) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_node_status_condition:labeled
     expr: |
       kube_node_status_condition
       *
       on(environment,cluster,node) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: node_schedstat_waiting_seconds_total:labeled
     expr: |
       node_schedstat_waiting_seconds_total
       *
       on(environment,cluster,node) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: node_cpu_seconds_total:labeled
     expr: |
       node_cpu_seconds_total
       *
       on(environment,cluster,node) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: node_network_transmit_bytes_total:labeled
     expr: |
       node_network_transmit_bytes_total
       *
       on(environment,cluster,node) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: node_network_receive_bytes_total:labeled
     expr: |
       node_network_receive_bytes_total
       *
       on(environment,cluster,node) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: node_disk_reads_completed_total:labeled
     expr: |
       node_disk_reads_completed_total
       *
       on(environment,cluster,node) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: node_disk_writes_completed_total:labeled
     expr: |
       node_disk_writes_completed_total
       *
       on(environment,cluster,node) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: node_disk_read_bytes_total:labeled
     expr: |
       node_disk_read_bytes_total
       *
       on(environment,cluster,node) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: node_disk_written_bytes_total:labeled
     expr: |
       node_disk_written_bytes_total
       *
       on(environment,cluster,node) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: node_disk_read_time_seconds_total:labeled
     expr: |
       node_disk_read_time_seconds_total
       *
       on(environment,cluster,node) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: node_disk_write_time_seconds_total:labeled
     expr: |
       node_disk_write_time_seconds_total
       *
       on(environment,cluster,node) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: node_load1:labeled
     expr: |
       node_load1
       *
       on(environment,cluster,node) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: node_load5:labeled
     expr: |
       node_load5
       *
       on(environment,cluster,node) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: node_load15:labeled
     expr: |
       node_load15
       *
       on(environment,cluster,node) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: node_vmstat_oom_kill:labeled
     expr: |
       node_vmstat_oom_kill
       *
       on(environment,cluster,node) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: nginx_ingress_controller_requests:labeled
     expr: |
       nginx_ingress_controller_requests
       *
       on(environment,cluster,ingress) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,ingress) (1, kube_ingress_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,ingress) (1, kube_ingress_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_deployment_status_replicas_unavailable:labeled
     expr: |
       kube_deployment_status_replicas_unavailable
       *
       on(environment,cluster,deployment) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_deployment_status_replicas_updated:labeled
     expr: |
       kube_deployment_status_replicas_updated
       *
       on(environment,cluster,deployment) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_deployment_spec_paused:labeled
     expr: |
       kube_deployment_spec_paused
       *
       on(environment,cluster,deployment) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_deployment_spec_replicas:labeled
     expr: |
       kube_deployment_spec_replicas
       *
       on(environment,cluster,deployment) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_deployment_spec_strategy_rollingupdate_max_surge:labeled
     expr: |
       kube_deployment_spec_strategy_rollingupdate_max_surge
       *
       on(environment,cluster,deployment) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_deployment_spec_strategy_rollingupdate_max_unavailable:labeled
     expr: |
       kube_deployment_spec_strategy_rollingupdate_max_unavailable
       *
       on(environment,cluster,deployment) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_deployment_status_condition:labeled
     expr: |
       kube_deployment_status_condition
       *
       on(environment,cluster,deployment) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_deployment_status_replicas_available:labeled
     expr: |
       kube_deployment_status_replicas_available
       *
       on(environment,cluster,deployment) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_deployment_created:labeled
     expr: |
       kube_deployment_created
       *
       on(environment,cluster,deployment) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_deployment_metadata_generation:labeled
     expr: |
       kube_deployment_metadata_generation
       *
       on(environment,cluster,deployment) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_deployment_status_observed_generation:labeled
     expr: |
       kube_deployment_status_observed_generation
       *
       on(environment,cluster,deployment) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
   - record: kube_deployment_status_replicas:labeled
     expr: |
       kube_deployment_status_replicas
       *
       on(environment,cluster,deployment) group_left(tier,type,stage,shard)
-      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse"})
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{type=~"ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|registry|sidekiq|vault|web|web-pages|websockets|woodhouse|zoekt"})
diff --git a/legacy-prometheus-rules/autogenerated-saturation.yml b/legacy-prometheus-rules/autogenerated-saturation.yml
index ca319dc3c2..a4eb4b10bb 100644
--- a/legacy-prometheus-rules/autogenerated-saturation.yml
+++ b/legacy-prometheus-rules/autogenerated-saturation.yml
@@ -79,7 +79,7 @@ groups:
         clamp_min(
           clamp_max(
             1 - avg by (environment, tier, type, stage, shard) (
-              rate(node_cpu_seconds_total{mode="idle", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}[5m])
+              rate(node_cpu_seconds_total{mode="idle", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}[5m])
             )
             ,
             1)
@@ -94,9 +94,9 @@ groups:
         clamp_min(
           clamp_max(
             1 - (
-              node_filesystem_files_free{fstype=~"(ext.|xfs)", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}
+              node_filesystem_files_free{fstype=~"(ext.|xfs)", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}
               /
-              node_filesystem_files{fstype=~"(ext.|xfs)", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}
+              node_filesystem_files{fstype=~"(ext.|xfs)", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}
             )
             ,
             1)
@@ -111,7 +111,7 @@ groups:
         clamp_min(
           clamp_max(
             (
-              1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}
+              1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}
             )
             ,
             1)
@@ -366,15 +366,15 @@ groups:
           clamp_max(
             (
               sum by (environment, tier, type, stage, shard, pod, container) (
-                rate(container_cpu_usage_seconds_total:labeled{container!="", container!="POD", type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse"}[1h])
+                rate(container_cpu_usage_seconds_total:labeled{container!="", container!="POD", type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse|zoekt"}[1h])
               )
               unless on(environment, tier, type, stage, shard, pod, container) (
-                container_spec_cpu_quota:labeled{container!="", container!="POD", type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse"}
+                container_spec_cpu_quota:labeled{container!="", container!="POD", type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse|zoekt"}
               )
             )
             /
             sum by(environment, tier, type, stage, shard, pod, container) (
-              kube_pod_container_resource_requests:labeled{container!="", container!="POD", resource="cpu", type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse"}
+              kube_pod_container_resource_requests:labeled{container!="", container!="POD", resource="cpu", type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse|zoekt"}
             )
             ,
             1)
@@ -389,13 +389,13 @@ groups:
         clamp_min(
           clamp_max(
             sum by (environment, tier, type, stage, shard, pod, container) (
-              rate(container_cpu_usage_seconds_total:labeled{container!="", container!="POD", type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse"}[5m])
+              rate(container_cpu_usage_seconds_total:labeled{container!="", container!="POD", type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse|zoekt"}[5m])
             )
             /
             sum by(environment, tier, type, stage, shard, pod, container) (
-              container_spec_cpu_quota:labeled{container!="", container!="POD", type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse"}
+              container_spec_cpu_quota:labeled{container!="", container!="POD", type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse|zoekt"}
               /
-              container_spec_cpu_period:labeled{container!="", container!="POD", type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse"}
+              container_spec_cpu_period:labeled{container!="", container!="POD", type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse|zoekt"}
             )
             ,
             1)
@@ -409,9 +409,9 @@ groups:
       max by(environment, tier, type, stage, shard) (
         clamp_min(
           clamp_max(
-            container_memory_working_set_bytes:labeled{container!="", container!="POD", type=~"atlantis|camoproxy|consul|external-dns|istio|kas|kube|logging|mailroom|monitoring|nginx|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|vault|web-pages|woodhouse"}
+            container_memory_working_set_bytes:labeled{container!="", container!="POD", type=~"atlantis|camoproxy|consul|external-dns|istio|kas|kube|logging|mailroom|monitoring|nginx|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|vault|web-pages|woodhouse|zoekt"}
             /
-            (container_spec_memory_limit_bytes:labeled{container!="", container!="POD", type=~"atlantis|camoproxy|consul|external-dns|istio|kas|kube|logging|mailroom|monitoring|nginx|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|vault|web-pages|woodhouse"} > 0)
+            (container_spec_memory_limit_bytes:labeled{container!="", container!="POD", type=~"atlantis|camoproxy|consul|external-dns|istio|kas|kube|logging|mailroom|monitoring|nginx|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|vault|web-pages|woodhouse|zoekt"} > 0)
             ,
             1)
         ,
@@ -441,9 +441,9 @@ groups:
         clamp_min(
           clamp_max(
             avg by (environment, tier, type, stage, shard, pod, container)(
-              rate(container_cpu_cfs_throttled_periods_total:labeled{container!="", type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse"}[5m])
+              rate(container_cpu_cfs_throttled_periods_total:labeled{container!="", type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse|zoekt"}[5m])
               /
-              rate(container_cpu_cfs_periods_total:labeled{container!="", type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse"}[5m])
+              rate(container_cpu_cfs_periods_total:labeled{container!="", type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse|zoekt"}[5m])
             )
             ,
             1)
@@ -457,9 +457,9 @@ groups:
       max by(environment, tier, type, stage, shard) (
         clamp_min(
           clamp_max(
-            kube_horizontalpodautoscaler_status_desired_replicas:labeled{type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse", shard!~"database-throttled|elasticsearch|gitaly-throttled|urgent-authorized-projects|urgent-other", namespace!~"pubsubbeat"}
+            kube_horizontalpodautoscaler_status_desired_replicas:labeled{type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse|zoekt", shard!~"database-throttled|elasticsearch|gitaly-throttled|urgent-authorized-projects|urgent-other", namespace!~"pubsubbeat"}
             /
-            kube_horizontalpodautoscaler_spec_max_replicas:labeled{type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse", shard!~"database-throttled|elasticsearch|gitaly-throttled|urgent-authorized-projects|urgent-other", namespace!~"pubsubbeat"}
+            kube_horizontalpodautoscaler_spec_max_replicas:labeled{type=~"web|ai-assisted|api|atlantis|camoproxy|consul|external-dns|git|internal-api|istio|kas|kube|logging|mailroom|monitoring|nginx|ops-gitlab-net|packagecloud|plantuml|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sidekiq|vault|web-pages|websockets|woodhouse|zoekt", shard!~"database-throttled|elasticsearch|gitaly-throttled|urgent-authorized-projects|urgent-other", namespace!~"pubsubbeat"}
             ,
             1)
         ,
@@ -523,7 +523,7 @@ groups:
       max by(environment, tier, type, stage, shard) (
         clamp_min(
           clamp_max(
-            instance:node_memory_utilization:ratio{type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"} or instance:node_memory_utilisation:ratio{type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}
+            instance:node_memory_utilization:ratio{type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"} or instance:node_memory_utilisation:ratio{type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}
             ,
             1)
         ,
@@ -578,9 +578,9 @@ groups:
       max by(environment, tier, type, stage, shard) (
         clamp_min(
           clamp_max(
-            max_over_time(node_nf_conntrack_entries{type=~"patroni|ci-runners|consul|customersdot|frontend|gitaly|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}[1m])
+            max_over_time(node_nf_conntrack_entries{type=~"patroni|ci-runners|consul|customersdot|frontend|gitaly|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}[1m])
             /
-            node_nf_conntrack_entries_limit{type=~"patroni|ci-runners|consul|customersdot|frontend|gitaly|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}
+            node_nf_conntrack_entries_limit{type=~"patroni|ci-runners|consul|customersdot|frontend|gitaly|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}
             ,
             1)
         ,
@@ -593,7 +593,7 @@ groups:
       max by(environment, tier, type, stage, shard) (
         clamp_min(
           clamp_max(
-            avg without (cpu) (rate(node_schedstat_waiting_seconds_total{type=~"patroni|ci-runners|consul|customersdot|frontend|gitaly|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}[1h]))
+            avg without (cpu) (rate(node_schedstat_waiting_seconds_total{type=~"patroni|ci-runners|consul|customersdot|frontend|gitaly|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}[1h]))
             ,
             1)
         ,
@@ -1294,7 +1294,7 @@ groups:
         clamp_min(
           clamp_max(
             1 - avg by (environment, tier, type, stage, shard, shard) (
-              rate(node_cpu_seconds_total{mode="idle", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}[5m])
+              rate(node_cpu_seconds_total{mode="idle", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}[5m])
             )
             ,
             1)
@@ -1361,7 +1361,7 @@ groups:
       max by(environment, tier, type, stage, shard) (
         clamp_min(
           clamp_max(
-            avg without(cpu, mode) (1 - rate(node_cpu_seconds_total{mode="idle", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages|zoekt"}[5m]))
+            avg without(cpu, mode) (1 - rate(node_cpu_seconds_total{mode="idle", type=~"gitaly|ci-runners|consul|customersdot|frontend|jaeger|monitoring|patroni-ci|patroni-embedding|patroni-registry|patroni|pgbouncer-ci|pgbouncer-embedding|pgbouncer-registry|pgbouncer|postgres-archive|redis-cluster-cache|redis-cluster-chat-cache|redis-cluster-feature-flag|redis-cluster-queues-meta|redis-cluster-ratelimiting|redis-cluster-repo-cache|redis-cluster-shared-state|redis-db-load-balancing|redis-pubsub|redis-registry-cache|redis-sessions|redis-sidekiq|redis-tracechunks|redis|registry|sentry|web-pages"}[5m]))
             ,
             1)
         ,
diff --git a/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-kube-cause-alerts.yml b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-kube-cause-alerts.yml
new file mode 100644
index 0000000000..73eb427571
--- /dev/null
+++ b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-kube-cause-alerts.yml
@@ -0,0 +1,39 @@
+# WARNING. DO NOT EDIT THIS FILE BY HAND. USE ./mimir-rules-jsonnet/kube-cause-alerts.jsonnet TO GENERATE IT
+# YOUR CHANGES WILL BE OVERRIDDEN
+groups:
+- name: kube_cause_alerts
+  rules:
+  - alert: KubeContainersWaitingInError
+    for: 20m
+    annotations:
+      title: Containers for the `{{ $labels.type }}` service, `{{ $labels.stage }}`
+        are unable to start.
+      description: |
+        More than 50% of the deployment's `maxSurge` setting consists of containers unable to start for reasons other than `ContainerCreating`.
+      grafana_dashboard_id: alerts-kube_containers_waiting/alerts-containers-waiting
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-kube_containers_waiting/alerts-containers-waiting?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}&var-cluster={{ $labels.cluster }}
+      grafana_datasource_id: mimir-gitlab-gprd
+      grafana_min_zoom_hours: "6"
+      grafana_variables: environment,type,stage,cluster
+    labels:
+      alert_type: cause
+      pager: pagerduty
+      runbook: docs/kube/alerts/KubeContainersWaitingInError.md
+      severity: s2
+      team: sre_reliability
+    expr: |
+      sum by (type, env, tier, stage, cluster) (
+        kube_pod_container_status_waiting_reason:labeled{
+          env="gprd",type="zoekt",
+          reason!="ContainerCreating",
+        }
+      )
+      > 0
+      >= on(type, env, tier, stage, cluster) (
+        topk by(type, env, tier, stage, cluster) (1,
+          kube_deployment_spec_strategy_rollingupdate_max_surge:labeled{env="gprd",type="zoekt"}
+        )
+        * 0.5
+      )
diff --git a/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-kube-state-metrics.yml b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-kube-state-metrics.yml
new file mode 100644
index 0000000000..41032f28d0
--- /dev/null
+++ b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-kube-state-metrics.yml
@@ -0,0 +1,446 @@
+# WARNING. DO NOT EDIT THIS FILE BY HAND. USE ./mimir-rules-jsonnet/kube-state-metrics-recording-rules.jsonnet TO GENERATE IT
+# YOUR CHANGES WILL BE OVERRIDDEN
+groups:
+- name: 'kube-state-metrics-recording-rules: zoekt'
+  interval: 1m
+  rules:
+  - record: kube_pod_labels:labeled
+    labels:
+      tier: inf
+      type: zoekt
+    expr: |
+      group without(label_stage,label_shard,label_deployment) (
+        label_replace(
+          label_replace(
+            label_replace(
+              topk by(environment,cluster,pod) (1, kube_pod_labels{env="gprd",label_type="zoekt"}),
+              "stage", "$0", "label_stage", ".*"
+            ),
+            "shard", "$0", "label_shard", ".*"
+          ),
+          "deployment", "$0", "label_deployment", ".*"
+        )
+      )
+  - record: kube_horizontalpodautoscaler_labels:labeled
+    labels:
+      tier: inf
+      type: zoekt
+    expr: |
+      group without(label_stage,label_shard) (
+        label_replace(
+          label_replace(
+            topk by(environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels{env="gprd",label_type="zoekt"}),
+            "stage", "$0", "label_stage", ".*"
+          ),
+          "shard", "$0", "label_shard", ".*"
+        )
+      )
+  - record: kube_ingress_labels:labeled
+    labels:
+      tier: inf
+      type: zoekt
+    expr: |
+      group without(label_stage,label_shard) (
+        label_replace(
+          label_replace(
+            topk by(environment,cluster,ingress) (1, kube_ingress_labels{env="gprd",label_type="zoekt"}),
+            "stage", "$0", "label_stage", ".*"
+          ),
+          "shard", "$0", "label_shard", ".*"
+        )
+      )
+  - record: kube_deployment_labels:labeled
+    labels:
+      tier: inf
+      type: zoekt
+    expr: |
+      group without(label_stage,label_shard) (
+        label_replace(
+          label_replace(
+            topk by(environment,cluster,deployment) (1, kube_deployment_labels{env="gprd",label_type="zoekt"}),
+            "stage", "$0", "label_stage", ".*"
+          ),
+          "shard", "$0", "label_shard", ".*"
+        )
+      )
+- name: 'kube-state-metrics-recording-rules: enriched label recording rules'
+  interval: 1m
+  rules:
+  - record: container_start_time_seconds:labeled
+    expr: |
+      container_start_time_seconds{env="gprd",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: container_cpu_cfs_periods_total:labeled
+    expr: |
+      container_cpu_cfs_periods_total{env="gprd",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: container_cpu_cfs_throttled_periods_total:labeled
+    expr: |
+      container_cpu_cfs_throttled_periods_total{env="gprd",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: container_cpu_cfs_throttled_seconds_total:labeled
+    expr: |
+      container_cpu_cfs_throttled_seconds_total{env="gprd",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: container_cpu_usage_seconds_total:labeled
+    expr: |
+      container_cpu_usage_seconds_total{env="gprd",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: container_memory_cache:labeled
+    expr: |
+      container_memory_cache{env="gprd",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: container_memory_rss:labeled
+    expr: |
+      container_memory_rss{env="gprd",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: container_memory_swap:labeled
+    expr: |
+      container_memory_swap{env="gprd",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: container_memory_usage_bytes:labeled
+    expr: |
+      container_memory_usage_bytes{env="gprd",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: container_memory_working_set_bytes:labeled
+    expr: |
+      container_memory_working_set_bytes{env="gprd",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: container_network_receive_bytes_total:labeled
+    expr: |
+      container_network_receive_bytes_total{env="gprd",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: container_network_transmit_bytes_total:labeled
+    expr: |
+      container_network_transmit_bytes_total{env="gprd",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: container_spec_cpu_period:labeled
+    expr: |
+      container_spec_cpu_period{env="gprd",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: container_spec_cpu_quota:labeled
+    expr: |
+      container_spec_cpu_quota{env="gprd",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: container_spec_cpu_shares:labeled
+    expr: |
+      container_spec_cpu_shares{env="gprd",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: container_spec_memory_limit_bytes:labeled
+    expr: |
+      container_spec_memory_limit_bytes{env="gprd",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_pod_container_resource_limits:labeled
+    expr: |
+      kube_pod_container_resource_limits{env="gprd"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_pod_container_resource_requests:labeled
+    expr: |
+      kube_pod_container_resource_requests{env="gprd"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_pod_container_status_last_terminated_reason:labeled
+    expr: |
+      kube_pod_container_status_last_terminated_reason{env="gprd"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_pod_container_status_ready:labeled
+    expr: |
+      kube_pod_container_status_ready{env="gprd"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_pod_container_status_restarts_total:labeled
+    expr: |
+      kube_pod_container_status_restarts_total{env="gprd"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_pod_container_status_running:labeled
+    expr: |
+      kube_pod_container_status_running{env="gprd"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_pod_container_status_terminated:labeled
+    expr: |
+      kube_pod_container_status_terminated{env="gprd"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_pod_container_status_terminated_reason:labeled
+    expr: |
+      kube_pod_container_status_terminated_reason{env="gprd"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_pod_container_status_waiting:labeled
+    expr: |
+      kube_pod_container_status_waiting{env="gprd"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_pod_container_status_waiting_reason:labeled
+    expr: |
+      kube_pod_container_status_waiting_reason{env="gprd"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_horizontalpodautoscaler_spec_target_metric:labeled
+    expr: |
+      kube_horizontalpodautoscaler_spec_target_metric{env="gprd"}
+      *
+      on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_horizontalpodautoscaler_status_condition:labeled
+    expr: |
+      kube_horizontalpodautoscaler_status_condition{env="gprd"}
+      *
+      on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_horizontalpodautoscaler_status_current_replicas:labeled
+    expr: |
+      kube_horizontalpodautoscaler_status_current_replicas{env="gprd"}
+      *
+      on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_horizontalpodautoscaler_status_desired_replicas:labeled
+    expr: |
+      kube_horizontalpodautoscaler_status_desired_replicas{env="gprd"}
+      *
+      on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_horizontalpodautoscaler_metadata_generation:labeled
+    expr: |
+      kube_horizontalpodautoscaler_metadata_generation{env="gprd"}
+      *
+      on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_horizontalpodautoscaler_spec_max_replicas:labeled
+    expr: |
+      kube_horizontalpodautoscaler_spec_max_replicas{env="gprd"}
+      *
+      on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_horizontalpodautoscaler_spec_min_replicas:labeled
+    expr: |
+      kube_horizontalpodautoscaler_spec_min_replicas{env="gprd"}
+      *
+      on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_node_status_capacity:labeled
+    expr: |
+      kube_node_status_capacity{env="gprd"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_node_status_allocatable:labeled
+    expr: |
+      kube_node_status_allocatable{env="gprd"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_node_status_condition:labeled
+    expr: |
+      kube_node_status_condition{env="gprd"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: node_schedstat_waiting_seconds_total:labeled
+    expr: |
+      node_schedstat_waiting_seconds_total{env="gprd"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: node_cpu_seconds_total:labeled
+    expr: |
+      node_cpu_seconds_total{env="gprd"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: node_network_transmit_bytes_total:labeled
+    expr: |
+      node_network_transmit_bytes_total{env="gprd"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: node_network_receive_bytes_total:labeled
+    expr: |
+      node_network_receive_bytes_total{env="gprd"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: node_disk_reads_completed_total:labeled
+    expr: |
+      node_disk_reads_completed_total{env="gprd"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: node_disk_writes_completed_total:labeled
+    expr: |
+      node_disk_writes_completed_total{env="gprd"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: node_disk_read_bytes_total:labeled
+    expr: |
+      node_disk_read_bytes_total{env="gprd"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: node_disk_written_bytes_total:labeled
+    expr: |
+      node_disk_written_bytes_total{env="gprd"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: node_disk_read_time_seconds_total:labeled
+    expr: |
+      node_disk_read_time_seconds_total{env="gprd"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: node_disk_write_time_seconds_total:labeled
+    expr: |
+      node_disk_write_time_seconds_total{env="gprd"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: node_load1:labeled
+    expr: |
+      node_load1{env="gprd"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: node_load5:labeled
+    expr: |
+      node_load5{env="gprd"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: node_load15:labeled
+    expr: |
+      node_load15{env="gprd"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: node_vmstat_oom_kill:labeled
+    expr: |
+      node_vmstat_oom_kill{env="gprd"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: nginx_ingress_controller_requests:labeled
+    expr: |
+      nginx_ingress_controller_requests{env="gprd"}
+      *
+      on(environment,cluster,ingress) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,ingress) (1, kube_ingress_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_deployment_status_replicas_unavailable:labeled
+    expr: |
+      kube_deployment_status_replicas_unavailable{env="gprd"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_deployment_status_replicas_updated:labeled
+    expr: |
+      kube_deployment_status_replicas_updated{env="gprd"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_deployment_spec_paused:labeled
+    expr: |
+      kube_deployment_spec_paused{env="gprd"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_deployment_spec_replicas:labeled
+    expr: |
+      kube_deployment_spec_replicas{env="gprd"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_deployment_spec_strategy_rollingupdate_max_surge:labeled
+    expr: |
+      kube_deployment_spec_strategy_rollingupdate_max_surge{env="gprd"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_deployment_spec_strategy_rollingupdate_max_unavailable:labeled
+    expr: |
+      kube_deployment_spec_strategy_rollingupdate_max_unavailable{env="gprd"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_deployment_status_condition:labeled
+    expr: |
+      kube_deployment_status_condition{env="gprd"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_deployment_status_replicas_available:labeled
+    expr: |
+      kube_deployment_status_replicas_available{env="gprd"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_deployment_created:labeled
+    expr: |
+      kube_deployment_created{env="gprd"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_deployment_metadata_generation:labeled
+    expr: |
+      kube_deployment_metadata_generation{env="gprd"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_deployment_status_observed_generation:labeled
+    expr: |
+      kube_deployment_status_observed_generation{env="gprd"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gprd",type=~"zoekt"})
+  - record: kube_deployment_status_replicas:labeled
+    expr: |
+      kube_deployment_status_replicas{env="gprd"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gprd",type=~"zoekt"})
diff --git a/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation-alerts.yml b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation-alerts.yml
index 191ec4be99..059e78629f 100644
--- a/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation-alerts.yml
+++ b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation-alerts.yml
@@ -17,147 +17,40 @@ groups:
 - name: GitLab Saturation Alerts
   interval: 1m
   rules:
-  - alert: component_saturation_slo_out_of_bounds:cpu
-    for: 5m
-    annotations:
-      title: The Average Service CPU Utilization resource of the {{ $labels.type }}
-        service ({{ $labels.stage }} stage) has a saturation exceeding SLO and is
-        close to its capacity limit.
-      description: |
-        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
-
-        Details of the Average Service CPU Utilization resource:
-
-        This resource measures average CPU utilization across an all cores in a service fleet. If it is becoming saturated, it may indicate that the fleet needs horizontal or vertical scaling.
-      grafana_dashboard_id: alerts-sat_cpu
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_cpu?from=now-6h/m&to=now-1m/m&var-environment={{
-        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
-        }}
-      grafana_datasource_id: mimir-gitlab-gprd
-      grafana_min_zoom_hours: "6"
-      grafana_panel_id: "1465724101"
-      grafana_variables: environment,type,stage
-      promql_query: |
-        max by(environment, tier, type, stage, shard) (
-          clamp_min(
-            clamp_max(
-              1 - avg by (environment, tier, type, stage, shard) (
-                rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
-              )
-              ,
-              1)
-          ,
-          0)
-        )
-      promql_template_1: |
-        max by(environment, tier, type, stage, shard) (
-          clamp_min(
-            clamp_max(
-              1 - avg by (environment, tier, type, stage, shard) (
-                rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
-              )
-              ,
-              1)
-          ,
-          0)
-        )
-      runbook: docs/{{ $labels.type }}/README.md
-    labels:
-      alert_type: cause
-      rules_domain: general
-      severity: s3
-    expr: |
-      gitlab_component_saturation:ratio{component="cpu",env="gprd",type="zoekt"} > on(component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="cpu"}
-  - alert: component_saturation_slo_out_of_bounds:disk_inodes
+  - alert: component_saturation_slo_out_of_bounds:kube_container_cpu_limit
     for: 15m
     annotations:
-      title: The Disk inode Utilization per Device per Node resource of the {{ $labels.type
+      title: The Kube Container CPU over-utilization resource of the {{ $labels.type
         }} service ({{ $labels.stage }} stage) has a saturation exceeding SLO and
         is close to its capacity limit.
       description: |
         This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
 
-        Details of the Disk inode Utilization per Device per Node resource:
+        Details of the Kube Container CPU over-utilization resource:
 
-        Disk inode utilization per device per node.
+        Kubernetes containers can have a limit configured on how much CPU they can consume in a burst. If we are at this limit, exceeding the allocated requested resources, we should consider revisting the container's HPA configuration.
 
-        If this is too high, its possible that a directory is filling up with files. Consider logging in an checking temp directories for large numbers of files
-      grafana_dashboard_id: alerts-sat_disk_inodes
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_disk_inodes?from=now-6h/m&to=now-1m/m&var-environment={{
+        When a container is utilizing CPU resources up-to it's configured limit for extended periods of time, this could cause it and other running containers to be throttled.
+      grafana_dashboard_id: alerts-sat_kube_container_cpu_limit
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_kube_container_cpu_limit?from=now-6h/m&to=now-1m/m&var-environment={{
         $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
         }}
       grafana_datasource_id: mimir-gitlab-gprd
       grafana_min_zoom_hours: "6"
-      grafana_panel_id: "39965907"
+      grafana_panel_id: "1262336683"
       grafana_variables: environment,type,stage
       promql_query: |
-        max by(environment, tier, type, stage, shard, fqdn, device) (
-          clamp_min(
-            clamp_max(
-              1 - (
-                node_filesystem_files_free{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
-                /
-                node_filesystem_files{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
-              )
-              ,
-              1)
-          ,
-          0)
-        )
-      promql_template_1: |
-        max by(environment, tier, type, stage, shard, fqdn, device) (
+        max by(environment, tier, type, stage, shard, pod, container) (
           clamp_min(
             clamp_max(
-              1 - (
-                node_filesystem_files_free{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
-                /
-                node_filesystem_files{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              sum by (environment, tier, type, stage, shard, pod, container) (
+                rate(container_cpu_usage_seconds_total:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
               )
-              ,
-              1)
-          ,
-          0)
-        )
-      runbook: docs/{{ $labels.type }}/README.md
-    labels:
-      alert_type: cause
-      pager: pagerduty
-      rules_domain: general
-      severity: s2
-    expr: |
-      gitlab_component_saturation:ratio{component="disk_inodes",env="gprd",type="zoekt"} > on(component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="disk_inodes"}
-  - alert: ComponentResourceRunningOut_disk_inodes
-    for: 15m
-    annotations:
-      title: The Disk inode Utilization per Device per Node resource of the {{ $labels.type
-        }} service ({{ $labels.stage }} stage) is on track to hit capacity within
-        6h
-      description: |
-        This means that this resource is growing rapidly and is predicted to exceed saturation threshold within 6h.
-
-        Details of the Disk inode Utilization per Device per Node resource:
-
-        Disk inode utilization per device per node.
-
-        If this is too high, its possible that a directory is filling up with files. Consider logging in an checking temp directories for large numbers of files
-      grafana_dashboard_id: alerts-sat_disk_inodes
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_disk_inodes?from=now-6h/m&to=now-1m/m&var-environment={{
-        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
-        }}
-      grafana_datasource_id: mimir-gitlab-gprd
-      grafana_min_zoom_hours: "6"
-      grafana_panel_id: "39965907"
-      grafana_variables: environment,type,stage
-      promql_query: |
-        max by(environment, tier, type, stage, shard, fqdn, device) (
-          clamp_min(
-            clamp_max(
-              1 - (
-                node_filesystem_files_free{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              /
+              sum by(environment, tier, type, stage, shard, pod, container) (
+                container_spec_cpu_quota:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
                 /
-                node_filesystem_files{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                container_spec_cpu_period:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
               )
               ,
               1)
@@ -165,13 +58,17 @@ groups:
           0)
         )
       promql_template_1: |
-        max by(environment, tier, type, stage, shard, fqdn, device) (
+        max by(environment, tier, type, stage, shard, pod, container) (
           clamp_min(
             clamp_max(
-              1 - (
-                node_filesystem_files_free{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              sum by (environment, tier, type, stage, shard, pod, container) (
+                rate(container_cpu_usage_seconds_total:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
+              )
+              /
+              sum by(environment, tier, type, stage, shard, pod, container) (
+                container_spec_cpu_quota:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
                 /
-                node_filesystem_files{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                container_spec_cpu_period:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
               )
               ,
               1)
@@ -181,157 +78,50 @@ groups:
       runbook: docs/{{ $labels.type }}/README.md
     labels:
       alert_type: cause
-      linear_prediction_saturation_alert: 6h
-      pager: pagerduty
       rules_domain: general
-      severity: s2
+      severity: s4
     expr: |
-      predict_linear(gitlab_component_saturation:ratio{component="disk_inodes",env="gprd",type="zoekt"}[6h], 21600)
-      > on (component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="disk_inodes"}
-  - alert: component_saturation_slo_out_of_bounds:disk_space
+      gitlab_component_saturation:ratio{component="kube_container_cpu_limit",env="gprd",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="kube_container_cpu_limit"}
+  - alert: component_saturation_slo_out_of_bounds:kube_container_memory
     for: 15m
     annotations:
-      title: The Disk Space Utilization per Device per Node resource of the {{ $labels.type
+      title: The Kube Container Memory Utilization resource of the {{ $labels.type
         }} service ({{ $labels.stage }} stage) has a saturation exceeding SLO and
         is close to its capacity limit.
       description: |
         This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
 
-        Details of the Disk Space Utilization per Device per Node resource:
+        Details of the Kube Container Memory Utilization resource:
 
-        Disk space utilization per device per node.
-      grafana_dashboard_id: alerts-sat_disk_space
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_disk_space?from=now-6h/m&to=now-1m/m&var-environment={{
+        This uses the working set size from cAdvisor for the cgroup's memory usage. That may not be a good measure as it includes filesystem cache pages that are not necessarily attributable to the application inside the cgroup, and are permitted to be evicted instead of being OOM killed.
+      grafana_dashboard_id: alerts-sat_kube_container_memory
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_kube_container_memory?from=now-6h/m&to=now-1m/m&var-environment={{
         $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
         }}
       grafana_datasource_id: mimir-gitlab-gprd
       grafana_min_zoom_hours: "6"
-      grafana_panel_id: "2661375984"
+      grafana_panel_id: "172578411"
       grafana_variables: environment,type,stage
       promql_query: |
-        max by(environment, tier, type, stage, shard, fqdn, device) (
-          clamp_min(
-            clamp_max(
-              (
-                1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
-              )
-              ,
-              1)
-          ,
-          0)
-        )
-      promql_template_1: |
-        max by(environment, tier, type, stage, shard, fqdn, device) (
-          clamp_min(
-            clamp_max(
-              (
-                1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
-              )
-              ,
-              1)
-          ,
-          0)
-        )
-      runbook: docs/{{ $labels.type }}/README.md
-    labels:
-      alert_type: cause
-      pager: pagerduty
-      rules_domain: general
-      severity: s2
-    expr: |
-      gitlab_component_saturation:ratio{component="disk_space",env="gprd",type="zoekt"} > on(component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="disk_space"}
-  - alert: ComponentResourceRunningOut_disk_space
-    for: 15m
-    annotations:
-      title: The Disk Space Utilization per Device per Node resource of the {{ $labels.type
-        }} service ({{ $labels.stage }} stage) is on track to hit capacity within
-        6h
-      description: |
-        This means that this resource is growing rapidly and is predicted to exceed saturation threshold within 6h.
-
-        Details of the Disk Space Utilization per Device per Node resource:
-
-        Disk space utilization per device per node.
-      grafana_dashboard_id: alerts-sat_disk_space
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_disk_space?from=now-6h/m&to=now-1m/m&var-environment={{
-        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
-        }}
-      grafana_datasource_id: mimir-gitlab-gprd
-      grafana_min_zoom_hours: "6"
-      grafana_panel_id: "2661375984"
-      grafana_variables: environment,type,stage
-      promql_query: |
-        max by(environment, tier, type, stage, shard, fqdn, device) (
-          clamp_min(
-            clamp_max(
-              (
-                1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
-              )
-              ,
-              1)
-          ,
-          0)
-        )
-      promql_template_1: |
-        max by(environment, tier, type, stage, shard, fqdn, device) (
-          clamp_min(
-            clamp_max(
-              (
-                1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
-              )
-              ,
-              1)
-          ,
-          0)
-        )
-      runbook: docs/{{ $labels.type }}/README.md
-    labels:
-      alert_type: cause
-      linear_prediction_saturation_alert: 6h
-      pager: pagerduty
-      rules_domain: general
-      severity: s2
-    expr: |
-      predict_linear(gitlab_component_saturation:ratio{component="disk_space",env="gprd",type="zoekt"}[6h], 21600)
-      > on (component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="disk_space"}
-  - alert: component_saturation_slo_out_of_bounds:memory
-    for: 5m
-    annotations:
-      title: The Memory Utilization per Node resource of the {{ $labels.type }} service
-        ({{ $labels.stage }} stage) has a saturation exceeding SLO and is close to
-        its capacity limit.
-      description: |
-        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
-
-        Details of the Memory Utilization per Node resource:
-
-        Memory utilization per device per node.
-      grafana_dashboard_id: alerts-sat_memory
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_memory?from=now-6h/m&to=now-1m/m&var-environment={{
-        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
-        }}
-      grafana_datasource_id: mimir-gitlab-gprd
-      grafana_min_zoom_hours: "6"
-      grafana_panel_id: "1955556769"
-      grafana_variables: environment,type,stage
-      promql_query: |
-        max by(environment, tier, type, stage, shard, fqdn) (
+        max by(environment, tier, type, stage, shard) (
           clamp_min(
             clamp_max(
-              instance:node_memory_utilization:ratio{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} or instance:node_memory_utilisation:ratio{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              container_memory_working_set_bytes:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              /
+              (container_spec_memory_limit_bytes:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} > 0)
               ,
               1)
           ,
           0)
         )
       promql_template_1: |
-        max by(environment, tier, type, stage, shard, fqdn) (
+        max by(environment, tier, type, stage, shard) (
           clamp_min(
             clamp_max(
-              instance:node_memory_utilization:ratio{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} or instance:node_memory_utilisation:ratio{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              container_memory_working_set_bytes:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              /
+              (container_spec_memory_limit_bytes:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} > 0)
               ,
               1)
           ,
@@ -343,49 +133,57 @@ groups:
       rules_domain: general
       severity: s4
     expr: |
-      gitlab_component_saturation:ratio{component="memory",env="gprd",type="zoekt"} > on(component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="memory"}
-  - alert: component_saturation_slo_out_of_bounds:nf_conntrack_entries
-    for: 5m
+      gitlab_component_saturation:ratio{component="kube_container_memory",env="gprd",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="kube_container_memory"}
+  - alert: component_saturation_slo_out_of_bounds:kube_container_throttling
+    for: 10m
     annotations:
-      title: The conntrack Entries per Node resource of the {{ $labels.type }} service
+      title: The Kube container throttling resource of the {{ $labels.type }} service
         ({{ $labels.stage }} stage) has a saturation exceeding SLO and is close to
         its capacity limit.
       description: |
         This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
 
-        Details of the conntrack Entries per Node resource:
+        Details of the Kube container throttling resource:
 
-        Netfilter connection tracking table utilization per node.
+        Kube container throttling
 
-        When saturated, new connection attempts (incoming SYN packets) are dropped with no reply, leaving clients to slowly retry (and typically fail again) over the next several seconds.  When packets are being dropped due to this condition, kernel will log the event as: "nf_conntrack: table full, dropping packet".
-      grafana_dashboard_id: alerts-sat_conntrack
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_conntrack?from=now-6h/m&to=now-1m/m&var-environment={{
+        A container will be throttled if it reaches the configured cpu limit for the horizontal pod autoscaler. Or when other containers on the node are overutilizing the the CPU.
+
+        To get around this, consider increasing the limit for this workload, taking into consideration the requested resources.
+      grafana_dashboard_id: alerts-kube_container_throttling
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-kube_container_throttling?from=now-6h/m&to=now-1m/m&var-environment={{
         $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
         }}
       grafana_datasource_id: mimir-gitlab-gprd
       grafana_min_zoom_hours: "6"
-      grafana_panel_id: "503581002"
+      grafana_panel_id: "54512634"
       grafana_variables: environment,type,stage
       promql_query: |
-        max by(environment, tier, type, stage, shard, fqdn, instance) (
+        quantile by(environment, tier, type, stage, shard, pod, container) (
+          0.99,
           clamp_min(
             clamp_max(
-              max_over_time(node_nf_conntrack_entries{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[1m])
-              /
-              node_nf_conntrack_entries_limit{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              avg by (environment, tier, type, stage, shard, pod, container)(
+                rate(container_cpu_cfs_throttled_periods_total:labeled{container!="", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
+                /
+                rate(container_cpu_cfs_periods_total:labeled{container!="", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
+              )
               ,
               1)
           ,
           0)
         )
       promql_template_1: |
-        max by(environment, tier, type, stage, shard, fqdn, instance) (
+        quantile by(environment, tier, type, stage, shard, pod, container) (
+          0.99,
           clamp_min(
             clamp_max(
-              max_over_time(node_nf_conntrack_entries{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[1m])
-              /
-              node_nf_conntrack_entries_limit{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              avg by (environment, tier, type, stage, shard, pod, container)(
+                rate(container_cpu_cfs_throttled_periods_total:labeled{container!="", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
+                /
+                rate(container_cpu_cfs_periods_total:labeled{container!="", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
+              )
               ,
               1)
           ,
@@ -397,60 +195,62 @@ groups:
       rules_domain: general
       severity: s3
     expr: |
-      gitlab_component_saturation:ratio{component="nf_conntrack_entries",env="gprd",type="zoekt"} > on(component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="nf_conntrack_entries"}
-  - alert: component_saturation_slo_out_of_bounds:node_schedstat_waiting
-    for: 90m
+      gitlab_component_saturation:ratio{component="kube_container_throttling",env="gprd",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="kube_container_throttling"}
+  - alert: component_saturation_slo_out_of_bounds:kube_horizontalpodautoscaler_desired_replicas
+    for: 25m
     annotations:
-      title: The Node Scheduler Waiting Time resource of the {{ $labels.type }} service
-        ({{ $labels.stage }} stage) has a saturation exceeding SLO and is close to
-        its capacity limit.
+      title: The Horizontal Pod Autoscaler Desired Replicas resource of the {{ $labels.type
+        }} service ({{ $labels.stage }} stage) has a saturation exceeding SLO and
+        is close to its capacity limit.
       description: |
         This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
 
-        Details of the Node Scheduler Waiting Time resource:
-
-        Measures the amount of scheduler waiting time that processes are waiting to be scheduled, according to [`CPU Scheduling Metrics`](https://www.robustperception.io/cpu-scheduling-metrics-from-the-node-exporter).
+        Details of the Horizontal Pod Autoscaler Desired Replicas resource:
 
-        A high value indicates that a node has more processes to be run than CPU time available to handle them, and may lead to degraded responsiveness and performance from the application.
+        The [Horizontal Pod Autoscaler](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) automatically scales the number of Pods in a deployment based on metrics.
 
-        Additionally, it may indicate that the fleet is under-provisioned.
-      grafana_dashboard_id: alerts-sat_node_schedstat_waiting
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_node_schedstat_waiting?from=now-6h/m&to=now-1m/m&var-environment={{
+        The Horizontal Pod Autoscaler has a configured upper maximum. When this limit is reached, the HPA will not increase the number of pods and other resource saturation (eg, CPU, memory) may occur.
+      grafana_dashboard_id: alerts-sat_kube_horizontalpodautoscaler
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_kube_horizontalpodautoscaler?from=now-6h/m&to=now-1m/m&var-environment={{
         $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
         }}
       grafana_datasource_id: mimir-gitlab-gprd
       grafana_min_zoom_hours: "6"
-      grafana_panel_id: "1415313189"
+      grafana_panel_id: "351198712"
       grafana_variables: environment,type,stage
       promql_query: |
-        max by(environment, tier, type, stage, shard, fqdn, shard) (
+        max by(environment, tier, type, stage, shard, horizontalpodautoscaler, shard) (
           clamp_min(
             clamp_max(
-              avg without (cpu) (rate(node_schedstat_waiting_seconds_total{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[1h]))
+              kube_horizontalpodautoscaler_status_desired_replicas:labeled{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}", shard!~"database-throttled|elasticsearch|gitaly-throttled|urgent-authorized-projects|urgent-other", namespace!~"pubsubbeat"}
+              /
+              kube_horizontalpodautoscaler_spec_max_replicas:labeled{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}", shard!~"database-throttled|elasticsearch|gitaly-throttled|urgent-authorized-projects|urgent-other", namespace!~"pubsubbeat"}
               ,
               1)
           ,
           0)
         )
       promql_template_1: |
-        max by(environment, tier, type, stage, shard, fqdn, shard) (
+        max by(environment, tier, type, stage, shard, horizontalpodautoscaler, shard) (
           clamp_min(
             clamp_max(
-              avg without (cpu) (rate(node_schedstat_waiting_seconds_total{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[1h]))
+              kube_horizontalpodautoscaler_status_desired_replicas:labeled{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}", shard!~"database-throttled|elasticsearch|gitaly-throttled|urgent-authorized-projects|urgent-other", namespace!~"pubsubbeat"}
+              /
+              kube_horizontalpodautoscaler_spec_max_replicas:labeled{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}", shard!~"database-throttled|elasticsearch|gitaly-throttled|urgent-authorized-projects|urgent-other", namespace!~"pubsubbeat"}
               ,
               1)
           ,
           0)
         )
-      runbook: docs/{{ $labels.type }}/README.md
+      runbook: docs/kube/kubernetes.md#hpascalecapability
     labels:
       alert_type: cause
       rules_domain: general
-      severity: s4
+      severity: s3
     expr: |
-      gitlab_component_saturation:ratio{component="node_schedstat_waiting",env="gprd",type="zoekt"} > on(component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="node_schedstat_waiting"}
+      gitlab_component_saturation:ratio{component="kube_horizontalpodautoscaler_desired_replicas",env="gprd",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="kube_horizontalpodautoscaler_desired_replicas"}
   - alert: component_saturation_slo_out_of_bounds:open_fds
     for: 5m
     annotations:
@@ -524,105 +324,3 @@ groups:
     expr: |
       gitlab_component_saturation:ratio{component="open_fds",env="gprd",type="zoekt"} > on(component) group_left
       slo:max:hard:gitlab_component_saturation:ratio{component="open_fds"}
-  - alert: component_saturation_slo_out_of_bounds:shard_cpu
-    for: 5m
-    annotations:
-      title: The Average CPU Utilization per Shard resource of the {{ $labels.type
-        }} service ({{ $labels.stage }} stage) has a saturation exceeding SLO and
-        is close to its capacity limit.
-      description: |
-        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
-
-        Details of the Average CPU Utilization per Shard resource:
-
-        This resource measures average CPU utilization across an all cores in a shard of a service fleet. If it is becoming saturated, it may indicate that the shard needs horizontal or vertical scaling.
-      grafana_dashboard_id: alerts-sat_shard_cpu
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_shard_cpu?from=now-6h/m&to=now-1m/m&var-environment={{
-        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
-        }}
-      grafana_datasource_id: mimir-gitlab-gprd
-      grafana_min_zoom_hours: "6"
-      grafana_panel_id: "1472933476"
-      grafana_variables: environment,type,stage
-      promql_query: |
-        max by(environment, tier, type, stage, shard, shard) (
-          clamp_min(
-            clamp_max(
-              1 - avg by (environment, tier, type, stage, shard, shard) (
-                rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
-              )
-              ,
-              1)
-          ,
-          0)
-        )
-      promql_template_1: |
-        max by(environment, tier, type, stage, shard, shard) (
-          clamp_min(
-            clamp_max(
-              1 - avg by (environment, tier, type, stage, shard, shard) (
-                rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
-              )
-              ,
-              1)
-          ,
-          0)
-        )
-      runbook: docs/{{ $labels.type }}/README.md
-    labels:
-      alert_type: cause
-      rules_domain: general
-      severity: s3
-    expr: |
-      gitlab_component_saturation:ratio{component="shard_cpu",env="gprd",type="zoekt"} > on(component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="shard_cpu"}
-  - alert: component_saturation_slo_out_of_bounds:single_node_cpu
-    for: 10m
-    annotations:
-      title: The Average CPU Utilization per Node resource of the {{ $labels.type
-        }} service ({{ $labels.stage }} stage) has a saturation exceeding SLO and
-        is close to its capacity limit.
-      description: |
-        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
-
-        Details of the Average CPU Utilization per Node resource:
-
-        Average CPU utilization per Node.
-
-        If average CPU is saturated, it may indicate that a fleet is in need to horizontal or vertical scaling. It may also indicate imbalances in load in a fleet.
-      grafana_dashboard_id: alerts-sat_single_node_cpu
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_single_node_cpu?from=now-6h/m&to=now-1m/m&var-environment={{
-        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
-        }}
-      grafana_datasource_id: mimir-gitlab-gprd
-      grafana_min_zoom_hours: "6"
-      grafana_panel_id: "3372411356"
-      grafana_variables: environment,type,stage
-      promql_query: |
-        max by(environment, tier, type, stage, shard, fqdn) (
-          clamp_min(
-            clamp_max(
-              avg without(cpu, mode) (1 - rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m]))
-              ,
-              1)
-          ,
-          0)
-        )
-      promql_template_1: |
-        max by(environment, tier, type, stage, shard, fqdn) (
-          clamp_min(
-            clamp_max(
-              avg without(cpu, mode) (1 - rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m]))
-              ,
-              1)
-          ,
-          0)
-        )
-      runbook: docs/{{ $labels.type }}/README.md
-    labels:
-      alert_type: cause
-      rules_domain: general
-      severity: s4
-    expr: |
-      gitlab_component_saturation:ratio{component="single_node_cpu",env="gprd",type="zoekt"} > on(component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="single_node_cpu"}
diff --git a/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation-metadata.yml b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation-metadata.yml
index fd74401a84..c4b506c3e4 100644
--- a/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation-metadata.yml
+++ b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation-metadata.yml
@@ -6,52 +6,44 @@ groups:
   rules:
   - record: slo:max:soft:gitlab_component_saturation:ratio
     labels:
-      component: cpu
-    expr: "0.8"
+      component: kube_container_cpu
+    expr: "0.95"
   - record: slo:max:hard:gitlab_component_saturation:ratio
     labels:
-      component: cpu
-    expr: "0.9"
+      component: kube_container_cpu
+    expr: "0.99"
   - record: slo:max:soft:gitlab_component_saturation:ratio
     labels:
-      component: disk_inodes
-    expr: "0.75"
+      component: kube_container_cpu_limit
+    expr: "0.9"
   - record: slo:max:hard:gitlab_component_saturation:ratio
     labels:
-      component: disk_inodes
-    expr: "0.8"
+      component: kube_container_cpu_limit
+    expr: "0.99"
   - record: slo:max:soft:gitlab_component_saturation:ratio
     labels:
-      component: disk_space
-    expr: "0.85"
+      component: kube_container_memory
+    expr: "0.8"
   - record: slo:max:hard:gitlab_component_saturation:ratio
     labels:
-      component: disk_space
+      component: kube_container_memory
     expr: "0.9"
   - record: slo:max:soft:gitlab_component_saturation:ratio
     labels:
-      component: memory
-    expr: "0.9"
+      component: kube_container_throttling
+    expr: "0.4"
   - record: slo:max:hard:gitlab_component_saturation:ratio
     labels:
-      component: memory
-    expr: "0.98"
+      component: kube_container_throttling
+    expr: "0.5"
   - record: slo:max:soft:gitlab_component_saturation:ratio
     labels:
-      component: nf_conntrack_entries
-    expr: "0.95"
-  - record: slo:max:hard:gitlab_component_saturation:ratio
-    labels:
-      component: nf_conntrack_entries
-    expr: "0.98"
-  - record: slo:max:soft:gitlab_component_saturation:ratio
-    labels:
-      component: node_schedstat_waiting
-    expr: "0.1"
+      component: kube_horizontalpodautoscaler_desired_replicas
+    expr: "0.9"
   - record: slo:max:hard:gitlab_component_saturation:ratio
     labels:
-      component: node_schedstat_waiting
-    expr: "0.15"
+      component: kube_horizontalpodautoscaler_desired_replicas
+    expr: "0.95"
   - record: slo:max:soft:gitlab_component_saturation:ratio
     labels:
       component: open_fds
@@ -60,19 +52,3 @@ groups:
     labels:
       component: open_fds
     expr: "0.9"
-  - record: slo:max:soft:gitlab_component_saturation:ratio
-    labels:
-      component: shard_cpu
-    expr: "0.85"
-  - record: slo:max:hard:gitlab_component_saturation:ratio
-    labels:
-      component: shard_cpu
-    expr: "0.95"
-  - record: slo:max:soft:gitlab_component_saturation:ratio
-    labels:
-      component: single_node_cpu
-    expr: "0.9"
-  - record: slo:max:hard:gitlab_component_saturation:ratio
-    labels:
-      component: single_node_cpu
-    expr: "0.95"
diff --git a/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation.yml b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation.yml
index ff447b39c1..b64eba3ed6 100644
--- a/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation.yml
+++ b/mimir-rules/gitlab-gprd/gprd/zoekt/autogenerated-gitlab-gprd-gprd-zoekt-saturation.yml
@@ -6,13 +6,23 @@ groups:
   rules:
   - record: gitlab_component_saturation:ratio
     labels:
-      component: cpu
+      component: kube_container_cpu
     expr: |
-      max by(env, environment, tier, type, stage, shard) (
+      quantile by(env, environment, tier, type, stage, shard) (
+        0.99,
         clamp_min(
           clamp_max(
-            1 - avg by (env, environment, tier, type, stage, shard) (
-              rate(node_cpu_seconds_total{mode="idle", env="gprd",type="zoekt"}[5m])
+            (
+              sum by (env, environment, tier, type, stage, shard, pod, container) (
+                rate(container_cpu_usage_seconds_total:labeled{container!="", container!="POD", env="gprd",type="zoekt"}[1h])
+              )
+              unless on(env, environment, tier, type, stage, shard, pod, container) (
+                container_spec_cpu_quota:labeled{container!="", container!="POD", env="gprd",type="zoekt"}
+              )
+            )
+            /
+            sum by(env, environment, tier, type, stage, shard, pod, container) (
+              kube_pod_container_resource_requests:labeled{container!="", container!="POD", resource="cpu", env="gprd",type="zoekt"}
             )
             ,
             1)
@@ -21,15 +31,19 @@ groups:
       )
   - record: gitlab_component_saturation:ratio
     labels:
-      component: disk_inodes
+      component: kube_container_cpu_limit
     expr: |
       max by(env, environment, tier, type, stage, shard) (
         clamp_min(
           clamp_max(
-            1 - (
-              node_filesystem_files_free{fstype=~"(ext.|xfs)", env="gprd",type="zoekt"}
+            sum by (env, environment, tier, type, stage, shard, pod, container) (
+              rate(container_cpu_usage_seconds_total:labeled{container!="", container!="POD", env="gprd",type="zoekt"}[5m])
+            )
+            /
+            sum by(env, environment, tier, type, stage, shard, pod, container) (
+              container_spec_cpu_quota:labeled{container!="", container!="POD", env="gprd",type="zoekt"}
               /
-              node_filesystem_files{fstype=~"(ext.|xfs)", env="gprd",type="zoekt"}
+              container_spec_cpu_period:labeled{container!="", container!="POD", env="gprd",type="zoekt"}
             )
             ,
             1)
@@ -38,14 +52,14 @@ groups:
       )
   - record: gitlab_component_saturation:ratio
     labels:
-      component: disk_space
+      component: kube_container_memory
     expr: |
       max by(env, environment, tier, type, stage, shard) (
         clamp_min(
           clamp_max(
-            (
-              1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", env="gprd",type="zoekt"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", env="gprd",type="zoekt"}
-            )
+            container_memory_working_set_bytes:labeled{container!="", container!="POD", env="gprd",type="zoekt"}
+            /
+            (container_spec_memory_limit_bytes:labeled{container!="", container!="POD", env="gprd",type="zoekt"} > 0)
             ,
             1)
         ,
@@ -53,12 +67,17 @@ groups:
       )
   - record: gitlab_component_saturation:ratio
     labels:
-      component: memory
+      component: kube_container_throttling
     expr: |
-      max by(env, environment, tier, type, stage, shard) (
+      quantile by(env, environment, tier, type, stage, shard) (
+        0.99,
         clamp_min(
           clamp_max(
-            instance:node_memory_utilization:ratio{env="gprd",type="zoekt"} or instance:node_memory_utilisation:ratio{env="gprd",type="zoekt"}
+            avg by (env, environment, tier, type, stage, shard, pod, container)(
+              rate(container_cpu_cfs_throttled_periods_total:labeled{container!="", env="gprd",type="zoekt"}[5m])
+              /
+              rate(container_cpu_cfs_periods_total:labeled{container!="", env="gprd",type="zoekt"}[5m])
+            )
             ,
             1)
         ,
@@ -66,27 +85,14 @@ groups:
       )
   - record: gitlab_component_saturation:ratio
     labels:
-      component: nf_conntrack_entries
+      component: kube_horizontalpodautoscaler_desired_replicas
     expr: |
       max by(env, environment, tier, type, stage, shard) (
         clamp_min(
           clamp_max(
-            max_over_time(node_nf_conntrack_entries{env="gprd",type="zoekt"}[1m])
+            kube_horizontalpodautoscaler_status_desired_replicas:labeled{env="gprd",type="zoekt", shard!~"database-throttled|elasticsearch|gitaly-throttled|urgent-authorized-projects|urgent-other", namespace!~"pubsubbeat"}
             /
-            node_nf_conntrack_entries_limit{env="gprd",type="zoekt"}
-            ,
-            1)
-        ,
-        0)
-      )
-  - record: gitlab_component_saturation:ratio
-    labels:
-      component: node_schedstat_waiting
-    expr: |
-      max by(env, environment, tier, type, stage, shard) (
-        clamp_min(
-          clamp_max(
-            avg without (cpu) (rate(node_schedstat_waiting_seconds_total{env="gprd",type="zoekt"}[1h]))
+            kube_horizontalpodautoscaler_spec_max_replicas:labeled{env="gprd",type="zoekt", shard!~"database-throttled|elasticsearch|gitaly-throttled|urgent-authorized-projects|urgent-other", namespace!~"pubsubbeat"}
             ,
             1)
         ,
@@ -115,31 +121,3 @@ groups:
         ,
         0)
       )
-  - record: gitlab_component_saturation:ratio
-    labels:
-      component: shard_cpu
-    expr: |
-      max by(env, environment, tier, type, stage, shard) (
-        clamp_min(
-          clamp_max(
-            1 - avg by (env, environment, tier, type, stage, shard, shard) (
-              rate(node_cpu_seconds_total{mode="idle", env="gprd",type="zoekt"}[5m])
-            )
-            ,
-            1)
-        ,
-        0)
-      )
-  - record: gitlab_component_saturation:ratio
-    labels:
-      component: single_node_cpu
-    expr: |
-      max by(env, environment, tier, type, stage, shard) (
-        clamp_min(
-          clamp_max(
-            avg without(cpu, mode) (1 - rate(node_cpu_seconds_total{mode="idle", env="gprd",type="zoekt"}[5m]))
-            ,
-            1)
-        ,
-        0)
-      )
diff --git a/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-kube-cause-alerts.yml b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-kube-cause-alerts.yml
new file mode 100644
index 0000000000..23d0dfcfc8
--- /dev/null
+++ b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-kube-cause-alerts.yml
@@ -0,0 +1,39 @@
+# WARNING. DO NOT EDIT THIS FILE BY HAND. USE ./mimir-rules-jsonnet/kube-cause-alerts.jsonnet TO GENERATE IT
+# YOUR CHANGES WILL BE OVERRIDDEN
+groups:
+- name: kube_cause_alerts
+  rules:
+  - alert: KubeContainersWaitingInError
+    for: 20m
+    annotations:
+      title: Containers for the `{{ $labels.type }}` service, `{{ $labels.stage }}`
+        are unable to start.
+      description: |
+        More than 50% of the deployment's `maxSurge` setting consists of containers unable to start for reasons other than `ContainerCreating`.
+      grafana_dashboard_id: alerts-kube_containers_waiting/alerts-containers-waiting
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-kube_containers_waiting/alerts-containers-waiting?from=now-6h/m&to=now-1m/m&var-environment={{
+        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
+        }}&var-cluster={{ $labels.cluster }}
+      grafana_datasource_id: mimir-gitlab-gstg
+      grafana_min_zoom_hours: "6"
+      grafana_variables: environment,type,stage,cluster
+    labels:
+      alert_type: cause
+      pager: pagerduty
+      runbook: docs/kube/alerts/KubeContainersWaitingInError.md
+      severity: s2
+      team: sre_reliability
+    expr: |
+      sum by (type, env, tier, stage, cluster) (
+        kube_pod_container_status_waiting_reason:labeled{
+          env="gstg",type="zoekt",
+          reason!="ContainerCreating",
+        }
+      )
+      > 0
+      >= on(type, env, tier, stage, cluster) (
+        topk by(type, env, tier, stage, cluster) (1,
+          kube_deployment_spec_strategy_rollingupdate_max_surge:labeled{env="gstg",type="zoekt"}
+        )
+        * 0.5
+      )
diff --git a/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-kube-state-metrics.yml b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-kube-state-metrics.yml
new file mode 100644
index 0000000000..604b4799a5
--- /dev/null
+++ b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-kube-state-metrics.yml
@@ -0,0 +1,446 @@
+# WARNING. DO NOT EDIT THIS FILE BY HAND. USE ./mimir-rules-jsonnet/kube-state-metrics-recording-rules.jsonnet TO GENERATE IT
+# YOUR CHANGES WILL BE OVERRIDDEN
+groups:
+- name: 'kube-state-metrics-recording-rules: zoekt'
+  interval: 1m
+  rules:
+  - record: kube_pod_labels:labeled
+    labels:
+      tier: inf
+      type: zoekt
+    expr: |
+      group without(label_stage,label_shard,label_deployment) (
+        label_replace(
+          label_replace(
+            label_replace(
+              topk by(environment,cluster,pod) (1, kube_pod_labels{env="gstg",label_type="zoekt"}),
+              "stage", "$0", "label_stage", ".*"
+            ),
+            "shard", "$0", "label_shard", ".*"
+          ),
+          "deployment", "$0", "label_deployment", ".*"
+        )
+      )
+  - record: kube_horizontalpodautoscaler_labels:labeled
+    labels:
+      tier: inf
+      type: zoekt
+    expr: |
+      group without(label_stage,label_shard) (
+        label_replace(
+          label_replace(
+            topk by(environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels{env="gstg",label_type="zoekt"}),
+            "stage", "$0", "label_stage", ".*"
+          ),
+          "shard", "$0", "label_shard", ".*"
+        )
+      )
+  - record: kube_ingress_labels:labeled
+    labels:
+      tier: inf
+      type: zoekt
+    expr: |
+      group without(label_stage,label_shard) (
+        label_replace(
+          label_replace(
+            topk by(environment,cluster,ingress) (1, kube_ingress_labels{env="gstg",label_type="zoekt"}),
+            "stage", "$0", "label_stage", ".*"
+          ),
+          "shard", "$0", "label_shard", ".*"
+        )
+      )
+  - record: kube_deployment_labels:labeled
+    labels:
+      tier: inf
+      type: zoekt
+    expr: |
+      group without(label_stage,label_shard) (
+        label_replace(
+          label_replace(
+            topk by(environment,cluster,deployment) (1, kube_deployment_labels{env="gstg",label_type="zoekt"}),
+            "stage", "$0", "label_stage", ".*"
+          ),
+          "shard", "$0", "label_shard", ".*"
+        )
+      )
+- name: 'kube-state-metrics-recording-rules: enriched label recording rules'
+  interval: 1m
+  rules:
+  - record: container_start_time_seconds:labeled
+    expr: |
+      container_start_time_seconds{env="gstg",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: container_cpu_cfs_periods_total:labeled
+    expr: |
+      container_cpu_cfs_periods_total{env="gstg",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: container_cpu_cfs_throttled_periods_total:labeled
+    expr: |
+      container_cpu_cfs_throttled_periods_total{env="gstg",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: container_cpu_cfs_throttled_seconds_total:labeled
+    expr: |
+      container_cpu_cfs_throttled_seconds_total{env="gstg",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: container_cpu_usage_seconds_total:labeled
+    expr: |
+      container_cpu_usage_seconds_total{env="gstg",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: container_memory_cache:labeled
+    expr: |
+      container_memory_cache{env="gstg",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: container_memory_rss:labeled
+    expr: |
+      container_memory_rss{env="gstg",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: container_memory_swap:labeled
+    expr: |
+      container_memory_swap{env="gstg",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: container_memory_usage_bytes:labeled
+    expr: |
+      container_memory_usage_bytes{env="gstg",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: container_memory_working_set_bytes:labeled
+    expr: |
+      container_memory_working_set_bytes{env="gstg",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: container_network_receive_bytes_total:labeled
+    expr: |
+      container_network_receive_bytes_total{env="gstg",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: container_network_transmit_bytes_total:labeled
+    expr: |
+      container_network_transmit_bytes_total{env="gstg",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: container_spec_cpu_period:labeled
+    expr: |
+      container_spec_cpu_period{env="gstg",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: container_spec_cpu_quota:labeled
+    expr: |
+      container_spec_cpu_quota{env="gstg",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: container_spec_cpu_shares:labeled
+    expr: |
+      container_spec_cpu_shares{env="gstg",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: container_spec_memory_limit_bytes:labeled
+    expr: |
+      container_spec_memory_limit_bytes{env="gstg",metrics_path="/metrics/cadvisor"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_pod_container_resource_limits:labeled
+    expr: |
+      kube_pod_container_resource_limits{env="gstg"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_pod_container_resource_requests:labeled
+    expr: |
+      kube_pod_container_resource_requests{env="gstg"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_pod_container_status_last_terminated_reason:labeled
+    expr: |
+      kube_pod_container_status_last_terminated_reason{env="gstg"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_pod_container_status_ready:labeled
+    expr: |
+      kube_pod_container_status_ready{env="gstg"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_pod_container_status_restarts_total:labeled
+    expr: |
+      kube_pod_container_status_restarts_total{env="gstg"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_pod_container_status_running:labeled
+    expr: |
+      kube_pod_container_status_running{env="gstg"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_pod_container_status_terminated:labeled
+    expr: |
+      kube_pod_container_status_terminated{env="gstg"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_pod_container_status_terminated_reason:labeled
+    expr: |
+      kube_pod_container_status_terminated_reason{env="gstg"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_pod_container_status_waiting:labeled
+    expr: |
+      kube_pod_container_status_waiting{env="gstg"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_pod_container_status_waiting_reason:labeled
+    expr: |
+      kube_pod_container_status_waiting_reason{env="gstg"}
+      *
+      on(environment,cluster,pod) group_left(tier,type,stage,shard,deployment)
+      topk by (environment,cluster,pod) (1, kube_pod_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_horizontalpodautoscaler_spec_target_metric:labeled
+    expr: |
+      kube_horizontalpodautoscaler_spec_target_metric{env="gstg"}
+      *
+      on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_horizontalpodautoscaler_status_condition:labeled
+    expr: |
+      kube_horizontalpodautoscaler_status_condition{env="gstg"}
+      *
+      on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_horizontalpodautoscaler_status_current_replicas:labeled
+    expr: |
+      kube_horizontalpodautoscaler_status_current_replicas{env="gstg"}
+      *
+      on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_horizontalpodautoscaler_status_desired_replicas:labeled
+    expr: |
+      kube_horizontalpodautoscaler_status_desired_replicas{env="gstg"}
+      *
+      on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_horizontalpodautoscaler_metadata_generation:labeled
+    expr: |
+      kube_horizontalpodautoscaler_metadata_generation{env="gstg"}
+      *
+      on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_horizontalpodautoscaler_spec_max_replicas:labeled
+    expr: |
+      kube_horizontalpodautoscaler_spec_max_replicas{env="gstg"}
+      *
+      on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_horizontalpodautoscaler_spec_min_replicas:labeled
+    expr: |
+      kube_horizontalpodautoscaler_spec_min_replicas{env="gstg"}
+      *
+      on(environment,cluster,horizontalpodautoscaler) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,horizontalpodautoscaler) (1, kube_horizontalpodautoscaler_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_node_status_capacity:labeled
+    expr: |
+      kube_node_status_capacity{env="gstg"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_node_status_allocatable:labeled
+    expr: |
+      kube_node_status_allocatable{env="gstg"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_node_status_condition:labeled
+    expr: |
+      kube_node_status_condition{env="gstg"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: node_schedstat_waiting_seconds_total:labeled
+    expr: |
+      node_schedstat_waiting_seconds_total{env="gstg"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: node_cpu_seconds_total:labeled
+    expr: |
+      node_cpu_seconds_total{env="gstg"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: node_network_transmit_bytes_total:labeled
+    expr: |
+      node_network_transmit_bytes_total{env="gstg"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: node_network_receive_bytes_total:labeled
+    expr: |
+      node_network_receive_bytes_total{env="gstg"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: node_disk_reads_completed_total:labeled
+    expr: |
+      node_disk_reads_completed_total{env="gstg"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: node_disk_writes_completed_total:labeled
+    expr: |
+      node_disk_writes_completed_total{env="gstg"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: node_disk_read_bytes_total:labeled
+    expr: |
+      node_disk_read_bytes_total{env="gstg"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: node_disk_written_bytes_total:labeled
+    expr: |
+      node_disk_written_bytes_total{env="gstg"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: node_disk_read_time_seconds_total:labeled
+    expr: |
+      node_disk_read_time_seconds_total{env="gstg"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: node_disk_write_time_seconds_total:labeled
+    expr: |
+      node_disk_write_time_seconds_total{env="gstg"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: node_load1:labeled
+    expr: |
+      node_load1{env="gstg"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: node_load5:labeled
+    expr: |
+      node_load5{env="gstg"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: node_load15:labeled
+    expr: |
+      node_load15{env="gstg"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: node_vmstat_oom_kill:labeled
+    expr: |
+      node_vmstat_oom_kill{env="gstg"}
+      *
+      on(environment,cluster,node) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,node) (1, kube_node_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: nginx_ingress_controller_requests:labeled
+    expr: |
+      nginx_ingress_controller_requests{env="gstg"}
+      *
+      on(environment,cluster,ingress) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,ingress) (1, kube_ingress_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_deployment_status_replicas_unavailable:labeled
+    expr: |
+      kube_deployment_status_replicas_unavailable{env="gstg"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_deployment_status_replicas_updated:labeled
+    expr: |
+      kube_deployment_status_replicas_updated{env="gstg"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_deployment_spec_paused:labeled
+    expr: |
+      kube_deployment_spec_paused{env="gstg"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_deployment_spec_replicas:labeled
+    expr: |
+      kube_deployment_spec_replicas{env="gstg"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_deployment_spec_strategy_rollingupdate_max_surge:labeled
+    expr: |
+      kube_deployment_spec_strategy_rollingupdate_max_surge{env="gstg"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_deployment_spec_strategy_rollingupdate_max_unavailable:labeled
+    expr: |
+      kube_deployment_spec_strategy_rollingupdate_max_unavailable{env="gstg"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_deployment_status_condition:labeled
+    expr: |
+      kube_deployment_status_condition{env="gstg"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_deployment_status_replicas_available:labeled
+    expr: |
+      kube_deployment_status_replicas_available{env="gstg"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_deployment_created:labeled
+    expr: |
+      kube_deployment_created{env="gstg"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_deployment_metadata_generation:labeled
+    expr: |
+      kube_deployment_metadata_generation{env="gstg"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_deployment_status_observed_generation:labeled
+    expr: |
+      kube_deployment_status_observed_generation{env="gstg"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gstg",type=~"zoekt"})
+  - record: kube_deployment_status_replicas:labeled
+    expr: |
+      kube_deployment_status_replicas{env="gstg"}
+      *
+      on(environment,cluster,deployment) group_left(tier,type,stage,shard)
+      topk by (environment,cluster,deployment) (1, kube_deployment_labels:labeled{env="gstg",type=~"zoekt"})
diff --git a/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation-alerts.yml b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation-alerts.yml
index 221970a5e4..958e4afe31 100644
--- a/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation-alerts.yml
+++ b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation-alerts.yml
@@ -17,147 +17,40 @@ groups:
 - name: GitLab Saturation Alerts
   interval: 1m
   rules:
-  - alert: component_saturation_slo_out_of_bounds:cpu
-    for: 5m
-    annotations:
-      title: The Average Service CPU Utilization resource of the {{ $labels.type }}
-        service ({{ $labels.stage }} stage) has a saturation exceeding SLO and is
-        close to its capacity limit.
-      description: |
-        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
-
-        Details of the Average Service CPU Utilization resource:
-
-        This resource measures average CPU utilization across an all cores in a service fleet. If it is becoming saturated, it may indicate that the fleet needs horizontal or vertical scaling.
-      grafana_dashboard_id: alerts-sat_cpu
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_cpu?from=now-6h/m&to=now-1m/m&var-environment={{
-        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
-        }}
-      grafana_datasource_id: mimir-gitlab-gstg
-      grafana_min_zoom_hours: "6"
-      grafana_panel_id: "1465724101"
-      grafana_variables: environment,type,stage
-      promql_query: |
-        max by(environment, tier, type, stage, shard) (
-          clamp_min(
-            clamp_max(
-              1 - avg by (environment, tier, type, stage, shard) (
-                rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
-              )
-              ,
-              1)
-          ,
-          0)
-        )
-      promql_template_1: |
-        max by(environment, tier, type, stage, shard) (
-          clamp_min(
-            clamp_max(
-              1 - avg by (environment, tier, type, stage, shard) (
-                rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
-              )
-              ,
-              1)
-          ,
-          0)
-        )
-      runbook: docs/{{ $labels.type }}/README.md
-    labels:
-      alert_type: cause
-      rules_domain: general
-      severity: s3
-    expr: |
-      gitlab_component_saturation:ratio{component="cpu",env="gstg",type="zoekt"} > on(component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="cpu"}
-  - alert: component_saturation_slo_out_of_bounds:disk_inodes
+  - alert: component_saturation_slo_out_of_bounds:kube_container_cpu_limit
     for: 15m
     annotations:
-      title: The Disk inode Utilization per Device per Node resource of the {{ $labels.type
+      title: The Kube Container CPU over-utilization resource of the {{ $labels.type
         }} service ({{ $labels.stage }} stage) has a saturation exceeding SLO and
         is close to its capacity limit.
       description: |
         This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
 
-        Details of the Disk inode Utilization per Device per Node resource:
+        Details of the Kube Container CPU over-utilization resource:
 
-        Disk inode utilization per device per node.
+        Kubernetes containers can have a limit configured on how much CPU they can consume in a burst. If we are at this limit, exceeding the allocated requested resources, we should consider revisting the container's HPA configuration.
 
-        If this is too high, its possible that a directory is filling up with files. Consider logging in an checking temp directories for large numbers of files
-      grafana_dashboard_id: alerts-sat_disk_inodes
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_disk_inodes?from=now-6h/m&to=now-1m/m&var-environment={{
+        When a container is utilizing CPU resources up-to it's configured limit for extended periods of time, this could cause it and other running containers to be throttled.
+      grafana_dashboard_id: alerts-sat_kube_container_cpu_limit
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_kube_container_cpu_limit?from=now-6h/m&to=now-1m/m&var-environment={{
         $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
         }}
       grafana_datasource_id: mimir-gitlab-gstg
       grafana_min_zoom_hours: "6"
-      grafana_panel_id: "39965907"
+      grafana_panel_id: "1262336683"
       grafana_variables: environment,type,stage
       promql_query: |
-        max by(environment, tier, type, stage, shard, fqdn, device) (
-          clamp_min(
-            clamp_max(
-              1 - (
-                node_filesystem_files_free{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
-                /
-                node_filesystem_files{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
-              )
-              ,
-              1)
-          ,
-          0)
-        )
-      promql_template_1: |
-        max by(environment, tier, type, stage, shard, fqdn, device) (
+        max by(environment, tier, type, stage, shard, pod, container) (
           clamp_min(
             clamp_max(
-              1 - (
-                node_filesystem_files_free{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
-                /
-                node_filesystem_files{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              sum by (environment, tier, type, stage, shard, pod, container) (
+                rate(container_cpu_usage_seconds_total:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
               )
-              ,
-              1)
-          ,
-          0)
-        )
-      runbook: docs/{{ $labels.type }}/README.md
-    labels:
-      alert_type: cause
-      pager: pagerduty
-      rules_domain: general
-      severity: s2
-    expr: |
-      gitlab_component_saturation:ratio{component="disk_inodes",env="gstg",type="zoekt"} > on(component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="disk_inodes"}
-  - alert: ComponentResourceRunningOut_disk_inodes
-    for: 15m
-    annotations:
-      title: The Disk inode Utilization per Device per Node resource of the {{ $labels.type
-        }} service ({{ $labels.stage }} stage) is on track to hit capacity within
-        6h
-      description: |
-        This means that this resource is growing rapidly and is predicted to exceed saturation threshold within 6h.
-
-        Details of the Disk inode Utilization per Device per Node resource:
-
-        Disk inode utilization per device per node.
-
-        If this is too high, its possible that a directory is filling up with files. Consider logging in an checking temp directories for large numbers of files
-      grafana_dashboard_id: alerts-sat_disk_inodes
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_disk_inodes?from=now-6h/m&to=now-1m/m&var-environment={{
-        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
-        }}
-      grafana_datasource_id: mimir-gitlab-gstg
-      grafana_min_zoom_hours: "6"
-      grafana_panel_id: "39965907"
-      grafana_variables: environment,type,stage
-      promql_query: |
-        max by(environment, tier, type, stage, shard, fqdn, device) (
-          clamp_min(
-            clamp_max(
-              1 - (
-                node_filesystem_files_free{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              /
+              sum by(environment, tier, type, stage, shard, pod, container) (
+                container_spec_cpu_quota:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
                 /
-                node_filesystem_files{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                container_spec_cpu_period:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
               )
               ,
               1)
@@ -165,13 +58,17 @@ groups:
           0)
         )
       promql_template_1: |
-        max by(environment, tier, type, stage, shard, fqdn, device) (
+        max by(environment, tier, type, stage, shard, pod, container) (
           clamp_min(
             clamp_max(
-              1 - (
-                node_filesystem_files_free{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              sum by (environment, tier, type, stage, shard, pod, container) (
+                rate(container_cpu_usage_seconds_total:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
+              )
+              /
+              sum by(environment, tier, type, stage, shard, pod, container) (
+                container_spec_cpu_quota:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
                 /
-                node_filesystem_files{fstype=~"(ext.|xfs)", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+                container_spec_cpu_period:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
               )
               ,
               1)
@@ -181,157 +78,50 @@ groups:
       runbook: docs/{{ $labels.type }}/README.md
     labels:
       alert_type: cause
-      linear_prediction_saturation_alert: 6h
-      pager: pagerduty
       rules_domain: general
-      severity: s2
+      severity: s4
     expr: |
-      predict_linear(gitlab_component_saturation:ratio{component="disk_inodes",env="gstg",type="zoekt"}[6h], 21600)
-      > on (component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="disk_inodes"}
-  - alert: component_saturation_slo_out_of_bounds:disk_space
+      gitlab_component_saturation:ratio{component="kube_container_cpu_limit",env="gstg",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="kube_container_cpu_limit"}
+  - alert: component_saturation_slo_out_of_bounds:kube_container_memory
     for: 15m
     annotations:
-      title: The Disk Space Utilization per Device per Node resource of the {{ $labels.type
+      title: The Kube Container Memory Utilization resource of the {{ $labels.type
         }} service ({{ $labels.stage }} stage) has a saturation exceeding SLO and
         is close to its capacity limit.
       description: |
         This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
 
-        Details of the Disk Space Utilization per Device per Node resource:
+        Details of the Kube Container Memory Utilization resource:
 
-        Disk space utilization per device per node.
-      grafana_dashboard_id: alerts-sat_disk_space
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_disk_space?from=now-6h/m&to=now-1m/m&var-environment={{
+        This uses the working set size from cAdvisor for the cgroup's memory usage. That may not be a good measure as it includes filesystem cache pages that are not necessarily attributable to the application inside the cgroup, and are permitted to be evicted instead of being OOM killed.
+      grafana_dashboard_id: alerts-sat_kube_container_memory
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_kube_container_memory?from=now-6h/m&to=now-1m/m&var-environment={{
         $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
         }}
       grafana_datasource_id: mimir-gitlab-gstg
       grafana_min_zoom_hours: "6"
-      grafana_panel_id: "2661375984"
+      grafana_panel_id: "172578411"
       grafana_variables: environment,type,stage
       promql_query: |
-        max by(environment, tier, type, stage, shard, fqdn, device) (
-          clamp_min(
-            clamp_max(
-              (
-                1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
-              )
-              ,
-              1)
-          ,
-          0)
-        )
-      promql_template_1: |
-        max by(environment, tier, type, stage, shard, fqdn, device) (
-          clamp_min(
-            clamp_max(
-              (
-                1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
-              )
-              ,
-              1)
-          ,
-          0)
-        )
-      runbook: docs/{{ $labels.type }}/README.md
-    labels:
-      alert_type: cause
-      pager: pagerduty
-      rules_domain: general
-      severity: s2
-    expr: |
-      gitlab_component_saturation:ratio{component="disk_space",env="gstg",type="zoekt"} > on(component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="disk_space"}
-  - alert: ComponentResourceRunningOut_disk_space
-    for: 15m
-    annotations:
-      title: The Disk Space Utilization per Device per Node resource of the {{ $labels.type
-        }} service ({{ $labels.stage }} stage) is on track to hit capacity within
-        6h
-      description: |
-        This means that this resource is growing rapidly and is predicted to exceed saturation threshold within 6h.
-
-        Details of the Disk Space Utilization per Device per Node resource:
-
-        Disk space utilization per device per node.
-      grafana_dashboard_id: alerts-sat_disk_space
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_disk_space?from=now-6h/m&to=now-1m/m&var-environment={{
-        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
-        }}
-      grafana_datasource_id: mimir-gitlab-gstg
-      grafana_min_zoom_hours: "6"
-      grafana_panel_id: "2661375984"
-      grafana_variables: environment,type,stage
-      promql_query: |
-        max by(environment, tier, type, stage, shard, fqdn, device) (
-          clamp_min(
-            clamp_max(
-              (
-                1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
-              )
-              ,
-              1)
-          ,
-          0)
-        )
-      promql_template_1: |
-        max by(environment, tier, type, stage, shard, fqdn, device) (
-          clamp_min(
-            clamp_max(
-              (
-                1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
-              )
-              ,
-              1)
-          ,
-          0)
-        )
-      runbook: docs/{{ $labels.type }}/README.md
-    labels:
-      alert_type: cause
-      linear_prediction_saturation_alert: 6h
-      pager: pagerduty
-      rules_domain: general
-      severity: s2
-    expr: |
-      predict_linear(gitlab_component_saturation:ratio{component="disk_space",env="gstg",type="zoekt"}[6h], 21600)
-      > on (component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="disk_space"}
-  - alert: component_saturation_slo_out_of_bounds:memory
-    for: 5m
-    annotations:
-      title: The Memory Utilization per Node resource of the {{ $labels.type }} service
-        ({{ $labels.stage }} stage) has a saturation exceeding SLO and is close to
-        its capacity limit.
-      description: |
-        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
-
-        Details of the Memory Utilization per Node resource:
-
-        Memory utilization per device per node.
-      grafana_dashboard_id: alerts-sat_memory
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_memory?from=now-6h/m&to=now-1m/m&var-environment={{
-        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
-        }}
-      grafana_datasource_id: mimir-gitlab-gstg
-      grafana_min_zoom_hours: "6"
-      grafana_panel_id: "1955556769"
-      grafana_variables: environment,type,stage
-      promql_query: |
-        max by(environment, tier, type, stage, shard, fqdn) (
+        max by(environment, tier, type, stage, shard) (
           clamp_min(
             clamp_max(
-              instance:node_memory_utilization:ratio{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} or instance:node_memory_utilisation:ratio{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              container_memory_working_set_bytes:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              /
+              (container_spec_memory_limit_bytes:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} > 0)
               ,
               1)
           ,
           0)
         )
       promql_template_1: |
-        max by(environment, tier, type, stage, shard, fqdn) (
+        max by(environment, tier, type, stage, shard) (
           clamp_min(
             clamp_max(
-              instance:node_memory_utilization:ratio{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} or instance:node_memory_utilisation:ratio{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              container_memory_working_set_bytes:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              /
+              (container_spec_memory_limit_bytes:labeled{container!="", container!="POD", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"} > 0)
               ,
               1)
           ,
@@ -343,49 +133,57 @@ groups:
       rules_domain: general
       severity: s4
     expr: |
-      gitlab_component_saturation:ratio{component="memory",env="gstg",type="zoekt"} > on(component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="memory"}
-  - alert: component_saturation_slo_out_of_bounds:nf_conntrack_entries
-    for: 5m
+      gitlab_component_saturation:ratio{component="kube_container_memory",env="gstg",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="kube_container_memory"}
+  - alert: component_saturation_slo_out_of_bounds:kube_container_throttling
+    for: 10m
     annotations:
-      title: The conntrack Entries per Node resource of the {{ $labels.type }} service
+      title: The Kube container throttling resource of the {{ $labels.type }} service
         ({{ $labels.stage }} stage) has a saturation exceeding SLO and is close to
         its capacity limit.
       description: |
         This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
 
-        Details of the conntrack Entries per Node resource:
+        Details of the Kube container throttling resource:
 
-        Netfilter connection tracking table utilization per node.
+        Kube container throttling
 
-        When saturated, new connection attempts (incoming SYN packets) are dropped with no reply, leaving clients to slowly retry (and typically fail again) over the next several seconds.  When packets are being dropped due to this condition, kernel will log the event as: "nf_conntrack: table full, dropping packet".
-      grafana_dashboard_id: alerts-sat_conntrack
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_conntrack?from=now-6h/m&to=now-1m/m&var-environment={{
+        A container will be throttled if it reaches the configured cpu limit for the horizontal pod autoscaler. Or when other containers on the node are overutilizing the the CPU.
+
+        To get around this, consider increasing the limit for this workload, taking into consideration the requested resources.
+      grafana_dashboard_id: alerts-kube_container_throttling
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-kube_container_throttling?from=now-6h/m&to=now-1m/m&var-environment={{
         $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
         }}
       grafana_datasource_id: mimir-gitlab-gstg
       grafana_min_zoom_hours: "6"
-      grafana_panel_id: "503581002"
+      grafana_panel_id: "54512634"
       grafana_variables: environment,type,stage
       promql_query: |
-        max by(environment, tier, type, stage, shard, fqdn, instance) (
+        quantile by(environment, tier, type, stage, shard, pod, container) (
+          0.99,
           clamp_min(
             clamp_max(
-              max_over_time(node_nf_conntrack_entries{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[1m])
-              /
-              node_nf_conntrack_entries_limit{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              avg by (environment, tier, type, stage, shard, pod, container)(
+                rate(container_cpu_cfs_throttled_periods_total:labeled{container!="", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
+                /
+                rate(container_cpu_cfs_periods_total:labeled{container!="", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
+              )
               ,
               1)
           ,
           0)
         )
       promql_template_1: |
-        max by(environment, tier, type, stage, shard, fqdn, instance) (
+        quantile by(environment, tier, type, stage, shard, pod, container) (
+          0.99,
           clamp_min(
             clamp_max(
-              max_over_time(node_nf_conntrack_entries{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[1m])
-              /
-              node_nf_conntrack_entries_limit{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}
+              avg by (environment, tier, type, stage, shard, pod, container)(
+                rate(container_cpu_cfs_throttled_periods_total:labeled{container!="", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
+                /
+                rate(container_cpu_cfs_periods_total:labeled{container!="", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
+              )
               ,
               1)
           ,
@@ -397,60 +195,62 @@ groups:
       rules_domain: general
       severity: s3
     expr: |
-      gitlab_component_saturation:ratio{component="nf_conntrack_entries",env="gstg",type="zoekt"} > on(component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="nf_conntrack_entries"}
-  - alert: component_saturation_slo_out_of_bounds:node_schedstat_waiting
-    for: 90m
+      gitlab_component_saturation:ratio{component="kube_container_throttling",env="gstg",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="kube_container_throttling"}
+  - alert: component_saturation_slo_out_of_bounds:kube_horizontalpodautoscaler_desired_replicas
+    for: 25m
     annotations:
-      title: The Node Scheduler Waiting Time resource of the {{ $labels.type }} service
-        ({{ $labels.stage }} stage) has a saturation exceeding SLO and is close to
-        its capacity limit.
+      title: The Horizontal Pod Autoscaler Desired Replicas resource of the {{ $labels.type
+        }} service ({{ $labels.stage }} stage) has a saturation exceeding SLO and
+        is close to its capacity limit.
       description: |
         This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
 
-        Details of the Node Scheduler Waiting Time resource:
-
-        Measures the amount of scheduler waiting time that processes are waiting to be scheduled, according to [`CPU Scheduling Metrics`](https://www.robustperception.io/cpu-scheduling-metrics-from-the-node-exporter).
+        Details of the Horizontal Pod Autoscaler Desired Replicas resource:
 
-        A high value indicates that a node has more processes to be run than CPU time available to handle them, and may lead to degraded responsiveness and performance from the application.
+        The [Horizontal Pod Autoscaler](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) automatically scales the number of Pods in a deployment based on metrics.
 
-        Additionally, it may indicate that the fleet is under-provisioned.
-      grafana_dashboard_id: alerts-sat_node_schedstat_waiting
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_node_schedstat_waiting?from=now-6h/m&to=now-1m/m&var-environment={{
+        The Horizontal Pod Autoscaler has a configured upper maximum. When this limit is reached, the HPA will not increase the number of pods and other resource saturation (eg, CPU, memory) may occur.
+      grafana_dashboard_id: alerts-sat_kube_horizontalpodautoscaler
+      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_kube_horizontalpodautoscaler?from=now-6h/m&to=now-1m/m&var-environment={{
         $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
         }}
       grafana_datasource_id: mimir-gitlab-gstg
       grafana_min_zoom_hours: "6"
-      grafana_panel_id: "1415313189"
+      grafana_panel_id: "351198712"
       grafana_variables: environment,type,stage
       promql_query: |
-        max by(environment, tier, type, stage, shard, fqdn, shard) (
+        max by(environment, tier, type, stage, shard, horizontalpodautoscaler, shard) (
           clamp_min(
             clamp_max(
-              avg without (cpu) (rate(node_schedstat_waiting_seconds_total{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[1h]))
+              kube_horizontalpodautoscaler_status_desired_replicas:labeled{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}", shard!~"database-throttled|elasticsearch|gitaly-throttled|urgent-authorized-projects|urgent-other", namespace!~"pubsubbeat"}
+              /
+              kube_horizontalpodautoscaler_spec_max_replicas:labeled{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}", shard!~"database-throttled|elasticsearch|gitaly-throttled|urgent-authorized-projects|urgent-other", namespace!~"pubsubbeat"}
               ,
               1)
           ,
           0)
         )
       promql_template_1: |
-        max by(environment, tier, type, stage, shard, fqdn, shard) (
+        max by(environment, tier, type, stage, shard, horizontalpodautoscaler, shard) (
           clamp_min(
             clamp_max(
-              avg without (cpu) (rate(node_schedstat_waiting_seconds_total{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[1h]))
+              kube_horizontalpodautoscaler_status_desired_replicas:labeled{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}", shard!~"database-throttled|elasticsearch|gitaly-throttled|urgent-authorized-projects|urgent-other", namespace!~"pubsubbeat"}
+              /
+              kube_horizontalpodautoscaler_spec_max_replicas:labeled{environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}", shard!~"database-throttled|elasticsearch|gitaly-throttled|urgent-authorized-projects|urgent-other", namespace!~"pubsubbeat"}
               ,
               1)
           ,
           0)
         )
-      runbook: docs/{{ $labels.type }}/README.md
+      runbook: docs/kube/kubernetes.md#hpascalecapability
     labels:
       alert_type: cause
       rules_domain: general
-      severity: s4
+      severity: s3
     expr: |
-      gitlab_component_saturation:ratio{component="node_schedstat_waiting",env="gstg",type="zoekt"} > on(component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="node_schedstat_waiting"}
+      gitlab_component_saturation:ratio{component="kube_horizontalpodautoscaler_desired_replicas",env="gstg",type="zoekt"} > on(component) group_left
+      slo:max:hard:gitlab_component_saturation:ratio{component="kube_horizontalpodautoscaler_desired_replicas"}
   - alert: component_saturation_slo_out_of_bounds:open_fds
     for: 5m
     annotations:
@@ -524,105 +324,3 @@ groups:
     expr: |
       gitlab_component_saturation:ratio{component="open_fds",env="gstg",type="zoekt"} > on(component) group_left
       slo:max:hard:gitlab_component_saturation:ratio{component="open_fds"}
-  - alert: component_saturation_slo_out_of_bounds:shard_cpu
-    for: 5m
-    annotations:
-      title: The Average CPU Utilization per Shard resource of the {{ $labels.type
-        }} service ({{ $labels.stage }} stage) has a saturation exceeding SLO and
-        is close to its capacity limit.
-      description: |
-        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
-
-        Details of the Average CPU Utilization per Shard resource:
-
-        This resource measures average CPU utilization across an all cores in a shard of a service fleet. If it is becoming saturated, it may indicate that the shard needs horizontal or vertical scaling.
-      grafana_dashboard_id: alerts-sat_shard_cpu
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_shard_cpu?from=now-6h/m&to=now-1m/m&var-environment={{
-        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
-        }}
-      grafana_datasource_id: mimir-gitlab-gstg
-      grafana_min_zoom_hours: "6"
-      grafana_panel_id: "1472933476"
-      grafana_variables: environment,type,stage
-      promql_query: |
-        max by(environment, tier, type, stage, shard, shard) (
-          clamp_min(
-            clamp_max(
-              1 - avg by (environment, tier, type, stage, shard, shard) (
-                rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
-              )
-              ,
-              1)
-          ,
-          0)
-        )
-      promql_template_1: |
-        max by(environment, tier, type, stage, shard, shard) (
-          clamp_min(
-            clamp_max(
-              1 - avg by (environment, tier, type, stage, shard, shard) (
-                rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m])
-              )
-              ,
-              1)
-          ,
-          0)
-        )
-      runbook: docs/{{ $labels.type }}/README.md
-    labels:
-      alert_type: cause
-      rules_domain: general
-      severity: s3
-    expr: |
-      gitlab_component_saturation:ratio{component="shard_cpu",env="gstg",type="zoekt"} > on(component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="shard_cpu"}
-  - alert: component_saturation_slo_out_of_bounds:single_node_cpu
-    for: 10m
-    annotations:
-      title: The Average CPU Utilization per Node resource of the {{ $labels.type
-        }} service ({{ $labels.stage }} stage) has a saturation exceeding SLO and
-        is close to its capacity limit.
-      description: |
-        This means that this resource is running close to capacity and is at risk of exceeding its current capacity limit.
-
-        Details of the Average CPU Utilization per Node resource:
-
-        Average CPU utilization per Node.
-
-        If average CPU is saturated, it may indicate that a fleet is in need to horizontal or vertical scaling. It may also indicate imbalances in load in a fleet.
-      grafana_dashboard_id: alerts-sat_single_node_cpu
-      grafana_dashboard_link: https://dashboards.gitlab.net/d/alerts-sat_single_node_cpu?from=now-6h/m&to=now-1m/m&var-environment={{
-        $labels.environment }}&var-type={{ $labels.type }}&var-stage={{ $labels.stage
-        }}
-      grafana_datasource_id: mimir-gitlab-gstg
-      grafana_min_zoom_hours: "6"
-      grafana_panel_id: "3372411356"
-      grafana_variables: environment,type,stage
-      promql_query: |
-        max by(environment, tier, type, stage, shard, fqdn) (
-          clamp_min(
-            clamp_max(
-              avg without(cpu, mode) (1 - rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m]))
-              ,
-              1)
-          ,
-          0)
-        )
-      promql_template_1: |
-        max by(environment, tier, type, stage, shard, fqdn) (
-          clamp_min(
-            clamp_max(
-              avg without(cpu, mode) (1 - rate(node_cpu_seconds_total{mode="idle", environment="{{ $labels.environment }}",stage="{{ $labels.stage }}",type="{{ $labels.type }}"}[5m]))
-              ,
-              1)
-          ,
-          0)
-        )
-      runbook: docs/{{ $labels.type }}/README.md
-    labels:
-      alert_type: cause
-      rules_domain: general
-      severity: s4
-    expr: |
-      gitlab_component_saturation:ratio{component="single_node_cpu",env="gstg",type="zoekt"} > on(component) group_left
-      slo:max:hard:gitlab_component_saturation:ratio{component="single_node_cpu"}
diff --git a/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation-metadata.yml b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation-metadata.yml
index fd74401a84..c4b506c3e4 100644
--- a/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation-metadata.yml
+++ b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation-metadata.yml
@@ -6,52 +6,44 @@ groups:
   rules:
   - record: slo:max:soft:gitlab_component_saturation:ratio
     labels:
-      component: cpu
-    expr: "0.8"
+      component: kube_container_cpu
+    expr: "0.95"
   - record: slo:max:hard:gitlab_component_saturation:ratio
     labels:
-      component: cpu
-    expr: "0.9"
+      component: kube_container_cpu
+    expr: "0.99"
   - record: slo:max:soft:gitlab_component_saturation:ratio
     labels:
-      component: disk_inodes
-    expr: "0.75"
+      component: kube_container_cpu_limit
+    expr: "0.9"
   - record: slo:max:hard:gitlab_component_saturation:ratio
     labels:
-      component: disk_inodes
-    expr: "0.8"
+      component: kube_container_cpu_limit
+    expr: "0.99"
   - record: slo:max:soft:gitlab_component_saturation:ratio
     labels:
-      component: disk_space
-    expr: "0.85"
+      component: kube_container_memory
+    expr: "0.8"
   - record: slo:max:hard:gitlab_component_saturation:ratio
     labels:
-      component: disk_space
+      component: kube_container_memory
     expr: "0.9"
   - record: slo:max:soft:gitlab_component_saturation:ratio
     labels:
-      component: memory
-    expr: "0.9"
+      component: kube_container_throttling
+    expr: "0.4"
   - record: slo:max:hard:gitlab_component_saturation:ratio
     labels:
-      component: memory
-    expr: "0.98"
+      component: kube_container_throttling
+    expr: "0.5"
   - record: slo:max:soft:gitlab_component_saturation:ratio
     labels:
-      component: nf_conntrack_entries
-    expr: "0.95"
-  - record: slo:max:hard:gitlab_component_saturation:ratio
-    labels:
-      component: nf_conntrack_entries
-    expr: "0.98"
-  - record: slo:max:soft:gitlab_component_saturation:ratio
-    labels:
-      component: node_schedstat_waiting
-    expr: "0.1"
+      component: kube_horizontalpodautoscaler_desired_replicas
+    expr: "0.9"
   - record: slo:max:hard:gitlab_component_saturation:ratio
     labels:
-      component: node_schedstat_waiting
-    expr: "0.15"
+      component: kube_horizontalpodautoscaler_desired_replicas
+    expr: "0.95"
   - record: slo:max:soft:gitlab_component_saturation:ratio
     labels:
       component: open_fds
@@ -60,19 +52,3 @@ groups:
     labels:
       component: open_fds
     expr: "0.9"
-  - record: slo:max:soft:gitlab_component_saturation:ratio
-    labels:
-      component: shard_cpu
-    expr: "0.85"
-  - record: slo:max:hard:gitlab_component_saturation:ratio
-    labels:
-      component: shard_cpu
-    expr: "0.95"
-  - record: slo:max:soft:gitlab_component_saturation:ratio
-    labels:
-      component: single_node_cpu
-    expr: "0.9"
-  - record: slo:max:hard:gitlab_component_saturation:ratio
-    labels:
-      component: single_node_cpu
-    expr: "0.95"
diff --git a/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation.yml b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation.yml
index 14e37fdc5d..50aa2a03e8 100644
--- a/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation.yml
+++ b/mimir-rules/gitlab-gstg/gstg/zoekt/autogenerated-gitlab-gstg-gstg-zoekt-saturation.yml
@@ -6,13 +6,23 @@ groups:
   rules:
   - record: gitlab_component_saturation:ratio
     labels:
-      component: cpu
+      component: kube_container_cpu
     expr: |
-      max by(env, environment, tier, type, stage, shard) (
+      quantile by(env, environment, tier, type, stage, shard) (
+        0.99,
         clamp_min(
           clamp_max(
-            1 - avg by (env, environment, tier, type, stage, shard) (
-              rate(node_cpu_seconds_total{mode="idle", env="gstg",type="zoekt"}[5m])
+            (
+              sum by (env, environment, tier, type, stage, shard, pod, container) (
+                rate(container_cpu_usage_seconds_total:labeled{container!="", container!="POD", env="gstg",type="zoekt"}[1h])
+              )
+              unless on(env, environment, tier, type, stage, shard, pod, container) (
+                container_spec_cpu_quota:labeled{container!="", container!="POD", env="gstg",type="zoekt"}
+              )
+            )
+            /
+            sum by(env, environment, tier, type, stage, shard, pod, container) (
+              kube_pod_container_resource_requests:labeled{container!="", container!="POD", resource="cpu", env="gstg",type="zoekt"}
             )
             ,
             1)
@@ -21,15 +31,19 @@ groups:
       )
   - record: gitlab_component_saturation:ratio
     labels:
-      component: disk_inodes
+      component: kube_container_cpu_limit
     expr: |
       max by(env, environment, tier, type, stage, shard) (
         clamp_min(
           clamp_max(
-            1 - (
-              node_filesystem_files_free{fstype=~"(ext.|xfs)", env="gstg",type="zoekt"}
+            sum by (env, environment, tier, type, stage, shard, pod, container) (
+              rate(container_cpu_usage_seconds_total:labeled{container!="", container!="POD", env="gstg",type="zoekt"}[5m])
+            )
+            /
+            sum by(env, environment, tier, type, stage, shard, pod, container) (
+              container_spec_cpu_quota:labeled{container!="", container!="POD", env="gstg",type="zoekt"}
               /
-              node_filesystem_files{fstype=~"(ext.|xfs)", env="gstg",type="zoekt"}
+              container_spec_cpu_period:labeled{container!="", container!="POD", env="gstg",type="zoekt"}
             )
             ,
             1)
@@ -38,14 +52,14 @@ groups:
       )
   - record: gitlab_component_saturation:ratio
     labels:
-      component: disk_space
+      component: kube_container_memory
     expr: |
       max by(env, environment, tier, type, stage, shard) (
         clamp_min(
           clamp_max(
-            (
-              1 - node_filesystem_avail_bytes{fstype=~"ext.|xfs", env="gstg",type="zoekt"} / node_filesystem_size_bytes{fstype=~"ext.|xfs", env="gstg",type="zoekt"}
-            )
+            container_memory_working_set_bytes:labeled{container!="", container!="POD", env="gstg",type="zoekt"}
+            /
+            (container_spec_memory_limit_bytes:labeled{container!="", container!="POD", env="gstg",type="zoekt"} > 0)
             ,
             1)
         ,
@@ -53,12 +67,17 @@ groups:
       )
   - record: gitlab_component_saturation:ratio
     labels:
-      component: memory
+      component: kube_container_throttling
     expr: |
-      max by(env, environment, tier, type, stage, shard) (
+      quantile by(env, environment, tier, type, stage, shard) (
+        0.99,
         clamp_min(
           clamp_max(
-            instance:node_memory_utilization:ratio{env="gstg",type="zoekt"} or instance:node_memory_utilisation:ratio{env="gstg",type="zoekt"}
+            avg by (env, environment, tier, type, stage, shard, pod, container)(
+              rate(container_cpu_cfs_throttled_periods_total:labeled{container!="", env="gstg",type="zoekt"}[5m])
+              /
+              rate(container_cpu_cfs_periods_total:labeled{container!="", env="gstg",type="zoekt"}[5m])
+            )
             ,
             1)
         ,
@@ -66,27 +85,14 @@ groups:
       )
   - record: gitlab_component_saturation:ratio
     labels:
-      component: nf_conntrack_entries
+      component: kube_horizontalpodautoscaler_desired_replicas
     expr: |
       max by(env, environment, tier, type, stage, shard) (
         clamp_min(
           clamp_max(
-            max_over_time(node_nf_conntrack_entries{env="gstg",type="zoekt"}[1m])
+            kube_horizontalpodautoscaler_status_desired_replicas:labeled{env="gstg",type="zoekt", shard!~"database-throttled|elasticsearch|gitaly-throttled|urgent-authorized-projects|urgent-other", namespace!~"pubsubbeat"}
             /
-            node_nf_conntrack_entries_limit{env="gstg",type="zoekt"}
-            ,
-            1)
-        ,
-        0)
-      )
-  - record: gitlab_component_saturation:ratio
-    labels:
-      component: node_schedstat_waiting
-    expr: |
-      max by(env, environment, tier, type, stage, shard) (
-        clamp_min(
-          clamp_max(
-            avg without (cpu) (rate(node_schedstat_waiting_seconds_total{env="gstg",type="zoekt"}[1h]))
+            kube_horizontalpodautoscaler_spec_max_replicas:labeled{env="gstg",type="zoekt", shard!~"database-throttled|elasticsearch|gitaly-throttled|urgent-authorized-projects|urgent-other", namespace!~"pubsubbeat"}
             ,
             1)
         ,
@@ -115,31 +121,3 @@ groups:
         ,
         0)
       )
-  - record: gitlab_component_saturation:ratio
-    labels:
-      component: shard_cpu
-    expr: |
-      max by(env, environment, tier, type, stage, shard) (
-        clamp_min(
-          clamp_max(
-            1 - avg by (env, environment, tier, type, stage, shard, shard) (
-              rate(node_cpu_seconds_total{mode="idle", env="gstg",type="zoekt"}[5m])
-            )
-            ,
-            1)
-        ,
-        0)
-      )
-  - record: gitlab_component_saturation:ratio
-    labels:
-      component: single_node_cpu
-    expr: |
-      max by(env, environment, tier, type, stage, shard) (
-        clamp_min(
-          clamp_max(
-            avg without(cpu, mode) (1 - rate(node_cpu_seconds_total{mode="idle", env="gstg",type="zoekt"}[5m]))
-            ,
-            1)
-        ,
-        0)
-      )
-- 
GitLab