Reduce timeseries data cardinality when calling `ObserveHistogramsService`
Why are we doing this work
In !157955 (merged) and !157955 (merged) we introduced the following Prometheus histograms and corresponding labels:
-
gitlab_security_policies_scan_result_process_duration_seconds,project_id,configuration_id -
gitlab_security_policies_scan_execution_configuration_rendering_seconds,project_id,action_count
This leads to high data cardinality and we should instead use structured logging.
Additionally, we need to reduce the bucket count for gitlab_security_policies_scan_execution_configuration_rendering_seconds
Implementation Plan
- backend Remove the histogram labels and reduce bucket count:
diff --git a/ee/app/services/security/security_orchestration_policies/observe_histograms_service.rb b/ee/app/services/security/security_orchestration_policies/observe_histograms_service.rb
index c44f27971baa..58b378bf9d31 100644
--- a/ee/app/services/security/security_orchestration_policies/observe_histograms_service.rb
+++ b/ee/app/services/security/security_orchestration_policies/observe_histograms_service.rb
@@ -6,7 +6,7 @@ class ObserveHistogramsService
HISTOGRAMS = {
gitlab_security_policies_scan_execution_configuration_rendering_seconds: {
description: 'The amount of time to render scan execution policy CI configurations',
- buckets: (0..10).to_a.freeze
+ buckets: [1, 3, 5, 10].freeze
},
gitlab_security_policies_scan_result_process_duration_seconds: {
description: 'The amount of time to process scan result policies',
diff --git a/ee/app/services/security/security_orchestration_policies/scan_pipeline_service.rb b/ee/app/services/security/security_orchestration_policies/scan_pipeline_service.rb
index c81c7b68d160..d352dd518852 100644
--- a/ee/app/services/security/security_orchestration_policies/scan_pipeline_service.rb
+++ b/ee/app/services/security/security_orchestration_policies/scan_pipeline_service.rb
@@ -38,7 +38,7 @@ def initialize(context, base_variables: {})
end
def execute(actions)
- measure(HISTOGRAM, project_id: project.id, action_count: actions.size) do
+ measure(HISTOGRAM) do
actions = actions.select do |action|
valid_scan_type?(action[:scan]) && pipeline_scan_type?(action[:scan].to_s)
end
diff --git a/ee/app/workers/security/process_scan_result_policy_worker.rb b/ee/app/workers/security/process_scan_result_policy_worker.rb
index 47feea919ebb..93c0926e3821 100644
--- a/ee/app/workers/security/process_scan_result_policy_worker.rb
+++ b/ee/app/workers/security/process_scan_result_policy_worker.rb
@@ -14,7 +14,7 @@ class ProcessScanResultPolicyWorker
HISTOGRAM = :gitlab_security_policies_scan_result_process_duration_seconds
def perform(project_id, configuration_id)
- measure(HISTOGRAM, project_id: project_id, configuration_id: configuration_id) do
+ measure(HISTOGRAM) do
project = Project.find_by_id(project_id)
configuration = Security::OrchestrationPolicyConfiguration.find_by_id(configuration_id)
break unless project && configuration
Edited by Dominic Bauer