From a64c716817e7efd4f59ac0d00de66fb4e0902ee6 Mon Sep 17 00:00:00 2001 From: Chad Woolley <cwoolley@gitlab.com> Date: Sun, 10 Mar 2024 20:03:35 -0700 Subject: [PATCH] Add OpenTelemetry instrumentation and docs - Follow https://docs.honeycomb.io/getting-data-in/opentelemetry/ruby/ - Set up initializer to only enable in development and staging environments - Add some minimal developer docs with instructions for pointing the OpenTelemetry exporter to a GitLab instance --- Gemfile | 31 ++++ Gemfile.checksum | 30 ++++ Gemfile.lock | 132 ++++++++++++++++++ config/initializers/opentelemetry.rb | 9 ++ ...itlab_instrumentation_for_opentelemetry.md | 66 +++++++++ .../stage_group_observability/index.md | 6 + 6 files changed, 274 insertions(+) create mode 100644 config/initializers/opentelemetry.rb create mode 100644 doc/development/stage_group_observability/gitlab_instrumentation_for_opentelemetry.md diff --git a/Gemfile b/Gemfile index 8cc5d88688d0fe..42c00f5eafcb54 100644 --- a/Gemfile +++ b/Gemfile @@ -399,6 +399,37 @@ gem 'snowplow-tracker', '~> 0.8.0' # rubocop:todo Gemfile/MissingFeatureCategory gem 'webrick', '~> 1.8.1', require: false # rubocop:todo Gemfile/MissingFeatureCategory gem 'prometheus-client-mmap', '~> 1.1', '>= 1.1.1', require: 'prometheus/client' # rubocop:todo Gemfile/MissingFeatureCategory +# OpenTelemetry +group :opentelemetry do + # Core OpenTelemetry gems + gem 'opentelemetry-sdk', feature_category: :tooling + gem 'opentelemetry-exporter-otlp', feature_category: :tooling + + # OpenTelemetry gems selected from full set in `opentelemetry-instrumentation-all` metagem + gem 'opentelemetry-instrumentation-active_support', feature_category: :tooling + gem 'opentelemetry-instrumentation-action_pack', feature_category: :tooling + gem 'opentelemetry-instrumentation-active_job', feature_category: :tooling + gem 'opentelemetry-instrumentation-active_record', feature_category: :tooling + gem 'opentelemetry-instrumentation-action_view', feature_category: :tooling + gem 'opentelemetry-instrumentation-aws_sdk', feature_category: :tooling + gem 'opentelemetry-instrumentation-http', feature_category: :tooling + gem 'opentelemetry-instrumentation-active_model_serializers', feature_category: :tooling + gem 'opentelemetry-instrumentation-concurrent_ruby', feature_category: :tooling + gem 'opentelemetry-instrumentation-ethon', feature_category: :tooling + gem 'opentelemetry-instrumentation-excon', feature_category: :tooling + gem 'opentelemetry-instrumentation-faraday', feature_category: :tooling + gem 'opentelemetry-instrumentation-grape', feature_category: :tooling + gem 'opentelemetry-instrumentation-graphql', feature_category: :tooling + gem 'opentelemetry-instrumentation-http_client', feature_category: :tooling + gem 'opentelemetry-instrumentation-net_http', feature_category: :tooling + gem 'opentelemetry-instrumentation-pg', feature_category: :tooling + gem 'opentelemetry-instrumentation-rack', feature_category: :tooling + gem 'opentelemetry-instrumentation-rails', feature_category: :tooling + gem 'opentelemetry-instrumentation-rake', feature_category: :tooling + gem 'opentelemetry-instrumentation-redis', feature_category: :tooling + gem 'opentelemetry-instrumentation-sidekiq', feature_category: :tooling +end + gem 'warning', '~> 1.3.0' # rubocop:todo Gemfile/MissingFeatureCategory group :development do diff --git a/Gemfile.checksum b/Gemfile.checksum index 87a456429b6b76..c747464acfbbc3 100644 --- a/Gemfile.checksum +++ b/Gemfile.checksum @@ -446,6 +446,36 @@ {"name":"openid_connect","version":"1.3.0","platform":"ruby","checksum":"a796855096850cc01140e37ea6ae9fd14f2be818b9b5bc698418063dfe228770"}, {"name":"openssl","version":"3.1.0","platform":"ruby","checksum":"e3a01279e918a7a5cf741db69b124864878b1a9783b1f2d34854bc1d444ac430"}, {"name":"openssl-signature_algorithm","version":"1.3.0","platform":"ruby","checksum":"a3b40b5e8276162d4a6e50c7c97cdaf1446f9b2c3946a6fa2c14628e0c957e80"}, +{"name":"opentelemetry-api","version":"1.2.5","platform":"ruby","checksum":"ab3d9a0566cd2ee068ade40e840bc973383ab8568e693c0c5712f0c789122cc9"}, +{"name":"opentelemetry-common","version":"0.20.1","platform":"ruby","checksum":"f8cd7284d4a58f89b80d26aeb0e86306bc730df015d171af0d7796cb1388e7de"}, +{"name":"opentelemetry-exporter-otlp","version":"0.26.3","platform":"ruby","checksum":"fc1deea7924c74e3536983b02684d1bc7e9737baa5c38f3aff9809a7fd330399"}, +{"name":"opentelemetry-helpers-sql-obfuscation","version":"0.1.0","platform":"ruby","checksum":"bc6ef1373dbcf979647091b3bfc99d7b6fb9669f74c3ae184f58b48adfc8d432"}, +{"name":"opentelemetry-instrumentation-action_pack","version":"0.9.0","platform":"ruby","checksum":"c5df8472afc9cdbfc1425d9af7816b9cfc1a1a69b86621f1fc624974bd9acb9a"}, +{"name":"opentelemetry-instrumentation-action_view","version":"0.7.0","platform":"ruby","checksum":"bc7c714be0b4bb76843085c29ecc9465e65cb7fe6722e34c71629e44f8c3cb75"}, +{"name":"opentelemetry-instrumentation-active_job","version":"0.7.1","platform":"ruby","checksum":"da24806c9d92fe580db42638f6c763fe1324ff90aa147d45d4247f8052c68089"}, +{"name":"opentelemetry-instrumentation-active_model_serializers","version":"0.20.1","platform":"ruby","checksum":"8c47f859fc925c4c078d37f5a13c55f4ba9751f880aa64d0c9568f3f59a3efaa"}, +{"name":"opentelemetry-instrumentation-active_record","version":"0.7.0","platform":"ruby","checksum":"327ca53ebb74187b463ab05c1d89508552e9cd9122db0843ad1f27930ee91797"}, +{"name":"opentelemetry-instrumentation-active_support","version":"0.5.1","platform":"ruby","checksum":"03898327e8284410b8935a3d3b980bda56e2063eb5a7d30acf75487dd6934a66"}, +{"name":"opentelemetry-instrumentation-aws_sdk","version":"0.5.1","platform":"ruby","checksum":"496a8d13c59ff4d08dcd69b16db97c013398173295058593aa0c2f3ef3090cce"}, +{"name":"opentelemetry-instrumentation-base","version":"0.22.3","platform":"ruby","checksum":"f61c434f0406cdc878bc188f67e644f94dba4be553d2fd21b2d1faa82731605f"}, +{"name":"opentelemetry-instrumentation-concurrent_ruby","version":"0.21.2","platform":"ruby","checksum":"cc285f6c133c7f037cf3850213172f9441e1b6cd9f26015304835efa1e8bfd65"}, +{"name":"opentelemetry-instrumentation-ethon","version":"0.21.3","platform":"ruby","checksum":"178371bb861e787bc7cdcfd9b54efb388dcaf5f52582512183a24442092be699"}, +{"name":"opentelemetry-instrumentation-excon","version":"0.22.0","platform":"ruby","checksum":"fc58eab5563b4a114947e9ebc8da08166340cf40f44143c4c7db9c400bdc7e87"}, +{"name":"opentelemetry-instrumentation-faraday","version":"0.24.0","platform":"ruby","checksum":"e4f4df0ebca4dc11c96ac3c98aff76434bb6195f0a191b408f960bd125f85d60"}, +{"name":"opentelemetry-instrumentation-grape","version":"0.1.6","platform":"ruby","checksum":"d270d541fe7ca05cffea461e1f1e49a1310e3e4f7f5bf8574907bbbb74014173"}, +{"name":"opentelemetry-instrumentation-graphql","version":"0.28.0","platform":"ruby","checksum":"afcf06a3ab5c1574dd7b2c708329b812d4430961cc9a386a813a42c2472bdbf6"}, +{"name":"opentelemetry-instrumentation-http","version":"0.23.2","platform":"ruby","checksum":"1c548d2a242490ca018a253d1817c26422017f94558f59961bb1c9b00d779e3f"}, +{"name":"opentelemetry-instrumentation-http_client","version":"0.22.3","platform":"ruby","checksum":"34e4b83581aa2a39d98a25a2daa2a6d6cb710c00f30f353366153b0c5bb08178"}, +{"name":"opentelemetry-instrumentation-net_http","version":"0.22.4","platform":"ruby","checksum":"2878b5e6e456d74038c6ba487b0cbdf33c1c90d4bd867f7ef66b00646f2698ce"}, +{"name":"opentelemetry-instrumentation-pg","version":"0.27.1","platform":"ruby","checksum":"3918d1835c44feb60e0409623a8df89b618a64e73bbb738f0b9bb7ede6102a06"}, +{"name":"opentelemetry-instrumentation-rack","version":"0.24.0","platform":"ruby","checksum":"770c5043e5ccfc5f453ff26fafee76a8b3bea448d1cc78294ba4149c65dc0220"}, +{"name":"opentelemetry-instrumentation-rails","version":"0.30.0","platform":"ruby","checksum":"fe2bd2204f42b3c68a96b2f34b02e9a34269049204bc588a1c64c6eeee5b9c37"}, +{"name":"opentelemetry-instrumentation-rake","version":"0.2.1","platform":"ruby","checksum":"c46e1bf592a1c9b1697fe774b5b852e1910629713398ab7bfa16d0c696ed9278"}, +{"name":"opentelemetry-instrumentation-redis","version":"0.25.3","platform":"ruby","checksum":"2c649a6f1c1008bb0147a4689ecea2aef113a109c25d3a7369f93b4d2ffd2112"}, +{"name":"opentelemetry-instrumentation-sidekiq","version":"0.25.2","platform":"ruby","checksum":"8c5a2d0a896a95210d69aadcfd3463fa02970b1d8a9144207d6b6770270896cc"}, +{"name":"opentelemetry-registry","version":"0.3.0","platform":"ruby","checksum":"116ab6114a706340900718298c126f720e50b1ef3cfdbe5997611ff232fe6822"}, +{"name":"opentelemetry-sdk","version":"1.4.0","platform":"ruby","checksum":"2466922dfb98185f8edeae995e9334bfff6416ac9fb9c08b099c251ef133034a"}, +{"name":"opentelemetry-semantic_conventions","version":"1.10.0","platform":"ruby","checksum":"13d24c1071736004a6c09113ee9fe163a25daa0defe6ab279a42cac7b92b1b76"}, {"name":"opentracing","version":"0.5.0","platform":"ruby","checksum":"deb5d7abe6b0e7631d866d8cb5ee7bb9352650a504a32f61591302bc510b9286"}, {"name":"optimist","version":"3.0.1","platform":"ruby","checksum":"336b753676d6117cad9301fac7e91dab4228f747d4e7179891ad3a163c64e2ed"}, {"name":"org-ruby","version":"0.9.12","platform":"ruby","checksum":"93cbec3a4470cb9dca6a4a98dc276a6434ea9d9e7bc2d42ea33c3aedd5d1c974"}, diff --git a/Gemfile.lock b/Gemfile.lock index 9f4f63bba06247..8de8ea129f7e24 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1238,6 +1238,114 @@ GEM openssl (3.1.0) openssl-signature_algorithm (1.3.0) openssl (> 2.0) + opentelemetry-api (1.2.5) + opentelemetry-common (0.20.1) + opentelemetry-api (~> 1.0) + opentelemetry-exporter-otlp (0.26.3) + google-protobuf (~> 3.14) + googleapis-common-protos-types (~> 1.3) + opentelemetry-api (~> 1.1) + opentelemetry-common (~> 0.20) + opentelemetry-sdk (~> 1.2) + opentelemetry-semantic_conventions + opentelemetry-helpers-sql-obfuscation (0.1.0) + opentelemetry-common (~> 0.20) + opentelemetry-instrumentation-action_pack (0.9.0) + opentelemetry-api (~> 1.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-rack (~> 0.21) + opentelemetry-instrumentation-action_view (0.7.0) + opentelemetry-api (~> 1.0) + opentelemetry-instrumentation-active_support (~> 0.1) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-active_job (0.7.1) + opentelemetry-api (~> 1.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-active_model_serializers (0.20.1) + opentelemetry-api (~> 1.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-active_record (0.7.0) + opentelemetry-api (~> 1.0) + opentelemetry-instrumentation-base (~> 0.22.1) + ruby2_keywords + opentelemetry-instrumentation-active_support (0.5.1) + opentelemetry-api (~> 1.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-aws_sdk (0.5.1) + opentelemetry-api (~> 1.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-base (0.22.3) + opentelemetry-api (~> 1.0) + opentelemetry-registry (~> 0.1) + opentelemetry-instrumentation-concurrent_ruby (0.21.2) + opentelemetry-api (~> 1.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-ethon (0.21.3) + opentelemetry-api (~> 1.0) + opentelemetry-common (~> 0.20.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-excon (0.22.0) + opentelemetry-api (~> 1.0) + opentelemetry-common (~> 0.20.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-faraday (0.24.0) + opentelemetry-api (~> 1.0) + opentelemetry-common (~> 0.20.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-grape (0.1.6) + opentelemetry-api (~> 1.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-rack (~> 0.21) + opentelemetry-instrumentation-graphql (0.28.0) + opentelemetry-api (~> 1.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-http (0.23.2) + opentelemetry-api (~> 1.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-http_client (0.22.3) + opentelemetry-api (~> 1.0) + opentelemetry-common (~> 0.20.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-net_http (0.22.4) + opentelemetry-api (~> 1.0) + opentelemetry-common (~> 0.20.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-pg (0.27.1) + opentelemetry-api (~> 1.0) + opentelemetry-helpers-sql-obfuscation + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-rack (0.24.0) + opentelemetry-api (~> 1.0) + opentelemetry-common (~> 0.20.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-rails (0.30.0) + opentelemetry-api (~> 1.0) + opentelemetry-instrumentation-action_pack (~> 0.9.0) + opentelemetry-instrumentation-action_view (~> 0.7.0) + opentelemetry-instrumentation-active_job (~> 0.7.0) + opentelemetry-instrumentation-active_record (~> 0.7.0) + opentelemetry-instrumentation-active_support (~> 0.5.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-rake (0.2.1) + opentelemetry-api (~> 1.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-redis (0.25.3) + opentelemetry-api (~> 1.0) + opentelemetry-common (~> 0.20.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-instrumentation-sidekiq (0.25.2) + opentelemetry-api (~> 1.0) + opentelemetry-common (~> 0.20.0) + opentelemetry-instrumentation-base (~> 0.22.1) + opentelemetry-registry (0.3.0) + opentelemetry-api (~> 1.1) + opentelemetry-sdk (1.4.0) + opentelemetry-api (~> 1.1) + opentelemetry-common (~> 0.20) + opentelemetry-registry (~> 0.2) + opentelemetry-semantic_conventions + opentelemetry-semantic_conventions (1.10.0) + opentelemetry-api (~> 1.0) opentracing (0.5.0) optimist (3.0.1) org-ruby (0.9.12) @@ -2039,6 +2147,30 @@ DEPENDENCIES omniauth_openid_connect (~> 0.6.1) openid_connect (= 1.3.0) openssl (~> 3.0) + opentelemetry-exporter-otlp + opentelemetry-instrumentation-action_pack + opentelemetry-instrumentation-action_view + opentelemetry-instrumentation-active_job + opentelemetry-instrumentation-active_model_serializers + opentelemetry-instrumentation-active_record + opentelemetry-instrumentation-active_support + opentelemetry-instrumentation-aws_sdk + opentelemetry-instrumentation-concurrent_ruby + opentelemetry-instrumentation-ethon + opentelemetry-instrumentation-excon + opentelemetry-instrumentation-faraday + opentelemetry-instrumentation-grape + opentelemetry-instrumentation-graphql + opentelemetry-instrumentation-http + opentelemetry-instrumentation-http_client + opentelemetry-instrumentation-net_http + opentelemetry-instrumentation-pg + opentelemetry-instrumentation-rack + opentelemetry-instrumentation-rails + opentelemetry-instrumentation-rake + opentelemetry-instrumentation-redis + opentelemetry-instrumentation-sidekiq + opentelemetry-sdk org-ruby (~> 0.9.12) os (~> 1.1, >= 1.1.4) pact (~> 1.64) diff --git a/config/initializers/opentelemetry.rb b/config/initializers/opentelemetry.rb new file mode 100644 index 00000000000000..439356a7ede88a --- /dev/null +++ b/config/initializers/opentelemetry.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +# NOTE: We are using an ENV var 'GITLAB_ENABLE_OTEL_EXPORTERS' to enable instead of a feature flag or Settings module, +# because these may not yet be fully configured or usable by this point in the Rails initialization process. +if Gitlab::Utils.to_boolean(ENV['GITLAB_ENABLE_OTEL_EXPORTERS'], default: false) && + (::Gitlab.dev_or_test_env? || ::Gitlab.staging?) + Bundler.require(:opentelemetry) + OpenTelemetry::SDK.configure(&:use_all) +end diff --git a/doc/development/stage_group_observability/gitlab_instrumentation_for_opentelemetry.md b/doc/development/stage_group_observability/gitlab_instrumentation_for_opentelemetry.md new file mode 100644 index 00000000000000..6149c641b2ced1 --- /dev/null +++ b/doc/development/stage_group_observability/gitlab_instrumentation_for_opentelemetry.md @@ -0,0 +1,66 @@ +--- +stage: Monitor +group: Observability +info: Any user with at least the Maintainer role can merge updates to this content. For details, see https://docs.gitlab.com/ee/development/development_processes.html#development-guidelines-review. +--- + +# GitLab instrumentation for OpenTelemetry + +## Enable OpenTelemetry tracing, metrics, and logs in GDK development + +NOTE: +Currently the default GDK environment is not set up by default to properly +collect and display OpenTelemetry data. Therefore, you should point the +`OTEL_EXPORTER_*_ENDPOINT` ENV vars to a GitLab project: + +1. Which has an Ultimate license, and where you have +1. In which you have at least the Maintainer role +1. In which you have access to enable top-level root-group feature flags (or is under the `gitlab-org` or `gitlab-com` root groups which already have the flags enabled) + +Once you have a project identified to use: + +1. Note the ID of the project (from the three dots at upper right of main project page). +1. Note the ID of the top-level root group which contains the project. +1. When setting the environment variables for the following steps, add them to `env.runit` in the root of the `gitlab-development-kit` folder. +1. Follow instructions to [configure distributed tracing for a project](../../operations/tracing.md), with the following custom settings: + - For the `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` environment variable, use the following value: + + ```shell + export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT="https://<gitlab-host>/v3/<gitlab-top-level-group-id>/<gitlab-project-id>/ingest/traces" + ``` + +1. Follow instructions to [configure distributed metrics for a project](../../operations/metrics.md), with the following custom settings: + - For the `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` environment variable, use the following value: + + ```shell + export OTEL_EXPORTER_OTLP_METRICS_ENDPOINT="https://<gitlab-host>/v3/<gitlab-top-level-group-id>/<gitlab-project-id>/ingest/metrics" + ``` + +1. Follow instructions to [configure distributed logs for a project](../../operations/logs.md), with the following custom settings: + - For the `OTEL_EXPORTER_OTLP_LOGS_ENDPOINT` environment variable, use the following value: + + ```shell + export OTEL_EXPORTER_OTLP_LOGS_ENDPOINT="https://<gitlab-host>/v3/<gitlab-top-level-group-id>/<gitlab-project-id>/ingest/logs" + ``` + +1. Also add the following to the `env.runit` file: + + ```shell + # GitLab-specific flag to enable the Rails initializer to set up OpenTelemetry exporters + export GITLAB_ENABLE_OTEL_EXPORTERS=true + ``` + +1. `gdk restart`. +1. Navigate to your project, and follow the instructions in the above docs to enable and view the tracing, metrics, or logs. + +## References + +- [Distributed Tracing](../../operations/tracing.md) +- [Metrics](../../operations/metrics.md) +- [Logs](../../operations/logs.md) + +## Related blueprints + +- [GitLab Observability in GitLab.com and Self-Managed GitLab Instances](../../architecture/blueprints/observability_for_self_managed/index.md) +- [GitLab Observability - Metrics](../../architecture/blueprints/observability_metrics/index.md) +- [GitLab Observability - Logging](../../architecture/blueprints/observability_logging/index.md) diff --git a/doc/development/stage_group_observability/index.md b/doc/development/stage_group_observability/index.md index d295a302c9bdd9..057a2ecc092f9d 100644 --- a/doc/development/stage_group_observability/index.md +++ b/doc/development/stage_group_observability/index.md @@ -172,3 +172,9 @@ Questions to answer: 1. Does the failure look related to a particular component? (database, Redis, ...) 1. Does the failure affect a specific endpoint? Or is it system-wide? 1. Does the failure appear caused by infrastructure incidents? + +## GitLab instrumentation for OpenTelemetry + +There is an ongoing effort to instrument the GitLab codebase for OpenTelemetry. + +For more specific information on this effort, see [GitLab instrumentation for OpenTelemetry](gitlab_instrumentation_for_opentelemetry.md). -- GitLab