Commit 37dd1993 authored by Grzegorz Bizon's avatar Grzegorz Bizon 🌡

Merge branch 'instrument-infra' into 'master'

Add Prometheus metrics endpoint and basic infrastructure to meter code

See merge request !11553
parents 19982333 1c59ba67
Pipeline #8830060 failed with stages
in 105 minutes and 25 seconds
......@@ -268,6 +268,9 @@ group :metrics do
gem 'allocations', '~> 1.0', require: false, platform: :mri
gem 'method_source', '~> 0.8', require: false
gem 'influxdb', '~> 0.2', require: false
# Prometheus
gem 'prometheus-client-mmap', '~>0.7.0.beta5'
end
group :development do
......
......@@ -457,6 +457,7 @@ GEM
mimemagic (0.3.0)
mini_portile2 (2.1.0)
minitest (5.7.0)
mmap2 (2.2.6)
mousetrap-rails (1.4.6)
multi_json (1.12.1)
multi_xml (0.6.0)
......@@ -560,6 +561,8 @@ GEM
premailer-rails (1.9.2)
actionmailer (>= 3, < 6)
premailer (~> 1.7, >= 1.7.9)
prometheus-client-mmap (0.7.0.beta5)
mmap2 (~> 2.2.6)
pry (0.10.4)
coderay (~> 1.1.0)
method_source (~> 0.8.1)
......@@ -995,6 +998,7 @@ DEPENDENCIES
pg (~> 0.18.2)
poltergeist (~> 1.9.0)
premailer-rails (~> 1.9.0)
prometheus-client-mmap (~> 0.7.0.beta5)
pry-byebug (~> 3.4.1)
pry-rails (~> 0.3.4)
rack-attack (~> 4.4.1)
......
......@@ -149,6 +149,7 @@ class Admin::ApplicationSettingsController < Admin::ApplicationController
:version_check_enabled,
:terminal_max_session_time,
:polling_interval_multiplier,
:prometheus_metrics_enabled,
:usage_ping_enabled,
disabled_oauth_sign_in_sources: [],
......
......@@ -20,25 +20,8 @@ class HealthController < ActionController::Base
render_check_results(results)
end
def metrics
results = CHECKS.flat_map(&:metrics)
response = results.map(&method(:metric_to_prom_line)).join("\n")
render text: response, content_type: 'text/plain; version=0.0.4'
end
private
def metric_to_prom_line(metric)
labels = metric.labels&.map { |key, value| "#{key}=\"#{value}\"" }&.join(',') || ''
if labels.empty?
"#{metric.name} #{metric.value}"
else
"#{metric.name}{#{labels}} #{metric.value}"
end
end
def render_check_results(results)
flattened = results.flat_map do |name, result|
if result.is_a?(Gitlab::HealthChecks::Result)
......
class MetricsController < ActionController::Base
include RequiresHealthToken
protect_from_forgery with: :exception
before_action :validate_prometheus_metrics
def index
render text: metrics_service.metrics_text, content_type: 'text/plain; verssion=0.0.4'
end
private
def metrics_service
@metrics_service ||= MetricsService.new
end
def validate_prometheus_metrics
render_404 unless Gitlab::Metrics.prometheus_metrics_enabled?
end
end
......@@ -47,6 +47,10 @@ class SessionsController < Devise::SessionsController
private
def login_counter
@login_counter ||= Gitlab::Metrics.counter(:user_session_logins, 'User sign in count')
end
# Handle an "initial setup" state, where there's only one user, it's an admin,
# and they require a password change.
def check_initial_setup
......@@ -129,6 +133,7 @@ class SessionsController < Devise::SessionsController
end
def log_user_activity(user)
login_counter.increment
Users::ActivityService.new(user, 'login').execute
end
......
require 'prometheus/client/formats/text'
class MetricsService
CHECKS = [
Gitlab::HealthChecks::DbCheck,
Gitlab::HealthChecks::RedisCheck,
Gitlab::HealthChecks::FsShardsCheck
].freeze
def prometheus_metrics_text
Prometheus::Client::Formats::Text.marshal_multiprocess(multiprocess_metrics_path)
end
def health_metrics_text
metrics = CHECKS.flat_map(&:metrics)
formatter.marshal(metrics)
end
def metrics_text
"#{health_metrics_text}#{prometheus_metrics_text}"
end
private
def formatter
@formatter ||= Gitlab::HealthChecks::PrometheusTextFormat.new
end
def multiprocess_metrics_path
@multiprocess_metrics_path ||= Rails.root.join(ENV['prometheus_multiproc_dir']).freeze
end
end
......@@ -232,7 +232,7 @@
= f.number_field :container_registry_token_expire_delay, class: 'form-control'
%fieldset
%legend Metrics
%legend Metrics - Influx
%p
Setup InfluxDB to measure a wide variety of statistics like the time spent
in running SQL queries. These settings require a
......@@ -296,6 +296,21 @@
The amount of points to store in a single UDP packet. More points
results in fewer but larger UDP packets being sent.
%fieldset
%legend Metrics - Prometheus
%p
Setup Prometheus to measure a variety of statistics that partially overlap and complement Influx based metrics.
This setting requires a
= link_to 'restart', help_page_path('administration/restart_gitlab')
to take effect.
= link_to icon('question-circle'), help_page_path('administration/monitoring/performance/introduction')
.form-group
.col-sm-offset-2.col-sm-10
.checkbox
= f.label :prometheus_metrics_enabled do
= f.check_box :prometheus_metrics_enabled
Enable Prometheus Metrics
%fieldset
%legend Background Jobs
%p
......
---
title: Add prometheus based metrics collection to gitlab webapp
merge_request:
author:
......@@ -15,6 +15,9 @@ if defined?(Unicorn)
end
end
# set default directory for multiproces metrics gathering
ENV['prometheus_multiproc_dir'] ||= 'tmp/prometheus_multiproc_dir'
require ::File.expand_path('../config/environment', __FILE__)
map ENV['RAILS_RELATIVE_URL_ROOT'] || "/" do
......
......@@ -38,10 +38,10 @@ Rails.application.routes.draw do
# Health check
get 'health_check(/:checks)' => 'health_check#index', as: :health_check
scope path: '-', controller: 'health' do
get :liveness
get :readiness
get :metrics
scope path: '-' do
get 'liveness' => 'health#liveness'
get 'readiness' => 'health#readiness'
resources :metrics, only: [:index]
end
# Koding route
......
if ENV['GITLAB_SHARED_RUNNERS_REGISTRATION_TOKEN'].present?
settings = ApplicationSetting.current || ApplicationSetting.create_from_defaults
settings.set_runners_registration_token(ENV['GITLAB_SHARED_RUNNERS_REGISTRATION_TOKEN'])
def save(settings, topic)
if settings.save
puts "Saved Runner Registration Token".color(:green)
puts "Saved #{topic}".color(:green)
else
puts "Could not save Runner Registration Token".color(:red)
puts "Could not save #{topic}".color(:red)
puts
settings.errors.full_messages.map do |message|
puts "--> #{message}".color(:red)
end
puts
exit 1
exit(1)
end
end
if ENV['GITLAB_SHARED_RUNNERS_REGISTRATION_TOKEN'].present?
settings = Gitlab::CurrentSettings.current_application_settings
settings.set_runners_registration_token(ENV['GITLAB_SHARED_RUNNERS_REGISTRATION_TOKEN'])
save(settings, 'Runner Registration Token')
end
if ENV['GITLAB_PROMETHEUS_METRICS_ENABLED'].present?
settings = Gitlab::CurrentSettings.current_application_settings
value = Gitlab::Utils.to_boolean(ENV['GITLAB_PROMETHEUS_METRICS_ENABLED']) || false
settings.prometheus_metrics_enabled = value
save(settings, 'Prometheus metrics enabled flag')
end
class AddPrometheusSettingsToMetricsSettings < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
DOWNTIME = false
def up
add_column_with_default(:application_settings, :prometheus_metrics_enabled, :boolean,
default: false, allow_null: false)
end
def down
remove_column(:application_settings, :prometheus_metrics_enabled)
end
end
......@@ -123,6 +123,7 @@ ActiveRecord::Schema.define(version: 20170525174156) do
t.integer "cached_markdown_version"
t.boolean "clientside_sentry_enabled", default: false, null: false
t.string "clientside_sentry_dsn"
t.boolean "prometheus_metrics_enabled", default: false, null: false
end
create_table "audit_events", force: :cascade do |t|
......
......@@ -110,6 +110,7 @@ module API
optional :default_artifacts_expire_in, type: String, desc: "Set the default expiration time for each job's artifacts"
optional :max_pages_size, type: Integer, desc: 'Maximum size of pages in MB'
optional :container_registry_token_expire_delay, type: Integer, desc: 'Authorization token duration (minutes)'
optional :prometheus_metrics_enabled, type: Boolean, desc: 'Enable Prometheus metrics'
optional :metrics_enabled, type: Boolean, desc: 'Enable the InfluxDB metrics'
given metrics_enabled: ->(val) { val } do
requires :metrics_host, type: String, desc: 'The InfluxDB host'
......
module Gitlab
module HealthChecks
class PrometheusTextFormat
def marshal(metrics)
"#{metrics_with_type_declarations(metrics).join("\n")}\n"
end
private
def metrics_with_type_declarations(metrics)
type_declaration_added = {}
metrics.flat_map do |metric|
metric_lines = []
unless type_declaration_added.has_key?(metric.name)
type_declaration_added[metric.name] = true
metric_lines << metric_type_declaration(metric)
end
metric_lines << metric_text(metric)
end
end
def metric_type_declaration(metric)
"# TYPE #{metric.name} gauge"
end
def metric_text(metric)
labels = metric.labels&.map { |key, value| "#{key}=\"#{value}\"" }&.join(',') || ''
if labels.empty?
"#{metric.name} #{metric.value}"
else
"#{metric.name}{#{labels}} #{metric.value}"
end
end
end
end
end
module Gitlab
module Metrics
extend Gitlab::CurrentSettings
RAILS_ROOT = Rails.root.to_s
METRICS_ROOT = Rails.root.join('lib', 'gitlab', 'metrics').to_s
PATH_REGEX = /^#{RAILS_ROOT}\/?/
def self.settings
@settings ||= {
enabled: current_application_settings[:metrics_enabled],
pool_size: current_application_settings[:metrics_pool_size],
timeout: current_application_settings[:metrics_timeout],
method_call_threshold: current_application_settings[:metrics_method_call_threshold],
host: current_application_settings[:metrics_host],
port: current_application_settings[:metrics_port],
sample_interval: current_application_settings[:metrics_sample_interval] || 15,
packet_size: current_application_settings[:metrics_packet_size] || 1
}
end
extend Gitlab::Metrics::InfluxDb
extend Gitlab::Metrics::Prometheus
def self.enabled?
settings[:enabled] || false
end
def self.mri?
RUBY_ENGINE == 'ruby'
end
def self.method_call_threshold
# This is memoized since this method is called for every instrumented
# method. Loading data from an external cache on every method call slows
# things down too much.
@method_call_threshold ||= settings[:method_call_threshold]
end
def self.pool
@pool
end
def self.submit_metrics(metrics)
prepared = prepare_metrics(metrics)
pool.with do |connection|
prepared.each_slice(settings[:packet_size]) do |slice|
begin
connection.write_points(slice)
rescue StandardError
end
end
end
rescue Errno::EADDRNOTAVAIL, SocketError => ex
Gitlab::EnvironmentLogger.error('Cannot resolve InfluxDB address. GitLab Performance Monitoring will not work.')
Gitlab::EnvironmentLogger.error(ex)
end
def self.prepare_metrics(metrics)
metrics.map do |hash|
new_hash = hash.symbolize_keys
new_hash[:tags].each do |key, value|
if value.blank?
new_hash[:tags].delete(key)
else
new_hash[:tags][key] = escape_value(value)
end
end
new_hash
end
end
def self.escape_value(value)
value.to_s.gsub('=', '\\=')
end
# Measures the execution time of a block.
#
# Example:
#
# Gitlab::Metrics.measure(:find_by_username_duration) do
# User.find_by_username(some_username)
# end
#
# name - The name of the field to store the execution time in.
#
# Returns the value yielded by the supplied block.
def self.measure(name)
trans = current_transaction
return yield unless trans
real_start = Time.now.to_f
cpu_start = System.cpu_time
retval = yield
cpu_stop = System.cpu_time
real_stop = Time.now.to_f
real_time = (real_stop - real_start) * 1000.0
cpu_time = cpu_stop - cpu_start
trans.increment("#{name}_real_time", real_time)
trans.increment("#{name}_cpu_time", cpu_time)
trans.increment("#{name}_call_count", 1)
retval
end
# Adds a tag to the current transaction (if any)
#
# name - The name of the tag to add.
# value - The value of the tag.
def self.tag_transaction(name, value)
trans = current_transaction
trans&.add_tag(name, value)
end
# Sets the action of the current transaction (if any)
#
# action - The name of the action.
def self.action=(action)
trans = current_transaction
trans&.action = action
end
# Tracks an event.
#
# See `Gitlab::Metrics::Transaction#add_event` for more details.
def self.add_event(*args)
trans = current_transaction
trans&.add_event(*args)
end
# Returns the prefix to use for the name of a series.
def self.series_prefix
@series_prefix ||= Sidekiq.server? ? 'sidekiq_' : 'rails_'
end
# Allow access from other metrics related middlewares
def self.current_transaction
Transaction.current
end
# When enabled this should be set before being used as the usual pattern
# "@foo ||= bar" is _not_ thread-safe.
if enabled?
@pool = ConnectionPool.new(size: settings[:pool_size], timeout: settings[:timeout]) do
host = settings[:host]
port = settings[:port]
InfluxDB::Client.
new(udp: { host: host, port: port })
end
influx_metrics_enabled? || prometheus_metrics_enabled?
end
end
end
module Gitlab
module Metrics
module InfluxDb
extend Gitlab::CurrentSettings
extend self
MUTEX = Mutex.new
private_constant :MUTEX
def influx_metrics_enabled?
settings[:enabled] || false
end
RAILS_ROOT = Rails.root.to_s
METRICS_ROOT = Rails.root.join('lib', 'gitlab', 'metrics').to_s
PATH_REGEX = /^#{RAILS_ROOT}\/?/
def settings
@settings ||= {
enabled: current_application_settings[:metrics_enabled],
pool_size: current_application_settings[:metrics_pool_size],
timeout: current_application_settings[:metrics_timeout],
method_call_threshold: current_application_settings[:metrics_method_call_threshold],
host: current_application_settings[:metrics_host],
port: current_application_settings[:metrics_port],
sample_interval: current_application_settings[:metrics_sample_interval] || 15,
packet_size: current_application_settings[:metrics_packet_size] || 1
}
end
def mri?
RUBY_ENGINE == 'ruby'
end
def method_call_threshold
# This is memoized since this method is called for every instrumented
# method. Loading data from an external cache on every method call slows
# things down too much.
@method_call_threshold ||= settings[:method_call_threshold]
end
def submit_metrics(metrics)
prepared = prepare_metrics(metrics)
pool&.with do |connection|
prepared.each_slice(settings[:packet_size]) do |slice|
begin
connection.write_points(slice)
rescue StandardError
end
end
end
rescue Errno::EADDRNOTAVAIL, SocketError => ex
Gitlab::EnvironmentLogger.error('Cannot resolve InfluxDB address. GitLab Performance Monitoring will not work.')
Gitlab::EnvironmentLogger.error(ex)
end
def prepare_metrics(metrics)
metrics.map do |hash|
new_hash = hash.symbolize_keys
new_hash[:tags].each do |key, value|
if value.blank?
new_hash[:tags].delete(key)
else
new_hash[:tags][key] = escape_value(value)
end
end
new_hash
end
end
def escape_value(value)
value.to_s.gsub('=', '\\=')
end
# Measures the execution time of a block.
#
# Example:
#
# Gitlab::Metrics.measure(:find_by_username_duration) do
# User.find_by_username(some_username)
# end
#
# name - The name of the field to store the execution time in.
#
# Returns the value yielded by the supplied block.
def measure(name)
trans = current_transaction
return yield unless trans
real_start = Time.now.to_f
cpu_start = System.cpu_time
retval = yield
cpu_stop = System.cpu_time
real_stop = Time.now.to_f
real_time = (real_stop - real_start) * 1000.0
cpu_time = cpu_stop - cpu_start
trans.increment("#{name}_real_time", real_time)
trans.increment("#{name}_cpu_time", cpu_time)
trans.increment("#{name}_call_count", 1)
retval
end
# Adds a tag to the current transaction (if any)
#
# name - The name of the tag to add.
# value - The value of the tag.
def tag_transaction(name, value)
trans = current_transaction
trans&.add_tag(name, value)
end
# Sets the action of the current transaction (if any)
#
# action - The name of the action.
def action=(action)
trans = current_transaction
trans&.action = action
end
# Tracks an event.
#
# See `Gitlab::Metrics::Transaction#add_event` for more details.
def add_event(*args)
trans = current_transaction
trans&.add_event(*args)
end
# Returns the prefix to use for the name of a series.
def series_prefix
@series_prefix ||= Sidekiq.server? ? 'sidekiq_' : 'rails_'
end
# Allow access from other metrics related middlewares
def current_transaction
Transaction.current
end
# When enabled this should be set before being used as the usual pattern
# "@foo ||= bar" is _not_ thread-safe.
def pool
if influx_metrics_enabled?
if @pool.nil?
MUTEX.synchronize do
@pool ||= ConnectionPool.new(size: settings[:pool_size], timeout: settings[:timeout]) do
host = settings[:host]
port = settings[:port]
InfluxDB::Client.
new(udp: { host: host, port: port })
end
end
end
@pool
end
end
end
end
end
module Gitlab
module Metrics
# Mocks ::Prometheus::Client::Metric and all derived metrics
class NullMetric
def method_missing(name, *args, &block)
nil
end
end
end
end
require 'prometheus/client'
module Gitlab
module Metrics
module Prometheus
include Gitlab::CurrentSettings
def prometheus_metrics_enabled?
@prometheus_metrics_enabled ||= current_application_settings[:prometheus_metrics_enabled] || false
end
def registry
@registry ||= ::Prometheus::Client.registry
end
def counter(name, docstring, base_labels = {})
provide_metric(name) || registry.counter(name, docstring, base_labels)
end
def summary(name, docstring, base_labels = {})
provide_metric(name) || registry.summary(name, docstring, base_labels)
end
def gauge(name, docstring, base_labels = {})
provide_metric(name) || registry.gauge(name, docstring, base_labels)
end
def histogram(name, docstring, base_labels = {}, buckets = ::Prometheus::Client::Histogram::DEFAULT_BUCKETS)
provide_metric(name) || registry.histogram(name, docstring, base_labels, buckets)
end
def provide_metric(name)
if prometheus_metrics_enabled?
registry.get(name)
else
NullMetric.new
end
end
end
end
end
......@@ -54,43 +54,4 @@ describe HealthController do
end
end
end
describe '#metrics' do
context 'authorization token provided' do
before do
request.headers['TOKEN'] = token
end