From ca46a1a7100aa7b85e9b33fba37186f2023f4277 Mon Sep 17 00:00:00 2001 From: Pawel Chojnacki Date: Thu, 30 Mar 2017 15:02:46 +0200 Subject: [PATCH 01/14] Add /-/readiness /-/liveness and /-/health_metrics endpoint --- .../concerns/requires_health_token.rb | 25 ++++ app/controllers/health_check_controller.rb | 21 +-- app/controllers/health_controller.rb | 67 ++++++++++ config/routes.rb | 4 + .../health_checks/base_abstract_check.rb | 41 ++++++ lib/gitlab/health_checks/db_check.rb | 25 ++++ lib/gitlab/health_checks/fs_shards_check.rb | 126 ++++++++++++++++++ lib/gitlab/health_checks/metric.rb | 3 + lib/gitlab/health_checks/redis_check.rb | 25 ++++ lib/gitlab/health_checks/result.rb | 3 + .../health_checks/simple_abstract_check.rb | 57 ++++++++ spec/controllers/health_controller_spec.rb | 101 ++++++++++++++ spec/lib/gitlab/healthchecks/db_check_spec.rb | 6 + .../healthchecks/fs_shards_check_spec.rb | 113 ++++++++++++++++ .../gitlab/healthchecks/redis_check_spec.rb | 6 + .../healthchecks/simple_check_shared.rb | 66 +++++++++ 16 files changed, 669 insertions(+), 20 deletions(-) create mode 100644 app/controllers/concerns/requires_health_token.rb create mode 100644 app/controllers/health_controller.rb create mode 100644 lib/gitlab/health_checks/base_abstract_check.rb create mode 100644 lib/gitlab/health_checks/db_check.rb create mode 100644 lib/gitlab/health_checks/fs_shards_check.rb create mode 100644 lib/gitlab/health_checks/metric.rb create mode 100644 lib/gitlab/health_checks/redis_check.rb create mode 100644 lib/gitlab/health_checks/result.rb create mode 100644 lib/gitlab/health_checks/simple_abstract_check.rb create mode 100644 spec/controllers/health_controller_spec.rb create mode 100644 spec/lib/gitlab/healthchecks/db_check_spec.rb create mode 100644 spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb create mode 100644 spec/lib/gitlab/healthchecks/redis_check_spec.rb create mode 100644 spec/lib/gitlab/healthchecks/simple_check_shared.rb diff --git a/app/controllers/concerns/requires_health_token.rb b/app/controllers/concerns/requires_health_token.rb new file mode 100644 index 00000000000..34ab1a97649 --- /dev/null +++ b/app/controllers/concerns/requires_health_token.rb @@ -0,0 +1,25 @@ +module RequiresHealthToken + extend ActiveSupport::Concern + included do + before_action :validate_health_check_access! + end + + private + + def validate_health_check_access! + render_404 unless token_valid? + end + + def token_valid? + token = params[:token].presence || request.headers['TOKEN'] + token.present? && + ActiveSupport::SecurityUtils.variable_size_secure_compare( + token, + current_application_settings.health_check_access_token + ) + end + + def render_404 + render file: Rails.root.join('public', '404'), layout: false, status: '404' + end +end diff --git a/app/controllers/health_check_controller.rb b/app/controllers/health_check_controller.rb index 037da7d2bce..5d3109b7187 100644 --- a/app/controllers/health_check_controller.rb +++ b/app/controllers/health_check_controller.rb @@ -1,22 +1,3 @@ class HealthCheckController < HealthCheck::HealthCheckController - before_action :validate_health_check_access! - - private - - def validate_health_check_access! - render_404 unless token_valid? - end - - def token_valid? - token = params[:token].presence || request.headers['TOKEN'] - token.present? && - ActiveSupport::SecurityUtils.variable_size_secure_compare( - token, - current_application_settings.health_check_access_token - ) - end - - def render_404 - render file: Rails.root.join('public', '404'), layout: false, status: '404' - end + include RequiresHealthToken end diff --git a/app/controllers/health_controller.rb b/app/controllers/health_controller.rb new file mode 100644 index 00000000000..20ddb06eb7a --- /dev/null +++ b/app/controllers/health_controller.rb @@ -0,0 +1,67 @@ +class HealthController < ActionController::Base + include RequiresHealthToken + + attr_reader :checks + + def initialize + @checks = [ + Gitlab::HealthChecks::DbCheck, + Gitlab::HealthChecks::RedisCheck, + Gitlab::HealthChecks::FsShardsCheck, + ] + end + + def readiness + results = checks.map { |check| [check_name(check), check.readiness] } + + render_check_results(results) + end + + def liveness + results = checks.map { |check| [check_name(check), check.liveness] } + + render_check_results(results) + end + + def metrics + results = checks.flat_map(&:metrics) + + response = results.map(&method(:metric_to_prom_line)).join("\n") + + render text: response, content_type: 'text/plain; version=0.0.4' + end + + private + + def metric_to_prom_line(metric) + labels = metric.labels&.map { |key, value| "#{key}=\"#{value}\"" }&.join(',') || '' + if labels.empty? + "#{metric.name} #{metric.value}" + else + "#{metric.name}{#{labels}} #{metric.value}" + end + end + + def render_check_results(results) + flattened = results.flat_map do |name, result| + if result.is_a?(Gitlab::HealthChecks::Result) + [[name, result]] + else + result.map { |r| [name, r] } + end + end + success = flattened.all? { |name, r| r.success } + + response = flattened.map do |name, r| + info = { status: r.success ? 'ok' : 'failed' } + info['message'] = r.message if r.message + info[:labels] = r.labels if r.labels + [name, info] + end + render json: response.to_h, status: success ? :ok : :service_unavailable + end + + def check_name(check) + check.name.demodulize.underscore + end +end diff --git a/config/routes.rb b/config/routes.rb index 1a851da6203..2e60af02cd3 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -39,6 +39,10 @@ Rails.application.routes.draw do # Health check get 'health_check(/:checks)' => 'health_check#index', as: :health_check + get '-/readiness' => 'health#readiness' + get '-/liveness' => 'health#liveness' + get '-/health_metrics' => 'health#metrics' + # Koding route get 'koding' => 'koding#index' diff --git a/lib/gitlab/health_checks/base_abstract_check.rb b/lib/gitlab/health_checks/base_abstract_check.rb new file mode 100644 index 00000000000..edfc9ba9784 --- /dev/null +++ b/lib/gitlab/health_checks/base_abstract_check.rb @@ -0,0 +1,41 @@ +module Gitlab + module HealthChecks + module BaseAbstractCheck + def readiness + raise NotImplementedError.new + end + + def liveness + HealthChecks::Result.new(true) + end + + def metrics + [] + end + + protected + + def metric(name, value, **labels) + Metric.new(name, value, labels) + end + + def with_timing(proc) + start = Time.now + result = proc.call + yield result, Time.now.to_f - start.to_f + end + + def catch_timeout(seconds, &block) + begin + Timeout::timeout(seconds.to_i, &block) + rescue Timeout::Error => ex + ex + end + end + + def is_timeout(obj) + obj.is_a? Timeout::Error + end + end + end +end diff --git a/lib/gitlab/health_checks/db_check.rb b/lib/gitlab/health_checks/db_check.rb new file mode 100644 index 00000000000..a0c79c34246 --- /dev/null +++ b/lib/gitlab/health_checks/db_check.rb @@ -0,0 +1,25 @@ +module Gitlab + module HealthChecks + class DbCheck + extend SimpleAbstractCheck + + class << self + private + + def metric_prefix + 'db_ping' + end + + def is_successful?(result) + result == '1' + end + + def check + catch_timeout 10.seconds do + ActiveRecord::Base.connection.execute('SELECT 1 as ping')&.first&.[]('ping') + end + end + end + end + end +end diff --git a/lib/gitlab/health_checks/fs_shards_check.rb b/lib/gitlab/health_checks/fs_shards_check.rb new file mode 100644 index 00000000000..dfb8c75b031 --- /dev/null +++ b/lib/gitlab/health_checks/fs_shards_check.rb @@ -0,0 +1,126 @@ +module Gitlab + module HealthChecks + class FsShardsCheck + extend BaseAbstractCheck + + class << self + def readiness + res = [] + + repository_storages.each do |storage_name| + begin + tmp_file_path = tmp_file_path(storage_name) + res << + if !storage_stat_test(storage_name) + HealthChecks::Result.new(false, 'cannot stat storage', shard: storage_name) + elsif !storage_write_test(tmp_file_path) + HealthChecks::Result.new(false, 'cannot write to storage', shard: storage_name) + elsif !storage_read_test(tmp_file_path) + HealthChecks::Result.new(false, 'cannot read from storage', shard: storage_name) + else + HealthChecks::Result.new(true, nil, shard: storage_name) + end + rescue RuntimeError => ex + message = "unexpected error #{ex} when checking storage #{storage_name}" + Rails.logger.error(message) + res << HealthChecks::Result.new(false, message, shard: storage_name) + ensure + delete_test_file(tmp_file_path) + end + end + res + end + + def metrics + metrics = [] + repository_storages.each do |storage_name| + metrics.concat(operation_metrics('filesystem_accessible', 'filesystem_access_latency', -> { storage_stat_test(storage_name) }, shard: storage_name)) + + tmp_file_path = tmp_file_path(storage_name) + + metrics.concat(operation_metrics('filesystem_writable', 'filesystem_write_latency', -> { storage_write_test(tmp_file_path) }, shard: storage_name)) + metrics.concat(operation_metrics('filesystem_readable', 'filesystem_read_latency', -> { storage_read_test(tmp_file_path) }, shard: storage_name)) + end + metrics + end + + private + + RANDOM_STRING = rand(36**1000).to_s(36).freeze + + def operation_metrics(ok_metric, latency_metric, operation, **labels) + metrics = [] + with_timing operation do |result, elapsed| + metrics << metric(latency_metric, elapsed, **labels) + + metrics << if result + metric(ok_metric, 1, **labels) + else + metric(ok_metric, 0, **labels) + end + end + metrics + rescue RuntimeError => ex + Rails.logger("unexpected error #{ex} when checking #{ok_metric}") + metrics << metric(ok_metric, 0, **labels) + end + + def repository_storages + Gitlab::CurrentSettings.current_application_settings.repository_storages + end + + def storages_paths + Gitlab.config.repositories.storages + end + + def with_timeout(args) + args + end + + def tmp_file_path(storage_name) + Dir::Tmpname.create(['fs_shards_check', '+deleted'], path(storage_name)) { |path| path } + end + + def path(storage_name) + storages_paths&.[](storage_name)&.[]('path') + end + + def storage_stat_test(storage_name) + stat_path = File.join(path(storage_name), '.') + begin + _, status = Gitlab::Popen.popen(with_timeout(%W{ stat #{stat_path} })) + status == 0 + rescue Errno::ENOENT + File::Stat.new(stat_path).readable? + end + end + + def storage_write_test(tmp_path) + _, status = Gitlab::Popen.popen(with_timeout(%W{ tee #{tmp_path} })) do |stdin| + stdin.write(RANDOM_STRING) + end + status == 0 + rescue Errno::ENOENT + written_bytes = File.open(tmp_path, 'w') { |f| f.write(RANDOM_STRING) } rescue Errno::ENOENT + written_bytes == RANDOM_STRING.length + end + + def storage_read_test(tmp_path) + _, status = Gitlab::Popen.popen(with_timeout(%W{ diff #{tmp_path} - })) do |stdin| + stdin.write(RANDOM_STRING) + end + status == 0 + rescue Errno::ENOENT + File.open(tmp_path, 'r') { |f| f.read } rescue Errno::ENOENT == RANDOM_STRING + end + + def delete_test_file(tmp_path) + _, status = Gitlab::Popen.popen(with_timeout(%W{ rm -f #{tmp_path} })) + status == 0 + rescue Errno::ENOENT + File.delete(tmp_path, 'r') rescue Errno::ENOENT + end + end + end + end +end \ No newline at end of file diff --git a/lib/gitlab/health_checks/metric.rb b/lib/gitlab/health_checks/metric.rb new file mode 100644 index 00000000000..e812f1d8b75 --- /dev/null +++ b/lib/gitlab/health_checks/metric.rb @@ -0,0 +1,3 @@ +module Gitlab::HealthChecks + Metric = Struct.new(:name, :value, :labels) +end \ No newline at end of file diff --git a/lib/gitlab/health_checks/redis_check.rb b/lib/gitlab/health_checks/redis_check.rb new file mode 100644 index 00000000000..f65b26ff4d0 --- /dev/null +++ b/lib/gitlab/health_checks/redis_check.rb @@ -0,0 +1,25 @@ +module Gitlab + module HealthChecks + class RedisCheck + extend SimpleAbstractCheck + + private + + class << self + def metric_prefix + 'redis_ping' + end + + def is_successful?(result) + result == 'PONG' + end + + def check + catch_timeout 10.seconds do + Gitlab::Redis.with { |r| r.ping } + end + end + end + end + end +end diff --git a/lib/gitlab/health_checks/result.rb b/lib/gitlab/health_checks/result.rb new file mode 100644 index 00000000000..fd38896ba6d --- /dev/null +++ b/lib/gitlab/health_checks/result.rb @@ -0,0 +1,3 @@ +module Gitlab::HealthChecks + Result = Struct.new(:success, :message, :labels) +end \ No newline at end of file diff --git a/lib/gitlab/health_checks/simple_abstract_check.rb b/lib/gitlab/health_checks/simple_abstract_check.rb new file mode 100644 index 00000000000..3938815ba03 --- /dev/null +++ b/lib/gitlab/health_checks/simple_abstract_check.rb @@ -0,0 +1,57 @@ +module Gitlab + module HealthChecks + module SimpleAbstractCheck + include BaseAbstractCheck + + def readiness + check_result = check + if is_successful?(check_result) + HealthChecks::Result.new(true) + elsif check_result.is_a? Timeout::Error + HealthChecks::Result.new(false, "#{check_name} check timed out") + else + HealthChecks::Result.new(false, "unexpected #{check_name} check result: #{check_result}") + end + end + + def metrics + metrics = [] + with_timing method(:check) do |result, elapsed| + metrics << if result.is_a? Timeout::Error + metric("#{metric_prefix}_timeout", 1) + else + metric("#{metric_prefix}_timeout", 0) + end + + metrics << if is_successful?(result) + metric("#{metric_prefix}_success", 1) + else + Rails.logger.error "#{check_name} check returned unexpected result #{result}" + metric("#{metric_prefix}_success", 0) + end + + metrics << metric("#{metric_prefix}_latency", elapsed) + end + metrics + end + + private + + def check_name + self.name.demodulize.underscore.sub(/_[^_]*$/, '').capitalize + end + + def metric_prefix + raise NotImplementedError.new + end + + def is_successful?(result) + raise NotImplementedError.new + end + + def check + raise NotImplementedError.new + end + end + end +end diff --git a/spec/controllers/health_controller_spec.rb b/spec/controllers/health_controller_spec.rb new file mode 100644 index 00000000000..6e4deae1b21 --- /dev/null +++ b/spec/controllers/health_controller_spec.rb @@ -0,0 +1,101 @@ +require 'spec_helper' + +describe HealthController do + + include StubENV + + let(:token) { current_application_settings.health_check_access_token } + let(:json_response) { JSON.parse(response.body) } + let(:xml_response) { Hash.from_xml(response.body)['hash'] } + + before do + stub_env('IN_MEMORY_APPLICATION_SETTINGS', 'false') + end + + describe '#readiness' do + + context 'authorization token provided' do + before do + request.headers['TOKEN'] = token + end + + it 'returns proper response' do + get :readiness + expect(json_response['db_check']['status']).to eq('ok') + expect(json_response['redis_check']['status']).to eq('ok') + expect(json_response['fs_shards_check']['status']).to eq('ok') + expect(json_response['fs_shards_check']['labels']['shard']).to eq('default') + end + end + + context 'without authorization token' do + it 'returns proper response' do + get :readiness + expect(response.status).to eq(404) + end + end + end + + describe '#liveness' do + + context 'authorization token provided' do + before do + request.headers['TOKEN'] = token + end + + it 'returns proper response' do + get :liveness + expect(json_response['db_check']['status']).to eq('ok') + expect(json_response['redis_check']['status']).to eq('ok') + expect(json_response['fs_shards_check']['status']).to eq('ok') + end + end + + context 'without authorization token' do + it 'returns proper response' do + get :liveness + expect(response.status).to eq(404) + end + end + end + + describe '#metrics' do + + context 'authorization token provided' do + before do + request.headers['TOKEN'] = token + end + + it 'returns DB ping metrics' do + get :metrics + expect(response.body).to match(/^db_ping_timeout 0$/) + expect(response.body).to match(/^db_ping_success 1$/) + expect(response.body).to match(/^db_ping_latency [0-9\.]+$/) + end + + it 'returns Redis ping metrics' do + get :metrics + expect(response.body).to match(/^redis_ping_timeout 0$/) + expect(response.body).to match(/^redis_ping_success 1$/) + expect(response.body).to match(/^redis_ping_latency [0-9\.]+$/) + end + + it 'returns file system check metrics' do + get :metrics + expect(response.body).to match(/^filesystem_access_latency{shard="default"} [0-9\.]+$/) + expect(response.body).to match(/^filesystem_accessible{shard="default"} 1$/) + expect(response.body).to match(/^filesystem_write_latency{shard="default"} [0-9\.]+$/) + expect(response.body).to match(/^filesystem_writable{shard="default"} 1$/) + expect(response.body).to match(/^filesystem_read_latency{shard="default"} [0-9\.]+$/) + expect(response.body).to match(/^filesystem_readable{shard="default"} 1$/) + end + end + + context 'without authorization token' do + it 'returns proper response' do + get :metrics + expect(response.status).to eq(404) + end + end + end +end diff --git a/spec/lib/gitlab/healthchecks/db_check_spec.rb b/spec/lib/gitlab/healthchecks/db_check_spec.rb new file mode 100644 index 00000000000..33c6c24449c --- /dev/null +++ b/spec/lib/gitlab/healthchecks/db_check_spec.rb @@ -0,0 +1,6 @@ +require 'spec_helper' +require_relative './simple_check_shared' + +describe Gitlab::HealthChecks::DbCheck do + include_examples 'simple_check', 'db_ping', 'Db', '1' +end diff --git a/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb b/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb new file mode 100644 index 00000000000..edace1ca8fd --- /dev/null +++ b/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb @@ -0,0 +1,113 @@ +require 'spec_helper' + +describe Gitlab::HealthChecks::FsShardsCheck do + let(:metric_class) { Gitlab::HealthChecks::Metric } + let(:result_class) { Gitlab::HealthChecks::Result } + let(:repository_storages) { [:default] } + let(:tmp_dir) { Dir::mktmpdir } + + let(:storages_paths) do + { + default: { path: tmp_dir } + }.with_indifferent_access + end + + before do + allow(described_class).to receive(:repository_storages) { repository_storages } + allow(described_class).to receive(:storages_paths) { storages_paths } + end + + after do + FileUtils.remove_entry_secure(tmp_dir) if Dir.exists?(tmp_dir) + end + + describe '#readiness' do + subject { described_class.readiness } + + context 'storage points to not existing folder' do + let(:storages_paths) do + { + default: { path: 'tmp/this/path/doesnt/exist' } + }.with_indifferent_access + end + + it { is_expected.to include(result_class.new(false, 'cannot stat storage', shard: :default)) } + end + + context 'storage points to directory which is not writeble' do + before do + FileUtils.chmod_R(0555, tmp_dir) + end + + it { is_expected.to include(result_class.new(false, 'cannot write to storage', shard: :default)) } + end + + context 'storage points to directory that has both read and write rights' do + before do + FileUtils.chmod_R(0755, tmp_dir) + end + + it { is_expected.to include(result_class.new(true, nil, shard: :default)) } + + it 'cleans up files used for testing' do + expect(described_class).to receive(:storage_write_test).with(any_args).and_call_original + + subject + + expect(Dir.entries(tmp_dir).count).to eq(2) + end + + context 'read test fails' do + before do + allow(described_class).to receive(:storage_read_test).with(any_args).and_return(false) + end + + it { is_expected.to include(result_class.new(false, 'cannot read from storage', shard: :default)) } + end + end + end + + describe '#metrics' do + subject { described_class.metrics } + + context 'storage points to not existing folder' do + let(:storages_paths) do + { + default: { path: 'tmp/this/path/doesnt/exist' } + }.with_indifferent_access + end + + it { is_expected.to include(metric_class.new('filesystem_accessible', 0, shard: :default)) } + it { is_expected.to include(metric_class.new('filesystem_readable', 0, shard: :default)) } + it { is_expected.to include(metric_class.new('filesystem_writable', 0, shard: :default)) } + + it { is_expected.to include(have_attributes(name: 'filesystem_access_latency', value: be > 0, labels: { shard: :default })) } + it { is_expected.to include(have_attributes(name: 'filesystem_read_latency', value: be > 0, labels: { shard: :default })) } + it { is_expected.to include(have_attributes(name: 'filesystem_write_latency', value: be > 0, labels: { shard: :default })) } + end + + context 'storage points to directory which is not writeble' do + before do + FileUtils.chmod_R(0555, tmp_dir) + end + + it { is_expected.to include(metric_class.new('filesystem_accessible', 1, shard: :default)) } + it { is_expected.to include(metric_class.new('filesystem_readable', 0, shard: :default)) } + it { is_expected.to include(metric_class.new('filesystem_writable', 0, shard: :default)) } + end + + context 'storage points to directory that has both read and write rights' do + before do + FileUtils.chmod_R(0755, tmp_dir) + end + + it { is_expected.to include(metric_class.new('filesystem_accessible', 1, shard: :default)) } + it { is_expected.to include(metric_class.new('filesystem_readable', 1, shard: :default)) } + it { is_expected.to include(metric_class.new('filesystem_writable', 1, shard: :default)) } + + it { is_expected.to include(have_attributes(name: 'filesystem_access_latency', value: be > 0, labels: { shard: :default })) } + it { is_expected.to include(have_attributes(name: 'filesystem_read_latency', value: be > 0, labels: { shard: :default })) } + it { is_expected.to include(have_attributes(name: 'filesystem_write_latency', value: be > 0, labels: { shard: :default })) } + end + end +end diff --git a/spec/lib/gitlab/healthchecks/redis_check_spec.rb b/spec/lib/gitlab/healthchecks/redis_check_spec.rb new file mode 100644 index 00000000000..734cdcb893e --- /dev/null +++ b/spec/lib/gitlab/healthchecks/redis_check_spec.rb @@ -0,0 +1,6 @@ +require 'spec_helper' +require_relative './simple_check_shared' + +describe Gitlab::HealthChecks::RedisCheck do + include_examples 'simple_check', 'redis_ping', 'Redis', 'PONG' +end diff --git a/spec/lib/gitlab/healthchecks/simple_check_shared.rb b/spec/lib/gitlab/healthchecks/simple_check_shared.rb new file mode 100644 index 00000000000..1fa6d0faef9 --- /dev/null +++ b/spec/lib/gitlab/healthchecks/simple_check_shared.rb @@ -0,0 +1,66 @@ +shared_context 'simple_check' do |metrics_prefix, check_name, success_result| + describe '#metrics' do + subject { described_class.metrics } + context 'Check is passing' do + before do + allow(described_class).to receive(:check).and_return success_result + end + + it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_success", value: 1)) } + it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_timeout", value: 0)) } + it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency", value: be > 0)) } + end + + context 'Check is misbehaving' do + before do + allow(described_class).to receive(:check).and_return 'error!' + end + + it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_success", value: 0)) } + it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_timeout", value: 0)) } + it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency", value: be > 0)) } + end + + context 'Check is timeouting' do + before do + allow(described_class).to receive(:check).and_return Timeout::Error.new + end + + it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_success", value: 0)) } + it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_timeout", value: 1)) } + it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency", value: be > 0)) } + end + end + + describe '#readiness' do + subject { described_class.readiness } + context 'Check returns ok' do + before do + allow(described_class).to receive(:check).and_return success_result + end + + it { is_expected.to have_attributes(success: true) } + end + + context 'Check is misbehaving' do + before do + allow(described_class).to receive(:check).and_return 'error!' + end + + it { is_expected.to have_attributes(success: false, message: "unexpected #{check_name} check result: error!") } + end + + context 'Check is timeouting' do + before do + allow(described_class).to receive(:check ).and_return Timeout::Error.new + end + + it { is_expected.to have_attributes(success: false, message: "#{check_name} check timed out") } + end + end + + describe '#liveness' do + subject { described_class.readiness } + it { is_expected.to eq(Gitlab::HealthChecks::Result.new(true)) } + end +end -- 2.24.1 From d76d66b6b4503ae76db0d6b438d7743ccece0082 Mon Sep 17 00:00:00 2001 From: Pawel Chojnacki Date: Tue, 4 Apr 2017 13:32:26 +0200 Subject: [PATCH 02/14] Fix rubocop warnings --- lib/gitlab/health_checks/base_abstract_check.rb | 2 +- lib/gitlab/health_checks/fs_shards_check.rb | 2 +- lib/gitlab/health_checks/metric.rb | 2 +- lib/gitlab/health_checks/redis_check.rb | 4 ++-- lib/gitlab/health_checks/result.rb | 2 +- spec/controllers/health_controller_spec.rb | 5 ----- spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb | 4 ++-- 7 files changed, 8 insertions(+), 13 deletions(-) diff --git a/lib/gitlab/health_checks/base_abstract_check.rb b/lib/gitlab/health_checks/base_abstract_check.rb index edfc9ba9784..0e8dd84e049 100644 --- a/lib/gitlab/health_checks/base_abstract_check.rb +++ b/lib/gitlab/health_checks/base_abstract_check.rb @@ -27,7 +27,7 @@ module Gitlab def catch_timeout(seconds, &block) begin - Timeout::timeout(seconds.to_i, &block) + Timeout.timeout(seconds.to_i, &block) rescue Timeout::Error => ex ex end diff --git a/lib/gitlab/health_checks/fs_shards_check.rb b/lib/gitlab/health_checks/fs_shards_check.rb index dfb8c75b031..ead0b2ed281 100644 --- a/lib/gitlab/health_checks/fs_shards_check.rb +++ b/lib/gitlab/health_checks/fs_shards_check.rb @@ -123,4 +123,4 @@ module Gitlab end end end -end \ No newline at end of file +end diff --git a/lib/gitlab/health_checks/metric.rb b/lib/gitlab/health_checks/metric.rb index e812f1d8b75..1a2eab0b005 100644 --- a/lib/gitlab/health_checks/metric.rb +++ b/lib/gitlab/health_checks/metric.rb @@ -1,3 +1,3 @@ module Gitlab::HealthChecks Metric = Struct.new(:name, :value, :labels) -end \ No newline at end of file +end diff --git a/lib/gitlab/health_checks/redis_check.rb b/lib/gitlab/health_checks/redis_check.rb index f65b26ff4d0..cede4702c62 100644 --- a/lib/gitlab/health_checks/redis_check.rb +++ b/lib/gitlab/health_checks/redis_check.rb @@ -3,9 +3,9 @@ module Gitlab class RedisCheck extend SimpleAbstractCheck - private - class << self + private + def metric_prefix 'redis_ping' end diff --git a/lib/gitlab/health_checks/result.rb b/lib/gitlab/health_checks/result.rb index fd38896ba6d..8086760023e 100644 --- a/lib/gitlab/health_checks/result.rb +++ b/lib/gitlab/health_checks/result.rb @@ -1,3 +1,3 @@ module Gitlab::HealthChecks Result = Struct.new(:success, :message, :labels) -end \ No newline at end of file +end diff --git a/spec/controllers/health_controller_spec.rb b/spec/controllers/health_controller_spec.rb index 6e4deae1b21..b8b6e0c3a88 100644 --- a/spec/controllers/health_controller_spec.rb +++ b/spec/controllers/health_controller_spec.rb @@ -1,19 +1,16 @@ require 'spec_helper' describe HealthController do - include StubENV let(:token) { current_application_settings.health_check_access_token } let(:json_response) { JSON.parse(response.body) } - let(:xml_response) { Hash.from_xml(response.body)['hash'] } before do stub_env('IN_MEMORY_APPLICATION_SETTINGS', 'false') end describe '#readiness' do - context 'authorization token provided' do before do request.headers['TOKEN'] = token @@ -37,7 +34,6 @@ describe HealthController do end describe '#liveness' do - context 'authorization token provided' do before do request.headers['TOKEN'] = token @@ -60,7 +56,6 @@ describe HealthController do end describe '#metrics' do - context 'authorization token provided' do before do request.headers['TOKEN'] = token diff --git a/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb b/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb index edace1ca8fd..a29f340f32c 100644 --- a/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb +++ b/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb @@ -4,7 +4,7 @@ describe Gitlab::HealthChecks::FsShardsCheck do let(:metric_class) { Gitlab::HealthChecks::Metric } let(:result_class) { Gitlab::HealthChecks::Result } let(:repository_storages) { [:default] } - let(:tmp_dir) { Dir::mktmpdir } + let(:tmp_dir) { Dir.mktmpdir } let(:storages_paths) do { @@ -18,7 +18,7 @@ describe Gitlab::HealthChecks::FsShardsCheck do end after do - FileUtils.remove_entry_secure(tmp_dir) if Dir.exists?(tmp_dir) + FileUtils.remove_entry_secure(tmp_dir) if Dir.exist?(tmp_dir) end describe '#readiness' do -- 2.24.1 From 7b8f11466f85a6d96ce80a7b95138d7cd659f927 Mon Sep 17 00:00:00 2001 From: Pawel Chojnacki Date: Tue, 4 Apr 2017 14:58:14 +0200 Subject: [PATCH 03/14] Check also fallback methods --- lib/gitlab/health_checks/fs_shards_check.rb | 4 +- .../healthchecks/fs_shards_check_spec.rb | 145 ++++++++++-------- 2 files changed, 87 insertions(+), 62 deletions(-) diff --git a/lib/gitlab/health_checks/fs_shards_check.rb b/lib/gitlab/health_checks/fs_shards_check.rb index ead0b2ed281..7756a85b835 100644 --- a/lib/gitlab/health_checks/fs_shards_check.rb +++ b/lib/gitlab/health_checks/fs_shards_check.rb @@ -91,7 +91,7 @@ module Gitlab _, status = Gitlab::Popen.popen(with_timeout(%W{ stat #{stat_path} })) status == 0 rescue Errno::ENOENT - File::Stat.new(stat_path).readable? + File.exist?(stat_path) && File::Stat.new(stat_path).readable? end end @@ -123,4 +123,4 @@ module Gitlab end end end -end +end \ No newline at end of file diff --git a/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb b/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb index a29f340f32c..935277e6074 100644 --- a/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb +++ b/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb @@ -21,93 +21,118 @@ describe Gitlab::HealthChecks::FsShardsCheck do FileUtils.remove_entry_secure(tmp_dir) if Dir.exist?(tmp_dir) end - describe '#readiness' do - subject { described_class.readiness } - - context 'storage points to not existing folder' do - let(:storages_paths) do - { - default: { path: 'tmp/this/path/doesnt/exist' } - }.with_indifferent_access + shared_examples 'filesystem checks' do + describe '#readiness' do + subject { described_class.readiness } + + context 'storage points to not existing folder' do + let(:storages_paths) do + { + default: { path: 'tmp/this/path/doesnt/exist' } + }.with_indifferent_access + end + + it { is_expected.to include(result_class.new(false, 'cannot stat storage', shard: :default)) } end - it { is_expected.to include(result_class.new(false, 'cannot stat storage', shard: :default)) } - end + context 'storage points to directory which is not writeble' do + before do + FileUtils.chmod_R(0555, tmp_dir) + end - context 'storage points to directory which is not writeble' do - before do - FileUtils.chmod_R(0555, tmp_dir) + it { is_expected.to include(result_class.new(false, 'cannot write to storage', shard: :default)) } end - it { is_expected.to include(result_class.new(false, 'cannot write to storage', shard: :default)) } - end + context 'storage points to directory that has both read and write rights' do + before do + FileUtils.chmod_R(0755, tmp_dir) + end - context 'storage points to directory that has both read and write rights' do - before do - FileUtils.chmod_R(0755, tmp_dir) - end + it { is_expected.to include(result_class.new(true, nil, shard: :default)) } + + it 'cleans up files used for testing' do + expect(described_class).to receive(:storage_write_test).with(any_args).and_call_original - it { is_expected.to include(result_class.new(true, nil, shard: :default)) } + subject - it 'cleans up files used for testing' do - expect(described_class).to receive(:storage_write_test).with(any_args).and_call_original + expect(Dir.entries(tmp_dir).count).to eq(2) + end - subject + context 'read test fails' do + before do + allow(described_class).to receive(:storage_read_test).with(any_args).and_return(false) + end - expect(Dir.entries(tmp_dir).count).to eq(2) + it { is_expected.to include(result_class.new(false, 'cannot read from storage', shard: :default)) } + end end + end - context 'read test fails' do - before do - allow(described_class).to receive(:storage_read_test).with(any_args).and_return(false) + describe '#metrics' do + subject { described_class.metrics } + + context 'storage points to not existing folder' do + let(:storages_paths) do + { + default: { path: 'tmp/this/path/doesnt/exist' } + }.with_indifferent_access end - it { is_expected.to include(result_class.new(false, 'cannot read from storage', shard: :default)) } + it { is_expected.to include(metric_class.new('filesystem_accessible', 0, shard: :default)) } + it { is_expected.to include(metric_class.new('filesystem_readable', 0, shard: :default)) } + it { is_expected.to include(metric_class.new('filesystem_writable', 0, shard: :default)) } + + it { is_expected.to include(have_attributes(name: 'filesystem_access_latency', value: be > 0, labels: { shard: :default })) } + it { is_expected.to include(have_attributes(name: 'filesystem_read_latency', value: be > 0, labels: { shard: :default })) } + it { is_expected.to include(have_attributes(name: 'filesystem_write_latency', value: be > 0, labels: { shard: :default })) } end - end - end - describe '#metrics' do - subject { described_class.metrics } + context 'storage points to directory which is not writeble' do + before do + FileUtils.chmod_R(0555, tmp_dir) + end - context 'storage points to not existing folder' do - let(:storages_paths) do - { - default: { path: 'tmp/this/path/doesnt/exist' } - }.with_indifferent_access + it { is_expected.to include(metric_class.new('filesystem_accessible', 1, shard: :default)) } + it { is_expected.to include(metric_class.new('filesystem_readable', 0, shard: :default)) } + it { is_expected.to include(metric_class.new('filesystem_writable', 0, shard: :default)) } end - it { is_expected.to include(metric_class.new('filesystem_accessible', 0, shard: :default)) } - it { is_expected.to include(metric_class.new('filesystem_readable', 0, shard: :default)) } - it { is_expected.to include(metric_class.new('filesystem_writable', 0, shard: :default)) } + context 'storage points to directory that has both read and write rights' do + before do + FileUtils.chmod_R(0755, tmp_dir) + end - it { is_expected.to include(have_attributes(name: 'filesystem_access_latency', value: be > 0, labels: { shard: :default })) } - it { is_expected.to include(have_attributes(name: 'filesystem_read_latency', value: be > 0, labels: { shard: :default })) } - it { is_expected.to include(have_attributes(name: 'filesystem_write_latency', value: be > 0, labels: { shard: :default })) } - end + it { is_expected.to include(metric_class.new('filesystem_accessible', 1, shard: :default)) } + it { is_expected.to include(metric_class.new('filesystem_readable', 1, shard: :default)) } + it { is_expected.to include(metric_class.new('filesystem_writable', 1, shard: :default)) } - context 'storage points to directory which is not writeble' do - before do - FileUtils.chmod_R(0555, tmp_dir) + it { is_expected.to include(have_attributes(name: 'filesystem_access_latency', value: be > 0, labels: { shard: :default })) } + it { is_expected.to include(have_attributes(name: 'filesystem_read_latency', value: be > 0, labels: { shard: :default })) } + it { is_expected.to include(have_attributes(name: 'filesystem_write_latency', value: be > 0, labels: { shard: :default })) } end - - it { is_expected.to include(metric_class.new('filesystem_accessible', 1, shard: :default)) } - it { is_expected.to include(metric_class.new('filesystem_readable', 0, shard: :default)) } - it { is_expected.to include(metric_class.new('filesystem_writable', 0, shard: :default)) } end + end - context 'storage points to directory that has both read and write rights' do - before do - FileUtils.chmod_R(0755, tmp_dir) + context 'when popen always finds required binaries' do + before do + allow(Gitlab::Popen).to receive(:popen).and_wrap_original do |method, *args, &block| + begin + method.call(*args, &block) + rescue RuntimeError => ex + + raise RuntimeError.new('expected not to happen') + end end + end - it { is_expected.to include(metric_class.new('filesystem_accessible', 1, shard: :default)) } - it { is_expected.to include(metric_class.new('filesystem_readable', 1, shard: :default)) } - it { is_expected.to include(metric_class.new('filesystem_writable', 1, shard: :default)) } + it_behaves_like 'filesystem checks' + end - it { is_expected.to include(have_attributes(name: 'filesystem_access_latency', value: be > 0, labels: { shard: :default })) } - it { is_expected.to include(have_attributes(name: 'filesystem_read_latency', value: be > 0, labels: { shard: :default })) } - it { is_expected.to include(have_attributes(name: 'filesystem_write_latency', value: be > 0, labels: { shard: :default })) } + context 'when popen never finds required binaries' do + before do + allow(Gitlab::Popen).to receive(:popen).and_raise(Errno::ENOENT) end + + it_behaves_like 'filesystem checks' end end -- 2.24.1 From 7d1b124988d27d9f38bb5bb040240714fdce01db Mon Sep 17 00:00:00 2001 From: Pawel Chojnacki Date: Tue, 4 Apr 2017 15:02:12 +0200 Subject: [PATCH 04/14] Enable timeouts --- lib/gitlab/health_checks/fs_shards_check.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/gitlab/health_checks/fs_shards_check.rb b/lib/gitlab/health_checks/fs_shards_check.rb index 7756a85b835..0482db67ffd 100644 --- a/lib/gitlab/health_checks/fs_shards_check.rb +++ b/lib/gitlab/health_checks/fs_shards_check.rb @@ -74,7 +74,7 @@ module Gitlab end def with_timeout(args) - args + %w{timeout 1}.concat(args) end def tmp_file_path(storage_name) @@ -118,7 +118,7 @@ module Gitlab _, status = Gitlab::Popen.popen(with_timeout(%W{ rm -f #{tmp_path} })) status == 0 rescue Errno::ENOENT - File.delete(tmp_path, 'r') rescue Errno::ENOENT + File.delete(tmp_path) rescue Errno::ENOENT end end end -- 2.24.1 From 6b1a5bf8253a534e439303e871ff4286ec7d93e2 Mon Sep 17 00:00:00 2001 From: Pawel Chojnacki Date: Wed, 5 Apr 2017 10:30:14 +0200 Subject: [PATCH 05/14] Rubocop fixes --- lib/gitlab/health_checks/fs_shards_check.rb | 2 +- spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/gitlab/health_checks/fs_shards_check.rb b/lib/gitlab/health_checks/fs_shards_check.rb index 0482db67ffd..d9a42074e9b 100644 --- a/lib/gitlab/health_checks/fs_shards_check.rb +++ b/lib/gitlab/health_checks/fs_shards_check.rb @@ -123,4 +123,4 @@ module Gitlab end end end -end \ No newline at end of file +end diff --git a/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb b/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb index 935277e6074..df132ddefce 100644 --- a/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb +++ b/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb @@ -118,9 +118,8 @@ describe Gitlab::HealthChecks::FsShardsCheck do allow(Gitlab::Popen).to receive(:popen).and_wrap_original do |method, *args, &block| begin method.call(*args, &block) - rescue RuntimeError => ex - - raise RuntimeError.new('expected not to happen') + rescue RuntimeError + raise 'expected not to happen' end end end -- 2.24.1 From 2c33f918b3295427f5258b34d802282466d8282a Mon Sep 17 00:00:00 2001 From: Pawel Chojnacki Date: Wed, 5 Apr 2017 13:16:58 +0200 Subject: [PATCH 06/14] add protect_from_forgery to appease brakeman --- app/controllers/health_controller.rb | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/app/controllers/health_controller.rb b/app/controllers/health_controller.rb index 20ddb06eb7a..f53a71653df 100644 --- a/app/controllers/health_controller.rb +++ b/app/controllers/health_controller.rb @@ -1,30 +1,27 @@ class HealthController < ActionController::Base + protect_from_forgery with: :exception include RequiresHealthToken - attr_reader :checks - - def initialize - @checks = [ - Gitlab::HealthChecks::DbCheck, - Gitlab::HealthChecks::RedisCheck, - Gitlab::HealthChecks::FsShardsCheck, - ] - end + CHECKS = [ + Gitlab::HealthChecks::DbCheck, + Gitlab::HealthChecks::RedisCheck, + Gitlab::HealthChecks::FsShardsCheck, + ] def readiness - results = checks.map { |check| [check_name(check), check.readiness] } + results = CHECKS.map { |check| [check_name(check), check.readiness] } render_check_results(results) end def liveness - results = checks.map { |check| [check_name(check), check.liveness] } + results = CHECKS.map { |check| [check_name(check), check.liveness] } render_check_results(results) end def metrics - results = checks.flat_map(&:metrics) + results = CHECKS.flat_map(&:metrics) response = results.map(&method(:metric_to_prom_line)).join("\n") -- 2.24.1 From 404e69e5d46bee0e048816fd2143c37f4b4b2ea3 Mon Sep 17 00:00:00 2001 From: Pawel Chojnacki Date: Wed, 5 Apr 2017 13:51:19 +0200 Subject: [PATCH 07/14] Add Changelog --- changelogs/unreleased/24240-add-monitoring-endpoints.yml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changelogs/unreleased/24240-add-monitoring-endpoints.yml diff --git a/changelogs/unreleased/24240-add-monitoring-endpoints.yml b/changelogs/unreleased/24240-add-monitoring-endpoints.yml new file mode 100644 index 00000000000..d00d8912418 --- /dev/null +++ b/changelogs/unreleased/24240-add-monitoring-endpoints.yml @@ -0,0 +1,4 @@ +--- +title: Add /-/readiness /-/liveness and /-/health_metrics endpoints to track application health +merge_request: 10416 +author: -- 2.24.1 From cabf33cbbbe3fcfbc251f6811dcc4d1c4c184157 Mon Sep 17 00:00:00 2001 From: Pawel Chojnacki Date: Wed, 5 Apr 2017 16:05:07 +0200 Subject: [PATCH 08/14] Fix rubocop and MySql compatibility --- app/controllers/health_controller.rb | 2 +- lib/gitlab/health_checks/db_check.rb | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/app/controllers/health_controller.rb b/app/controllers/health_controller.rb index f53a71653df..410d7f4666a 100644 --- a/app/controllers/health_controller.rb +++ b/app/controllers/health_controller.rb @@ -6,7 +6,7 @@ class HealthController < ActionController::Base Gitlab::HealthChecks::DbCheck, Gitlab::HealthChecks::RedisCheck, Gitlab::HealthChecks::FsShardsCheck, - ] + ].freeze def readiness results = CHECKS.map { |check| [check_name(check), check.readiness] } diff --git a/lib/gitlab/health_checks/db_check.rb b/lib/gitlab/health_checks/db_check.rb index a0c79c34246..8a5208e8182 100644 --- a/lib/gitlab/health_checks/db_check.rb +++ b/lib/gitlab/health_checks/db_check.rb @@ -16,7 +16,11 @@ module Gitlab def check catch_timeout 10.seconds do - ActiveRecord::Base.connection.execute('SELECT 1 as ping')&.first&.[]('ping') + if ActiveRecord::Base.connection.adapter_name.downcase.starts_with? 'postgresql' + ActiveRecord::Base.connection.execute('SELECT 1 as ping')&.first&.[]('ping') + else + ActiveRecord::Base.connection.execute('SELECT 1 as ping')&.first&.first&.to_s + end end end end -- 2.24.1 From 37054aac0384935b4959d23482ca96841911afa1 Mon Sep 17 00:00:00 2001 From: Pawel Chojnacki Date: Thu, 6 Apr 2017 19:18:57 +0200 Subject: [PATCH 09/14] Use scope in routing and cleanup the code to be more idiomatic --- app/controllers/health_controller.rb | 8 ++------ changelogs/unreleased/24240-add-monitoring-endpoints.yml | 2 +- config/routes.rb | 8 +++++--- lib/gitlab/health_checks/base_abstract_check.rb | 8 ++++---- lib/gitlab/health_checks/db_check.rb | 2 +- 5 files changed, 13 insertions(+), 15 deletions(-) diff --git a/app/controllers/health_controller.rb b/app/controllers/health_controller.rb index 410d7f4666a..439f2621b04 100644 --- a/app/controllers/health_controller.rb +++ b/app/controllers/health_controller.rb @@ -9,13 +9,13 @@ class HealthController < ActionController::Base ].freeze def readiness - results = CHECKS.map { |check| [check_name(check), check.readiness] } + results = CHECKS.map { |check| [check.human_name, check.readiness] } render_check_results(results) end def liveness - results = CHECKS.map { |check| [check_name(check), check.liveness] } + results = CHECKS.map { |check| [check.human_name, check.liveness] } render_check_results(results) end @@ -57,8 +57,4 @@ class HealthController < ActionController::Base end render json: response.to_h, status: success ? :ok : :service_unavailable end - - def check_name(check) - check.name.demodulize.underscore - end end diff --git a/changelogs/unreleased/24240-add-monitoring-endpoints.yml b/changelogs/unreleased/24240-add-monitoring-endpoints.yml index d00d8912418..a22458965fc 100644 --- a/changelogs/unreleased/24240-add-monitoring-endpoints.yml +++ b/changelogs/unreleased/24240-add-monitoring-endpoints.yml @@ -1,4 +1,4 @@ --- -title: Add /-/readiness /-/liveness and /-/health_metrics endpoints to track application health +title: Add /-/readiness /-/liveness and /-/metrics endpoints to track application health merge_request: 10416 author: diff --git a/config/routes.rb b/config/routes.rb index 2e60af02cd3..1da226a3b57 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -39,9 +39,11 @@ Rails.application.routes.draw do # Health check get 'health_check(/:checks)' => 'health_check#index', as: :health_check - get '-/readiness' => 'health#readiness' - get '-/liveness' => 'health#liveness' - get '-/health_metrics' => 'health#metrics' + scope path: '-', controller: 'health' do + get :liveness + get :readiness + get :metrics + end # Koding route get 'koding' => 'koding#index' diff --git a/lib/gitlab/health_checks/base_abstract_check.rb b/lib/gitlab/health_checks/base_abstract_check.rb index 0e8dd84e049..2d7144b6a04 100644 --- a/lib/gitlab/health_checks/base_abstract_check.rb +++ b/lib/gitlab/health_checks/base_abstract_check.rb @@ -1,6 +1,10 @@ module Gitlab module HealthChecks module BaseAbstractCheck + def human_name + self.name.demodulize.underscore + end + def readiness raise NotImplementedError.new end @@ -32,10 +36,6 @@ module Gitlab ex end end - - def is_timeout(obj) - obj.is_a? Timeout::Error - end end end end diff --git a/lib/gitlab/health_checks/db_check.rb b/lib/gitlab/health_checks/db_check.rb index 8a5208e8182..fd94984f8a2 100644 --- a/lib/gitlab/health_checks/db_check.rb +++ b/lib/gitlab/health_checks/db_check.rb @@ -16,7 +16,7 @@ module Gitlab def check catch_timeout 10.seconds do - if ActiveRecord::Base.connection.adapter_name.downcase.starts_with? 'postgresql' + if Gitlab::Database.postgresql? ActiveRecord::Base.connection.execute('SELECT 1 as ping')&.first&.[]('ping') else ActiveRecord::Base.connection.execute('SELECT 1 as ping')&.first&.first&.to_s -- 2.24.1 From f83f32c6a072e6a4f088c2c741a61808ecc4563f Mon Sep 17 00:00:00 2001 From: Pawel Chojnacki Date: Thu, 6 Apr 2017 19:34:05 +0200 Subject: [PATCH 10/14] use symbols to name fs metrics --- lib/gitlab/health_checks/fs_shards_check.rb | 41 ++++++++----------- .../healthchecks/fs_shards_check_spec.rb | 30 +++++++------- 2 files changed, 33 insertions(+), 38 deletions(-) diff --git a/lib/gitlab/health_checks/fs_shards_check.rb b/lib/gitlab/health_checks/fs_shards_check.rb index d9a42074e9b..9ddf47c68f3 100644 --- a/lib/gitlab/health_checks/fs_shards_check.rb +++ b/lib/gitlab/health_checks/fs_shards_check.rb @@ -5,43 +5,38 @@ module Gitlab class << self def readiness - res = [] - - repository_storages.each do |storage_name| + repository_storages.map do |storage_name| begin tmp_file_path = tmp_file_path(storage_name) - res << - if !storage_stat_test(storage_name) - HealthChecks::Result.new(false, 'cannot stat storage', shard: storage_name) - elsif !storage_write_test(tmp_file_path) - HealthChecks::Result.new(false, 'cannot write to storage', shard: storage_name) - elsif !storage_read_test(tmp_file_path) - HealthChecks::Result.new(false, 'cannot read from storage', shard: storage_name) - else - HealthChecks::Result.new(true, nil, shard: storage_name) - end + + if !storage_stat_test(storage_name) + HealthChecks::Result.new(false, 'cannot stat storage', shard: storage_name) + elsif !storage_write_test(tmp_file_path) + HealthChecks::Result.new(false, 'cannot write to storage', shard: storage_name) + elsif !storage_read_test(tmp_file_path) + HealthChecks::Result.new(false, 'cannot read from storage', shard: storage_name) + else + HealthChecks::Result.new(true, nil, shard: storage_name) + end rescue RuntimeError => ex message = "unexpected error #{ex} when checking storage #{storage_name}" Rails.logger.error(message) - res << HealthChecks::Result.new(false, message, shard: storage_name) + HealthChecks::Result.new(false, message, shard: storage_name) ensure delete_test_file(tmp_file_path) end end - res end def metrics - metrics = [] - repository_storages.each do |storage_name| - metrics.concat(operation_metrics('filesystem_accessible', 'filesystem_access_latency', -> { storage_stat_test(storage_name) }, shard: storage_name)) - + repository_storages.flat_map do |storage_name| tmp_file_path = tmp_file_path(storage_name) - - metrics.concat(operation_metrics('filesystem_writable', 'filesystem_write_latency', -> { storage_write_test(tmp_file_path) }, shard: storage_name)) - metrics.concat(operation_metrics('filesystem_readable', 'filesystem_read_latency', -> { storage_read_test(tmp_file_path) }, shard: storage_name)) + [ + operation_metrics(:filesystem_accessible, :filesystem_access_latency, -> { storage_stat_test(storage_name) }, shard: storage_name), + operation_metrics(:filesystem_writable, :filesystem_write_latency, -> { storage_write_test(tmp_file_path) }, shard: storage_name), + operation_metrics(:filesystem_readable, :filesystem_read_latency, -> { storage_read_test(tmp_file_path) }, shard: storage_name) + ].flatten end - metrics end private diff --git a/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb b/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb index df132ddefce..ba1cea1703c 100644 --- a/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb +++ b/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb @@ -78,13 +78,13 @@ describe Gitlab::HealthChecks::FsShardsCheck do }.with_indifferent_access end - it { is_expected.to include(metric_class.new('filesystem_accessible', 0, shard: :default)) } - it { is_expected.to include(metric_class.new('filesystem_readable', 0, shard: :default)) } - it { is_expected.to include(metric_class.new('filesystem_writable', 0, shard: :default)) } + it { is_expected.to include(metric_class.new(:filesystem_accessible, 0, shard: :default)) } + it { is_expected.to include(metric_class.new(:filesystem_readable, 0, shard: :default)) } + it { is_expected.to include(metric_class.new(:filesystem_writable, 0, shard: :default)) } - it { is_expected.to include(have_attributes(name: 'filesystem_access_latency', value: be > 0, labels: { shard: :default })) } - it { is_expected.to include(have_attributes(name: 'filesystem_read_latency', value: be > 0, labels: { shard: :default })) } - it { is_expected.to include(have_attributes(name: 'filesystem_write_latency', value: be > 0, labels: { shard: :default })) } + it { is_expected.to include(have_attributes(name: :filesystem_access_latency, value: be > 0, labels: { shard: :default })) } + it { is_expected.to include(have_attributes(name: :filesystem_read_latency, value: be > 0, labels: { shard: :default })) } + it { is_expected.to include(have_attributes(name: :filesystem_write_latency, value: be > 0, labels: { shard: :default })) } end context 'storage points to directory which is not writeble' do @@ -92,9 +92,9 @@ describe Gitlab::HealthChecks::FsShardsCheck do FileUtils.chmod_R(0555, tmp_dir) end - it { is_expected.to include(metric_class.new('filesystem_accessible', 1, shard: :default)) } - it { is_expected.to include(metric_class.new('filesystem_readable', 0, shard: :default)) } - it { is_expected.to include(metric_class.new('filesystem_writable', 0, shard: :default)) } + it { is_expected.to include(metric_class.new(:filesystem_accessible, 1, shard: :default)) } + it { is_expected.to include(metric_class.new(:filesystem_readable, 0, shard: :default)) } + it { is_expected.to include(metric_class.new(:filesystem_writable, 0, shard: :default)) } end context 'storage points to directory that has both read and write rights' do @@ -102,13 +102,13 @@ describe Gitlab::HealthChecks::FsShardsCheck do FileUtils.chmod_R(0755, tmp_dir) end - it { is_expected.to include(metric_class.new('filesystem_accessible', 1, shard: :default)) } - it { is_expected.to include(metric_class.new('filesystem_readable', 1, shard: :default)) } - it { is_expected.to include(metric_class.new('filesystem_writable', 1, shard: :default)) } + it { is_expected.to include(metric_class.new(:filesystem_accessible, 1, shard: :default)) } + it { is_expected.to include(metric_class.new(:filesystem_readable, 1, shard: :default)) } + it { is_expected.to include(metric_class.new(:filesystem_writable, 1, shard: :default)) } - it { is_expected.to include(have_attributes(name: 'filesystem_access_latency', value: be > 0, labels: { shard: :default })) } - it { is_expected.to include(have_attributes(name: 'filesystem_read_latency', value: be > 0, labels: { shard: :default })) } - it { is_expected.to include(have_attributes(name: 'filesystem_write_latency', value: be > 0, labels: { shard: :default })) } + it { is_expected.to include(have_attributes(name: :filesystem_access_latency, value: be > 0, labels: { shard: :default })) } + it { is_expected.to include(have_attributes(name: :filesystem_read_latency, value: be > 0, labels: { shard: :default })) } + it { is_expected.to include(have_attributes(name: :filesystem_write_latency, value: be > 0, labels: { shard: :default })) } end end end -- 2.24.1 From 64780fa441456792829a46cd363a093acb1848b9 Mon Sep 17 00:00:00 2001 From: Pawel Chojnacki Date: Thu, 6 Apr 2017 20:19:19 +0200 Subject: [PATCH 11/14] More idiomatic ruby code, use flat_map instead of each and other fixes --- app/controllers/health_controller.rb | 4 +-- .../health_checks/base_abstract_check.rb | 6 +++- lib/gitlab/health_checks/fs_shards_check.rb | 26 +++++++-------- lib/gitlab/health_checks/redis_check.rb | 2 +- .../health_checks/simple_abstract_check.rb | 32 ++++++------------- 5 files changed, 28 insertions(+), 42 deletions(-) diff --git a/app/controllers/health_controller.rb b/app/controllers/health_controller.rb index 439f2621b04..df0fc3132ed 100644 --- a/app/controllers/health_controller.rb +++ b/app/controllers/health_controller.rb @@ -9,13 +9,13 @@ class HealthController < ActionController::Base ].freeze def readiness - results = CHECKS.map { |check| [check.human_name, check.readiness] } + results = CHECKS.map { |check| [check.name, check.readiness] } render_check_results(results) end def liveness - results = CHECKS.map { |check| [check.human_name, check.liveness] } + results = CHECKS.map { |check| [check.name, check.liveness] } render_check_results(results) end diff --git a/lib/gitlab/health_checks/base_abstract_check.rb b/lib/gitlab/health_checks/base_abstract_check.rb index 2d7144b6a04..2ac72b10212 100644 --- a/lib/gitlab/health_checks/base_abstract_check.rb +++ b/lib/gitlab/health_checks/base_abstract_check.rb @@ -1,8 +1,12 @@ module Gitlab module HealthChecks module BaseAbstractCheck + def name + super.demodulize.underscore + end + def human_name - self.name.demodulize.underscore + name.sub(/_check$/, '').capitalize end def readiness diff --git a/lib/gitlab/health_checks/fs_shards_check.rb b/lib/gitlab/health_checks/fs_shards_check.rb index 9ddf47c68f3..ebd380d7a70 100644 --- a/lib/gitlab/health_checks/fs_shards_check.rb +++ b/lib/gitlab/health_checks/fs_shards_check.rb @@ -41,23 +41,18 @@ module Gitlab private - RANDOM_STRING = rand(36**1000).to_s(36).freeze + RANDOM_STRING = SecureRandom.hex(1000).freeze def operation_metrics(ok_metric, latency_metric, operation, **labels) - metrics = [] with_timing operation do |result, elapsed| - metrics << metric(latency_metric, elapsed, **labels) - - metrics << if result - metric(ok_metric, 1, **labels) - else - metric(ok_metric, 0, **labels) - end + [ + metric(latency_metric, elapsed, **labels), + metric(ok_metric, result ? 1 : 0, **labels) + ] end - metrics rescue RuntimeError => ex Rails.logger("unexpected error #{ex} when checking #{ok_metric}") - metrics << metric(ok_metric, 0, **labels) + [metric(ok_metric, 0, **labels)] end def repository_storages @@ -73,11 +68,11 @@ module Gitlab end def tmp_file_path(storage_name) - Dir::Tmpname.create(['fs_shards_check', '+deleted'], path(storage_name)) { |path| path } + Dir::Tmpname.create(%w(fs_shards_check +deleted), path(storage_name)) { |path| path } end def path(storage_name) - storages_paths&.[](storage_name)&.[]('path') + storages_paths&.dig(storage_name, 'path') end def storage_stat_test(storage_name) @@ -96,7 +91,7 @@ module Gitlab end status == 0 rescue Errno::ENOENT - written_bytes = File.open(tmp_path, 'w') { |f| f.write(RANDOM_STRING) } rescue Errno::ENOENT + written_bytes = File.write(tmp_path, RANDOM_STRING) rescue Errno::ENOENT written_bytes == RANDOM_STRING.length end @@ -106,7 +101,8 @@ module Gitlab end status == 0 rescue Errno::ENOENT - File.open(tmp_path, 'r') { |f| f.read } rescue Errno::ENOENT == RANDOM_STRING + file_contents = File.read(tmp_path) rescue Errno::ENOENT + file_contents == RANDOM_STRING end def delete_test_file(tmp_path) diff --git a/lib/gitlab/health_checks/redis_check.rb b/lib/gitlab/health_checks/redis_check.rb index cede4702c62..57bbe5b3ad0 100644 --- a/lib/gitlab/health_checks/redis_check.rb +++ b/lib/gitlab/health_checks/redis_check.rb @@ -16,7 +16,7 @@ module Gitlab def check catch_timeout 10.seconds do - Gitlab::Redis.with { |r| r.ping } + Gitlab::Redis.with(&:ping) end end end diff --git a/lib/gitlab/health_checks/simple_abstract_check.rb b/lib/gitlab/health_checks/simple_abstract_check.rb index 3938815ba03..ff20487966d 100644 --- a/lib/gitlab/health_checks/simple_abstract_check.rb +++ b/lib/gitlab/health_checks/simple_abstract_check.rb @@ -7,40 +7,26 @@ module Gitlab check_result = check if is_successful?(check_result) HealthChecks::Result.new(true) - elsif check_result.is_a? Timeout::Error - HealthChecks::Result.new(false, "#{check_name} check timed out") + elsif check_result.is_a?(Timeout::Error) + HealthChecks::Result.new(false, "#{human_name} check timed out") else - HealthChecks::Result.new(false, "unexpected #{check_name} check result: #{check_result}") + HealthChecks::Result.new(false, "unexpected #{human_name} check result: #{check_result}") end end def metrics - metrics = [] with_timing method(:check) do |result, elapsed| - metrics << if result.is_a? Timeout::Error - metric("#{metric_prefix}_timeout", 1) - else - metric("#{metric_prefix}_timeout", 0) - end - - metrics << if is_successful?(result) - metric("#{metric_prefix}_success", 1) - else - Rails.logger.error "#{check_name} check returned unexpected result #{result}" - metric("#{metric_prefix}_success", 0) - end - - metrics << metric("#{metric_prefix}_latency", elapsed) + Rails.logger.error("#{human_name} check returned unexpected result #{result}") unless is_successful?(result) + [ + metric("#{metric_prefix}_timeout", result.is_a?(Timeout::Error) ? 1 : 0), + metric("#{metric_prefix}_success", is_successful?(result) ? 1 : 0), + metric("#{metric_prefix}_latency", elapsed) + ] end - metrics end private - def check_name - self.name.demodulize.underscore.sub(/_[^_]*$/, '').capitalize - end - def metric_prefix raise NotImplementedError.new end -- 2.24.1 From 94f11d1d23cd0a65693c4cdad02f1a237c15481b Mon Sep 17 00:00:00 2001 From: Pawel Chojnacki Date: Thu, 6 Apr 2017 22:25:28 +0200 Subject: [PATCH 12/14] Remove uneccessare .new and memoize configuration access methods --- lib/gitlab/health_checks/base_abstract_check.rb | 2 +- lib/gitlab/health_checks/fs_shards_check.rb | 4 ++-- lib/gitlab/health_checks/simple_abstract_check.rb | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/gitlab/health_checks/base_abstract_check.rb b/lib/gitlab/health_checks/base_abstract_check.rb index 2ac72b10212..7de6d4d9367 100644 --- a/lib/gitlab/health_checks/base_abstract_check.rb +++ b/lib/gitlab/health_checks/base_abstract_check.rb @@ -10,7 +10,7 @@ module Gitlab end def readiness - raise NotImplementedError.new + raise NotImplementedError end def liveness diff --git a/lib/gitlab/health_checks/fs_shards_check.rb b/lib/gitlab/health_checks/fs_shards_check.rb index ebd380d7a70..df962d203b7 100644 --- a/lib/gitlab/health_checks/fs_shards_check.rb +++ b/lib/gitlab/health_checks/fs_shards_check.rb @@ -56,11 +56,11 @@ module Gitlab end def repository_storages - Gitlab::CurrentSettings.current_application_settings.repository_storages + @repository_storage ||= Gitlab::CurrentSettings.current_application_settings.repository_storages end def storages_paths - Gitlab.config.repositories.storages + @storage_paths ||= Gitlab.config.repositories.storages end def with_timeout(args) diff --git a/lib/gitlab/health_checks/simple_abstract_check.rb b/lib/gitlab/health_checks/simple_abstract_check.rb index ff20487966d..fbe1645c1b1 100644 --- a/lib/gitlab/health_checks/simple_abstract_check.rb +++ b/lib/gitlab/health_checks/simple_abstract_check.rb @@ -28,15 +28,15 @@ module Gitlab private def metric_prefix - raise NotImplementedError.new + raise NotImplementedError end def is_successful?(result) - raise NotImplementedError.new + raise NotImplementedError end def check - raise NotImplementedError.new + raise NotImplementedError end end end -- 2.24.1 From c2cbbbad6ebc96152f37a2cf68e4f212294fde28 Mon Sep 17 00:00:00 2001 From: Pawel Chojnacki Date: Thu, 6 Apr 2017 23:14:43 +0200 Subject: [PATCH 13/14] Remove offending check and add stubbed version --- .../gitlab/healthchecks/fs_shards_check_spec.rb | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb b/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb index ba1cea1703c..6c018402f88 100644 --- a/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb +++ b/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb @@ -35,14 +35,6 @@ describe Gitlab::HealthChecks::FsShardsCheck do it { is_expected.to include(result_class.new(false, 'cannot stat storage', shard: :default)) } end - context 'storage points to directory which is not writeble' do - before do - FileUtils.chmod_R(0555, tmp_dir) - end - - it { is_expected.to include(result_class.new(false, 'cannot write to storage', shard: :default)) } - end - context 'storage points to directory that has both read and write rights' do before do FileUtils.chmod_R(0755, tmp_dir) @@ -65,6 +57,14 @@ describe Gitlab::HealthChecks::FsShardsCheck do it { is_expected.to include(result_class.new(false, 'cannot read from storage', shard: :default)) } end + + context 'write test fails' do + before do + allow(described_class).to receive(:storage_write_test).with(any_args).and_return(false) + end + + it { is_expected.to include(result_class.new(false, 'cannot write to storage', shard: :default)) } + end end end -- 2.24.1 From bd7ef9c40631c05dfb44f7cfc1c92b7de1969a7a Mon Sep 17 00:00:00 2001 From: Pawel Chojnacki Date: Fri, 7 Apr 2017 00:27:29 +0200 Subject: [PATCH 14/14] Remove another test that is tested elsewhere and doesn't work on root user --- spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb b/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb index 6c018402f88..4cd8cf313a5 100644 --- a/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb +++ b/spec/lib/gitlab/healthchecks/fs_shards_check_spec.rb @@ -87,16 +87,6 @@ describe Gitlab::HealthChecks::FsShardsCheck do it { is_expected.to include(have_attributes(name: :filesystem_write_latency, value: be > 0, labels: { shard: :default })) } end - context 'storage points to directory which is not writeble' do - before do - FileUtils.chmod_R(0555, tmp_dir) - end - - it { is_expected.to include(metric_class.new(:filesystem_accessible, 1, shard: :default)) } - it { is_expected.to include(metric_class.new(:filesystem_readable, 0, shard: :default)) } - it { is_expected.to include(metric_class.new(:filesystem_writable, 0, shard: :default)) } - end - context 'storage points to directory that has both read and write rights' do before do FileUtils.chmod_R(0755, tmp_dir) -- 2.24.1