Commit ed8aa954 authored by Tomasz Maczukin's avatar Tomasz Maczukin

Rename internal Prometheus metrics

parent f1b350db
Pipeline #22791397 passed with stages
in 39 minutes and 1 second
......@@ -12,21 +12,21 @@ import (
)
var numBuildsDesc = prometheus.NewDesc(
"ci_runner_builds",
"gitlab_runner_jobs",
"The current number of running builds.",
[]string{"runner", "state", "stage", "executor_stage"},
nil,
)
var requestConcurrencyDesc = prometheus.NewDesc(
"ci_runner_request_concurrency",
"gitlab_runner_request_concurrency",
"The current number of concurrent requests for a new job",
[]string{"runner"},
nil,
)
var requestConcurrencyExceededDesc = prometheus.NewDesc(
"ci_runner_request_concurrency_exceeded_total",
"gitlab_runner_request_concurrency_exceeded_total",
"Counter tracking exceeding of request concurrency",
[]string{"runner"},
nil,
......
......@@ -81,7 +81,7 @@ func (v *AppVersionInfo) NewMetricsCollector() *prometheus.GaugeVec {
buildInfo := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "ci_runner_version_info",
Name: "gitlab_runner_version_info",
Help: "A metric with a constant '1' value labeled by different build stats fields.",
},
labelNames,
......
......@@ -14,155 +14,19 @@ system or accessed with any other HTTP client.
The exposed information includes:
- Runner business logic metrics (e.g., the number of currently running builds)
- Runner business logic metrics (e.g., the number of currently running jobs)
- Go-specific process metrics (garbage collection stats, goroutines, memstats, etc.)
- general process metrics (memory usage, CPU usage, file descriptor usage, etc.)
- build version information
The following is an example of the metrics output in Prometheus'
text-based metrics exposition format:
```
# HELP ci_docker_machines The total number of machines created.
# TYPE ci_docker_machines counter
ci_docker_machines{type="created"} 0
ci_docker_machines{type="removed"} 0
ci_docker_machines{type="used"} 0
# HELP ci_docker_machines_provider The current number of machines in given state.
# TYPE ci_docker_machines_provider gauge
ci_docker_machines_provider{state="acquired"} 0
ci_docker_machines_provider{state="creating"} 0
ci_docker_machines_provider{state="idle"} 0
ci_docker_machines_provider{state="removing"} 0
ci_docker_machines_provider{state="used"} 0
# HELP ci_runner_builds The current number of running builds.
# TYPE ci_runner_builds gauge
ci_runner_builds{stage="prepare_script",state="running"} 1
# HELP ci_runner_version_info A metric with a constant '1' value labeled by different build stats fields.
# TYPE ci_runner_version_info gauge
ci_runner_version_info{architecture="amd64",branch="rename-to-gitlab-runner",built_at="2017-09-11 15:30:31 +0000 +0000",go_version="go1.8.3",name="gitlab-runner",os="linux",revision="35e724fa",version="10.0.0~beta.28.g35e724fa"} 1
# HELP ci_ssh_docker_machines The total number of machines created.
# TYPE ci_ssh_docker_machines counter
ci_ssh_docker_machines{type="created"} 0
ci_ssh_docker_machines{type="removed"} 0
ci_ssh_docker_machines{type="used"} 0
# HELP ci_ssh_docker_machines_provider The current number of machines in given state.
# TYPE ci_ssh_docker_machines_provider gauge
ci_ssh_docker_machines_provider{state="acquired"} 0
ci_ssh_docker_machines_provider{state="creating"} 0
ci_ssh_docker_machines_provider{state="idle"} 0
ci_ssh_docker_machines_provider{state="removing"} 0
ci_ssh_docker_machines_provider{state="used"} 0
# HELP go_gc_duration_seconds A summary of the GC invocation durations.
# TYPE go_gc_duration_seconds summary
go_gc_duration_seconds{quantile="0"} 0.00030304800000000004
go_gc_duration_seconds{quantile="0.25"} 0.00038177500000000005
go_gc_duration_seconds{quantile="0.5"} 0.0009022510000000001
go_gc_duration_seconds{quantile="0.75"} 0.006189937
go_gc_duration_seconds{quantile="1"} 0.00880617
go_gc_duration_seconds_sum 0.016583181000000002
go_gc_duration_seconds_count 5
# HELP go_goroutines Number of goroutines that currently exist.
# TYPE go_goroutines gauge
go_goroutines 16
# HELP go_memstats_alloc_bytes Number of bytes allocated and still in use.
# TYPE go_memstats_alloc_bytes gauge
go_memstats_alloc_bytes 2.8288e+06
# HELP go_memstats_alloc_bytes_total Total number of bytes allocated, even if freed.
# TYPE go_memstats_alloc_bytes_total counter
go_memstats_alloc_bytes_total 7.973392e+06
# HELP go_memstats_buck_hash_sys_bytes Number of bytes used by the profiling bucket hash table.
# TYPE go_memstats_buck_hash_sys_bytes gauge
go_memstats_buck_hash_sys_bytes 1.444932e+06
# HELP go_memstats_frees_total Total number of frees.
# TYPE go_memstats_frees_total counter
go_memstats_frees_total 73317
# HELP go_memstats_gc_sys_bytes Number of bytes used for garbage collection system metadata.
# TYPE go_memstats_gc_sys_bytes gauge
go_memstats_gc_sys_bytes 423936
# HELP go_memstats_heap_alloc_bytes Number of heap bytes allocated and still in use.
# TYPE go_memstats_heap_alloc_bytes gauge
go_memstats_heap_alloc_bytes 2.8288e+06
# HELP go_memstats_heap_idle_bytes Number of heap bytes waiting to be used.
# TYPE go_memstats_heap_idle_bytes gauge
go_memstats_heap_idle_bytes 1.39264e+06
# HELP go_memstats_heap_inuse_bytes Number of heap bytes that are in use.
# TYPE go_memstats_heap_inuse_bytes gauge
go_memstats_heap_inuse_bytes 4.407296e+06
# HELP go_memstats_heap_objects Number of allocated objects.
# TYPE go_memstats_heap_objects gauge
go_memstats_heap_objects 23532
# HELP go_memstats_heap_released_bytes_total Total number of heap bytes released to OS.
# TYPE go_memstats_heap_released_bytes_total counter
go_memstats_heap_released_bytes_total 0
# HELP go_memstats_heap_sys_bytes Number of heap bytes obtained from system.
# TYPE go_memstats_heap_sys_bytes gauge
go_memstats_heap_sys_bytes 5.799936e+06
# HELP go_memstats_last_gc_time_seconds Number of seconds since 1970 of last garbage collection.
# TYPE go_memstats_last_gc_time_seconds gauge
go_memstats_last_gc_time_seconds 1.4768981425195277e+09
# HELP go_memstats_lookups_total Total number of pointer lookups.
# TYPE go_memstats_lookups_total counter
go_memstats_lookups_total 42
# HELP go_memstats_mallocs_total Total number of mallocs.
# TYPE go_memstats_mallocs_total counter
go_memstats_mallocs_total 96849
# HELP go_memstats_mcache_inuse_bytes Number of bytes in use by mcache structures.
# TYPE go_memstats_mcache_inuse_bytes gauge
go_memstats_mcache_inuse_bytes 4800
# HELP go_memstats_mcache_sys_bytes Number of bytes used for mcache structures obtained from system.
# TYPE go_memstats_mcache_sys_bytes gauge
go_memstats_mcache_sys_bytes 16384
# HELP go_memstats_mspan_inuse_bytes Number of bytes in use by mspan structures.
# TYPE go_memstats_mspan_inuse_bytes gauge
go_memstats_mspan_inuse_bytes 72320
# HELP go_memstats_mspan_sys_bytes Number of bytes used for mspan structures obtained from system.
# TYPE go_memstats_mspan_sys_bytes gauge
go_memstats_mspan_sys_bytes 98304
# HELP go_memstats_next_gc_bytes Number of heap bytes when next garbage collection will take place.
# TYPE go_memstats_next_gc_bytes gauge
go_memstats_next_gc_bytes 5.274438e+06
# HELP go_memstats_other_sys_bytes Number of bytes used for other system allocations.
# TYPE go_memstats_other_sys_bytes gauge
go_memstats_other_sys_bytes 1.2341e+06
# HELP go_memstats_stack_inuse_bytes Number of bytes in use by the stack allocator.
# TYPE go_memstats_stack_inuse_bytes gauge
go_memstats_stack_inuse_bytes 491520
# HELP go_memstats_stack_sys_bytes Number of bytes obtained from system for stack allocator.
# TYPE go_memstats_stack_sys_bytes gauge
go_memstats_stack_sys_bytes 491520
# HELP go_memstats_sys_bytes Number of bytes obtained by system. Sum of all system allocations.
# TYPE go_memstats_sys_bytes gauge
go_memstats_sys_bytes 9.509112e+06
# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
# TYPE process_cpu_seconds_total counter
process_cpu_seconds_total 0.18
# HELP process_max_fds Maximum number of open file descriptors.
# TYPE process_max_fds gauge
process_max_fds 1024
# HELP process_open_fds Number of open file descriptors.
# TYPE process_open_fds gauge
process_open_fds 8
# HELP process_resident_memory_bytes Resident memory size in bytes.
# TYPE process_resident_memory_bytes gauge
process_resident_memory_bytes 2.3191552e+07
# HELP process_start_time_seconds Start time of the process since unix epoch in seconds.
# TYPE process_start_time_seconds gauge
process_start_time_seconds 1.47689813837e+09
# HELP process_virtual_memory_bytes Virtual memory size in bytes.
# TYPE process_virtual_memory_bytes gauge
process_virtual_memory_bytes 3.39746816e+08
```
Note that the lines starting with `# HELP` document the meaning of each exposed
metric. This metrics format is documented in Prometheus'
The metrics format is documented in Prometheus'
[Exposition formats](https://prometheus.io/docs/instrumenting/exposition_formats/)
specification.
These metrics are meant as a way for operators to monitor and gain insight into
GitLab Runners. For example, you may be interested if the load average increase
on your runner's host is related to an increase of processed builds or not. Or
you are running a cluster of machines to be used for the builds and you want to
on your runner's host is related to an increase of processed jobs or not. Or
you are running a cluster of machines to be used for the jobs and you want to
track build trends to plan changes in your infrastructure.
### Learning more about Prometheus
......
......@@ -136,6 +136,6 @@ func (e *machineExecutor) SetCurrentStage(stage common.ExecutorStage) {
}
func init() {
common.RegisterExecutor("docker+machine", newMachineProvider("docker_machines", "docker"))
common.RegisterExecutor("docker-ssh+machine", newMachineProvider("ssh_docker_machines", "docker-ssh"))
common.RegisterExecutor("docker+machine", newMachineProvider("docker+machine", "docker"))
common.RegisterExecutor("docker-ssh+machine", newMachineProvider("docker-ssh+machine", "docker-ssh"))
}
......@@ -428,22 +428,30 @@ func newMachineProvider(name, executor string) *machineProvider {
provider: provider,
totalActions: prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "ci_" + name + "_provider_actions_total",
Name: "gitlab_runner_autoscaling_actions_total",
Help: "The total number of actions executed by the provider.",
ConstLabels: prometheus.Labels{
"executor": name,
},
},
[]string{"action"},
),
currentStatesDesc: prometheus.NewDesc(
"ci_"+name+"_provider_machine_states",
"gitlab_runner_autoscaling_machine_states",
"The current number of machines per state in this provider.",
[]string{"state"},
nil,
prometheus.Labels{
"executor": name,
},
),
creationHistogram: prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "ci_" + name + "_provider_machine_creation_duration_seconds",
Name: "gitlab_runner_autoscaling_machine_creation_duration_seconds",
Help: "Histogram of machine creation time.",
Buckets: prometheus.ExponentialBuckets(30, 1.25, 10),
ConstLabels: prometheus.Labels{
"executor": name,
},
},
),
}
......
......@@ -248,7 +248,7 @@ func testMachineProvider(machine ...string) (*machineProvider, *testMachine) {
Removed: make(chan bool, 10),
Stopped: make(chan bool, 10),
}
p := newMachineProvider("docker_machines", "docker")
p := newMachineProvider("docker+machine", "docker")
p.machine = t
return p, t
}
......
......@@ -9,7 +9,7 @@ import (
)
var numJobFailuresDesc = prometheus.NewDesc(
"ci_runner_failed_jobs_total",
"gitlab_runner_failed_jobs_total",
"Total number of failed jobs",
[]string{"runner", "failure_reason"},
nil,
......
......@@ -7,7 +7,7 @@ import (
"github.com/sirupsen/logrus"
)
var numErrorsDesc = prometheus.NewDesc("ci_runner_errors", "The number of catched errors.", []string{"level"}, nil)
var numErrorsDesc = prometheus.NewDesc("gitlab_runner_errors_total", "The number of catched errors.", []string{"level"}, nil)
type LogHook struct {
errorsNumber map[logrus.Level]*int64
......
......@@ -24,7 +24,7 @@ import (
const clientError = -100
var apiRequestStatuses = prometheus.NewDesc(
"ci_runner_api_request_statuses_total",
"gitlab_runner_api_request_statuses_total",
"The total number of api requests, partitioned by runner, endpoint and status.",
[]string{"runner", "endpoint", "status"},
nil,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment