Commit 9d84de37 authored by Tomasz Maczukin's avatar Tomasz Maczukin

Add new metrics related to jobs requesting and API usage

parent a3b49866
Pipeline #21184915 failed with stages
in 33 minutes and 54 seconds
......@@ -6,6 +6,7 @@ import (
"sync"
"gitlab.com/gitlab-org/gitlab-runner/common"
"gitlab.com/gitlab-org/gitlab-runner/helpers"
"github.com/prometheus/client_golang/prometheus"
)
......@@ -17,6 +18,20 @@ var numBuildsDesc = prometheus.NewDesc(
nil,
)
var requestConcurrencyDesc = prometheus.NewDesc(
"ci_runner_request_concurrency",
"The current number of concurrent requests for a new job",
[]string{"runner"},
nil,
)
var requestConcurrencyExceededDesc = prometheus.NewDesc(
"ci_runner_request_concurrency_exceeded_total",
"Counter tracking exceeding of request concurrency",
[]string{"runner"},
nil,
)
type statePermutation struct {
runner string
buildState common.BuildRuntimeState
......@@ -36,6 +51,8 @@ func newStatePermutationFromBuild(build *common.Build) statePermutation {
type runnerCounter struct {
builds int
requests int
requestConcurrencyExceeded int
}
type buildsHelper struct {
......@@ -91,6 +108,8 @@ func (b *buildsHelper) acquireRequest(runner *common.RunnerConfig) bool {
counter := b.getRunnerCounter(runner)
if counter.requests >= runner.GetRequestConcurrency() {
counter.requestConcurrencyExceeded++
return false
}
......@@ -183,16 +202,29 @@ func (b *buildsHelper) statesAndStages() map[statePermutation]int {
return data
}
func (b *buildsHelper) runnersCounters() map[string]*runnerCounter {
b.lock.Lock()
defer b.lock.Unlock()
data := make(map[string]*runnerCounter)
for token, counter := range b.counters {
data[helpers.ShortenToken(token)] = counter
}
return data
}
// Describe implements prometheus.Collector.
func (b *buildsHelper) Describe(ch chan<- *prometheus.Desc) {
ch <- numBuildsDesc
ch <- requestConcurrencyDesc
ch <- requestConcurrencyExceededDesc
}
// Collect implements prometheus.Collector.
func (b *buildsHelper) Collect(ch chan<- prometheus.Metric) {
data := b.statesAndStages()
for state, count := range data {
builds := b.statesAndStages()
for state, count := range builds {
ch <- prometheus.MustNewConstMetric(
numBuildsDesc,
prometheus.GaugeValue,
......@@ -203,6 +235,23 @@ func (b *buildsHelper) Collect(ch chan<- prometheus.Metric) {
string(state.executorStage),
)
}
counters := b.runnersCounters()
for runner, counter := range counters {
ch <- prometheus.MustNewConstMetric(
requestConcurrencyDesc,
prometheus.GaugeValue,
float64(counter.requests),
runner,
)
ch <- prometheus.MustNewConstMetric(
requestConcurrencyExceededDesc,
prometheus.CounterValue,
float64(counter.requestConcurrencyExceeded),
runner,
)
}
}
func (b *buildsHelper) ListJobsHandler(w http.ResponseWriter, r *http.Request) {
......
......@@ -350,6 +350,8 @@ func (mr *RunCommand) serveMetrics() {
registry := prometheus.NewRegistry()
// Metrics about the runner's business logic.
registry.MustRegister(&mr.buildsHelper)
// Metrics about API connections
registry.MustRegister(mr.network)
// Metrics about jobs failures
registry.MustRegister(mr.failuresCollector)
// Metrics about catched errors
......
......@@ -5,6 +5,8 @@ import (
"fmt"
"io"
"github.com/prometheus/client_golang/prometheus"
"gitlab.com/gitlab-org/gitlab-runner/helpers/url"
)
......@@ -340,6 +342,8 @@ type JobTracePatch interface {
}
type Network interface {
prometheus.Collector
RegisterRunner(config RunnerCredentials, parameters RegisterRunnerParameters) *RegisterRunnerResponse
VerifyRunner(config RunnerCredentials) bool
UnregisterRunner(config RunnerCredentials) bool
......
......@@ -14,16 +14,74 @@ import (
"strconv"
"sync"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
"gitlab.com/gitlab-org/gitlab-runner/common"
"gitlab.com/gitlab-org/gitlab-runner/helpers"
)
const clientError = -100
var apiRequestStatuses = prometheus.NewDesc(
"ci_runner_api_request_statuses_total",
"The total number of api requests, partitioned by runner, endpoint and status.",
[]string{"runner", "endpoint", "status"},
nil,
)
type APIEndpoint string
const (
APIEndpointRequestJob APIEndpoint = "request_job"
APIEndpointUpdateJob APIEndpoint = "update_job"
APIEndpointPatchTrace APIEndpoint = "patch_trace"
)
type apiRequestStatusPermutation struct {
runnerID string
endpoint APIEndpoint
status int
}
type apiRequestStatusesMap struct {
internal map[apiRequestStatusPermutation]int
lock sync.RWMutex
}
func (arspm *apiRequestStatusesMap) append(runnerID string, endpoint APIEndpoint, status int) {
arspm.lock.Lock()
defer arspm.lock.Unlock()
permutation := apiRequestStatusPermutation{runnerID: runnerID, endpoint: endpoint, status: status}
if _, ok := arspm.internal[permutation]; !ok {
arspm.internal[permutation] = 0
}
arspm.internal[permutation]++
}
func (arspm *apiRequestStatusesMap) read(handler func(apiRequestStatusPermutation, int)) {
arspm.lock.RLock()
defer arspm.lock.RUnlock()
for permutation, count := range arspm.internal {
handler(permutation, count)
}
}
func newAPIRequestStatusesMap() *apiRequestStatusesMap {
return &apiRequestStatusesMap{
internal: make(map[apiRequestStatusPermutation]int),
}
}
type GitLabClient struct {
clients map[string]*client
lock sync.Mutex
requestsStatusesMap *apiRequestStatusesMap
}
func (n *GitLabClient) getClient(credentials requestCredentials) (c *client, err error) {
......@@ -196,6 +254,8 @@ func (n *GitLabClient) RequestJob(config common.RunnerConfig) (*common.JobRespon
var response common.JobResponse
result, statusText, tlsData := n.doJSON(&config.RunnerCredentials, "POST", "jobs/request", http.StatusCreated, &request, &response)
n.requestsStatusesMap.append(config.RunnerCredentials.ShortDescription(), APIEndpointRequestJob, result)
switch result {
case http.StatusCreated:
config.Log().WithFields(logrus.Fields{
......@@ -231,6 +291,9 @@ func (n *GitLabClient) UpdateJob(config common.RunnerConfig, jobCredentials *com
log := config.Log().WithField("job", jobInfo.ID)
result, statusText, _ := n.doJSON(&config.RunnerCredentials, "PUT", fmt.Sprintf("jobs/%d", jobInfo.ID), http.StatusOK, &request, nil)
n.requestsStatusesMap.append(config.RunnerCredentials.ShortDescription(), APIEndpointUpdateJob, result)
switch result {
case http.StatusOK:
log.Debugln("Submitting job to coordinator...", "ok")
......@@ -262,6 +325,9 @@ func (n *GitLabClient) PatchTrace(config common.RunnerConfig, jobCredentials *co
request := bytes.NewReader(tracePatch.Patch())
response, err := n.doRaw(&config.RunnerCredentials, "PATCH", uri, request, "text/plain", headers)
n.requestsStatusesMap.append(config.RunnerCredentials.ShortDescription(), APIEndpointPatchTrace, response.StatusCode)
if err != nil {
config.Log().Errorln("Appending trace to coordinator...", "error", err.Error())
return common.UpdateFailed
......@@ -447,6 +513,26 @@ func (n *GitLabClient) DownloadArtifacts(config common.JobCredentials, artifacts
}
}
// Describe implements prometheus.Collector.
func (n *GitLabClient) Describe(ch chan<- *prometheus.Desc) {
ch <- apiRequestStatuses
}
// Collect implements prometheus.Collector.
func (n *GitLabClient) Collect(ch chan<- prometheus.Metric) {
n.requestsStatusesMap.read(func(permutation apiRequestStatusPermutation, count int) {
ch <- prometheus.MustNewConstMetric(
apiRequestStatuses,
prometheus.CounterValue,
float64(count),
permutation.runnerID,
string(permutation.endpoint),
strconv.Itoa(permutation.status),
)
})
}
func (n *GitLabClient) ProcessJob(config common.RunnerConfig, jobCredentials *common.JobCredentials) common.JobTrace {
trace := newJobTrace(n, config, jobCredentials)
trace.start()
......@@ -454,5 +540,7 @@ func (n *GitLabClient) ProcessJob(config common.RunnerConfig, jobCredentials *co
}
func NewGitLabClient() *GitLabClient {
return &GitLabClient{}
return &GitLabClient{
requestsStatusesMap: newAPIRequestStatusesMap(),
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment