Skip to content
Snippets Groups Projects
Verified Commit 168beed5 authored by Taliesin Millhouse's avatar Taliesin Millhouse
Browse files

feat: add utilization rates panel

parent 46ea91a6
No related branches found
No related tags found
No related merge requests found
This commit is part of merge request !8489. Comments created here will be created in the context of that merge request.
......@@ -2,7 +2,7 @@ local aggregationSets = (import '../../metrics-catalog/gitlab-metrics-config.lib
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
local layout = import 'grafana/layout.libsonnet';
local singleMetricRow = import '../key-metric-panels/single-metric-row.libsonnet';
local utilizationRatesPanel = import 'key-metric-panels/utilization-rates-panel.libsonnet';
local utilizationRatesPanel = import '../key-metric-panels/utilization-rates-panel.libsonnet';
local metricsCatalog = import 'servicemetrics/metrics-catalog.libsonnet';
local row = grafana.row;
......@@ -110,7 +110,7 @@ local getColumnWidths(
(
if showSaturationCell then
[[
utilizationRatesPanel.panel(
utilizationRatesPanel.timeSeriesPanel(
serviceType,
selectorHash=selectorHashWithShard,
compact=compact,
......
......@@ -185,6 +185,7 @@ local sliDetailErrorRatePanel(
legendFormat='%(sliName)s errors',
intervalFactor=1,
withoutLabels=[],
panelType='graph',
) =
basic.timeseries(
......@@ -290,7 +291,8 @@ local sliDetailErrorRatePanel(
sliName,
selectorHash,
aggregationSets,
minLatency=0.01
minLatency=0.01,
panelType='graph',
)::
local service = metricsCatalog.getService(serviceType);
local sli = service.serviceLevelIndicators[sliName];
......@@ -312,6 +314,7 @@ local sliDetailErrorRatePanel(
layout.singleRow(
std.prune(
[
// todo
if sli.hasHistogramApdex() then
sliDetailLatencyPanel(
title='Estimated %(percentile_humanized)s ' + sliName + ' Latency - ' + aggregationSet.title,
......@@ -325,6 +328,7 @@ local sliDetailErrorRatePanel(
else
null,
// todo
if misc.isPresent(aggregationSet.aggregationLabels) && sli.hasApdex() && std.objectHasAll(sli.apdex, 'apdexAttribution') then
basic.percentageTimeseries(
title='Apdex attribution for ' + sliName + ' Latency - ' + aggregationSet.title,
......@@ -352,6 +356,7 @@ local sliDetailErrorRatePanel(
legendFormat=aggregationSet.legendFormat,
aggregationLabels=aggregationSet.aggregationLabels,
selector=filteredSelectorHash + aggregationSet.selector,
panelType=panelType,
)
else
null,
......
local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet';
local row = g.panel.row;
local row(
title,
collapse=false,
) =
row.new(title) +
row.withCollapsed(collapse) +
{
addPanels(panels)::
self +
row.withPanelsMixin(panels),
};
{
row: row,
}
......@@ -18,15 +18,6 @@ local basic(
pointradius=5,
lines=true,
unit=null,
// bars: This doesn't appear to be used in the Timeseries panel.
// decimals: This doesn't look as nice as just leaving it as auto.
// fill: Not sure what this relates to yet.
// legend_hideEmpty: This doesn't appear to be used in the Timeseries panel.
// legend_values: This doesn't appear to be used in the Timeseries panel.
// sort: This doesn't appear to be in the Graph panel JSON output.
// stableId: This doesn't appear to be used in the Timeseries panel.
// stack: This doesn't appear to be used in the Timeseries panel.
// threshdolds: This isn't used in the Graph panel tested, but is in the Timeseries, it may be a default that needs to be removed.
) =
local datasourceType =
if dataSource == '$PROMETHEUS_DS' then
......@@ -67,96 +58,100 @@ local basic(
ts.panelOptions.withDescription(description) +
ts.standardOptions.withUnit(unit) +
{
addYaxis(min=null, max=null, label=null, show=true):: self {
addYaxis(min=null, max=null, label=null, show=true)::
local axisPlacement = if show then
'left'
else
'hidden',
'hidden';
fieldConfig+: {
defaults+: {
custom+: {
axisColorMode: 'text',
axisLabel: label,
axisPlacement: axisPlacement,
},
min: min,
max: max,
},
},
},
addDataLink(link):: self {
fieldConfig+: {
defaults+: {
links+: [link],
},
},
},
addSeriesOverride(override):: self {
self +
ts.fieldConfig.defaults.custom.withAxisColorMode('text') +
ts.fieldConfig.defaults.custom.withAxisPlacement(axisPlacement) +
(
if min != null then
ts.standardOptions.withMin(min)
else
{}
) +
(
if max != null then
ts.standardOptions.withMax(max)
else
{}
) +
(
if label != null then
ts.fieldConfig.defaults.custom.withAxisLabel(label)
else
{}
),
addDataLink(link)::
self +
ts.standardOptions.withLinksMixin(link),
addSeriesOverride(override)::
self +
local matcherId =
if std.startsWith(override.alias, '/') && std.endsWith(override.alias, '/') then
'byRegexp'
else
'byName',
'byName';
fieldConfig+: {
overrides+: [
{
matcher: {
id: matcherId,
options: override.alias,
ts.standardOptions.withOverridesMixin({
matcher: {
id: matcherId,
options: override.alias,
},
properties: [
if std.objectHas(override, 'dashes') && override.dashes then
{
id: 'custom.lineStyle',
value: {
dash: [override.dashLength],
fill: 'dash',
},
},
if std.objectHas(override, 'color') then
{
id: 'color',
value: {
fixedColor: override.color,
mode: 'fixed',
},
},
if std.objectHas(override, 'fillBelowTo') then
{
id: 'custom.fillBelowTo',
value: override.fillBelowTo,
},
if std.objectHas(override, 'fillBelowTo') then
{
id: 'custom.fillOpacity',
value: 30,
},
if std.objectHas(override, 'legend') then
{
id: 'custom.hideFrom',
value: {
legend: !override.legend,
tooltip: false,
viz: false,
},
},
if std.objectHas(override, 'linewidth') then
{
id: 'custom.lineWidth',
value: override.linewidth,
},
if std.objectHas(override, 'nullPointMode') && override.nullPointMode == 'connected' then
{
id: 'custom.spanNulls',
value: true,
},
properties: [
if std.objectHas(override, 'dashes') && override.dashes then
{
id: 'custom.lineStyle',
value: {
dash: [override.dashLength],
fill: 'dash',
},
},
if std.objectHas(override, 'color') then
{
id: 'color',
value: {
fixedColor: override.color,
mode: 'fixed',
},
},
if std.objectHas(override, 'fillBelowTo') then
{
id: 'custom.fillBelowTo',
value: override.fillBelowTo,
},
if std.objectHas(override, 'fillBelowTo') then
{
id: 'custom.fillOpacity',
value: 30,
},
if std.objectHas(override, 'legend') then
{
id: 'custom.hideFrom',
value: {
legend: !override.legend,
tooltip: false,
viz: false,
},
},
if std.objectHas(override, 'linewidth') then
{
id: 'custom.lineWidth',
value: override.linewidth,
},
if std.objectHas(override, 'nullPointMode') && override.nullPointMode == 'connected' then
{
id: 'custom.spanNulls',
value: true,
},
],
},
],
},
},
}),
addTarget(target):: self {
targets+: [target],
},
......
local platformLinks = import '../gitlab-dashboards/platform_links.libsonnet';
local panel = import '../grafana/time-series/panel.libsonnet';
local target = import '../grafana/time-series/target.libsonnet';
local basic = import 'grafana/basic.libsonnet';
local promQuery = import 'grafana/prom_query.libsonnet';
local selectors = import 'promql/selectors.libsonnet';
......@@ -56,9 +58,66 @@ local utilizationRatesPanel(
)
+ {
links+: platformLinks.saturationDetails(serviceType),
}
;
};
local utilizationRatesTimeSeriesPanel(
serviceType,
selectorHash,
compact=false,
stableId=stableId,
linewidth=if compact then 1 else 2,
) =
local hasShardSelector = std.objectHas(selectorHash, 'shard');
local aggregationLabels = if !hasShardSelector then ['component'] else ['component', 'shard'];
local legendFormat = if !hasShardSelector then
'{{ component }} component'
else
'{{ component }} component - {{ shard }} shard';
local formatConfig = {
serviceType: serviceType,
selector: selectors.serializeHash(selectorHash { type: serviceType }),
aggregationLabels: std.join(', ', aggregationLabels),
};
panel.basic(
title='Saturation',
description='Saturation is a measure of what ratio of a finite resource is currently being utilized. Lower is better.',
// sort='decreasing',
legend_show=!compact,
linewidth=linewidth,
// stableId=stableId,
unit='percentunit',
)
.addTarget( // Primary metric
target.prometheus(
|||
max(
max_over_time(
gitlab_component_saturation:ratio{%(selector)s}[$__interval]
)
) by (%(aggregationLabels)s)
||| % formatConfig,
legendFormat=legendFormat,
)
)
// .resetYaxes()
.addYaxis(
// format='percentunit',
max=1,
label=if compact then '' else 'Saturation %',
)
// todo: what is the point of a second axis where show is false?
// .addYaxis(
// format='short',
// max=1,
// min=0,
// show=false,
// )
+ {
links+: platformLinks.saturationDetails(serviceType),
};
{
panel:: utilizationRatesPanel,
timeSeriesPanel:: utilizationRatesTimeSeriesPanel,
}
{
"datasource": {
"type": "prometheus",
"uid": "$PROMETHEUS_DS"
},
"description": "Apdex is a measure of requests that complete within a tolerable period of time for the service. Higher is better.",
"fieldConfig": {
"defaults": {
"custom": {
"axisColorMode": "text",
"axisGridShow": true,
"axisLabel": "Apdex %",
"axisPlacement": "left",
"lineWidth": 2,
"pointSize": 5,
"showPoints": "never"
},
"links": [
{
"targetBlank": true,
"title": "sentry Service Apdex SLO Analysis",
"url": "/d/alerts-service_slo_apdex?${__url_time_range}&${__all_variables}&var-type=sentry&var-stage=$stage"
}
],
"max": 1,
"unit": "percentunit"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "/6h Degradation SLO \\(5% of monthly error budget\\).*/"
},
"properties": [
{
"id": "custom.lineStyle",
"value": {
"dash": [
4
],
"fill": "dash"
}
},
{
"id": "color",
"value": {
"fixedColor": "#FF4500",
"mode": "fixed"
}
},
{
"id": "custom.hideFrom",
"value": {
"legend": false,
"tooltip": false,
"viz": false
}
},
{
"id": "custom.lineWidth",
"value": 2
},
{
"id": "custom.spanNulls",
"value": true
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "/1h Outage SLO \\(2% of monthly error budget\\).*/"
},
"properties": [
{
"id": "custom.lineStyle",
"value": {
"dash": [
4
],
"fill": "dash"
}
},
{
"id": "color",
"value": {
"fixedColor": "#F2495C",
"mode": "fixed"
}
},
{
"id": "custom.hideFrom",
"value": {
"legend": false,
"tooltip": false,
"viz": false
}
},
{
"id": "custom.lineWidth",
"value": 4
},
{
"id": "custom.spanNulls",
"value": true
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "/.*shard (apdex|error ratio|RPS)$/"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E7D551",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "sentry apdex avg"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#5794F2",
"mode": "fixed"
}
},
{
"id": "custom.fillBelowTo",
"value": "sentry apdex"
},
{
"id": "custom.fillOpacity",
"value": 30
},
{
"id": "custom.lineWidth",
"value": 1
}
]
},
{
"matcher": {
"id": "byName",
"options": "last week"
},
"properties": [
{
"id": "custom.lineStyle",
"value": {
"dash": [
4
],
"fill": "dash"
}
},
{
"id": "color",
"value": {
"fixedColor": "#dddddd80",
"mode": "fixed"
}
},
{
"id": "custom.hideFrom",
"value": {
"legend": false,
"tooltip": false,
"viz": false
}
},
{
"id": "custom.lineWidth",
"value": 1
},
{
"id": "custom.spanNulls",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "sentry apdex"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#E7D551",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 9,
"w": 6,
"x": 0,
"y": 1
},
"id": 3,
"options": {
"legend": {
"calcs": [
"min",
"max",
"mean",
"last"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
}
},
"pluginVersion": "v11.4.0",
"targets": [
{
"expr": "min_over_time(gitlab_service_apdex:ratio_5m{env=\"ops\",environment=\"ops\",monitor=\"global\",stage=\"$stage\",type=\"sentry\"}[$__interval])\n",
"format": "time_series",
"interval": "1m",
"intervalFactor": 1,
"legendFormat": "sentry apdex"
},
{
"expr": "(1 - 6 * (1 - avg(slo:min:events:gitlab_service_apdex:ratio{component=\"\",monitor=\"global\",type=\"sentry\"})\n))\n",
"format": "time_series",
"interval": "5m",
"intervalFactor": 1,
"legendFormat": "6h Degradation SLO (5% of monthly error budget)"
},
{
"expr": "(1 - 14.4 * (1 - avg(slo:min:events:gitlab_service_apdex:ratio{component=\"\",monitor=\"global\",type=\"sentry\"})\n))\n",
"format": "time_series",
"interval": "5m",
"intervalFactor": 1,
"legendFormat": "1h Outage SLO (2% of monthly error budget)"
},
{
"expr": "avg_over_time(gitlab_service_apdex:ratio_5m{env=\"ops\",environment=\"ops\",monitor=\"global\",stage=\"$stage\",type=\"sentry\"}[$__interval])\n",
"format": "time_series",
"interval": "1m",
"intervalFactor": 1,
"legendFormat": "sentry apdex avg"
},
{
"expr": "clamp_min(\n gitlab_service_apdex:ratio_5m{env=\"ops\",environment=\"ops\",monitor=\"global\",stage=\"$stage\",type=\"sentry\"} offset 1w\n,\n scalar(min((1 - 14.4 * (1 - avg(slo:min:events:gitlab_service_apdex:ratio{component=\"\",monitor=\"global\",type=\"sentry\"})\n))\n))\n)\n",
"format": "time_series",
"interval": "1m",
"intervalFactor": 1,
"legendFormat": "last week"
}
],
"title": "sentry Service Apdex",
"type": "timeseries"
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment