Skip to content
Snippets Groups Projects
Commit 312c867b authored by Andrew Newdigate's avatar Andrew Newdigate :speech_balloon:
Browse files
parent 3c60f2b1
No related branches found
No related tags found
1 merge request!1328RCA Dashboard for https://gitlab.com/gitlab-com/gl-infra/infrastructure/issues/7543
......@@ -220,4 +220,48 @@ local seriesOverrides = import 'series_overrides.libsonnet';
show=false,
),
networkTrafficGraph(
title="Node Network Utilization",
description="Network utilization",
sendQuery,
legendFormat='{{ fqdn }}',
receiveQuery,
legend_show=true
):: graphPanel.new(
title,
linewidth=1,
fill=0,
description=description,
datasource="$PROMETHEUS_DS",
decimals=2,
sort="decreasing",
legend_show=legend_show,
legend_values=false,
legend_alignAsTable=false,
legend_hideEmpty=true,
)
.addSeriesOverride(seriesOverrides.networkReceive)
.addTarget(
promQuery.target(sendQuery,
legendFormat='send ' + legendFormat,
intervalFactor=5,
)
)
.addTarget(
promQuery.target(receiveQuery,
legendFormat='receive ' + legendFormat,
intervalFactor=5,
)
)
.resetYaxes()
.addYaxis(
format='Bps',
label="Network utilization",
)
.addYaxis(
format='short',
max=1,
min=0,
show=false,
)
}
local grafana = import 'grafonnet/grafana.libsonnet';
local dashboard = grafana.dashboard;
// local seriesOverrides = import 'series_overrides.libsonnet';
local commonAnnotations = import 'common_annotations.libsonnet';
// local promQuery = import 'prom_query.libsonnet';
local templates = import 'templates.libsonnet';
// local colors = import 'colors.libsonnet';
// local platformLinks = import 'platform_links.libsonnet';
// local capacityPlanning = import 'capacity_planning.libsonnet';
local layout = import 'layout.libsonnet';
local basic = import 'basic.libsonnet';
// local redisCommon = import 'redis_common_graphs.libsonnet';
// local nodeMetrics = import 'node_metrics.libsonnet';
// local keyMetrics = import 'key_metrics.libsonnet';
// local serviceCatalog = import 'service_catalog.libsonnet';
// local row = grafana.row;
// local template = grafana.template;
// local graphPanel = grafana.graphPanel;
// local annotation = grafana.annotation;
local text = grafana.text;
dashboard.new(
'2018-08-14 long degradation after postgres failover',
schemaVersion=16,
tags=['rca'],
timezone='UTC',
graphTooltip='shared_crosshair',
time_from='2018-08-14 06:00:00',
time_to='2018-08-14 22:00:00',
)
.addAnnotation(commonAnnotations.deploymentsForEnvironment)
.addAnnotation(commonAnnotations.deploymentsForEnvironmentCanary)
.addTemplate(templates.ds)
.addTemplate(templates.environment)
.addPanels(layout.grid([
text.new(title='Loopback traffic on patroni-01',
mode='markdown',
content='
Many of the logs on patroni-01 report losing connectivity to services running on localhost. This includes Patroni talking to Postgres and Patroni talking to Consul.
'),
basic.networkTrafficGraph(
title="Loopback traffic on patroni-01",
sendQuery='
rate(node_network_transmit_bytes_total{device="lo",fqdn="patroni-01-db-gprd.c.gitlab-production.internal"}[$__interval])
',
receiveQuery='
rate(node_network_receive_bytes_total{device="lo",fqdn="patroni-01-db-gprd.c.gitlab-production.internal"}[$__interval])
',
),
], cols=2,rowHeight=10, startRow=1))
+ {
annotations: {
list+: [{
"datasource": "Pagerduty",
"enable": true,
"hide": false,
"iconColor": "#F2495C",
"limit": 100,
"name": "GitLab Production Pagerduty",
"serviceId": "PATDFCE",
"showIn": 0,
"tags": [],
"type": "tags",
"urgency": "high"
},
{
"datasource": "Pagerduty",
"enable": true,
"hide": false,
"iconColor": "#C4162A",
"limit": 100,
"name": "GitLab Production SLO",
"serviceId": "P7Q44DU",
"showIn": 0,
"tags": [],
"type": "tags",
"urgency": "high"
},
{
"datasource": "Simple Annotations",
"enable": true,
"hide": false,
"iconColor": "#5794F2",
"limit": 100,
"name": "Key Events",
// To be completed...
"queries": [
{ date: "2019-08-14T08:25Z", text: "The patroni postgres cluster manager on the primary database instance (pg01) reports 'ERROR: get_cluster'" },
],
"showIn": 0,
"tags": [],
"type": "tags"
}]
},
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment