Commit ea53ddc4 authored by Ben Kochie's avatar Ben Kochie

Add new issue alerts

Split stale chef client and ssl expiring alerts into low and high
severity to open issues for appropriate problems.
parent c52915fe
......@@ -20,6 +20,17 @@ groups:
grafana_dashboard_id: "000000231/chef-client"
runbook: troubleshooting/chef.md
title: Chef client hasn't run for longer than expected
- alert: ChefClientStale
expr: time() - chef_client_last_run_timestamp_seconds > 3 * 86400
for: 1h
labels:
severity: s2
pager: issue
annotations:
description: Last Chef run for {{ $labels.fqdn }} was over {{ $value | humanizeDuration }} ago
grafana_dashboard_id: "000000231/chef-client"
runbook: troubleshooting/chef.md
title: Chef client hasn't run for longer than expected
- alert: ChefClientErrorCritical
expr: avg(chef_client_error) by (type) * 100 > 10
for: 1h
......
......@@ -2,8 +2,7 @@ groups:
- name: ssl-certificate-expiration.rules
rules:
- alert: SSLCertExpiresSoon
expr: probe_ssl_earliest_cert_expiry{job="blackbox"} - time() < 7 * 60 * 60
* 24
expr: probe_ssl_earliest_cert_expiry{job="blackbox"} - time() < 1 * 86400
for: 30m
labels:
severity: s1
......@@ -13,3 +12,14 @@ groups:
runbook: troubleshooting/ssl_cert.md
title: SSL certificate for {{ $labels.instance }} expires in {{ $value | humanizeDuration
}}
- alert: SSLCertExpiresSoon
expr: probe_ssl_earliest_cert_expiry{job="blackbox"} - time() < 7 * 86400
for: 30m
labels:
severity: s2
pager: issue
annotations:
description: Check SSL for specified nodes and consider reissuing certificate.
runbook: troubleshooting/ssl_cert.md
title: SSL certificate for {{ $labels.instance }} expires in {{ $value | humanizeDuration
}}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment