From e248ffb220d3a3371877188eba0687a2da841e61 Mon Sep 17 00:00:00 2001 From: Tom Wilkie Date: Tue, 12 Feb 2019 15:22:58 +0000 Subject: [PATCH] Add alert for WAL remote write falling behind. Signed-off-by: Tom Wilkie --- documentation/prometheus-mixin/alerts.libsonnet | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/documentation/prometheus-mixin/alerts.libsonnet b/documentation/prometheus-mixin/alerts.libsonnet index 4c66c4a9b..08b43b4ab 100644 --- a/documentation/prometheus-mixin/alerts.libsonnet +++ b/documentation/prometheus-mixin/alerts.libsonnet @@ -156,6 +156,22 @@ message: 'Prometheus failed to send {{ printf "%.1f" $value }}% samples', }, }, + { + alert: 'PrometheusRemoteWriteBehind', + expr: ||| + prometheus_remote_storage_highest_timestamp_in{%(prometheusSelector)s} + - on(job, instance) group_right + prometheus_remote_storage_queue_highest_sent_timestamp{%(prometheusSelector)s} + > 60 + ||| % $._config, + 'for': '15m', + labels: { + severity: 'critical', + }, + annotations: { + message: 'Prometheus remote write is {{ printf "%.1f" $value }}s behind.', + }, + }, { alert: 'PrometheusRuleFailures', 'for': '15m',