From 3b756148922775906b8746aa93ffc30e7efad254 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Fri, 2 Aug 2019 17:39:32 -0700 Subject: [PATCH] Add a warning alert, since the remote write behind alert will probably already be going off, about desired shards being higher than max shards. Signed-off-by: Callum Styan --- .../prometheus-mixin/alerts.libsonnet | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/documentation/prometheus-mixin/alerts.libsonnet b/documentation/prometheus-mixin/alerts.libsonnet index 06c527457..0cb52901d 100644 --- a/documentation/prometheus-mixin/alerts.libsonnet +++ b/documentation/prometheus-mixin/alerts.libsonnet @@ -225,6 +225,27 @@ description: 'Prometheus %(prometheusName)s remote write is {{ printf "%%.1f" $value }}s behind for queue {{$labels.queue}}.' % $._config, }, }, + { + alert: 'PrometheusRemoteWriteDesiredShards', + expr: ||| + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + max_over_time(prometheus_remote_storage_shards_desired{%(prometheusSelector)s}[5m]) + > on(job, instance) group_right + max_over_time(prometheus_remote_storage_shards_max{%(prometheusSelector)s}[5m]) + ) + == 1 + ||| % $._config, + 'for': '15m', + labels: { + severity: 'warning', + }, + annotations: { + summary: 'Prometheus remote write desired shards calculation wants to run more than configured max shards.', + description: 'Prometheus %(prometheusName)s remote write is {{ printf "%%.1f" $value }}s behind for queue {{$labels.queue}}.' % $._config, + }, + }, { alert: 'PrometheusRuleFailures', expr: |||