mirror of https://github.com/prometheus/prometheus
add alert for sd refresh failure (#12410)
* add alert for sd refresh failure Due to config error or sd service down, prometheus may fail to refresh sd resource, which may lead to scrape fail or irrelavant metrics. Signed-off-by: Leo Q <LeoQuote@users.noreply.github.com> * apply suggestions Signed-off-by: Leo Q <LeoQuote@users.noreply.github.com> --------- Signed-off-by: Leo Q <LeoQuote@users.noreply.github.com>pull/12446/head
parent
ac8abdaacd
commit
4268feb9d7
|
@ -20,6 +20,20 @@
|
|||
description: 'Prometheus %(prometheusName)s has failed to reload its configuration.' % $._config,
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusSDRefreshFailure',
|
||||
expr: |||
|
||||
increase(prometheus_sd_refresh_failures_total{%(prometheusSelector)s}[10m]) > 0
|
||||
||| % $._config,
|
||||
'for': '20m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
summary: 'Failed Prometheus SD refresh.',
|
||||
description: 'Prometheus %(prometheusName)s has failed to refresh SD with mechanism {{$labels.mechanism}}.' % $._config,
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusNotificationQueueRunningFull',
|
||||
expr: |||
|
||||
|
|
Loading…
Reference in New Issue