Increase time range for PrometheusHAGroupCrashlooping alert

Signed-off-by: Niko Smeds <nikosmeds@gmail.com>
2021-11-08 15:05:15 -08:00 · 2021-11-08 15:05:15 -08:00 · fdcd423dfe
parent 5e06527190
commit fdcd423dfe
1 changed files with 3 additions and 3 deletions
--- a/documentation/prometheus-mixin/alerts.libsonnet
+++ b/documentation/prometheus-mixin/alerts.libsonnet
@ -391,7 +391,7 @@
                and
                  ( 
                    count by (%(prometheusHAGroupLabels)s) (
-                      changes(process_start_time_seconds{%(prometheusSelector)s}[30m]) > 1
+                      changes(process_start_time_seconds{%(prometheusSelector)s}[1h]) > 1
                    ) 
                    / 
                    count by (%(prometheusHAGroupLabels)s) (
@ -403,7 +403,7 @@
              or
              (
                count by (%(prometheusHAGroupLabels)s) (
-                  changes(process_start_time_seconds{%(prometheusSelector)s}[30m]) > 4
+                  changes(process_start_time_seconds{%(prometheusSelector)s}[1h]) > 4
                )
              /
                count by (%(prometheusHAGroupLabels)s) (
@ -418,7 +418,7 @@
            },
            annotations: {
              summary: 'More than half of the Prometheus instances within the same HA group are crashlooping.',
-              description: '{{ $value | humanizePercentage }} of Prometheus instances within the %(prometheusHAGroupName)s HA group have had at least 5 total restarts or 2 unclean restarts in the last 30m.' % $._config,
+              description: '{{ $value | humanizePercentage }} of Prometheus instances within the %(prometheusHAGroupName)s HA group have had at least 5 total restarts or 2 unclean restarts in the last 1h.' % $._config,
            },
          },
        ],