summary: 'Prometheus encounters more than 3% errors sending alerts to any Alertmanager.',
description: '{{ printf "%%.1f" $value }}%% minimum errors while sending alerts from Prometheus %(prometheusName)s to any Alertmanager.' % $._config,
},
},
{
alert: 'PrometheusNotConnectedToAlertmanagers',
expr: |||
@ -281,6 +261,123 @@
description: 'Prometheus %(prometheusName)s has dropped {{ printf "%%.0f" $value }} targets because the number of targets exceeded the configured target_limit.' % $._config,
},
},
] + if $._config.prometheusHAGroupLabels == '' then self.rulesWithoutHA else self.rulesWithHA,
summary: 'Each Prometheus server in an HA group encounters more than 3% errors sending alerts to any Alertmanager.',
description: '{{ printf "%%.1f" $value }}%% minimum errors while sending alerts from any Prometheus server in HA group %(prometheusHAGroupName)s to any Alertmanager.' % $._config,
summary: 'More than half of the Prometheus instances within the same HA group are down.',
description: '{{ $value | humanizePercentage }} of Prometheus instances within the %(prometheusHAGroupName)s HA group have been up for less than half of the last 5m.' % $._config,
summary: 'More than half of the Prometheus instances within the same HA group are crashlooping.',
description: '{{ $value | humanizePercentage }} of Prometheus instances within the %(prometheusHAGroupName)s HA group have restarted at least 5 times in the last 30m.' % $._config,