diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index cf25622c9..5b3048cb1 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -174,7 +174,7 @@ func main() { a.Flag("rules.alert.for-grace-period", "Minimum duration between alert and restored 'for' state. This is maintained only for alerts with configured 'for' time greater than grace period."). Default("10m").SetValue(&cfg.forGracePeriod) - a.Flag("rules.alert.resend-delay", "Minimum amount of time to wait before resending an alert to Alertmanager. Must be lower than resolve_timeout in Alertmanager"). + a.Flag("rules.alert.resend-delay", "Minimum amount of time to wait before resending an alert to Alertmanager."). Default("1m").SetValue(&cfg.resendDelay) a.Flag("alertmanager.notification-queue-capacity", "The capacity of the queue for pending Alertmanager notifications."). @@ -700,6 +700,8 @@ func sendAlerts(n *notifier.Manager, externalURL string) rules.NotifyFunc { } if !alert.ResolvedAt.IsZero() { a.EndsAt = alert.ResolvedAt + } else { + a.EndsAt = alert.ValidUntil } res = append(res, a) } diff --git a/rules/alerting.go b/rules/alerting.go index a542f89e7..a337936e0 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -89,6 +89,7 @@ type Alert struct { FiredAt time.Time ResolvedAt time.Time LastSentAt time.Time + ValidUntil time.Time } func (a *Alert) needsSending(ts time.Time, resendDelay time.Duration) bool { @@ -440,11 +441,17 @@ func (r *AlertingRule) ForEachActiveAlert(f func(*Alert)) { } } -func (r *AlertingRule) sendAlerts(ctx context.Context, ts time.Time, resendDelay time.Duration, notifyFunc NotifyFunc) { +func (r *AlertingRule) sendAlerts(ctx context.Context, ts time.Time, resendDelay time.Duration, interval time.Duration, notifyFunc NotifyFunc) { alerts := make([]*Alert, 0) r.ForEachActiveAlert(func(alert *Alert) { if alert.needsSending(ts, resendDelay) { alert.LastSentAt = ts + // Allow for a couple Eval or Alertmanager send failures + delta := resendDelay + if interval > resendDelay { + delta = interval + } + alert.ValidUntil = ts.Add(3 * delta) anew := *alert alerts = append(alerts, &anew) } diff --git a/rules/manager.go b/rules/manager.go index 2a9ac3397..baf18c959 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -393,7 +393,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { } if ar, ok := rule.(*AlertingRule); ok { - ar.sendAlerts(ctx, ts, g.opts.ResendDelay, g.opts.NotifyFunc) + ar.sendAlerts(ctx, ts, g.opts.ResendDelay, g.interval, g.opts.NotifyFunc) } var ( numOutOfOrder = 0 diff --git a/rules/manager_test.go b/rules/manager_test.go index 66c33fe64..429fcbb05 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -689,6 +689,7 @@ func TestNotify(t *testing.T) { // Alert sent right away group.Eval(ctx, time.Unix(1, 0)) testutil.Equals(t, 1, len(lastNotified)) + testutil.Assert(t, !lastNotified[0].ValidUntil.IsZero(), "ValidUntil should not be zero") // Alert is not sent 1s later group.Eval(ctx, time.Unix(2, 0))