Browse Source

Merge pull request #1373 from prometheus/fix-flapping-alert-detection

Fix detection of flapping alerts
pull/1376/head
Fabian Reinartz 9 years ago
parent
commit
facabe254f
  1. 4
      rules/alerting.go
  2. 33
      rules/manager_test.go

4
rules/alerting.go

@ -39,7 +39,7 @@ const (
type AlertState int type AlertState int
const ( const (
// StateInactive is the state of an alert that is either firing nor pending. // StateInactive is the state of an alert that is neither firing nor pending.
StateInactive AlertState = iota StateInactive AlertState = iota
// StatePending is the state of an alert that has been active for less than // StatePending is the state of an alert that has been active for less than
// the configured threshold duration. // the configured threshold duration.
@ -159,7 +159,7 @@ func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine) (model.Vector,
fp := smpl.Metric.Fingerprint() fp := smpl.Metric.Fingerprint()
resultFPs[fp] = struct{}{} resultFPs[fp] = struct{}{}
if alert, ok := r.active[fp]; ok { if alert, ok := r.active[fp]; ok && alert.State != StateInactive {
alert.Value = smpl.Value alert.Value = smpl.Value
continue continue
} }

33
rules/manager_test.go

@ -27,14 +27,8 @@ import (
func TestAlertingRule(t *testing.T) { func TestAlertingRule(t *testing.T) {
suite, err := promql.NewTest(t, ` suite, err := promql.NewTest(t, `
load 5m load 5m
http_requests{job="api-server", instance="0", group="production"} 0+10x10 http_requests{job="app-server", instance="0", group="canary"} 75 85 95 105 105 95 85
http_requests{job="api-server", instance="1", group="production"} 0+20x10 http_requests{job="app-server", instance="1", group="canary"} 80 90 100 110 120 130 140
http_requests{job="api-server", instance="0", group="canary"} 0+30x10
http_requests{job="api-server", instance="1", group="canary"} 0+40x10
http_requests{job="app-server", instance="0", group="production"} 0+50x10
http_requests{job="app-server", instance="1", group="production"} 0+60x10
http_requests{job="app-server", instance="0", group="canary"} 0+70x10
http_requests{job="app-server", instance="1", group="canary"} 0+80x10
`) `)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -79,17 +73,32 @@ func TestAlertingRule(t *testing.T) {
}, { }, {
time: 10 * time.Minute, time: 10 * time.Minute,
result: []string{ result: []string{
`ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="0", job="app-server", severity="critical"} => 1 @[%v]`,
`ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="1", job="app-server", severity="critical"} => 0 @[%v]`, `ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="1", job="app-server", severity="critical"} => 0 @[%v]`,
`ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="0", job="app-server", severity="critical"} => 0 @[%v]`,
}, },
}, },
{ {
time: 15 * time.Minute, time: 15 * time.Minute,
result: nil, result: []string{
`ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="0", job="app-server", severity="critical"} => 0 @[%v]`,
},
}, },
{ {
time: 20 * time.Minute, time: 20 * time.Minute,
result: nil, result: []string{},
},
{
time: 25 * time.Minute,
result: []string{
`ALERTS{alertname="HTTPRequestRateLow", alertstate="pending", group="canary", instance="0", job="app-server", severity="critical"} => 1 @[%v]`,
},
},
{
time: 30 * time.Minute,
result: []string{
`ALERTS{alertname="HTTPRequestRateLow", alertstate="pending", group="canary", instance="0", job="app-server", severity="critical"} => 0 @[%v]`,
`ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="0", job="app-server", severity="critical"} => 1 @[%v]`,
},
}, },
} }

Loading…
Cancel
Save