mirror of https://github.com/prometheus/prometheus
Merge pull request #11827 from roidelapluie/stabilize
Add 'keep_firing_for' field to alerting rulespull/11889/head
commit
aeecf6854f
|
@ -123,6 +123,10 @@ expr: <string>
|
|||
# Alerts which have not yet fired for long enough are considered pending.
|
||||
[ for: <duration> | default = 0s ]
|
||||
|
||||
# How long an alert will continue firing after the condition that triggered it
|
||||
# has cleared.
|
||||
[ keep_firing_for: <duration> | default = 0s ]
|
||||
|
||||
# Labels to add or overwrite for each alert.
|
||||
labels:
|
||||
[ <labelname>: <tmpl_string> ]
|
||||
|
|
|
@ -143,22 +143,24 @@ type RuleGroup struct {
|
|||
|
||||
// Rule describes an alerting or recording rule.
|
||||
type Rule struct {
|
||||
Record string `yaml:"record,omitempty"`
|
||||
Alert string `yaml:"alert,omitempty"`
|
||||
Expr string `yaml:"expr"`
|
||||
For model.Duration `yaml:"for,omitempty"`
|
||||
Labels map[string]string `yaml:"labels,omitempty"`
|
||||
Annotations map[string]string `yaml:"annotations,omitempty"`
|
||||
Record string `yaml:"record,omitempty"`
|
||||
Alert string `yaml:"alert,omitempty"`
|
||||
Expr string `yaml:"expr"`
|
||||
For model.Duration `yaml:"for,omitempty"`
|
||||
KeepFiringFor model.Duration `yaml:"keep_firing_for,omitempty"`
|
||||
Labels map[string]string `yaml:"labels,omitempty"`
|
||||
Annotations map[string]string `yaml:"annotations,omitempty"`
|
||||
}
|
||||
|
||||
// RuleNode adds yaml.v3 layer to support line and column outputs for invalid rules.
|
||||
type RuleNode struct {
|
||||
Record yaml.Node `yaml:"record,omitempty"`
|
||||
Alert yaml.Node `yaml:"alert,omitempty"`
|
||||
Expr yaml.Node `yaml:"expr"`
|
||||
For model.Duration `yaml:"for,omitempty"`
|
||||
Labels map[string]string `yaml:"labels,omitempty"`
|
||||
Annotations map[string]string `yaml:"annotations,omitempty"`
|
||||
Record yaml.Node `yaml:"record,omitempty"`
|
||||
Alert yaml.Node `yaml:"alert,omitempty"`
|
||||
Expr yaml.Node `yaml:"expr"`
|
||||
For model.Duration `yaml:"for,omitempty"`
|
||||
KeepFiringFor model.Duration `yaml:"keep_firing_for,omitempty"`
|
||||
Labels map[string]string `yaml:"labels,omitempty"`
|
||||
Annotations map[string]string `yaml:"annotations,omitempty"`
|
||||
}
|
||||
|
||||
// Validate the rule and return a list of encountered errors.
|
||||
|
@ -208,6 +210,12 @@ func (r *RuleNode) Validate() (nodes []WrappedError) {
|
|||
node: &r.Record,
|
||||
})
|
||||
}
|
||||
if r.KeepFiringFor != 0 {
|
||||
nodes = append(nodes, WrappedError{
|
||||
err: fmt.Errorf("invalid field 'keep_firing_for' in recording rule"),
|
||||
node: &r.Record,
|
||||
})
|
||||
}
|
||||
if !model.IsValidMetricName(model.LabelValue(r.Record.Value)) {
|
||||
nodes = append(nodes, WrappedError{
|
||||
err: fmt.Errorf("invalid recording rule name: %s", r.Record.Value),
|
||||
|
|
|
@ -73,12 +73,21 @@ func TestParseFileFailure(t *testing.T) {
|
|||
filename: "invalid_label_name.bad.yaml",
|
||||
errMsg: "invalid label name",
|
||||
},
|
||||
{
|
||||
filename: "record_and_for.bad.yaml",
|
||||
errMsg: "invalid field 'for' in recording rule",
|
||||
},
|
||||
{
|
||||
filename: "record_and_keep_firing_for.bad.yaml",
|
||||
errMsg: "invalid field 'keep_firing_for' in recording rule",
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range table {
|
||||
_, errs := ParseFile(filepath.Join("testdata", c.filename))
|
||||
require.NotNil(t, errs, "Expected error parsing %s but got none", c.filename)
|
||||
require.Error(t, errs[0], c.errMsg, "Expected error for %s.", c.filename)
|
||||
require.Error(t, errs[0])
|
||||
require.Containsf(t, errs[0].Error(), c.errMsg, "Expected error for %s.", c.filename)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
groups:
|
||||
- name: yolo
|
||||
rules:
|
||||
- record: Hello
|
||||
expr: 1
|
||||
for: 1m
|
|
@ -0,0 +1,6 @@
|
|||
groups:
|
||||
- name: yolo
|
||||
rules:
|
||||
- record: Hello
|
||||
expr: 1
|
||||
keep_firing_for: 1m
|
|
@ -83,11 +83,12 @@ type Alert struct {
|
|||
Value float64
|
||||
// The interval during which the condition of this alert held true.
|
||||
// ResolvedAt will be 0 to indicate a still active alert.
|
||||
ActiveAt time.Time
|
||||
FiredAt time.Time
|
||||
ResolvedAt time.Time
|
||||
LastSentAt time.Time
|
||||
ValidUntil time.Time
|
||||
ActiveAt time.Time
|
||||
FiredAt time.Time
|
||||
ResolvedAt time.Time
|
||||
LastSentAt time.Time
|
||||
ValidUntil time.Time
|
||||
KeepFiringSince time.Time
|
||||
}
|
||||
|
||||
func (a *Alert) needsSending(ts time.Time, resendDelay time.Duration) bool {
|
||||
|
@ -112,6 +113,9 @@ type AlertingRule struct {
|
|||
// The duration for which a labelset needs to persist in the expression
|
||||
// output vector before an alert transitions from Pending to Firing state.
|
||||
holdDuration time.Duration
|
||||
// The amount of time that the alert should remain firing after the
|
||||
// resolution.
|
||||
keepFiringFor time.Duration
|
||||
// Extra labels to attach to the resulting alert sample vectors.
|
||||
labels labels.Labels
|
||||
// Non-identifying key/value pairs.
|
||||
|
@ -142,7 +146,7 @@ type AlertingRule struct {
|
|||
|
||||
// NewAlertingRule constructs a new AlertingRule.
|
||||
func NewAlertingRule(
|
||||
name string, vec parser.Expr, hold time.Duration,
|
||||
name string, vec parser.Expr, hold, keepFiringFor time.Duration,
|
||||
labels, annotations, externalLabels labels.Labels, externalURL string,
|
||||
restored bool, logger log.Logger,
|
||||
) *AlertingRule {
|
||||
|
@ -152,6 +156,7 @@ func NewAlertingRule(
|
|||
name: name,
|
||||
vector: vec,
|
||||
holdDuration: hold,
|
||||
keepFiringFor: keepFiringFor,
|
||||
labels: labels,
|
||||
annotations: annotations,
|
||||
externalLabels: el,
|
||||
|
@ -201,6 +206,12 @@ func (r *AlertingRule) HoldDuration() time.Duration {
|
|||
return r.holdDuration
|
||||
}
|
||||
|
||||
// KeepFiringFor returns the duration an alerting rule should keep firing for
|
||||
// after resolution.
|
||||
func (r *AlertingRule) KeepFiringFor() time.Duration {
|
||||
return r.keepFiringFor
|
||||
}
|
||||
|
||||
// Labels returns the labels of the alerting rule.
|
||||
func (r *AlertingRule) Labels() labels.Labels {
|
||||
return r.labels
|
||||
|
@ -404,16 +415,36 @@ func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc,
|
|||
// Check if any pending alerts should be removed or fire now. Write out alert timeseries.
|
||||
for fp, a := range r.active {
|
||||
if _, ok := resultFPs[fp]; !ok {
|
||||
// There is no firing alerts for this fingerprint. The alert is no
|
||||
// longer firing.
|
||||
|
||||
// Use keepFiringFor value to determine if the alert should keep
|
||||
// firing.
|
||||
var keepFiring bool
|
||||
if a.State == StateFiring && r.keepFiringFor > 0 {
|
||||
if a.KeepFiringSince.IsZero() {
|
||||
a.KeepFiringSince = ts
|
||||
}
|
||||
if ts.Sub(a.KeepFiringSince) < r.keepFiringFor {
|
||||
keepFiring = true
|
||||
}
|
||||
}
|
||||
|
||||
// If the alert was previously firing, keep it around for a given
|
||||
// retention time so it is reported as resolved to the AlertManager.
|
||||
if a.State == StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > resolvedRetention) {
|
||||
delete(r.active, fp)
|
||||
}
|
||||
if a.State != StateInactive {
|
||||
if a.State != StateInactive && !keepFiring {
|
||||
a.State = StateInactive
|
||||
a.ResolvedAt = ts
|
||||
}
|
||||
continue
|
||||
if !keepFiring {
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
// The alert is firing, reset keepFiringSince.
|
||||
a.KeepFiringSince = time.Time{}
|
||||
}
|
||||
numActivePending++
|
||||
|
||||
|
@ -512,11 +543,12 @@ func (r *AlertingRule) sendAlerts(ctx context.Context, ts time.Time, resendDelay
|
|||
|
||||
func (r *AlertingRule) String() string {
|
||||
ar := rulefmt.Rule{
|
||||
Alert: r.name,
|
||||
Expr: r.vector.String(),
|
||||
For: model.Duration(r.holdDuration),
|
||||
Labels: r.labels.Map(),
|
||||
Annotations: r.annotations.Map(),
|
||||
Alert: r.name,
|
||||
Expr: r.vector.String(),
|
||||
For: model.Duration(r.holdDuration),
|
||||
KeepFiringFor: model.Duration(r.keepFiringFor),
|
||||
Labels: r.labels.Map(),
|
||||
Annotations: r.annotations.Map(),
|
||||
}
|
||||
|
||||
byt, err := yaml.Marshal(ar)
|
||||
|
|
|
@ -66,7 +66,7 @@ func TestAlertingRuleState(t *testing.T) {
|
|||
}
|
||||
|
||||
for i, test := range tests {
|
||||
rule := NewAlertingRule(test.name, nil, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil)
|
||||
rule := NewAlertingRule(test.name, nil, 0, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil)
|
||||
rule.active = test.active
|
||||
got := rule.State()
|
||||
require.Equal(t, test.want, got, "test case %d unexpected AlertState, want:%d got:%d", i, test.want, got)
|
||||
|
@ -76,7 +76,7 @@ func TestAlertingRuleState(t *testing.T) {
|
|||
func TestAlertingRuleLabelsUpdate(t *testing.T) {
|
||||
suite, err := promql.NewTest(t, `
|
||||
load 1m
|
||||
http_requests{job="app-server", instance="0"} 75 85 70 70
|
||||
http_requests{job="app-server", instance="0"} 75 85 70 70 stale
|
||||
`)
|
||||
require.NoError(t, err)
|
||||
defer suite.Close()
|
||||
|
@ -90,6 +90,7 @@ func TestAlertingRuleLabelsUpdate(t *testing.T) {
|
|||
"HTTPRequestRateLow",
|
||||
expr,
|
||||
time.Minute,
|
||||
0,
|
||||
// Basing alerting rule labels off of a value that can change is a very bad idea.
|
||||
// If an alert is going back and forth between two label values it will never fire.
|
||||
// Instead, you should write two alerts with constant labels.
|
||||
|
@ -173,6 +174,10 @@ func TestAlertingRuleLabelsUpdate(t *testing.T) {
|
|||
|
||||
require.Equal(t, result, filteredRes)
|
||||
}
|
||||
evalTime := baseTime.Add(time.Duration(len(results)) * time.Minute)
|
||||
res, err := rule.Eval(suite.Context(), evalTime, EngineQueryFunc(suite.QueryEngine(), suite.Storage()), nil, 0)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0, len(res))
|
||||
}
|
||||
|
||||
func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) {
|
||||
|
@ -192,6 +197,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) {
|
|||
"ExternalLabelDoesNotExist",
|
||||
expr,
|
||||
time.Minute,
|
||||
0,
|
||||
labels.FromStrings("templated_label", "There are {{ len $externalLabels }} external Labels, of which foo is {{ $externalLabels.foo }}."),
|
||||
labels.EmptyLabels(),
|
||||
labels.EmptyLabels(),
|
||||
|
@ -202,6 +208,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) {
|
|||
"ExternalLabelExists",
|
||||
expr,
|
||||
time.Minute,
|
||||
0,
|
||||
labels.FromStrings("templated_label", "There are {{ len $externalLabels }} external Labels, of which foo is {{ $externalLabels.foo }}."),
|
||||
labels.EmptyLabels(),
|
||||
labels.FromStrings("foo", "bar", "dings", "bums"),
|
||||
|
@ -286,6 +293,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) {
|
|||
"ExternalURLDoesNotExist",
|
||||
expr,
|
||||
time.Minute,
|
||||
0,
|
||||
labels.FromStrings("templated_label", "The external URL is {{ $externalURL }}."),
|
||||
labels.EmptyLabels(),
|
||||
labels.EmptyLabels(),
|
||||
|
@ -296,6 +304,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) {
|
|||
"ExternalURLExists",
|
||||
expr,
|
||||
time.Minute,
|
||||
0,
|
||||
labels.FromStrings("templated_label", "The external URL is {{ $externalURL }}."),
|
||||
labels.EmptyLabels(),
|
||||
labels.EmptyLabels(),
|
||||
|
@ -380,6 +389,7 @@ func TestAlertingRuleEmptyLabelFromTemplate(t *testing.T) {
|
|||
"EmptyLabel",
|
||||
expr,
|
||||
time.Minute,
|
||||
0,
|
||||
labels.FromStrings("empty_label", ""),
|
||||
labels.EmptyLabels(),
|
||||
labels.EmptyLabels(),
|
||||
|
@ -436,6 +446,7 @@ func TestAlertingRuleQueryInTemplate(t *testing.T) {
|
|||
"ruleWithQueryInTemplate",
|
||||
expr,
|
||||
time.Minute,
|
||||
0,
|
||||
labels.FromStrings("label", "value"),
|
||||
labels.FromStrings("templated_label", `{{- with "sort(sum(http_requests) by (instance))" | query -}}
|
||||
{{- range $i,$v := . -}}
|
||||
|
@ -480,7 +491,7 @@ instance: {{ $v.Labels.instance }}, value: {{ printf "%.0f" $v.Value }};
|
|||
|
||||
func BenchmarkAlertingRuleAtomicField(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
rule := NewAlertingRule("bench", nil, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil)
|
||||
rule := NewAlertingRule("bench", nil, 0, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil)
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
for i := 0; i < b.N; i++ {
|
||||
|
@ -518,6 +529,7 @@ func TestAlertingRuleDuplicate(t *testing.T) {
|
|||
"foo",
|
||||
expr,
|
||||
time.Minute,
|
||||
0,
|
||||
labels.FromStrings("test", "test"),
|
||||
labels.EmptyLabels(),
|
||||
labels.EmptyLabels(),
|
||||
|
@ -564,6 +576,7 @@ func TestAlertingRuleLimit(t *testing.T) {
|
|||
"foo",
|
||||
expr,
|
||||
time.Minute,
|
||||
0,
|
||||
labels.FromStrings("test", "test"),
|
||||
labels.EmptyLabels(),
|
||||
labels.EmptyLabels(),
|
||||
|
@ -636,6 +649,7 @@ func TestQueryForStateSeries(t *testing.T) {
|
|||
"TestRule",
|
||||
nil,
|
||||
time.Minute,
|
||||
0,
|
||||
labels.FromStrings("severity", "critical"),
|
||||
labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil,
|
||||
)
|
||||
|
@ -669,6 +683,7 @@ func TestSendAlertsDontAffectActiveAlerts(t *testing.T) {
|
|||
"TestRule",
|
||||
nil,
|
||||
time.Minute,
|
||||
0,
|
||||
labels.FromStrings("severity", "critical"),
|
||||
labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil,
|
||||
)
|
||||
|
@ -712,3 +727,171 @@ func TestSendAlertsDontAffectActiveAlerts(t *testing.T) {
|
|||
// But the labels with the AlertingRule should not be changed.
|
||||
require.Equal(t, labels.FromStrings("a1", "1"), rule.active[h].Labels)
|
||||
}
|
||||
|
||||
func TestKeepFiringFor(t *testing.T) {
|
||||
suite, err := promql.NewTest(t, `
|
||||
load 1m
|
||||
http_requests{job="app-server", instance="0"} 75 85 70 70 10x5
|
||||
`)
|
||||
require.NoError(t, err)
|
||||
defer suite.Close()
|
||||
|
||||
require.NoError(t, suite.Run())
|
||||
|
||||
expr, err := parser.ParseExpr(`http_requests > 50`)
|
||||
require.NoError(t, err)
|
||||
|
||||
rule := NewAlertingRule(
|
||||
"HTTPRequestRateHigh",
|
||||
expr,
|
||||
time.Minute,
|
||||
time.Minute,
|
||||
labels.EmptyLabels(),
|
||||
labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil,
|
||||
)
|
||||
|
||||
results := []promql.Vector{
|
||||
{
|
||||
promql.Sample{
|
||||
Metric: labels.FromStrings(
|
||||
"__name__", "ALERTS",
|
||||
"alertname", "HTTPRequestRateHigh",
|
||||
"alertstate", "pending",
|
||||
"instance", "0",
|
||||
"job", "app-server",
|
||||
),
|
||||
Point: promql.Point{V: 1},
|
||||
},
|
||||
},
|
||||
{
|
||||
promql.Sample{
|
||||
Metric: labels.FromStrings(
|
||||
"__name__", "ALERTS",
|
||||
"alertname", "HTTPRequestRateHigh",
|
||||
"alertstate", "firing",
|
||||
"instance", "0",
|
||||
"job", "app-server",
|
||||
),
|
||||
Point: promql.Point{V: 1},
|
||||
},
|
||||
},
|
||||
{
|
||||
promql.Sample{
|
||||
Metric: labels.FromStrings(
|
||||
"__name__", "ALERTS",
|
||||
"alertname", "HTTPRequestRateHigh",
|
||||
"alertstate", "firing",
|
||||
"instance", "0",
|
||||
"job", "app-server",
|
||||
),
|
||||
Point: promql.Point{V: 1},
|
||||
},
|
||||
},
|
||||
{
|
||||
promql.Sample{
|
||||
Metric: labels.FromStrings(
|
||||
"__name__", "ALERTS",
|
||||
"alertname", "HTTPRequestRateHigh",
|
||||
"alertstate", "firing",
|
||||
"instance", "0",
|
||||
"job", "app-server",
|
||||
),
|
||||
Point: promql.Point{V: 1},
|
||||
},
|
||||
},
|
||||
// From now on the alert should keep firing.
|
||||
{
|
||||
promql.Sample{
|
||||
Metric: labels.FromStrings(
|
||||
"__name__", "ALERTS",
|
||||
"alertname", "HTTPRequestRateHigh",
|
||||
"alertstate", "firing",
|
||||
"instance", "0",
|
||||
"job", "app-server",
|
||||
),
|
||||
Point: promql.Point{V: 1},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
baseTime := time.Unix(0, 0)
|
||||
for i, result := range results {
|
||||
t.Logf("case %d", i)
|
||||
evalTime := baseTime.Add(time.Duration(i) * time.Minute)
|
||||
result[0].Point.T = timestamp.FromTime(evalTime)
|
||||
res, err := rule.Eval(suite.Context(), evalTime, EngineQueryFunc(suite.QueryEngine(), suite.Storage()), nil, 0)
|
||||
require.NoError(t, err)
|
||||
|
||||
var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples.
|
||||
for _, smpl := range res {
|
||||
smplName := smpl.Metric.Get("__name__")
|
||||
if smplName == "ALERTS" {
|
||||
filteredRes = append(filteredRes, smpl)
|
||||
} else {
|
||||
// If not 'ALERTS', it has to be 'ALERTS_FOR_STATE'.
|
||||
require.Equal(t, "ALERTS_FOR_STATE", smplName)
|
||||
}
|
||||
}
|
||||
|
||||
require.Equal(t, result, filteredRes)
|
||||
}
|
||||
evalTime := baseTime.Add(time.Duration(len(results)) * time.Minute)
|
||||
res, err := rule.Eval(suite.Context(), evalTime, EngineQueryFunc(suite.QueryEngine(), suite.Storage()), nil, 0)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0, len(res))
|
||||
}
|
||||
|
||||
func TestPendingAndKeepFiringFor(t *testing.T) {
|
||||
suite, err := promql.NewTest(t, `
|
||||
load 1m
|
||||
http_requests{job="app-server", instance="0"} 75 10x10
|
||||
`)
|
||||
require.NoError(t, err)
|
||||
defer suite.Close()
|
||||
|
||||
require.NoError(t, suite.Run())
|
||||
|
||||
expr, err := parser.ParseExpr(`http_requests > 50`)
|
||||
require.NoError(t, err)
|
||||
|
||||
rule := NewAlertingRule(
|
||||
"HTTPRequestRateHigh",
|
||||
expr,
|
||||
time.Minute,
|
||||
time.Minute,
|
||||
labels.EmptyLabels(),
|
||||
labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil,
|
||||
)
|
||||
|
||||
result := promql.Sample{
|
||||
Metric: labels.FromStrings(
|
||||
"__name__", "ALERTS",
|
||||
"alertname", "HTTPRequestRateHigh",
|
||||
"alertstate", "pending",
|
||||
"instance", "0",
|
||||
"job", "app-server",
|
||||
),
|
||||
Point: promql.Point{V: 1},
|
||||
}
|
||||
|
||||
baseTime := time.Unix(0, 0)
|
||||
result.Point.T = timestamp.FromTime(baseTime)
|
||||
res, err := rule.Eval(suite.Context(), baseTime, EngineQueryFunc(suite.QueryEngine(), suite.Storage()), nil, 0)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Len(t, res, 2)
|
||||
for _, smpl := range res {
|
||||
smplName := smpl.Metric.Get("__name__")
|
||||
if smplName == "ALERTS" {
|
||||
require.Equal(t, result, smpl)
|
||||
} else {
|
||||
// If not 'ALERTS', it has to be 'ALERTS_FOR_STATE'.
|
||||
require.Equal(t, "ALERTS_FOR_STATE", smplName)
|
||||
}
|
||||
}
|
||||
|
||||
evalTime := baseTime.Add(time.Minute)
|
||||
res, err = rule.Eval(suite.Context(), evalTime, EngineQueryFunc(suite.QueryEngine(), suite.Storage()), nil, 0)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0, len(res))
|
||||
}
|
||||
|
|
|
@ -1119,6 +1119,7 @@ func (m *Manager) LoadGroups(
|
|||
r.Alert.Value,
|
||||
expr,
|
||||
time.Duration(r.For),
|
||||
time.Duration(r.KeepFiringFor),
|
||||
labels.FromMap(r.Labels),
|
||||
labels.FromMap(r.Annotations),
|
||||
externalLabels,
|
||||
|
|
|
@ -66,6 +66,7 @@ func TestAlertingRule(t *testing.T) {
|
|||
"HTTPRequestRateLow",
|
||||
expr,
|
||||
time.Minute,
|
||||
0,
|
||||
labels.FromStrings("severity", "{{\"c\"}}ritical"),
|
||||
labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil,
|
||||
)
|
||||
|
@ -209,6 +210,7 @@ func TestForStateAddSamples(t *testing.T) {
|
|||
"HTTPRequestRateLow",
|
||||
expr,
|
||||
time.Minute,
|
||||
0,
|
||||
labels.FromStrings("severity", "{{\"c\"}}ritical"),
|
||||
labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil,
|
||||
)
|
||||
|
@ -383,6 +385,7 @@ func TestForStateRestore(t *testing.T) {
|
|||
"HTTPRequestRateLow",
|
||||
expr,
|
||||
alertForDuration,
|
||||
0,
|
||||
labels.FromStrings("severity", "critical"),
|
||||
labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil,
|
||||
)
|
||||
|
@ -449,6 +452,7 @@ func TestForStateRestore(t *testing.T) {
|
|||
"HTTPRequestRateLow",
|
||||
expr,
|
||||
alertForDuration,
|
||||
0,
|
||||
labels.FromStrings("severity", "critical"),
|
||||
labels.EmptyLabels(), labels.EmptyLabels(), "", false, nil,
|
||||
)
|
||||
|
@ -615,13 +619,13 @@ func readSeriesSet(ss storage.SeriesSet) (map[string][]promql.Point, error) {
|
|||
func TestCopyState(t *testing.T) {
|
||||
oldGroup := &Group{
|
||||
rules: []Rule{
|
||||
NewAlertingRule("alert", nil, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil),
|
||||
NewAlertingRule("alert", nil, 0, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil),
|
||||
NewRecordingRule("rule1", nil, labels.EmptyLabels()),
|
||||
NewRecordingRule("rule2", nil, labels.EmptyLabels()),
|
||||
NewRecordingRule("rule3", nil, labels.FromStrings("l1", "v1")),
|
||||
NewRecordingRule("rule3", nil, labels.FromStrings("l1", "v2")),
|
||||
NewRecordingRule("rule3", nil, labels.FromStrings("l1", "v3")),
|
||||
NewAlertingRule("alert2", nil, 0, labels.FromStrings("l2", "v1"), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil),
|
||||
NewAlertingRule("alert2", nil, 0, 0, labels.FromStrings("l2", "v1"), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil),
|
||||
},
|
||||
seriesInPreviousEval: []map[string]labels.Labels{
|
||||
{},
|
||||
|
@ -640,10 +644,10 @@ func TestCopyState(t *testing.T) {
|
|||
NewRecordingRule("rule3", nil, labels.FromStrings("l1", "v0")),
|
||||
NewRecordingRule("rule3", nil, labels.FromStrings("l1", "v1")),
|
||||
NewRecordingRule("rule3", nil, labels.FromStrings("l1", "v2")),
|
||||
NewAlertingRule("alert", nil, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil),
|
||||
NewAlertingRule("alert", nil, 0, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil),
|
||||
NewRecordingRule("rule1", nil, labels.EmptyLabels()),
|
||||
NewAlertingRule("alert2", nil, 0, labels.FromStrings("l2", "v0"), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil),
|
||||
NewAlertingRule("alert2", nil, 0, labels.FromStrings("l2", "v1"), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil),
|
||||
NewAlertingRule("alert2", nil, 0, 0, labels.FromStrings("l2", "v0"), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil),
|
||||
NewAlertingRule("alert2", nil, 0, 0, labels.FromStrings("l2", "v1"), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil),
|
||||
NewRecordingRule("rule4", nil, labels.EmptyLabels()),
|
||||
},
|
||||
seriesInPreviousEval: make([]map[string]labels.Labels, 8),
|
||||
|
@ -875,7 +879,7 @@ func TestNotify(t *testing.T) {
|
|||
|
||||
expr, err := parser.ParseExpr("a > 1")
|
||||
require.NoError(t, err)
|
||||
rule := NewAlertingRule("aTooHigh", expr, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, log.NewNopLogger())
|
||||
rule := NewAlertingRule("aTooHigh", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, log.NewNopLogger())
|
||||
group := NewGroup(GroupOptions{
|
||||
Name: "alert",
|
||||
Interval: time.Second,
|
||||
|
@ -1147,7 +1151,7 @@ func TestGroupHasAlertingRules(t *testing.T) {
|
|||
group: &Group{
|
||||
name: "HasAlertingRule",
|
||||
rules: []Rule{
|
||||
NewAlertingRule("alert", nil, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil),
|
||||
NewAlertingRule("alert", nil, 0, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil),
|
||||
NewRecordingRule("record", nil, labels.EmptyLabels()),
|
||||
},
|
||||
},
|
||||
|
|
|
@ -1111,11 +1111,12 @@ type AlertDiscovery struct {
|
|||
|
||||
// Alert has info for an alert.
|
||||
type Alert struct {
|
||||
Labels labels.Labels `json:"labels"`
|
||||
Annotations labels.Labels `json:"annotations"`
|
||||
State string `json:"state"`
|
||||
ActiveAt *time.Time `json:"activeAt,omitempty"`
|
||||
Value string `json:"value"`
|
||||
Labels labels.Labels `json:"labels"`
|
||||
Annotations labels.Labels `json:"annotations"`
|
||||
State string `json:"state"`
|
||||
ActiveAt *time.Time `json:"activeAt,omitempty"`
|
||||
KeepFiringSince *time.Time `json:"keepFiringSince,omitempty"`
|
||||
Value string `json:"value"`
|
||||
}
|
||||
|
||||
func (api *API) alerts(r *http.Request) apiFuncResult {
|
||||
|
@ -1144,6 +1145,9 @@ func rulesAlertsToAPIAlerts(rulesAlerts []*rules.Alert) []*Alert {
|
|||
ActiveAt: &ruleAlert.ActiveAt,
|
||||
Value: strconv.FormatFloat(ruleAlert.Value, 'e', -1, 64),
|
||||
}
|
||||
if !ruleAlert.KeepFiringSince.IsZero() {
|
||||
apiAlerts[i].KeepFiringSince = &ruleAlert.KeepFiringSince
|
||||
}
|
||||
}
|
||||
|
||||
return apiAlerts
|
||||
|
@ -1241,6 +1245,7 @@ type AlertingRule struct {
|
|||
Name string `json:"name"`
|
||||
Query string `json:"query"`
|
||||
Duration float64 `json:"duration"`
|
||||
KeepFiringFor float64 `json:"keepFiringFor"`
|
||||
Labels labels.Labels `json:"labels"`
|
||||
Annotations labels.Labels `json:"annotations"`
|
||||
Alerts []*Alert `json:"alerts"`
|
||||
|
@ -1303,6 +1308,7 @@ func (api *API) rules(r *http.Request) apiFuncResult {
|
|||
Name: rule.Name(),
|
||||
Query: rule.Query().String(),
|
||||
Duration: rule.HoldDuration().Seconds(),
|
||||
KeepFiringFor: rule.KeepFiringFor().Seconds(),
|
||||
Labels: rule.Labels(),
|
||||
Annotations: rule.Annotations(),
|
||||
Alerts: rulesAlertsToAPIAlerts(rule.ActiveAlerts()),
|
||||
|
|
|
@ -209,6 +209,7 @@ func (m rulesRetrieverMock) AlertingRules() []*rules.AlertingRule {
|
|||
"test_metric3",
|
||||
expr1,
|
||||
time.Second,
|
||||
0,
|
||||
labels.Labels{},
|
||||
labels.Labels{},
|
||||
labels.Labels{},
|
||||
|
@ -220,6 +221,7 @@ func (m rulesRetrieverMock) AlertingRules() []*rules.AlertingRule {
|
|||
"test_metric4",
|
||||
expr2,
|
||||
time.Second,
|
||||
0,
|
||||
labels.Labels{},
|
||||
labels.Labels{},
|
||||
labels.Labels{},
|
||||
|
|
|
@ -29,6 +29,7 @@ export interface Alert {
|
|||
value: string;
|
||||
annotations: Record<string, string>;
|
||||
activeAt: string;
|
||||
keepFiringSince: string;
|
||||
}
|
||||
|
||||
interface RuleGroup {
|
||||
|
|
|
@ -43,6 +43,11 @@ const CollapsibleAlertPanel: FC<CollapsibleAlertPanelProps> = ({ rule, showAnnot
|
|||
<div>for: {formatDuration(rule.duration * 1000)}</div>
|
||||
</div>
|
||||
)}
|
||||
{rule.keepFiringFor > 0 && (
|
||||
<div>
|
||||
<div>keep_firing_for: {formatDuration(rule.keepFiringFor * 1000)}</div>
|
||||
</div>
|
||||
)}
|
||||
{rule.labels && Object.keys(rule.labels).length > 0 && (
|
||||
<div>
|
||||
<div>labels:</div>
|
||||
|
@ -91,9 +96,14 @@ const CollapsibleAlertPanel: FC<CollapsibleAlertPanelProps> = ({ rule, showAnnot
|
|||
</td>
|
||||
<td>
|
||||
<h5 className="m-0">
|
||||
<Badge color={alertColors[alert.state] + ' text-uppercase'} className="px-3">
|
||||
<Badge color={alertColors[alert.state] + ' text-uppercase'} className="px-3 mr-1">
|
||||
{alert.state}
|
||||
</Badge>
|
||||
{alert.keepFiringSince && (
|
||||
<Badge color="secondary" className="px-3">
|
||||
Stabilizing
|
||||
</Badge>
|
||||
)}
|
||||
</h5>
|
||||
</td>
|
||||
<td>{alert.activeAt}</td>
|
||||
|
|
|
@ -96,6 +96,11 @@ export const RulesContent: FC<RulesContentProps> = ({ response }) => {
|
|||
<strong>for:</strong> {formatDuration(r.duration * 1000)}
|
||||
</div>
|
||||
)}
|
||||
{r.keepFiringFor > 0 && (
|
||||
<div>
|
||||
<strong>keep_firing_for:</strong> {formatDuration(r.keepFiringFor * 1000)}
|
||||
</div>
|
||||
)}
|
||||
{r.labels && Object.keys(r.labels).length > 0 && (
|
||||
<div>
|
||||
<strong>labels:</strong>
|
||||
|
|
|
@ -26,6 +26,7 @@ export type Rule = {
|
|||
alerts: Alert[];
|
||||
annotations: Record<string, string>;
|
||||
duration: number;
|
||||
keepFiringFor: number;
|
||||
evaluationTime: string;
|
||||
health: string;
|
||||
labels: Record<string, string>;
|
||||
|
|
Loading…
Reference in New Issue