mirror of https://github.com/prometheus/prometheus
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
726 lines
20 KiB
726 lines
20 KiB
// Copyright 2013 The Prometheus Authors |
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
|
// you may not use this file except in compliance with the License. |
|
// You may obtain a copy of the License at |
|
// |
|
// http://www.apache.org/licenses/LICENSE-2.0 |
|
// |
|
// Unless required by applicable law or agreed to in writing, software |
|
// distributed under the License is distributed on an "AS IS" BASIS, |
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
// See the License for the specific language governing permissions and |
|
// limitations under the License. |
|
|
|
package rules |
|
|
|
import ( |
|
"context" |
|
"math" |
|
"sort" |
|
"testing" |
|
"time" |
|
|
|
"github.com/go-kit/kit/log" |
|
"github.com/prometheus/common/model" |
|
|
|
"github.com/prometheus/prometheus/pkg/labels" |
|
"github.com/prometheus/prometheus/pkg/timestamp" |
|
"github.com/prometheus/prometheus/pkg/value" |
|
"github.com/prometheus/prometheus/promql" |
|
"github.com/prometheus/prometheus/storage" |
|
"github.com/prometheus/prometheus/util/testutil" |
|
) |
|
|
|
func TestAlertingRule(t *testing.T) { |
|
suite, err := promql.NewTest(t, ` |
|
load 5m |
|
http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 95 105 105 95 85 |
|
http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 80 90 100 110 120 130 140 |
|
`) |
|
testutil.Ok(t, err) |
|
defer suite.Close() |
|
|
|
err = suite.Run() |
|
testutil.Ok(t, err) |
|
|
|
expr, err := promql.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) |
|
testutil.Ok(t, err) |
|
|
|
rule := NewAlertingRule( |
|
"HTTPRequestRateLow", |
|
expr, |
|
time.Minute, |
|
labels.FromStrings("severity", "{{\"c\"}}ritical"), |
|
nil, true, nil, |
|
) |
|
result := promql.Vector{ |
|
{ |
|
Metric: labels.FromStrings( |
|
"__name__", "ALERTS", |
|
"alertname", "HTTPRequestRateLow", |
|
"alertstate", "pending", |
|
"group", "canary", |
|
"instance", "0", |
|
"job", "app-server", |
|
"severity", "critical", |
|
), |
|
Point: promql.Point{V: 1}, |
|
}, |
|
{ |
|
Metric: labels.FromStrings( |
|
"__name__", "ALERTS", |
|
"alertname", "HTTPRequestRateLow", |
|
"alertstate", "pending", |
|
"group", "canary", |
|
"instance", "1", |
|
"job", "app-server", |
|
"severity", "critical", |
|
), |
|
Point: promql.Point{V: 1}, |
|
}, |
|
{ |
|
Metric: labels.FromStrings( |
|
"__name__", "ALERTS", |
|
"alertname", "HTTPRequestRateLow", |
|
"alertstate", "firing", |
|
"group", "canary", |
|
"instance", "0", |
|
"job", "app-server", |
|
"severity", "critical", |
|
), |
|
Point: promql.Point{V: 1}, |
|
}, |
|
{ |
|
Metric: labels.FromStrings( |
|
"__name__", "ALERTS", |
|
"alertname", "HTTPRequestRateLow", |
|
"alertstate", "firing", |
|
"group", "canary", |
|
"instance", "1", |
|
"job", "app-server", |
|
"severity", "critical", |
|
), |
|
Point: promql.Point{V: 1}, |
|
}, |
|
} |
|
|
|
baseTime := time.Unix(0, 0) |
|
|
|
var tests = []struct { |
|
time time.Duration |
|
result promql.Vector |
|
}{ |
|
{ |
|
time: 0, |
|
result: result[:2], |
|
}, { |
|
time: 5 * time.Minute, |
|
result: result[2:], |
|
}, { |
|
time: 10 * time.Minute, |
|
result: result[2:3], |
|
}, |
|
{ |
|
time: 15 * time.Minute, |
|
result: nil, |
|
}, |
|
{ |
|
time: 20 * time.Minute, |
|
result: nil, |
|
}, |
|
{ |
|
time: 25 * time.Minute, |
|
result: result[:1], |
|
}, |
|
{ |
|
time: 30 * time.Minute, |
|
result: result[2:3], |
|
}, |
|
} |
|
|
|
for i, test := range tests { |
|
t.Logf("case %d", i) |
|
|
|
evalTime := baseTime.Add(test.time) |
|
|
|
res, err := rule.Eval(suite.Context(), evalTime, EngineQueryFunc(suite.QueryEngine(), suite.Storage()), nil) |
|
testutil.Ok(t, err) |
|
|
|
var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. |
|
for _, smpl := range res { |
|
smplName := smpl.Metric.Get("__name__") |
|
if smplName == "ALERTS" { |
|
filteredRes = append(filteredRes, smpl) |
|
} else { |
|
// If not 'ALERTS', it has to be 'ALERTS_FOR_STATE'. |
|
testutil.Equals(t, smplName, "ALERTS_FOR_STATE") |
|
} |
|
} |
|
for i := range test.result { |
|
test.result[i].T = timestamp.FromTime(evalTime) |
|
} |
|
testutil.Assert(t, len(test.result) == len(filteredRes), "%d. Number of samples in expected and actual output don't match (%d vs. %d)", i, len(test.result), len(res)) |
|
|
|
sort.Slice(filteredRes, func(i, j int) bool { |
|
return labels.Compare(filteredRes[i].Metric, filteredRes[j].Metric) < 0 |
|
}) |
|
testutil.Equals(t, test.result, filteredRes) |
|
|
|
for _, aa := range rule.ActiveAlerts() { |
|
testutil.Assert(t, aa.Labels.Get(model.MetricNameLabel) == "", "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) |
|
} |
|
} |
|
} |
|
|
|
func TestForStateAddSamples(t *testing.T) { |
|
suite, err := promql.NewTest(t, ` |
|
load 5m |
|
http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 95 105 105 95 85 |
|
http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 80 90 100 110 120 130 140 |
|
`) |
|
testutil.Ok(t, err) |
|
defer suite.Close() |
|
|
|
err = suite.Run() |
|
testutil.Ok(t, err) |
|
|
|
expr, err := promql.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) |
|
testutil.Ok(t, err) |
|
|
|
rule := NewAlertingRule( |
|
"HTTPRequestRateLow", |
|
expr, |
|
time.Minute, |
|
labels.FromStrings("severity", "{{\"c\"}}ritical"), |
|
nil, true, nil, |
|
) |
|
result := promql.Vector{ |
|
{ |
|
Metric: labels.FromStrings( |
|
"__name__", "ALERTS_FOR_STATE", |
|
"alertname", "HTTPRequestRateLow", |
|
"group", "canary", |
|
"instance", "0", |
|
"job", "app-server", |
|
"severity", "critical", |
|
), |
|
Point: promql.Point{V: 1}, |
|
}, |
|
{ |
|
Metric: labels.FromStrings( |
|
"__name__", "ALERTS_FOR_STATE", |
|
"alertname", "HTTPRequestRateLow", |
|
"group", "canary", |
|
"instance", "1", |
|
"job", "app-server", |
|
"severity", "critical", |
|
), |
|
Point: promql.Point{V: 1}, |
|
}, |
|
{ |
|
Metric: labels.FromStrings( |
|
"__name__", "ALERTS_FOR_STATE", |
|
"alertname", "HTTPRequestRateLow", |
|
"group", "canary", |
|
"instance", "0", |
|
"job", "app-server", |
|
"severity", "critical", |
|
), |
|
Point: promql.Point{V: 1}, |
|
}, |
|
{ |
|
Metric: labels.FromStrings( |
|
"__name__", "ALERTS_FOR_STATE", |
|
"alertname", "HTTPRequestRateLow", |
|
"group", "canary", |
|
"instance", "1", |
|
"job", "app-server", |
|
"severity", "critical", |
|
), |
|
Point: promql.Point{V: 1}, |
|
}, |
|
} |
|
|
|
baseTime := time.Unix(0, 0) |
|
|
|
var tests = []struct { |
|
time time.Duration |
|
result promql.Vector |
|
persistThisTime bool // If true, it means this 'time' is persisted for 'for'. |
|
}{ |
|
{ |
|
time: 0, |
|
result: append(promql.Vector{}, result[:2]...), |
|
persistThisTime: true, |
|
}, |
|
{ |
|
time: 5 * time.Minute, |
|
result: append(promql.Vector{}, result[2:]...), |
|
}, |
|
{ |
|
time: 10 * time.Minute, |
|
result: append(promql.Vector{}, result[2:3]...), |
|
}, |
|
{ |
|
time: 15 * time.Minute, |
|
result: nil, |
|
}, |
|
{ |
|
time: 20 * time.Minute, |
|
result: nil, |
|
}, |
|
{ |
|
time: 25 * time.Minute, |
|
result: append(promql.Vector{}, result[:1]...), |
|
persistThisTime: true, |
|
}, |
|
{ |
|
time: 30 * time.Minute, |
|
result: append(promql.Vector{}, result[2:3]...), |
|
}, |
|
} |
|
|
|
var forState float64 |
|
for i, test := range tests { |
|
t.Logf("case %d", i) |
|
evalTime := baseTime.Add(test.time) |
|
|
|
if test.persistThisTime { |
|
forState = float64(evalTime.Unix()) |
|
} |
|
if test.result == nil { |
|
forState = float64(value.StaleNaN) |
|
} |
|
|
|
res, err := rule.Eval(suite.Context(), evalTime, EngineQueryFunc(suite.QueryEngine(), suite.Storage()), nil) |
|
testutil.Ok(t, err) |
|
|
|
var filteredRes promql.Vector // After removing 'ALERTS' samples. |
|
for _, smpl := range res { |
|
smplName := smpl.Metric.Get("__name__") |
|
if smplName == "ALERTS_FOR_STATE" { |
|
filteredRes = append(filteredRes, smpl) |
|
} else { |
|
// If not 'ALERTS_FOR_STATE', it has to be 'ALERTS'. |
|
testutil.Equals(t, smplName, "ALERTS") |
|
} |
|
} |
|
for i := range test.result { |
|
test.result[i].T = timestamp.FromTime(evalTime) |
|
// Updating the expected 'for' state. |
|
if test.result[i].V >= 0 { |
|
test.result[i].V = forState |
|
} |
|
} |
|
testutil.Assert(t, len(test.result) == len(filteredRes), "%d. Number of samples in expected and actual output don't match (%d vs. %d)", i, len(test.result), len(res)) |
|
|
|
sort.Slice(filteredRes, func(i, j int) bool { |
|
return labels.Compare(filteredRes[i].Metric, filteredRes[j].Metric) < 0 |
|
}) |
|
testutil.Equals(t, test.result, filteredRes) |
|
|
|
for _, aa := range rule.ActiveAlerts() { |
|
testutil.Assert(t, aa.Labels.Get(model.MetricNameLabel) == "", "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) |
|
} |
|
|
|
} |
|
} |
|
|
|
// sortAlerts sorts `[]*Alert` w.r.t. the Labels. |
|
func sortAlerts(items []*Alert) { |
|
sort.Slice(items, func(i, j int) bool { |
|
return labels.Compare(items[i].Labels, items[j].Labels) <= 0 |
|
}) |
|
} |
|
|
|
func TestForStateRestore(t *testing.T) { |
|
suite, err := promql.NewTest(t, ` |
|
load 5m |
|
http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 50 0 0 25 0 0 40 0 120 |
|
http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 125 90 60 0 0 25 0 0 40 0 130 |
|
`) |
|
testutil.Ok(t, err) |
|
defer suite.Close() |
|
|
|
err = suite.Run() |
|
testutil.Ok(t, err) |
|
|
|
expr, err := promql.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) |
|
testutil.Ok(t, err) |
|
|
|
opts := &ManagerOptions{ |
|
QueryFunc: EngineQueryFunc(suite.QueryEngine(), suite.Storage()), |
|
Appendable: suite.Storage(), |
|
TSDB: suite.Storage(), |
|
Context: context.Background(), |
|
Logger: log.NewNopLogger(), |
|
NotifyFunc: func(ctx context.Context, expr string, alerts ...*Alert) {}, |
|
OutageTolerance: 30 * time.Minute, |
|
ForGracePeriod: 10 * time.Minute, |
|
} |
|
|
|
alertForDuration := 25 * time.Minute |
|
// Initial run before prometheus goes down. |
|
rule := NewAlertingRule( |
|
"HTTPRequestRateLow", |
|
expr, |
|
alertForDuration, |
|
labels.FromStrings("severity", "critical"), |
|
nil, true, nil, |
|
) |
|
|
|
group := NewGroup("default", "", time.Second, []Rule{rule}, true, opts) |
|
groups := make(map[string]*Group) |
|
groups["default;"] = group |
|
|
|
initialRuns := []time.Duration{0, 5 * time.Minute} |
|
|
|
baseTime := time.Unix(0, 0) |
|
for _, duration := range initialRuns { |
|
evalTime := baseTime.Add(duration) |
|
group.Eval(suite.Context(), evalTime) |
|
} |
|
|
|
exp := rule.ActiveAlerts() |
|
for _, aa := range exp { |
|
testutil.Assert(t, aa.Labels.Get(model.MetricNameLabel) == "", "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) |
|
} |
|
sort.Slice(exp, func(i, j int) bool { |
|
return labels.Compare(exp[i].Labels, exp[j].Labels) < 0 |
|
}) |
|
|
|
// Prometheus goes down here. We create new rules and groups. |
|
type testInput struct { |
|
restoreDuration time.Duration |
|
alerts []*Alert |
|
|
|
num int |
|
noRestore bool |
|
gracePeriod bool |
|
downDuration time.Duration |
|
} |
|
|
|
tests := []testInput{ |
|
{ |
|
// Normal restore (alerts were not firing). |
|
restoreDuration: 15 * time.Minute, |
|
alerts: rule.ActiveAlerts(), |
|
downDuration: 10 * time.Minute, |
|
}, |
|
{ |
|
// Testing Outage Tolerance. |
|
restoreDuration: 40 * time.Minute, |
|
noRestore: true, |
|
num: 2, |
|
}, |
|
{ |
|
// No active alerts. |
|
restoreDuration: 50 * time.Minute, |
|
alerts: []*Alert{}, |
|
}, |
|
} |
|
|
|
testFunc := func(tst testInput) { |
|
newRule := NewAlertingRule( |
|
"HTTPRequestRateLow", |
|
expr, |
|
alertForDuration, |
|
labels.FromStrings("severity", "critical"), |
|
nil, false, nil, |
|
) |
|
newGroup := NewGroup("default", "", time.Second, []Rule{newRule}, true, opts) |
|
|
|
newGroups := make(map[string]*Group) |
|
newGroups["default;"] = newGroup |
|
|
|
restoreTime := baseTime.Add(tst.restoreDuration) |
|
// First eval before restoration. |
|
newGroup.Eval(suite.Context(), restoreTime) |
|
// Restore happens here. |
|
newGroup.RestoreForState(restoreTime) |
|
|
|
got := newRule.ActiveAlerts() |
|
for _, aa := range got { |
|
testutil.Assert(t, aa.Labels.Get(model.MetricNameLabel) == "", "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) |
|
} |
|
sort.Slice(got, func(i, j int) bool { |
|
return labels.Compare(got[i].Labels, got[j].Labels) < 0 |
|
}) |
|
|
|
// Checking if we have restored it correctly. |
|
if tst.noRestore { |
|
testutil.Equals(t, tst.num, len(got)) |
|
for _, e := range got { |
|
testutil.Equals(t, e.ActiveAt, restoreTime) |
|
} |
|
} else if tst.gracePeriod { |
|
testutil.Equals(t, tst.num, len(got)) |
|
for _, e := range got { |
|
testutil.Equals(t, opts.ForGracePeriod, e.ActiveAt.Add(alertForDuration).Sub(restoreTime)) |
|
} |
|
} else { |
|
exp := tst.alerts |
|
testutil.Equals(t, len(exp), len(got)) |
|
sortAlerts(exp) |
|
sortAlerts(got) |
|
for i, e := range exp { |
|
testutil.Equals(t, e.Labels, got[i].Labels) |
|
|
|
// Difference in time should be within 1e6 ns, i.e. 1ms |
|
// (due to conversion between ns & ms, float64 & int64). |
|
activeAtDiff := float64(e.ActiveAt.Unix() + int64(tst.downDuration/time.Second) - got[i].ActiveAt.Unix()) |
|
testutil.Assert(t, math.Abs(activeAtDiff) == 0, "'for' state restored time is wrong") |
|
} |
|
} |
|
} |
|
|
|
for _, tst := range tests { |
|
testFunc(tst) |
|
} |
|
|
|
// Testing the grace period. |
|
for _, duration := range []time.Duration{10 * time.Minute, 15 * time.Minute, 20 * time.Minute} { |
|
evalTime := baseTime.Add(duration) |
|
group.Eval(suite.Context(), evalTime) |
|
} |
|
testFunc(testInput{ |
|
restoreDuration: 25 * time.Minute, |
|
alerts: []*Alert{}, |
|
gracePeriod: true, |
|
num: 2, |
|
}) |
|
} |
|
|
|
func TestStaleness(t *testing.T) { |
|
storage := testutil.NewStorage(t) |
|
defer storage.Close() |
|
engineOpts := promql.EngineOpts{ |
|
Logger: nil, |
|
Reg: nil, |
|
MaxConcurrent: 10, |
|
MaxSamples: 10, |
|
Timeout: 10 * time.Second, |
|
} |
|
engine := promql.NewEngine(engineOpts) |
|
opts := &ManagerOptions{ |
|
QueryFunc: EngineQueryFunc(engine, storage), |
|
Appendable: storage, |
|
TSDB: storage, |
|
Context: context.Background(), |
|
Logger: log.NewNopLogger(), |
|
} |
|
|
|
expr, err := promql.ParseExpr("a + 1") |
|
testutil.Ok(t, err) |
|
rule := NewRecordingRule("a_plus_one", expr, labels.Labels{}) |
|
group := NewGroup("default", "", time.Second, []Rule{rule}, true, opts) |
|
|
|
// A time series that has two samples and then goes stale. |
|
app, _ := storage.Appender() |
|
app.Add(labels.FromStrings(model.MetricNameLabel, "a"), 0, 1) |
|
app.Add(labels.FromStrings(model.MetricNameLabel, "a"), 1000, 2) |
|
app.Add(labels.FromStrings(model.MetricNameLabel, "a"), 2000, math.Float64frombits(value.StaleNaN)) |
|
|
|
err = app.Commit() |
|
testutil.Ok(t, err) |
|
|
|
ctx := context.Background() |
|
|
|
// Execute 3 times, 1 second apart. |
|
group.Eval(ctx, time.Unix(0, 0)) |
|
group.Eval(ctx, time.Unix(1, 0)) |
|
group.Eval(ctx, time.Unix(2, 0)) |
|
|
|
querier, err := storage.Querier(context.Background(), 0, 2000) |
|
testutil.Ok(t, err) |
|
defer querier.Close() |
|
|
|
matcher, err := labels.NewMatcher(labels.MatchEqual, model.MetricNameLabel, "a_plus_one") |
|
testutil.Ok(t, err) |
|
|
|
set, err := querier.Select(nil, matcher) |
|
testutil.Ok(t, err) |
|
|
|
samples, err := readSeriesSet(set) |
|
testutil.Ok(t, err) |
|
|
|
metric := labels.FromStrings(model.MetricNameLabel, "a_plus_one").String() |
|
metricSample, ok := samples[metric] |
|
|
|
testutil.Assert(t, ok, "Series %s not returned.", metric) |
|
testutil.Assert(t, value.IsStaleNaN(metricSample[2].V), "Appended second sample not as expected. Wanted: stale NaN Got: %x", math.Float64bits(metricSample[2].V)) |
|
metricSample[2].V = 42 // reflect.DeepEqual cannot handle NaN. |
|
|
|
want := map[string][]promql.Point{ |
|
metric: []promql.Point{{0, 2}, {1000, 3}, {2000, 42}}, |
|
} |
|
|
|
testutil.Equals(t, want, samples) |
|
} |
|
|
|
// Convert a SeriesSet into a form useable with reflect.DeepEqual. |
|
func readSeriesSet(ss storage.SeriesSet) (map[string][]promql.Point, error) { |
|
result := map[string][]promql.Point{} |
|
|
|
for ss.Next() { |
|
series := ss.At() |
|
|
|
points := []promql.Point{} |
|
it := series.Iterator() |
|
for it.Next() { |
|
t, v := it.At() |
|
points = append(points, promql.Point{T: t, V: v}) |
|
} |
|
|
|
name := series.Labels().String() |
|
result[name] = points |
|
} |
|
return result, ss.Err() |
|
} |
|
|
|
func TestCopyState(t *testing.T) { |
|
oldGroup := &Group{ |
|
rules: []Rule{ |
|
NewAlertingRule("alert", nil, 0, nil, nil, true, nil), |
|
NewRecordingRule("rule1", nil, nil), |
|
NewRecordingRule("rule2", nil, nil), |
|
NewRecordingRule("rule3", nil, nil), |
|
NewRecordingRule("rule3", nil, nil), |
|
}, |
|
seriesInPreviousEval: []map[string]labels.Labels{ |
|
map[string]labels.Labels{"a": nil}, |
|
map[string]labels.Labels{"r1": nil}, |
|
map[string]labels.Labels{"r2": nil}, |
|
map[string]labels.Labels{"r3a": nil}, |
|
map[string]labels.Labels{"r3b": nil}, |
|
}, |
|
evaluationDuration: time.Second, |
|
} |
|
oldGroup.rules[0].(*AlertingRule).active[42] = nil |
|
newGroup := &Group{ |
|
rules: []Rule{ |
|
NewRecordingRule("rule3", nil, nil), |
|
NewRecordingRule("rule3", nil, nil), |
|
NewRecordingRule("rule3", nil, nil), |
|
NewAlertingRule("alert", nil, 0, nil, nil, true, nil), |
|
NewRecordingRule("rule1", nil, nil), |
|
NewRecordingRule("rule4", nil, nil), |
|
}, |
|
seriesInPreviousEval: make([]map[string]labels.Labels, 6), |
|
} |
|
newGroup.CopyState(oldGroup) |
|
|
|
want := []map[string]labels.Labels{ |
|
map[string]labels.Labels{"r3a": nil}, |
|
map[string]labels.Labels{"r3b": nil}, |
|
nil, |
|
map[string]labels.Labels{"a": nil}, |
|
map[string]labels.Labels{"r1": nil}, |
|
nil, |
|
} |
|
testutil.Equals(t, want, newGroup.seriesInPreviousEval) |
|
testutil.Equals(t, oldGroup.rules[0], newGroup.rules[3]) |
|
testutil.Equals(t, oldGroup.evaluationDuration, newGroup.evaluationDuration) |
|
} |
|
|
|
func TestUpdate(t *testing.T) { |
|
files := []string{"fixtures/rules.yaml"} |
|
expected := map[string]labels.Labels{ |
|
"test": labels.FromStrings("name", "value"), |
|
} |
|
storage := testutil.NewStorage(t) |
|
defer storage.Close() |
|
opts := promql.EngineOpts{ |
|
Logger: nil, |
|
Reg: nil, |
|
MaxConcurrent: 10, |
|
MaxSamples: 10, |
|
Timeout: 10 * time.Second, |
|
} |
|
engine := promql.NewEngine(opts) |
|
ruleManager := NewManager(&ManagerOptions{ |
|
Appendable: storage, |
|
TSDB: storage, |
|
QueryFunc: EngineQueryFunc(engine, storage), |
|
Context: context.Background(), |
|
Logger: log.NewNopLogger(), |
|
}) |
|
ruleManager.Run() |
|
defer ruleManager.Stop() |
|
|
|
err := ruleManager.Update(10*time.Second, files) |
|
testutil.Ok(t, err) |
|
testutil.Assert(t, len(ruleManager.groups) > 0, "expected non-empty rule groups") |
|
for _, g := range ruleManager.groups { |
|
g.seriesInPreviousEval = []map[string]labels.Labels{ |
|
expected, |
|
} |
|
} |
|
|
|
err = ruleManager.Update(10*time.Second, files) |
|
testutil.Ok(t, err) |
|
for _, g := range ruleManager.groups { |
|
for _, actual := range g.seriesInPreviousEval { |
|
testutil.Equals(t, expected, actual) |
|
} |
|
} |
|
} |
|
|
|
func TestNotify(t *testing.T) { |
|
storage := testutil.NewStorage(t) |
|
defer storage.Close() |
|
engineOpts := promql.EngineOpts{ |
|
Logger: nil, |
|
Reg: nil, |
|
MaxConcurrent: 10, |
|
MaxSamples: 10, |
|
Timeout: 10 * time.Second, |
|
} |
|
engine := promql.NewEngine(engineOpts) |
|
var lastNotified []*Alert |
|
notifyFunc := func(ctx context.Context, expr string, alerts ...*Alert) { |
|
lastNotified = alerts |
|
} |
|
opts := &ManagerOptions{ |
|
QueryFunc: EngineQueryFunc(engine, storage), |
|
Appendable: storage, |
|
TSDB: storage, |
|
Context: context.Background(), |
|
Logger: log.NewNopLogger(), |
|
NotifyFunc: notifyFunc, |
|
ResendDelay: 2 * time.Second, |
|
} |
|
|
|
expr, err := promql.ParseExpr("a > 1") |
|
testutil.Ok(t, err) |
|
rule := NewAlertingRule("aTooHigh", expr, 0, labels.Labels{}, labels.Labels{}, true, log.NewNopLogger()) |
|
group := NewGroup("alert", "", time.Second, []Rule{rule}, true, opts) |
|
|
|
app, _ := storage.Appender() |
|
app.Add(labels.FromStrings(model.MetricNameLabel, "a"), 1000, 2) |
|
app.Add(labels.FromStrings(model.MetricNameLabel, "a"), 2000, 3) |
|
app.Add(labels.FromStrings(model.MetricNameLabel, "a"), 5000, 3) |
|
app.Add(labels.FromStrings(model.MetricNameLabel, "a"), 6000, 0) |
|
|
|
err = app.Commit() |
|
testutil.Ok(t, err) |
|
|
|
ctx := context.Background() |
|
|
|
// Alert sent right away |
|
group.Eval(ctx, time.Unix(1, 0)) |
|
testutil.Equals(t, 1, len(lastNotified)) |
|
testutil.Assert(t, !lastNotified[0].ValidUntil.IsZero(), "ValidUntil should not be zero") |
|
|
|
// Alert is not sent 1s later |
|
group.Eval(ctx, time.Unix(2, 0)) |
|
testutil.Equals(t, 0, len(lastNotified)) |
|
|
|
// Alert is resent at t=5s |
|
group.Eval(ctx, time.Unix(5, 0)) |
|
testutil.Equals(t, 1, len(lastNotified)) |
|
|
|
// Resolution alert sent right away |
|
group.Eval(ctx, time.Unix(6, 0)) |
|
testutil.Equals(t, 1, len(lastNotified)) |
|
}
|
|
|