|
|
|
@ -15,6 +15,7 @@ package rules
|
|
|
|
|
|
|
|
|
|
import ( |
|
|
|
|
"fmt" |
|
|
|
|
"reflect" |
|
|
|
|
"strings" |
|
|
|
|
"testing" |
|
|
|
|
"time" |
|
|
|
@ -22,202 +23,161 @@ import (
|
|
|
|
|
clientmodel "github.com/prometheus/client_golang/model" |
|
|
|
|
|
|
|
|
|
"github.com/prometheus/prometheus/promql" |
|
|
|
|
"github.com/prometheus/prometheus/storage/local" |
|
|
|
|
"github.com/prometheus/prometheus/storage/metric" |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
var ( |
|
|
|
|
testSampleInterval = time.Duration(5) * time.Minute |
|
|
|
|
testStartTime = clientmodel.Timestamp(0) |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
func getTestValueStream(startVal clientmodel.SampleValue, endVal clientmodel.SampleValue, stepVal clientmodel.SampleValue, startTime clientmodel.Timestamp) (resultValues metric.Values) { |
|
|
|
|
currentTime := startTime |
|
|
|
|
for currentVal := startVal; currentVal <= endVal; currentVal += stepVal { |
|
|
|
|
sample := metric.SamplePair{ |
|
|
|
|
Value: currentVal, |
|
|
|
|
Timestamp: currentTime, |
|
|
|
|
} |
|
|
|
|
resultValues = append(resultValues, sample) |
|
|
|
|
currentTime = currentTime.Add(testSampleInterval) |
|
|
|
|
} |
|
|
|
|
return resultValues |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func getTestVectorFromTestMatrix(matrix promql.Matrix) promql.Vector { |
|
|
|
|
vector := promql.Vector{} |
|
|
|
|
for _, sampleStream := range matrix { |
|
|
|
|
lastSample := sampleStream.Values[len(sampleStream.Values)-1] |
|
|
|
|
vector = append(vector, &promql.Sample{ |
|
|
|
|
Metric: sampleStream.Metric, |
|
|
|
|
Value: lastSample.Value, |
|
|
|
|
Timestamp: lastSample.Timestamp, |
|
|
|
|
}) |
|
|
|
|
func TestAlertingRule(t *testing.T) { |
|
|
|
|
suite, err := promql.NewTest(t, ` |
|
|
|
|
load 5m |
|
|
|
|
http_requests{job="api-server", instance="0", group="production"} 0+10x10 |
|
|
|
|
http_requests{job="api-server", instance="1", group="production"} 0+20x10 |
|
|
|
|
http_requests{job="api-server", instance="0", group="canary"} 0+30x10 |
|
|
|
|
http_requests{job="api-server", instance="1", group="canary"} 0+40x10 |
|
|
|
|
http_requests{job="app-server", instance="0", group="production"} 0+50x10 |
|
|
|
|
http_requests{job="app-server", instance="1", group="production"} 0+60x10 |
|
|
|
|
http_requests{job="app-server", instance="0", group="canary"} 0+70x10 |
|
|
|
|
http_requests{job="app-server", instance="1", group="canary"} 0+80x10 |
|
|
|
|
`) |
|
|
|
|
if err != nil { |
|
|
|
|
t.Fatal(err) |
|
|
|
|
} |
|
|
|
|
return vector |
|
|
|
|
} |
|
|
|
|
defer suite.Close() |
|
|
|
|
|
|
|
|
|
func storeMatrix(storage local.Storage, matrix promql.Matrix) { |
|
|
|
|
pendingSamples := clientmodel.Samples{} |
|
|
|
|
for _, sampleStream := range matrix { |
|
|
|
|
for _, sample := range sampleStream.Values { |
|
|
|
|
pendingSamples = append(pendingSamples, &clientmodel.Sample{ |
|
|
|
|
Metric: sampleStream.Metric.Metric, |
|
|
|
|
Value: sample.Value, |
|
|
|
|
Timestamp: sample.Timestamp, |
|
|
|
|
}) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
for _, s := range pendingSamples { |
|
|
|
|
storage.Append(s) |
|
|
|
|
if err := suite.Run(); err != nil { |
|
|
|
|
t.Fatal(err) |
|
|
|
|
} |
|
|
|
|
storage.WaitForIndexing() |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func vectorComparisonString(expected []string, actual []string) string { |
|
|
|
|
separator := "\n--------------\n" |
|
|
|
|
return fmt.Sprintf("Expected:%v%v%v\nActual:%v%v%v ", |
|
|
|
|
separator, |
|
|
|
|
strings.Join(expected, "\n"), |
|
|
|
|
separator, |
|
|
|
|
separator, |
|
|
|
|
strings.Join(actual, "\n"), |
|
|
|
|
separator) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func annotateWithTime(lines []string, timestamp clientmodel.Timestamp) []string { |
|
|
|
|
annotatedLines := []string{} |
|
|
|
|
for _, line := range lines { |
|
|
|
|
annotatedLines = append(annotatedLines, fmt.Sprintf(line, timestamp)) |
|
|
|
|
expr, err := promql.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) |
|
|
|
|
if err != nil { |
|
|
|
|
t.Fatalf("Unable to parse alert expression: %s", err) |
|
|
|
|
} |
|
|
|
|
return annotatedLines |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
var testMatrix = promql.Matrix{ |
|
|
|
|
{ |
|
|
|
|
Metric: clientmodel.COWMetric{ |
|
|
|
|
Metric: clientmodel.Metric{ |
|
|
|
|
clientmodel.MetricNameLabel: "http_requests", |
|
|
|
|
clientmodel.JobLabel: "api-server", |
|
|
|
|
"instance": "0", |
|
|
|
|
"group": "canary", |
|
|
|
|
}, |
|
|
|
|
}, |
|
|
|
|
Values: getTestValueStream(0, 300, 30, testStartTime), |
|
|
|
|
}, |
|
|
|
|
{ |
|
|
|
|
Metric: clientmodel.COWMetric{ |
|
|
|
|
Metric: clientmodel.Metric{ |
|
|
|
|
clientmodel.MetricNameLabel: "http_requests", |
|
|
|
|
clientmodel.JobLabel: "api-server", |
|
|
|
|
"instance": "1", |
|
|
|
|
"group": "canary", |
|
|
|
|
rule := NewAlertingRule( |
|
|
|
|
"HTTPRequestRateLow", |
|
|
|
|
expr, |
|
|
|
|
time.Minute, |
|
|
|
|
clientmodel.LabelSet{"severity": "critical"}, |
|
|
|
|
"summary", "description", "runbook", |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
var tests = []struct { |
|
|
|
|
time time.Duration |
|
|
|
|
result []string |
|
|
|
|
}{ |
|
|
|
|
{ |
|
|
|
|
time: 0, |
|
|
|
|
result: []string{ |
|
|
|
|
`ALERTS{alertname="HTTPRequestRateLow", alertstate="pending", group="canary", instance="0", job="app-server", severity="critical"} => 1 @[%v]`, |
|
|
|
|
`ALERTS{alertname="HTTPRequestRateLow", alertstate="pending", group="canary", instance="1", job="app-server", severity="critical"} => 1 @[%v]`, |
|
|
|
|
}, |
|
|
|
|
}, |
|
|
|
|
Values: getTestValueStream(0, 400, 40, testStartTime), |
|
|
|
|
}, |
|
|
|
|
{ |
|
|
|
|
Metric: clientmodel.COWMetric{ |
|
|
|
|
Metric: clientmodel.Metric{ |
|
|
|
|
clientmodel.MetricNameLabel: "http_requests", |
|
|
|
|
clientmodel.JobLabel: "app-server", |
|
|
|
|
"instance": "0", |
|
|
|
|
"group": "canary", |
|
|
|
|
}, { |
|
|
|
|
time: 5 * time.Minute, |
|
|
|
|
result: []string{ |
|
|
|
|
`ALERTS{alertname="HTTPRequestRateLow", alertstate="pending", group="canary", instance="0", job="app-server", severity="critical"} => 0 @[%v]`, |
|
|
|
|
`ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="0", job="app-server", severity="critical"} => 1 @[%v]`, |
|
|
|
|
`ALERTS{alertname="HTTPRequestRateLow", alertstate="pending", group="canary", instance="1", job="app-server", severity="critical"} => 0 @[%v]`, |
|
|
|
|
`ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="1", job="app-server", severity="critical"} => 1 @[%v]`, |
|
|
|
|
}, |
|
|
|
|
}, |
|
|
|
|
Values: getTestValueStream(0, 700, 70, testStartTime), |
|
|
|
|
}, |
|
|
|
|
{ |
|
|
|
|
Metric: clientmodel.COWMetric{ |
|
|
|
|
Metric: clientmodel.Metric{ |
|
|
|
|
clientmodel.MetricNameLabel: "http_requests", |
|
|
|
|
clientmodel.JobLabel: "app-server", |
|
|
|
|
"instance": "1", |
|
|
|
|
"group": "canary", |
|
|
|
|
}, { |
|
|
|
|
time: 10 * time.Minute, |
|
|
|
|
result: []string{ |
|
|
|
|
`ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="1", job="app-server", severity="critical"} => 0 @[%v]`, |
|
|
|
|
`ALERTS{alertname="HTTPRequestRateLow", alertstate="firing", group="canary", instance="0", job="app-server", severity="critical"} => 0 @[%v]`, |
|
|
|
|
}, |
|
|
|
|
}, |
|
|
|
|
Values: getTestValueStream(0, 800, 80, testStartTime), |
|
|
|
|
}, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func TestAlertingRule(t *testing.T) { |
|
|
|
|
// Labels in expected output need to be alphabetically sorted.
|
|
|
|
|
var evalOutputs = [][]string{ |
|
|
|
|
{ |
|
|
|
|
`ALERTS{alertname="HttpRequestRateLow", alertstate="pending", group="canary", instance="0", job="app-server", severity="critical"} => 1 @[%v]`, |
|
|
|
|
`ALERTS{alertname="HttpRequestRateLow", alertstate="pending", group="canary", instance="1", job="app-server", severity="critical"} => 1 @[%v]`, |
|
|
|
|
time: 15 * time.Minute, |
|
|
|
|
result: nil, |
|
|
|
|
}, |
|
|
|
|
{ |
|
|
|
|
`ALERTS{alertname="HttpRequestRateLow", alertstate="pending", group="canary", instance="0", job="app-server", severity="critical"} => 0 @[%v]`, |
|
|
|
|
`ALERTS{alertname="HttpRequestRateLow", alertstate="firing", group="canary", instance="0", job="app-server", severity="critical"} => 1 @[%v]`, |
|
|
|
|
`ALERTS{alertname="HttpRequestRateLow", alertstate="pending", group="canary", instance="1", job="app-server", severity="critical"} => 0 @[%v]`, |
|
|
|
|
`ALERTS{alertname="HttpRequestRateLow", alertstate="firing", group="canary", instance="1", job="app-server", severity="critical"} => 1 @[%v]`, |
|
|
|
|
time: 20 * time.Minute, |
|
|
|
|
result: nil, |
|
|
|
|
}, |
|
|
|
|
{ |
|
|
|
|
`ALERTS{alertname="HttpRequestRateLow", alertstate="firing", group="canary", instance="1", job="app-server", severity="critical"} => 0 @[%v]`, |
|
|
|
|
`ALERTS{alertname="HttpRequestRateLow", alertstate="firing", group="canary", instance="0", job="app-server", severity="critical"} => 0 @[%v]`, |
|
|
|
|
}, |
|
|
|
|
{ |
|
|
|
|
/* empty */ |
|
|
|
|
}, |
|
|
|
|
{ |
|
|
|
|
/* empty */ |
|
|
|
|
}, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
storage, closer := local.NewTestStorage(t, 1) |
|
|
|
|
defer closer.Close() |
|
|
|
|
|
|
|
|
|
storeMatrix(storage, testMatrix) |
|
|
|
|
|
|
|
|
|
engine := promql.NewEngine(storage, nil) |
|
|
|
|
defer engine.Stop() |
|
|
|
|
|
|
|
|
|
expr, err := promql.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) |
|
|
|
|
if err != nil { |
|
|
|
|
t.Fatalf("Unable to parse alert expression: %s", err) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
alertLabels := clientmodel.LabelSet{ |
|
|
|
|
"severity": "critical", |
|
|
|
|
} |
|
|
|
|
rule := NewAlertingRule("HttpRequestRateLow", expr, time.Minute, alertLabels, "summary", "description", "runbook") |
|
|
|
|
|
|
|
|
|
for i, expectedLines := range evalOutputs { |
|
|
|
|
evalTime := testStartTime.Add(testSampleInterval * time.Duration(i)) |
|
|
|
|
for i, test := range tests { |
|
|
|
|
evalTime := clientmodel.Timestamp(0).Add(test.time) |
|
|
|
|
|
|
|
|
|
res, err := rule.eval(evalTime, engine) |
|
|
|
|
res, err := rule.eval(evalTime, suite.QueryEngine()) |
|
|
|
|
if err != nil { |
|
|
|
|
t.Fatalf("Error during alerting rule evaluation: %s", err) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
actualLines := strings.Split(res.String(), "\n") |
|
|
|
|
expectedLines := annotateWithTime(expectedLines, evalTime) |
|
|
|
|
if actualLines[0] == "" { |
|
|
|
|
actualLines = []string{} |
|
|
|
|
actual := strings.Split(res.String(), "\n") |
|
|
|
|
expected := annotateWithTime(test.result, evalTime) |
|
|
|
|
if actual[0] == "" { |
|
|
|
|
actual = []string{} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
failed := false |
|
|
|
|
if len(actualLines) != len(expectedLines) { |
|
|
|
|
t.Errorf("%d. Number of samples in expected and actual output don't match (%d vs. %d)", i, len(expectedLines), len(actualLines)) |
|
|
|
|
failed = true |
|
|
|
|
if len(actual) != len(expected) { |
|
|
|
|
t.Errorf("%d. Number of samples in expected and actual output don't match (%d vs. %d)", i, len(expected), len(actual)) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for j, expectedSample := range expectedLines { |
|
|
|
|
for j, expectedSample := range expected { |
|
|
|
|
found := false |
|
|
|
|
for _, actualSample := range actualLines { |
|
|
|
|
for _, actualSample := range actual { |
|
|
|
|
if actualSample == expectedSample { |
|
|
|
|
found = true |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
if !found { |
|
|
|
|
t.Errorf("%d.%d. Couldn't find expected sample in output: '%v'", i, j, expectedSample) |
|
|
|
|
failed = true |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if failed { |
|
|
|
|
t.Fatalf("%d. Expected and actual outputs don't match:\n%v", i, vectorComparisonString(expectedLines, actualLines)) |
|
|
|
|
if t.Failed() { |
|
|
|
|
t.Errorf("%d. Expected and actual outputs don't match:", i) |
|
|
|
|
t.Fatalf("Expected:\n%v\n----\nActual:\n%v", strings.Join(expected, "\n"), strings.Join(actual, "\n")) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func annotateWithTime(lines []string, timestamp clientmodel.Timestamp) []string { |
|
|
|
|
annotatedLines := []string{} |
|
|
|
|
for _, line := range lines { |
|
|
|
|
annotatedLines = append(annotatedLines, fmt.Sprintf(line, timestamp)) |
|
|
|
|
} |
|
|
|
|
return annotatedLines |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func TestTransferAlertState(t *testing.T) { |
|
|
|
|
m := NewManager(&ManagerOptions{}) |
|
|
|
|
|
|
|
|
|
alert := &Alert{ |
|
|
|
|
Name: "testalert", |
|
|
|
|
State: StateFiring, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
arule := AlertingRule{ |
|
|
|
|
name: "test", |
|
|
|
|
activeAlerts: map[clientmodel.Fingerprint]*Alert{}, |
|
|
|
|
} |
|
|
|
|
aruleCopy := arule |
|
|
|
|
|
|
|
|
|
m.rules = append(m.rules, &arule) |
|
|
|
|
|
|
|
|
|
// Set an alert.
|
|
|
|
|
arule.activeAlerts[0] = alert |
|
|
|
|
|
|
|
|
|
// Save state and get the restore function.
|
|
|
|
|
restore := m.transferAlertState() |
|
|
|
|
|
|
|
|
|
// Remove arule from the rule list and add an unrelated rule and the
|
|
|
|
|
// stateless copy of arule.
|
|
|
|
|
m.rules = []Rule{ |
|
|
|
|
&AlertingRule{ |
|
|
|
|
name: "test_other", |
|
|
|
|
activeAlerts: map[clientmodel.Fingerprint]*Alert{}, |
|
|
|
|
}, |
|
|
|
|
&aruleCopy, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Apply the restore function.
|
|
|
|
|
restore() |
|
|
|
|
|
|
|
|
|
if ar := m.rules[0].(*AlertingRule); len(ar.activeAlerts) != 0 { |
|
|
|
|
t.Fatalf("unexpected alert for unrelated alerting rule") |
|
|
|
|
} |
|
|
|
|
if ar := m.rules[1].(*AlertingRule); !reflect.DeepEqual(ar.activeAlerts[0], alert) { |
|
|
|
|
t.Fatalf("alert state was not restored") |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|