Browse Source

Merge pull request #1807 from prometheus/am-label

Expand alert templates at eval time.
pull/1634/head
Fabian Reinartz 8 years ago committed by GitHub
parent
commit
9c3129746c
  1. 78
      rules/alerting.go
  2. 54
      rules/manager.go
  3. 8
      rules/manager_test.go
  4. 2
      rules/recording.go
  5. 2
      rules/recording_test.go

78
rules/alerting.go

@ -15,13 +15,16 @@ package rules
import (
"fmt"
"html/template"
"sync"
"time"
html_template "html/template"
"github.com/prometheus/common/log"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/template"
"github.com/prometheus/prometheus/util/strutil"
)
@ -63,8 +66,9 @@ func (s AlertState) String() string {
// Alert is the user-level representation of a single instance of an alerting rule.
type Alert struct {
State AlertState
Labels model.LabelSet
State AlertState
Labels model.LabelSet
Annotations model.LabelSet
// The value at the last evaluation of the alerting expression.
Value model.SampleValue
// The interval during which the condition of this alert held true.
@ -142,7 +146,7 @@ const resolvedRetention = 15 * time.Minute
// eval evaluates the rule expression and then creates pending alerts and fires
// or removes previously pending alerts accordingly.
func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine) (model.Vector, error) {
func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine, externalURLPath string) (model.Vector, error) {
query, err := engine.NewInstantQuery(r.vector.String(), ts)
if err != nil {
return nil, err
@ -160,6 +164,53 @@ func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine) (model.Vector,
resultFPs := map[model.Fingerprint]struct{}{}
for _, smpl := range res {
// Provide the alert information to the template.
l := make(map[string]string, len(smpl.Metric))
for k, v := range smpl.Metric {
l[string(k)] = string(v)
}
tmplData := struct {
Labels map[string]string
Value float64
}{
Labels: l,
Value: float64(smpl.Value),
}
// Inject some convenience variables that are easier to remember for users
// who are not used to Go's templating system.
defs := "{{$labels := .Labels}}{{$value := .Value}}"
expand := func(text model.LabelValue) model.LabelValue {
tmpl := template.NewTemplateExpander(
defs+string(text),
"__alert_"+r.Name(),
tmplData,
ts,
engine,
externalURLPath,
)
result, err := tmpl.Expand()
if err != nil {
result = fmt.Sprintf("<error expanding template: %s>", err)
log.Warnf("Error expanding alert template %v with data '%v': %s", r.Name(), tmplData, err)
}
return model.LabelValue(result)
}
labels := make(model.LabelSet, len(smpl.Metric)+len(r.labels)+1)
for ln, lv := range smpl.Metric {
labels[ln] = lv
}
for ln, lv := range r.labels {
labels[ln] = expand(lv)
}
labels[model.AlertNameLabel] = model.LabelValue(r.Name())
annotations := make(model.LabelSet, len(r.annotations))
for an, av := range r.annotations {
annotations[an] = expand(av)
}
fp := smpl.Metric.Fingerprint()
resultFPs[fp] = struct{}{}
@ -171,10 +222,11 @@ func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine) (model.Vector,
delete(smpl.Metric, model.MetricNameLabel)
r.active[fp] = &Alert{
Labels: model.LabelSet(smpl.Metric),
ActiveAt: ts,
State: StatePending,
Value: smpl.Value,
Labels: labels,
Annotations: annotations,
ActiveAt: ts,
State: StatePending,
Value: smpl.Value,
}
}
@ -243,13 +295,7 @@ func (r *AlertingRule) currentAlerts() []*Alert {
alerts := make([]*Alert, 0, len(r.active))
for _, a := range r.active {
labels := r.labels.Clone()
for ln, lv := range a.Labels {
labels[ln] = lv
}
anew := *a
anew.Labels = labels
alerts = append(alerts, &anew)
}
return alerts
@ -273,7 +319,7 @@ func (r *AlertingRule) String() string {
// HTMLSnippet returns an HTML snippet representing this alerting rule. The
// resulting snippet is expected to be presented in a <pre> element, so that
// line breaks and other returned whitespace is respected.
func (r *AlertingRule) HTMLSnippet(pathPrefix string) template.HTML {
func (r *AlertingRule) HTMLSnippet(pathPrefix string) html_template.HTML {
alertMetric := model.Metric{
model.MetricNameLabel: alertMetricName,
alertNameLabel: model.LabelValue(r.name),
@ -289,5 +335,5 @@ func (r *AlertingRule) HTMLSnippet(pathPrefix string) template.HTML {
if len(r.annotations) > 0 {
s += fmt.Sprintf("\n ANNOTATIONS %s", r.annotations)
}
return template.HTML(s)
return html_template.HTML(s)
}

54
rules/manager.go

@ -32,7 +32,6 @@ import (
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/storage/local"
"github.com/prometheus/prometheus/template"
"github.com/prometheus/prometheus/util/strutil"
)
@ -106,7 +105,7 @@ const (
type Rule interface {
Name() string
// eval evaluates the rule, including any associated recording or alerting actions.
eval(model.Time, *promql.Engine) (model.Vector, error)
eval(model.Time, *promql.Engine, string) (model.Vector, error)
// String returns a human-readable string representation of the rule.
String() string
// HTMLSnippet returns a human-readable string representation of the rule,
@ -257,7 +256,7 @@ func (g *Group) eval() {
evalTotal.WithLabelValues(rtyp).Inc()
vector, err := rule.eval(now, g.opts.QueryEngine)
vector, err := rule.eval(now, g.opts.QueryEngine, g.opts.ExternalURL.Path)
if err != nil {
// Canceled queries are intentional termination of queries. This normally
// happens on shutdown and thus we skip logging of any errors here.
@ -310,55 +309,10 @@ func (g *Group) sendAlerts(rule *AlertingRule, timestamp model.Time) error {
continue
}
// Provide the alert information to the template.
l := make(map[string]string, len(alert.Labels))
for k, v := range alert.Labels {
l[string(k)] = string(v)
}
tmplData := struct {
Labels map[string]string
Value float64
}{
Labels: l,
Value: float64(alert.Value),
}
// Inject some convenience variables that are easier to remember for users
// who are not used to Go's templating system.
defs := "{{$labels := .Labels}}{{$value := .Value}}"
expand := func(text model.LabelValue) model.LabelValue {
tmpl := template.NewTemplateExpander(
defs+string(text),
"__alert_"+rule.Name(),
tmplData,
timestamp,
g.opts.QueryEngine,
g.opts.ExternalURL.Path,
)
result, err := tmpl.Expand()
if err != nil {
result = fmt.Sprintf("<error expanding template: %s>", err)
log.Warnf("Error expanding alert template %v with data '%v': %s", rule.Name(), tmplData, err)
}
return model.LabelValue(result)
}
labels := make(model.LabelSet, len(alert.Labels)+1)
for ln, lv := range alert.Labels {
labels[ln] = expand(lv)
}
labels[model.AlertNameLabel] = model.LabelValue(rule.Name())
annotations := make(model.LabelSet, len(rule.annotations))
for an, av := range rule.annotations {
annotations[an] = expand(av)
}
a := &model.Alert{
StartsAt: alert.ActiveAt.Add(rule.holdDuration).Time(),
Labels: labels,
Annotations: annotations,
Labels: alert.Labels,
Annotations: alert.Annotations,
GeneratorURL: g.opts.ExternalURL.String() + strutil.GraphLinkForExpression(rule.vector.String()),
}
if alert.ResolvedAt != 0 {

8
rules/manager_test.go

@ -27,8 +27,8 @@ import (
func TestAlertingRule(t *testing.T) {
suite, err := promql.NewTest(t, `
load 5m
http_requests{job="app-server", instance="0", group="canary"} 75 85 95 105 105 95 85
http_requests{job="app-server", instance="1", group="canary"} 80 90 100 110 120 130 140
http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 95 105 105 95 85
http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 80 90 100 110 120 130 140
`)
if err != nil {
t.Fatal(err)
@ -48,7 +48,7 @@ func TestAlertingRule(t *testing.T) {
"HTTPRequestRateLow",
expr,
time.Minute,
model.LabelSet{"severity": "critical"},
model.LabelSet{"severity": "{{\"c\"}}ritical"},
model.LabelSet{},
)
@ -105,7 +105,7 @@ func TestAlertingRule(t *testing.T) {
for i, test := range tests {
evalTime := model.Time(0).Add(test.time)
res, err := rule.eval(evalTime, suite.QueryEngine())
res, err := rule.eval(evalTime, suite.QueryEngine(), "")
if err != nil {
t.Fatalf("Error during alerting rule evaluation: %s", err)
}

2
rules/recording.go

@ -45,7 +45,7 @@ func (rule RecordingRule) Name() string {
}
// eval evaluates the rule and then overrides the metric names and labels accordingly.
func (rule RecordingRule) eval(timestamp model.Time, engine *promql.Engine) (model.Vector, error) {
func (rule RecordingRule) eval(timestamp model.Time, engine *promql.Engine, _ string) (model.Vector, error) {
query, err := engine.NewInstantQuery(rule.vector.String(), timestamp)
if err != nil {
return nil, err

2
rules/recording_test.go

@ -59,7 +59,7 @@ func TestRuleEval(t *testing.T) {
for _, test := range suite {
rule := NewRecordingRule(test.name, test.expr, test.labels)
result, err := rule.eval(now, engine)
result, err := rule.eval(now, engine, "")
if err != nil {
t.Fatalf("Error evaluating %s", test.name)
}

Loading…
Cancel
Save