Rules: set otel status to ERROR when a rule fails (#10745)

Signed-off-by: Julien Pivotto <roidelapluie@o11y.eu>
pull/10744/head
Julien Pivotto 3 years ago committed by GitHub
parent 8f0e5b475f
commit 3a56817a30
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -28,6 +28,7 @@ import (
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"go.opentelemetry.io/otel" "go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/rulefmt" "github.com/prometheus/prometheus/model/rulefmt"
@ -624,6 +625,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
if err != nil { if err != nil {
rule.SetHealth(HealthBad) rule.SetHealth(HealthBad)
rule.SetLastError(err) rule.SetLastError(err)
sp.SetStatus(codes.Error, err.Error())
g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
// Canceled queries are intentional termination of queries. This normally // Canceled queries are intentional termination of queries. This normally
@ -651,6 +653,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
if err := app.Commit(); err != nil { if err := app.Commit(); err != nil {
rule.SetHealth(HealthBad) rule.SetHealth(HealthBad)
rule.SetLastError(err) rule.SetLastError(err)
sp.SetStatus(codes.Error, err.Error())
g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Rule sample appending failed", "err", err) level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Rule sample appending failed", "err", err)
@ -663,6 +666,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
if _, err := app.Append(0, s.Metric, s.T, s.V); err != nil { if _, err := app.Append(0, s.Metric, s.T, s.V); err != nil {
rule.SetHealth(HealthBad) rule.SetHealth(HealthBad)
rule.SetLastError(err) rule.SetLastError(err)
sp.SetStatus(codes.Error, err.Error())
switch errors.Cause(err) { switch errors.Cause(err) {
case storage.ErrOutOfOrderSample: case storage.ErrOutOfOrderSample:

Loading…
Cancel
Save