mirror of https://github.com/prometheus/prometheus
Increase evaluation failures on Commit() (#8770)
I think we should increment the metric here, we're setting the rule health anyways. This means even if the "evaluation" suceeded, none of the samples made it to storage. This is a simplified solution to: https://github.com/prometheus/prometheus/pull/8410/ Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>pull/8773/head
parent
9d7d818629
commit
2efdf660b1
|
@ -595,13 +595,13 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
|
|||
if err != nil {
|
||||
rule.SetHealth(HealthBad)
|
||||
rule.SetLastError(err)
|
||||
g.metrics.evalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
|
||||
|
||||
// Canceled queries are intentional termination of queries. This normally
|
||||
// happens on shutdown and thus we skip logging of any errors here.
|
||||
if _, ok := err.(promql.ErrQueryCanceled); !ok {
|
||||
level.Warn(g.logger).Log("msg", "Evaluating rule failed", "rule", rule, "err", err)
|
||||
}
|
||||
g.metrics.evalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
|
||||
return
|
||||
}
|
||||
samplesTotal += float64(len(vector))
|
||||
|
@ -620,6 +620,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
|
|||
if err := app.Commit(); err != nil {
|
||||
rule.SetHealth(HealthBad)
|
||||
rule.SetLastError(err)
|
||||
g.metrics.evalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
|
||||
|
||||
level.Warn(g.logger).Log("msg", "Rule sample appending failed", "err", err)
|
||||
return
|
||||
|
|
Loading…
Reference in New Issue