diff --git a/rules/manager.go b/rules/manager.go index 110b64d55..0b38ec187 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -53,37 +53,37 @@ const namespace = "prometheus" // Metrics for rule evaluation. type Metrics struct { - evalDuration prometheus.Summary - iterationDuration prometheus.Summary - iterationsMissed *prometheus.CounterVec - iterationsScheduled *prometheus.CounterVec - evalTotal *prometheus.CounterVec - evalFailures *prometheus.CounterVec - groupInterval *prometheus.GaugeVec - groupLastEvalTime *prometheus.GaugeVec - groupLastDuration *prometheus.GaugeVec - groupRules *prometheus.GaugeVec - groupSamples *prometheus.GaugeVec + EvalDuration prometheus.Summary + IterationDuration prometheus.Summary + IterationsMissed *prometheus.CounterVec + IterationsScheduled *prometheus.CounterVec + EvalTotal *prometheus.CounterVec + EvalFailures *prometheus.CounterVec + GroupInterval *prometheus.GaugeVec + GroupLastEvalTime *prometheus.GaugeVec + GroupLastDuration *prometheus.GaugeVec + GroupRules *prometheus.GaugeVec + GroupSamples *prometheus.GaugeVec } // NewGroupMetrics creates a new instance of Metrics and registers it with the provided registerer, // if not nil. func NewGroupMetrics(reg prometheus.Registerer) *Metrics { m := &Metrics{ - evalDuration: prometheus.NewSummary( + EvalDuration: prometheus.NewSummary( prometheus.SummaryOpts{ Namespace: namespace, Name: "rule_evaluation_duration_seconds", Help: "The duration for a rule to execute.", Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, }), - iterationDuration: prometheus.NewSummary(prometheus.SummaryOpts{ + IterationDuration: prometheus.NewSummary(prometheus.SummaryOpts{ Namespace: namespace, Name: "rule_group_duration_seconds", Help: "The duration of rule group evaluations.", Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001}, }), - iterationsMissed: prometheus.NewCounterVec( + IterationsMissed: prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: namespace, Name: "rule_group_iterations_missed_total", @@ -91,7 +91,7 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { }, []string{"rule_group"}, ), - iterationsScheduled: prometheus.NewCounterVec( + IterationsScheduled: prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: namespace, Name: "rule_group_iterations_total", @@ -99,7 +99,7 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { }, []string{"rule_group"}, ), - evalTotal: prometheus.NewCounterVec( + EvalTotal: prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: namespace, Name: "rule_evaluations_total", @@ -107,7 +107,7 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { }, []string{"rule_group"}, ), - evalFailures: prometheus.NewCounterVec( + EvalFailures: prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: namespace, Name: "rule_evaluation_failures_total", @@ -115,7 +115,7 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { }, []string{"rule_group"}, ), - groupInterval: prometheus.NewGaugeVec( + GroupInterval: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: namespace, Name: "rule_group_interval_seconds", @@ -123,7 +123,7 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { }, []string{"rule_group"}, ), - groupLastEvalTime: prometheus.NewGaugeVec( + GroupLastEvalTime: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: namespace, Name: "rule_group_last_evaluation_timestamp_seconds", @@ -131,7 +131,7 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { }, []string{"rule_group"}, ), - groupLastDuration: prometheus.NewGaugeVec( + GroupLastDuration: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: namespace, Name: "rule_group_last_duration_seconds", @@ -139,7 +139,7 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { }, []string{"rule_group"}, ), - groupRules: prometheus.NewGaugeVec( + GroupRules: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: namespace, Name: "rule_group_rules", @@ -147,7 +147,7 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { }, []string{"rule_group"}, ), - groupSamples: prometheus.NewGaugeVec( + GroupSamples: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: namespace, Name: "rule_group_last_evaluation_samples", @@ -159,17 +159,17 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { if reg != nil { reg.MustRegister( - m.evalDuration, - m.iterationDuration, - m.iterationsMissed, - m.iterationsScheduled, - m.evalTotal, - m.evalFailures, - m.groupInterval, - m.groupLastEvalTime, - m.groupLastDuration, - m.groupRules, - m.groupSamples, + m.EvalDuration, + m.IterationDuration, + m.IterationsMissed, + m.IterationsScheduled, + m.EvalTotal, + m.EvalFailures, + m.GroupInterval, + m.GroupLastEvalTime, + m.GroupLastDuration, + m.GroupRules, + m.GroupSamples, ) } @@ -281,15 +281,15 @@ func NewGroup(o GroupOptions) *Group { } key := GroupKey(o.File, o.Name) - metrics.iterationsMissed.WithLabelValues(key) - metrics.iterationsScheduled.WithLabelValues(key) - metrics.evalTotal.WithLabelValues(key) - metrics.evalFailures.WithLabelValues(key) - metrics.groupLastEvalTime.WithLabelValues(key) - metrics.groupLastDuration.WithLabelValues(key) - metrics.groupRules.WithLabelValues(key).Set(float64(len(o.Rules))) - metrics.groupSamples.WithLabelValues(key) - metrics.groupInterval.WithLabelValues(key).Set(o.Interval.Seconds()) + metrics.IterationsMissed.WithLabelValues(key) + metrics.IterationsScheduled.WithLabelValues(key) + metrics.EvalTotal.WithLabelValues(key) + metrics.EvalFailures.WithLabelValues(key) + metrics.GroupLastEvalTime.WithLabelValues(key) + metrics.GroupLastDuration.WithLabelValues(key) + metrics.GroupRules.WithLabelValues(key).Set(float64(len(o.Rules))) + metrics.GroupSamples.WithLabelValues(key) + metrics.GroupInterval.WithLabelValues(key).Set(o.Interval.Seconds()) return &Group{ name: o.Name, @@ -338,13 +338,13 @@ func (g *Group) run(ctx context.Context) { }) iter := func() { - g.metrics.iterationsScheduled.WithLabelValues(GroupKey(g.file, g.name)).Inc() + g.metrics.IterationsScheduled.WithLabelValues(GroupKey(g.file, g.name)).Inc() start := time.Now() g.Eval(ctx, evalTimestamp) timeSinceStart := time.Since(start) - g.metrics.iterationDuration.Observe(timeSinceStart.Seconds()) + g.metrics.IterationDuration.Observe(timeSinceStart.Seconds()) g.setEvaluationTime(timeSinceStart) g.setLastEvaluation(start) } @@ -390,8 +390,8 @@ func (g *Group) run(ctx context.Context) { case <-tick.C: missed := (time.Since(evalTimestamp) / g.interval) - 1 if missed > 0 { - g.metrics.iterationsMissed.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed)) - g.metrics.iterationsScheduled.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed)) + g.metrics.IterationsMissed.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed)) + g.metrics.IterationsScheduled.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed)) } evalTimestamp = evalTimestamp.Add((missed + 1) * g.interval) iter() @@ -412,8 +412,8 @@ func (g *Group) run(ctx context.Context) { case <-tick.C: missed := (time.Since(evalTimestamp) / g.interval) - 1 if missed > 0 { - g.metrics.iterationsMissed.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed)) - g.metrics.iterationsScheduled.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed)) + g.metrics.IterationsMissed.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed)) + g.metrics.IterationsScheduled.WithLabelValues(GroupKey(g.file, g.name)).Add(float64(missed)) } evalTimestamp = evalTimestamp.Add((missed + 1) * g.interval) iter() @@ -476,7 +476,7 @@ func (g *Group) GetEvaluationTime() time.Duration { // setEvaluationTime sets the time in seconds the last evaluation took. func (g *Group) setEvaluationTime(dur time.Duration) { - g.metrics.groupLastDuration.WithLabelValues(GroupKey(g.file, g.name)).Set(dur.Seconds()) + g.metrics.GroupLastDuration.WithLabelValues(GroupKey(g.file, g.name)).Set(dur.Seconds()) g.mtx.Lock() defer g.mtx.Unlock() @@ -492,7 +492,7 @@ func (g *Group) GetLastEvaluation() time.Time { // setLastEvaluation updates evaluationTimestamp to the timestamp of when the rule group was last evaluated. func (g *Group) setLastEvaluation(ts time.Time) { - g.metrics.groupLastEvalTime.WithLabelValues(GroupKey(g.file, g.name)).Set(float64(ts.UnixNano()) / 1e9) + g.metrics.GroupLastEvalTime.WithLabelValues(GroupKey(g.file, g.name)).Set(float64(ts.UnixNano()) / 1e9) g.mtx.Lock() defer g.mtx.Unlock() @@ -584,18 +584,18 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { sp.Finish() since := time.Since(t) - g.metrics.evalDuration.Observe(since.Seconds()) + g.metrics.EvalDuration.Observe(since.Seconds()) rule.SetEvaluationDuration(since) rule.SetEvaluationTimestamp(t) }(time.Now()) - g.metrics.evalTotal.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() + g.metrics.EvalTotal.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() vector, err := rule.Eval(ctx, ts, g.opts.QueryFunc, g.opts.ExternalURL) if err != nil { rule.SetHealth(HealthBad) rule.SetLastError(err) - g.metrics.evalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() + g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() // Canceled queries are intentional termination of queries. This normally // happens on shutdown and thus we skip logging of any errors here. @@ -620,7 +620,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { if err := app.Commit(); err != nil { rule.SetHealth(HealthBad) rule.SetLastError(err) - g.metrics.evalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() + g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() level.Warn(g.logger).Log("msg", "Rule sample appending failed", "err", err) return @@ -671,7 +671,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { }(i, rule) } if g.metrics != nil { - g.metrics.groupSamples.WithLabelValues(GroupKey(g.File(), g.Name())).Set(samplesTotal) + g.metrics.GroupSamples.WithLabelValues(GroupKey(g.File(), g.Name())).Set(samplesTotal) } g.cleanupStaleSeries(ctx, ts) } @@ -996,15 +996,15 @@ func (m *Manager) Update(interval time.Duration, files []string, externalLabels g.markStale = true g.stop() if m := g.metrics; m != nil { - m.iterationsMissed.DeleteLabelValues(n) - m.iterationsScheduled.DeleteLabelValues(n) - m.evalTotal.DeleteLabelValues(n) - m.evalFailures.DeleteLabelValues(n) - m.groupInterval.DeleteLabelValues(n) - m.groupLastEvalTime.DeleteLabelValues(n) - m.groupLastDuration.DeleteLabelValues(n) - m.groupRules.DeleteLabelValues(n) - m.groupSamples.DeleteLabelValues((n)) + m.IterationsMissed.DeleteLabelValues(n) + m.IterationsScheduled.DeleteLabelValues(n) + m.EvalTotal.DeleteLabelValues(n) + m.EvalFailures.DeleteLabelValues(n) + m.GroupInterval.DeleteLabelValues(n) + m.GroupLastEvalTime.DeleteLabelValues(n) + m.GroupLastDuration.DeleteLabelValues(n) + m.GroupRules.DeleteLabelValues(n) + m.GroupSamples.DeleteLabelValues((n)) } wg.Done() }(n, oldg)