From aaaba57184848b7038d3b8221b3da732982650fa Mon Sep 17 00:00:00 2001 From: Tobias Schmidt Date: Sun, 2 Apr 2017 20:03:28 -0300 Subject: [PATCH] Export number of missed rule evaluations In case the execution of all rules takes longer than the configured rule evaluation interval, one or more iterations will be skipped. This needs to be visible to the opterator. --- rules/manager.go | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/rules/manager.go b/rules/manager.go index ca5d4185a..7e9094e5a 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -75,10 +75,15 @@ var ( Name: "evaluator_iterations_skipped_total", Help: "The total number of rule group evaluations skipped due to throttled metric storage.", }) + iterationsMissed = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: namespace, + Name: "evaluator_iterations_missed_total", + Help: "The total number of rule group evaluations missed due to slow rule group evaluation.", + }) iterationsScheduled = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Name: "evaluator_iterations_total", - Help: "The total number of scheduled rule group evaluations, whether skipped or executed.", + Help: "The total number of scheduled rule group evaluations, whether executed, missed or skipped.", }) ) @@ -90,6 +95,7 @@ func init() { prometheus.MustRegister(iterationDuration) prometheus.MustRegister(iterationsSkipped) + prometheus.MustRegister(iterationsMissed) prometheus.MustRegister(evalFailures) prometheus.MustRegister(evalDuration) } @@ -158,6 +164,7 @@ func (g *Group) run() { iterationDuration.Observe(time.Since(start).Seconds()) } + lastTriggered := time.Now() iter() tick := time.NewTicker(g.interval) @@ -172,6 +179,12 @@ func (g *Group) run() { case <-g.done: return case <-tick.C: + missed := (time.Since(lastTriggered).Nanoseconds() / g.interval.Nanoseconds()) - 1 + if missed > 0 { + iterationsMissed.Add(float64(missed)) + iterationsScheduled.Add(float64(missed)) + } + lastTriggered = time.Now() iter() } }