From 7e1b39c682c27b10400f200cecf68a4dd2e4f169 Mon Sep 17 00:00:00 2001 From: Fabian Reinartz Date: Mon, 18 Jan 2016 16:47:31 +0100 Subject: [PATCH 1/2] Fix startup/teardown order, add documentation --- cmd/prometheus/main.go | 13 +++++++++---- notification/notification.go | 1 + 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 30e8869d3..53d820c83 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -132,7 +132,8 @@ func Main() int { } }() - // Start all components. + // Start all components. The order is NOT arbitrary. + if err := memStorage.Start(); err != nil { log.Errorln("Error opening memory series storage:", err) return 1 @@ -155,15 +156,19 @@ func Main() int { prometheus.MustRegister(configSuccess) prometheus.MustRegister(configSuccessTime) - go ruleManager.Run() - defer ruleManager.Stop() - + // The notification is a dependency of the rule manager. It has to be + // started before and torn down afterwards. go notificationHandler.Run() defer notificationHandler.Stop() + go ruleManager.Run() + defer ruleManager.Stop() + go targetManager.Run() defer targetManager.Stop() + // Shutting down the query engine before the rule manager will cause pending queries + // to be canceled and ensures a quick shutdown of the rule manager. defer queryEngine.Stop() go webHandler.Run() diff --git a/notification/notification.go b/notification/notification.go index acf3b49d7..0060047e4 100644 --- a/notification/notification.go +++ b/notification/notification.go @@ -200,6 +200,7 @@ func (n *Handler) Run() { } // SubmitReqs queues the given notification requests for processing. +// Panics if called on a handler that is not running. func (n *Handler) Send(alerts ...*model.Alert) { n.mtx.Lock() defer n.mtx.Unlock() From a8c38c3ac5f9d41cd99e7a45c55fcf04c14321c8 Mon Sep 17 00:00:00 2001 From: Fabian Reinartz Date: Mon, 18 Jan 2016 16:53:37 +0100 Subject: [PATCH 2/2] Don't log rule evaluation failure on shutdown --- rules/manager.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rules/manager.go b/rules/manager.go index 212c9f9e5..d779979b7 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -220,8 +220,12 @@ func (g *Group) eval() { vector, err := rule.eval(now, g.opts.QueryEngine) if err != nil { + // Canceled queries are intentional termination of queries. This normally + // happens on shutdown and thus we skip logging of any errors here. + if _, ok := err.(promql.ErrQueryCanceled); !ok { + log.Warnf("Error while evaluating rule %q: %s", rule, err) + } evalFailures.Inc() - log.Warnf("Error while evaluating rule %q: %s", rule, err) } var rtyp ruleType