diff --git a/main.go b/main.go index 1325c09ab..47d94b46c 100644 --- a/main.go +++ b/main.go @@ -92,13 +92,6 @@ type prometheus struct { // NewPrometheus creates a new prometheus object based on flag values. // Call Serve() to start serving and Close() for clean shutdown. func NewPrometheus() *prometheus { - conf, err := config.LoadFromFile(*configFile) - if err != nil { - glog.Errorf("Couldn't load configuration (-config.file=%s): %v", *configFile, err) - glog.Errorf("Note: The configuration format has changed with version 0.14, please check the documentation.") - os.Exit(2) - } - notificationHandler := notification.NewNotificationHandler(*alertmanagerURL, *notificationQueueCapacity) var syncStrategy local.SyncStrategy @@ -155,26 +148,17 @@ func NewPrometheus() *prometheus { sampleAppender = fanout } - targetManager, err := retrieval.NewTargetManager(conf, sampleAppender) - if err != nil { - glog.Errorf("Error creating target manager: %s", err) - os.Exit(1) - } + targetManager := retrieval.NewTargetManager(sampleAppender) queryEngine := promql.NewEngine(memStorage) ruleManager := rules.NewManager(&rules.ManagerOptions{ SampleAppender: sampleAppender, NotificationHandler: notificationHandler, - EvaluationInterval: time.Duration(conf.GlobalConfig.EvaluationInterval), QueryEngine: queryEngine, PrometheusURL: web.MustBuildServerURL(*pathPrefix), PathPrefix: *pathPrefix, }) - if err := ruleManager.LoadRuleFiles(conf.RuleFiles...); err != nil { - glog.Errorf("Error loading rule files: %s", err) - os.Exit(1) - } flags := map[string]string{} flag.VisitAll(func(f *flag.Flag) { @@ -182,7 +166,6 @@ func NewPrometheus() *prometheus { }) prometheusStatus := &web.PrometheusStatusHandler{ BuildInfo: BuildInfo, - Config: conf.String(), RuleManager: ruleManager, TargetPools: targetManager.Pools, Flags: flags, @@ -229,9 +212,27 @@ func NewPrometheus() *prometheus { webService: webService, } webService.QuitChan = make(chan struct{}) + + p.reloadConfig() + return p } +func (p *prometheus) reloadConfig() { + glog.Infof("Loading configuration file %s", *configFile) + + conf, err := config.LoadFromFile(*configFile) + if err != nil { + glog.Errorf("Couldn't load configuration (-config.file=%s): %v", *configFile, err) + glog.Errorf("Note: The configuration format has changed with version 0.14, please check the documentation.") + return + } + + p.webService.StatusHandler.ApplyConfig(conf) + p.targetManager.ApplyConfig(conf) + p.ruleManager.ApplyConfig(conf) +} + // Serve starts the Prometheus server. It returns after the server has been shut // down. The method installs an interrupt handler, allowing to trigger a // shutdown by sending SIGTERM to the process. @@ -252,15 +253,25 @@ func (p *prometheus) Serve() { } }() - notifier := make(chan os.Signal) - signal.Notify(notifier, os.Interrupt, syscall.SIGTERM) + hup := make(chan os.Signal) + signal.Notify(hup, syscall.SIGHUP) + go func() { + for range hup { + p.reloadConfig() + } + }() + + term := make(chan os.Signal) + signal.Notify(term, os.Interrupt, syscall.SIGTERM) select { - case <-notifier: + case <-term: glog.Warning("Received SIGTERM, exiting gracefully...") case <-p.webService.QuitChan: glog.Warning("Received termination request via web service, exiting gracefully...") } + close(hup) + p.targetManager.Stop() p.ruleManager.Stop() p.queryEngine.Stop() diff --git a/retrieval/target.go b/retrieval/target.go index bd1db8dc6..73ce5faf7 100644 --- a/retrieval/target.go +++ b/retrieval/target.go @@ -285,6 +285,7 @@ func (t *target) RunScraper(sampleAppender storage.SampleAppender) { // On changed scrape interval the new interval becomes effective // after the next scrape. if lastScrapeInterval != t.scrapeInterval { + ticker.Stop() ticker = time.NewTicker(t.scrapeInterval) lastScrapeInterval = t.scrapeInterval } diff --git a/retrieval/targetmanager.go b/retrieval/targetmanager.go index beebe7bd6..43d9d165a 100644 --- a/retrieval/targetmanager.go +++ b/retrieval/targetmanager.go @@ -62,16 +62,13 @@ type TargetManager struct { providers map[*config.ScrapeConfig][]TargetProvider } -// NewTargetManager creates a new TargetManager based on the given config. -func NewTargetManager(cfg *config.Config, sampleAppender storage.SampleAppender) (*TargetManager, error) { +// NewTargetManager creates a new TargetManager. +func NewTargetManager(sampleAppender storage.SampleAppender) *TargetManager { tm := &TargetManager{ sampleAppender: sampleAppender, targets: make(map[string][]Target), } - if err := tm.applyConfig(cfg); err != nil { - return nil, err - } - return tm, nil + return tm } // Run starts background processing to handle target updates. @@ -129,19 +126,17 @@ func fullSource(cfg *config.ScrapeConfig, src string) string { // Stop all background processing. func (tm *TargetManager) Stop() { - tm.stop(true) + tm.m.Lock() + defer tm.m.Unlock() + + if tm.running { + tm.stop(true) + } } // stop background processing of the target manager. If removeTargets is true, // existing targets will be stopped and removed. func (tm *TargetManager) stop(removeTargets bool) { - tm.m.Lock() - defer tm.m.Unlock() - - if !tm.running { - return - } - glog.Info("Stopping target manager...") defer glog.Info("Target manager stopped.") @@ -273,35 +268,23 @@ func (tm *TargetManager) Pools() map[string][]Target { // ApplyConfig resets the manager's target providers and job configurations as defined // by the new cfg. The state of targets that are valid in the new configuration remains unchanged. -func (tm *TargetManager) ApplyConfig(cfg *config.Config) error { - tm.stop(false) - // Even if updating the config failed, we want to continue rather than stop scraping anything. - defer tm.Run() - - if err := tm.applyConfig(cfg); err != nil { - glog.Warningf("Error updating config, changes not applied: %s", err) - return err - } - return nil -} - -func (tm *TargetManager) applyConfig(cfg *config.Config) error { - // Only apply changes if everything was successful. - providers := map[*config.ScrapeConfig][]TargetProvider{} - - for _, scfg := range cfg.ScrapeConfigs { - provs, err := ProvidersFromConfig(scfg) - if err != nil { - return err - } - providers[scfg] = provs - } +func (tm *TargetManager) ApplyConfig(cfg *config.Config) { tm.m.Lock() defer tm.m.Unlock() + if tm.running { + tm.stop(false) + // Even if updating the config failed, we want to continue rather than stop scraping anything. + defer tm.Run() + } + providers := map[*config.ScrapeConfig][]TargetProvider{} + + for _, scfg := range cfg.ScrapeConfigs { + providers[scfg] = ProvidersFromConfig(scfg) + } + tm.globalLabels = cfg.GlobalConfig.Labels tm.providers = providers - return nil } // targetsFromGroup builds targets based on the given TargetGroup and config. @@ -335,7 +318,7 @@ func (tm *TargetManager) targetsFromGroup(tg *config.TargetGroup, cfg *config.Sc labels, err := Relabel(labels, cfg.RelabelConfigs...) if err != nil { - return nil, fmt.Errorf("error while relabelling instance %d in target group %s: %s", i, tg, err) + return nil, fmt.Errorf("error while relabeling instance %d in target group %s: %s", i, tg, err) } // Check if the target was dropped. if labels == nil { @@ -357,7 +340,7 @@ func (tm *TargetManager) targetsFromGroup(tg *config.TargetGroup, cfg *config.Sc } // ProvidersFromConfig returns all TargetProviders configured in cfg. -func ProvidersFromConfig(cfg *config.ScrapeConfig) ([]TargetProvider, error) { +func ProvidersFromConfig(cfg *config.ScrapeConfig) []TargetProvider { var providers []TargetProvider for _, dnscfg := range cfg.DNSSDConfigs { @@ -367,7 +350,7 @@ func ProvidersFromConfig(cfg *config.ScrapeConfig) ([]TargetProvider, error) { if len(cfg.TargetGroups) > 0 { providers = append(providers, NewStaticProvider(cfg.TargetGroups)) } - return providers, nil + return providers } // StaticProvider holds a list of target groups that never change. diff --git a/retrieval/targetmanager_test.go b/retrieval/targetmanager_test.go index 6becfb221..f6aea2a40 100644 --- a/retrieval/targetmanager_test.go +++ b/retrieval/targetmanager_test.go @@ -277,19 +277,15 @@ func TestTargetManagerConfigUpdate(t *testing.T) { } conf := &config.Config{DefaultedConfig: config.DefaultConfig} - targetManager, err := NewTargetManager(conf, nopAppender{}) - if err != nil { - t.Fatal(err) - } + targetManager := NewTargetManager(nopAppender{}) + targetManager.ApplyConfig(conf) + targetManager.Run() defer targetManager.Stop() for i, step := range sequence { conf.ScrapeConfigs = step.scrapeConfigs - err := targetManager.ApplyConfig(conf) - if err != nil { - t.Fatal(err) - } + targetManager.ApplyConfig(conf) <-time.After(1 * time.Millisecond) diff --git a/rules/manager.go b/rules/manager.go index fe6b12b00..917b07c76 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -24,6 +24,7 @@ import ( clientmodel "github.com/prometheus/client_golang/model" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/notification" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/storage" @@ -120,7 +121,11 @@ func NewManager(o *ManagerOptions) *Manager { func (m *Manager) Run() { defer glog.Info("Rule manager stopped.") - ticker := time.NewTicker(m.interval) + m.Lock() + lastInterval := m.interval + m.Unlock() + + ticker := time.NewTicker(lastInterval) defer ticker.Stop() for { @@ -137,6 +142,14 @@ func (m *Manager) Run() { start := time.Now() m.runIteration() iterationDuration.Observe(float64(time.Since(start) / time.Millisecond)) + + m.Lock() + if lastInterval != m.interval { + ticker.Stop() + ticker = time.NewTicker(m.interval) + lastInterval = m.interval + } + m.Unlock() case <-m.done: return } @@ -255,11 +268,27 @@ func (m *Manager) runIteration() { wg.Wait() } -// LoadRuleFiles loads alerting and recording rules from the given files. -func (m *Manager) LoadRuleFiles(filenames ...string) error { +// ApplyConfig updates the rule manager's state as the config requires. If +// loading the new rules failed the old rule set is restored. +func (m *Manager) ApplyConfig(conf *config.Config) { m.Lock() defer m.Unlock() + m.interval = time.Duration(conf.GlobalConfig.EvaluationInterval) + + rulesSnapshot := make([]Rule, len(m.rules)) + copy(rulesSnapshot, m.rules) + m.rules = m.rules[:0] + + if err := m.loadRuleFiles(conf.RuleFiles...); err != nil { + // If loading the new rules failed, restore the old rule set. + m.rules = rulesSnapshot + glog.Errorf("Error loading rules, previous rule set restored: %s", err) + } +} + +// loadRuleFiles loads alerting and recording rules from the given files. +func (m *Manager) loadRuleFiles(filenames ...string) error { for _, fn := range filenames { content, err := ioutil.ReadFile(fn) if err != nil { diff --git a/web/status.go b/web/status.go index 7008c8dfe..6840505b9 100644 --- a/web/status.go +++ b/web/status.go @@ -18,6 +18,7 @@ import ( "sync" "time" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/retrieval" "github.com/prometheus/prometheus/rules" ) @@ -47,5 +48,14 @@ func (h *PrometheusStatusHandler) TargetStateToClass() map[retrieval.TargetState } func (h *PrometheusStatusHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + h.mu.RLock() executeTemplate(w, "status", h, h.PathPrefix) + h.mu.RUnlock() +} + +// ApplyConfig updates the status handler's state as the new config requires. +func (h *PrometheusStatusHandler) ApplyConfig(conf *config.Config) { + h.mu.Lock() + h.Config = conf.String() + h.mu.Unlock() }