discovery manager ApplyConfig now takes a direct ServiceDiscoveryConfig so that it can be used for the notify manager

reimplement the service discovery for the notify manager

Signed-off-by: Krasi Georgiev <krasi.root@gmail.com>
pull/3638/head
Krasi Georgiev 7 years ago
parent b20a1b1b1b
commit d12e6f29fc

@ -44,6 +44,7 @@ import (
promlogflag "github.com/prometheus/common/promlog/flag" promlogflag "github.com/prometheus/common/promlog/flag"
"github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery"
sd_config "github.com/prometheus/prometheus/discovery/config"
"github.com/prometheus/prometheus/notifier" "github.com/prometheus/prometheus/notifier"
"github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/retrieval" "github.com/prometheus/prometheus/retrieval"
@ -234,11 +235,12 @@ func main() {
ctxWeb, cancelWeb = context.WithCancel(context.Background()) ctxWeb, cancelWeb = context.WithCancel(context.Background())
ctxRule = context.Background() ctxRule = context.Background()
notifier = notifier.New(&cfg.notifier, log.With(logger, "component", "notifier")) notifier = notifier.New(&cfg.notifier, log.With(logger, "component", "notifier"))
discoveryManager = discovery.NewManager(log.With(logger, "component", "discovery manager")) discoveryManagerScrape = discovery.NewManager(log.With(logger, "component", "discovery manager scrape"))
scrapeManager = retrieval.NewScrapeManager(log.With(logger, "component", "scrape manager"), fanoutStorage) discoveryManagerNotify = discovery.NewManager(log.With(logger, "component", "discovery manager notify"))
queryEngine = promql.NewEngine(fanoutStorage, &cfg.queryEngine) scrapeManager = retrieval.NewScrapeManager(log.With(logger, "component", "scrape manager"), fanoutStorage)
ruleManager = rules.NewManager(&rules.ManagerOptions{ queryEngine = promql.NewEngine(fanoutStorage, &cfg.queryEngine)
ruleManager = rules.NewManager(&rules.ManagerOptions{
Appendable: fanoutStorage, Appendable: fanoutStorage,
QueryFunc: rules.EngineQueryFunc(queryEngine), QueryFunc: rules.EngineQueryFunc(queryEngine),
NotifyFunc: sendAlerts(notifier, cfg.web.ExternalURL.String()), NotifyFunc: sendAlerts(notifier, cfg.web.ExternalURL.String()),
@ -283,7 +285,21 @@ func main() {
remoteStorage.ApplyConfig, remoteStorage.ApplyConfig,
webHandler.ApplyConfig, webHandler.ApplyConfig,
notifier.ApplyConfig, notifier.ApplyConfig,
discoveryManager.ApplyConfig, func(cfg *config.Config) error {
c := make(map[string]sd_config.ServiceDiscoveryConfig)
for _, v := range cfg.ScrapeConfigs {
c[v.JobName] = v.ServiceDiscoveryConfig
}
return discoveryManagerScrape.ApplyConfig(c)
},
func(cfg *config.Config) error {
c := make(map[string]sd_config.ServiceDiscoveryConfig)
for _, v := range cfg.AlertingConfig.AlertmanagerConfigs {
// AlertmanagerConfigs doesn't hold an unique identifier so we use the config pointer as the identifier.
c[fmt.Sprintf("%p", v)] = v.ServiceDiscoveryConfig
}
return discoveryManagerNotify.ApplyConfig(c)
},
scrapeManager.ApplyConfig, scrapeManager.ApplyConfig,
func(cfg *config.Config) error { func(cfg *config.Config) error {
// Get all rule files matching the configuration oaths. // Get all rule files matching the configuration oaths.
@ -332,23 +348,37 @@ func main() {
) )
} }
{ {
ctxDiscovery, cancelDiscovery := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
g.Add(
func() error {
err := discoveryManagerScrape.Run(ctx)
level.Info(logger).Log("msg", "Scrape discovery manager stopped")
return err
},
func(err error) {
level.Info(logger).Log("msg", "Stopping scrape discovery manager...")
cancel()
},
)
}
{
ctx, cancel := context.WithCancel(context.Background())
g.Add( g.Add(
func() error { func() error {
err := discoveryManager.Run(ctxDiscovery) err := discoveryManagerNotify.Run(ctx)
level.Info(logger).Log("msg", "Discovery manager stopped") level.Info(logger).Log("msg", "Notify discovery manager stopped")
return err return err
}, },
func(err error) { func(err error) {
level.Info(logger).Log("msg", "Stopping discovery manager...") level.Info(logger).Log("msg", "Stopping notify discovery manager...")
cancelDiscovery() cancel()
}, },
) )
} }
{ {
g.Add( g.Add(
func() error { func() error {
err := scrapeManager.Run(discoveryManager.SyncCh()) err := scrapeManager.Run(discoveryManagerScrape.SyncCh())
level.Info(logger).Log("msg", "Scrape manager stopped") level.Info(logger).Log("msg", "Scrape manager stopped")
return err return err
}, },
@ -493,7 +523,7 @@ func main() {
// so keep this interrupt after the ruleManager.Stop(). // so keep this interrupt after the ruleManager.Stop().
g.Add( g.Add(
func() error { func() error {
notifier.Run() notifier.Run(discoveryManagerNotify.SyncCh())
return nil return nil
}, },
func(err error) { func(err error) {

@ -20,7 +20,6 @@ import (
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level" "github.com/go-kit/kit/log/level"
"github.com/prometheus/prometheus/config"
sd_config "github.com/prometheus/prometheus/discovery/config" sd_config "github.com/prometheus/prometheus/discovery/config"
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
@ -101,13 +100,13 @@ func (m *Manager) SyncCh() <-chan map[string][]*targetgroup.Group {
} }
// ApplyConfig removes all running discovery providers and starts new ones using the provided config. // ApplyConfig removes all running discovery providers and starts new ones using the provided config.
func (m *Manager) ApplyConfig(cfg *config.Config) error { func (m *Manager) ApplyConfig(cfg map[string]sd_config.ServiceDiscoveryConfig) error {
err := make(chan error) err := make(chan error)
m.actionCh <- func(ctx context.Context) { m.actionCh <- func(ctx context.Context) {
m.cancelDiscoverers() m.cancelDiscoverers()
for _, scfg := range cfg.ScrapeConfigs { for name, scfg := range cfg {
for provName, prov := range m.providersFromConfig(scfg.ServiceDiscoveryConfig) { for provName, prov := range m.providersFromConfig(scfg) {
m.startProvider(ctx, poolKey{setName: scfg.JobName, provider: provName}, prov) m.startProvider(ctx, poolKey{setName: name, provider: provName}, prov)
} }
} }
close(err) close(err)

@ -113,9 +113,8 @@ type Notifier struct {
ctx context.Context ctx context.Context
cancel func() cancel func()
alertmanagers []*alertmanagerSet alertmanagers map[string]*alertmanagerSet
cancelDiscovery func() logger log.Logger
logger log.Logger
} }
// Options are the configurable parameters of a Handler. // Options are the configurable parameters of a Handler.
@ -247,7 +246,7 @@ func (n *Notifier) ApplyConfig(conf *config.Config) error {
n.opts.ExternalLabels = conf.GlobalConfig.ExternalLabels n.opts.ExternalLabels = conf.GlobalConfig.ExternalLabels
n.opts.RelabelConfigs = conf.AlertingConfig.AlertRelabelConfigs n.opts.RelabelConfigs = conf.AlertingConfig.AlertRelabelConfigs
amSets := []*alertmanagerSet{} amSets := make(map[string]*alertmanagerSet)
for _, cfg := range conf.AlertingConfig.AlertmanagerConfigs { for _, cfg := range conf.AlertingConfig.AlertmanagerConfigs {
ams, err := newAlertmanagerSet(cfg, n.logger) ams, err := newAlertmanagerSet(cfg, n.logger)
@ -257,7 +256,8 @@ func (n *Notifier) ApplyConfig(conf *config.Config) error {
ams.metrics = n.metrics ams.metrics = n.metrics
amSets = append(amSets, ams) // The config pointer is used for the map lookup identifier.
amSets[fmt.Sprintf("%p", cfg)] = ams
} }
n.alertmanagers = amSets n.alertmanagers = amSets
@ -292,11 +292,14 @@ func (n *Notifier) nextBatch() []*Alert {
} }
// Run dispatches notifications continuously. // Run dispatches notifications continuously.
func (n *Notifier) Run() { func (n *Notifier) Run(tsets <-chan map[string][]*targetgroup.Group) {
for { for {
select { select {
case <-n.ctx.Done(): case <-n.ctx.Done():
return return
case ts := <-tsets:
n.reload(ts)
case <-n.more: case <-n.more:
} }
alerts := n.nextBatch() alerts := n.nextBatch()
@ -311,6 +314,20 @@ func (n *Notifier) Run() {
} }
} }
func (n *Notifier) reload(tgs map[string][]*targetgroup.Group) {
n.mtx.Lock()
defer n.mtx.Unlock()
for id, tgroup := range tgs {
am, ok := n.alertmanagers[id]
if !ok {
level.Error(n.logger).Log("msg", "couldn't sync alert manager set", "err", fmt.Sprintf("invalid id:%v", id))
continue
}
am.sync(tgroup)
}
}
// Send queues the given notification requests for processing. // Send queues the given notification requests for processing.
// Panics if called on a handler that is not running. // Panics if called on a handler that is not running.
func (n *Notifier) Send(alerts ...*Alert) { func (n *Notifier) Send(alerts ...*Alert) {
@ -515,9 +532,9 @@ func newAlertmanagerSet(cfg *config.AlertmanagerConfig, logger log.Logger) (*ale
return s, nil return s, nil
} }
// Sync extracts a deduplicated set of Alertmanager endpoints from a list // sync extracts a deduplicated set of Alertmanager endpoints from a list
// of target groups definitions. // of target groups definitions.
func (s *alertmanagerSet) Sync(tgs []*targetgroup.Group) { func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) {
all := []alertmanager{} all := []alertmanager{}
for _, tg := range tgs { for _, tg := range tgs {

@ -62,9 +62,7 @@ func (m *ScrapeManager) Run(tsets <-chan map[string][]*targetgroup.Group) error
case f := <-m.actionCh: case f := <-m.actionCh:
f() f()
case ts := <-tsets: case ts := <-tsets:
if err := m.reload(ts); err != nil { m.reload(ts)
level.Error(m.logger).Log("msg", "error reloading the scrape manager", "err", err)
}
case <-m.graceShut: case <-m.graceShut:
return nil return nil
} }
@ -129,11 +127,12 @@ func (m *ScrapeManager) Targets() []*Target {
return <-targets return <-targets
} }
func (m *ScrapeManager) reload(t map[string][]*targetgroup.Group) error { func (m *ScrapeManager) reload(t map[string][]*targetgroup.Group) {
for tsetName, tgroup := range t { for tsetName, tgroup := range t {
scrapeConfig, ok := m.scrapeConfigs[tsetName] scrapeConfig, ok := m.scrapeConfigs[tsetName]
if !ok { if !ok {
return fmt.Errorf("target set '%v' doesn't have valid config", tsetName) level.Error(m.logger).Log("msg", "error reloading target set", "err", fmt.Sprintf("invalid config id:%v", tsetName))
continue
} }
// Scrape pool doesn't exist so start a new one. // Scrape pool doesn't exist so start a new one.
@ -155,6 +154,4 @@ func (m *ScrapeManager) reload(t map[string][]*targetgroup.Group) error {
delete(m.scrapePools, name) delete(m.scrapePools, name)
} }
} }
return nil
} }

Loading…
Cancel
Save