From 0b619b46d678f586f84b89cdd95e1fa10b024adf Mon Sep 17 00:00:00 2001 From: Fabian Reinartz Date: Sat, 25 Apr 2015 12:59:05 +0200 Subject: [PATCH] Change JobConfig to ScrapeConfig. This commit changes the configuration interface from job configs to scrape configs. This includes allowing multiple ways of target definition at once and moving DNS SD to its own config message. DNS SD can now contain multiple DNS names per configured discovery. --- .../client_golang/model/labelname.go | 4 + config/config.go | 158 +++++++++++------- config/config.proto | 32 ++-- config/config_test.go | 7 +- config/fixtures/invalid_job_name.conf.input | 4 +- config/fixtures/minimal.conf.input | 4 +- .../mixing_sd_and_manual_targets.conf.input | 7 - config/fixtures/repeated_job_name.conf.input | 12 +- config/fixtures/sample.conf.input | 8 +- config/fixtures/sd_targets.conf.input | 8 +- config/generated/config.pb.go | 112 +++++++------ config/load.go | 6 +- retrieval/discovery/dns.go | 44 +++-- retrieval/target.go | 37 +++- retrieval/target_test.go | 8 +- retrieval/targetmanager.go | 81 ++++----- retrieval/targetmanager_test.go | 39 ++--- 17 files changed, 328 insertions(+), 243 deletions(-) delete mode 100644 config/fixtures/mixing_sd_and_manual_targets.conf.input diff --git a/Godeps/_workspace/src/github.com/prometheus/client_golang/model/labelname.go b/Godeps/_workspace/src/github.com/prometheus/client_golang/model/labelname.go index cebc14de3..5ea4258aa 100644 --- a/Godeps/_workspace/src/github.com/prometheus/client_golang/model/labelname.go +++ b/Godeps/_workspace/src/github.com/prometheus/client_golang/model/labelname.go @@ -30,6 +30,10 @@ const ( // a scrape target. AddressLabel LabelName = "__address__" + // MetricsPathLabel is the name of the label that holds the path on which to + // scrape a target. + MetricsPathLabel LabelName = "__metrics_path__" + // ReservedLabelPrefix is a prefix which is not legal in user-supplied // label names. ReservedLabelPrefix = "__" diff --git a/config/config.go b/config/config.go index 1e582e36d..844bd79e2 100644 --- a/config/config.go +++ b/config/config.go @@ -39,12 +39,12 @@ type Config struct { } // String returns an ASCII serialization of the loaded configuration protobuf. -func (c Config) String() string { +func (c *Config) String() string { return proto.MarshalTextString(&c.PrometheusConfig) } // validateLabels validates whether label names have the correct format. -func (c Config) validateLabels(labels *pb.LabelPairs) error { +func validateLabels(labels *pb.LabelPairs) error { if labels == nil { return nil } @@ -57,7 +57,7 @@ func (c Config) validateLabels(labels *pb.LabelPairs) error { } // validateHosts validates whether a target group contains valid hosts. -func (c Config) validateHosts(hosts []string) error { +func validateHosts(hosts []string) error { if hosts == nil { return nil } @@ -72,7 +72,7 @@ func (c Config) validateHosts(hosts []string) error { } // Validate checks an entire parsed Config for the validity of its fields. -func (c Config) Validate() error { +func (c *Config) Validate() error { // Check the global configuration section for validity. global := c.Global if _, err := utility.StringToDuration(global.GetScrapeInterval()); err != nil { @@ -81,58 +81,30 @@ func (c Config) Validate() error { if _, err := utility.StringToDuration(global.GetEvaluationInterval()); err != nil { return fmt.Errorf("invalid rule evaluation interval: %s", err) } - if err := c.validateLabels(global.Labels); err != nil { + if err := validateLabels(global.Labels); err != nil { return fmt.Errorf("invalid global labels: %s", err) } - // Check each job configuration for validity. - jobNames := map[string]bool{} - for _, job := range c.Job { - if jobNames[job.GetName()] { - return fmt.Errorf("found multiple jobs configured with the same name: '%s'", job.GetName()) - } - jobNames[job.GetName()] = true + // Check each scrape configuration for validity. + jobNames := map[string]struct{}{} + for _, sc := range c.ScrapeConfigs() { + name := sc.GetJobName() - if !jobNameRE.MatchString(job.GetName()) { - return fmt.Errorf("invalid job name '%s'", job.GetName()) + if _, ok := jobNames[name]; ok { + return fmt.Errorf("found multiple scrape configs configured with the same job name: %q", name) } - if _, err := utility.StringToDuration(job.GetScrapeInterval()); err != nil { - return fmt.Errorf("invalid scrape interval for job '%s': %s", job.GetName(), err) - } - if _, err := utility.StringToDuration(job.GetSdRefreshInterval()); err != nil { - return fmt.Errorf("invalid SD refresh interval for job '%s': %s", job.GetName(), err) - } - if _, err := utility.StringToDuration(job.GetScrapeTimeout()); err != nil { - return fmt.Errorf("invalid scrape timeout for job '%s': %s", job.GetName(), err) - } - for _, targetGroup := range job.TargetGroup { - if err := c.validateLabels(targetGroup.Labels); err != nil { - return fmt.Errorf("invalid labels for job '%s': %s", job.GetName(), err) - } - if err := c.validateHosts(targetGroup.Target); err != nil { - return fmt.Errorf("invalid targets for job '%s': %s", job.GetName(), err) - } - } - if job.SdName != nil && len(job.TargetGroup) > 0 { - return fmt.Errorf("specified both DNS-SD name and target group for job: %s", job.GetName()) + jobNames[name] = struct{}{} + + if err := sc.Validate(); err != nil { + return fmt.Errorf("error in scrape config %q: %s", name, err) } } return nil } -// GetJobByName finds a job by its name in a Config object. -func (c Config) GetJobByName(name string) *JobConfig { - for _, job := range c.Job { - if job.GetName() == name { - return &JobConfig{*job} - } - } - return nil -} - // GlobalLabels returns the global labels as a LabelSet. -func (c Config) GlobalLabels() clientmodel.LabelSet { +func (c *Config) GlobalLabels() clientmodel.LabelSet { labels := clientmodel.LabelSet{} if c.Global != nil && c.Global.Labels != nil { for _, label := range c.Global.Labels.Label { @@ -142,10 +114,10 @@ func (c Config) GlobalLabels() clientmodel.LabelSet { return labels } -// Jobs returns all the jobs in a Config object. -func (c Config) Jobs() (jobs []JobConfig) { - for _, job := range c.Job { - jobs = append(jobs, JobConfig{*job}) +// ScrapeConfigs returns all scrape configurations. +func (c *Config) ScrapeConfigs() (cfgs []*ScrapeConfig) { + for _, sc := range c.GetScrapeConfig() { + cfgs = append(cfgs, &ScrapeConfig{*sc}) } return } @@ -160,36 +132,96 @@ func stringToDuration(intervalStr string) time.Duration { } // ScrapeInterval gets the default scrape interval for a Config. -func (c Config) ScrapeInterval() time.Duration { +func (c *Config) ScrapeInterval() time.Duration { return stringToDuration(c.Global.GetScrapeInterval()) } // EvaluationInterval gets the default evaluation interval for a Config. -func (c Config) EvaluationInterval() time.Duration { +func (c *Config) EvaluationInterval() time.Duration { return stringToDuration(c.Global.GetEvaluationInterval()) } -// JobConfig encapsulates the configuration of a single job. It wraps the raw -// job protocol buffer to be able to add custom methods to it. -type JobConfig struct { - pb.JobConfig +// ScrapeConfig encapsulates a protobuf scrape configuration. +type ScrapeConfig struct { + pb.ScrapeConfig } -// SDRefreshInterval gets the the SD refresh interval for a job. -func (c JobConfig) SDRefreshInterval() time.Duration { - return stringToDuration(c.GetSdRefreshInterval()) -} - -// ScrapeInterval gets the scrape interval for a job. -func (c JobConfig) ScrapeInterval() time.Duration { +// ScrapeInterval gets the scrape interval for the scrape config. +func (c *ScrapeConfig) ScrapeInterval() time.Duration { return stringToDuration(c.GetScrapeInterval()) } -// ScrapeTimeout gets the scrape timeout for a job. -func (c JobConfig) ScrapeTimeout() time.Duration { +// ScrapeTimeout gets the scrape timeout for the scrape config. +func (c *ScrapeConfig) ScrapeTimeout() time.Duration { return stringToDuration(c.GetScrapeTimeout()) } +// Labels returns a label set for the targets that is implied by the scrape config. +func (c *ScrapeConfig) Labels() clientmodel.LabelSet { + return clientmodel.LabelSet{ + clientmodel.MetricsPathLabel: clientmodel.LabelValue(c.GetMetricsPath()), + clientmodel.JobLabel: clientmodel.LabelValue(c.GetJobName()), + } +} + +// Validate checks the ScrapeConfig for the validity of its fields +func (c *ScrapeConfig) Validate() error { + name := c.GetJobName() + + if !jobNameRE.MatchString(name) { + return fmt.Errorf("invalid job name %q", name) + } + if _, err := utility.StringToDuration(c.GetScrapeInterval()); err != nil { + return fmt.Errorf("invalid scrape interval: %s", err) + } + if _, err := utility.StringToDuration(c.GetScrapeTimeout()); err != nil { + return fmt.Errorf("invalid scrape timeout: %s", err) + } + for _, tgroup := range c.GetTargetGroup() { + if err := validateLabels(tgroup.Labels); err != nil { + return fmt.Errorf("invalid labels: %s", err) + } + if err := validateHosts(tgroup.Target); err != nil { + return fmt.Errorf("invalid targets: %s", err) + } + } + for _, dnscfg := range c.DNSConfigs() { + if err := dnscfg.Validate(); err != nil { + return fmt.Errorf("invalid DNS config: %s", err) + } + } + return nil +} + +// DNSConfigs returns the list of DNS service discovery configurations +// for the scrape config. +func (c *ScrapeConfig) DNSConfigs() []*DNSConfig { + var dnscfgs []*DNSConfig + for _, dc := range c.GetDnsConfig() { + dnscfgs = append(dnscfgs, &DNSConfig{*dc}) + } + return dnscfgs +} + +// DNSConfig encapsulates the protobuf configuration object for DNS based +// service discovery. +type DNSConfig struct { + pb.DNSConfig +} + +// Validate checks the DNSConfig for the validity of its fields. +func (c *DNSConfig) Validate() error { + if _, err := utility.StringToDuration(c.GetRefreshInterval()); err != nil { + return fmt.Errorf("invalid refresh interval: %s", err) + } + return nil +} + +// SDRefreshInterval gets the the SD refresh interval for the scrape config. +func (c *DNSConfig) RefreshInterval() time.Duration { + return stringToDuration(c.GetRefreshInterval()) +} + // TargetGroup is derived from a protobuf TargetGroup and attaches a source to it // that identifies the origin of the group. type TargetGroup struct { diff --git a/config/config.proto b/config/config.proto index d2f741491..3de910005 100644 --- a/config/config.proto +++ b/config/config.proto @@ -48,12 +48,22 @@ message TargetGroup { optional LabelPairs labels = 2; } +// The configuration for DNS based service discovery. +message DNSConfig { + // The list of DNS-SD service names pointing to SRV records + // containing endpoint information. + repeated string name = 1; + // Discovery refresh period when using DNS-SD to discover targets. Must be a + // valid Prometheus duration string in the form "[0-9]+[smhdwy]". + optional string refresh_interval = 2 [default = "30s"]; +} + // The configuration for a Prometheus job to scrape. // -// The next field no. is 8. -message JobConfig { +// The next field no. is 10. +message ScrapeConfig { // The job name. Must adhere to the regex "[a-zA-Z_][a-zA-Z0-9_-]*". - required string name = 1; + required string job_name = 1; // How frequently to scrape targets from this job. Overrides the global // default. Must be a valid Prometheus duration string in the form // "[0-9]+[smhdwy]". @@ -61,15 +71,9 @@ message JobConfig { // Per-target timeout when scraping this job. Must be a valid Prometheus // duration string in the form "[0-9]+[smhdwy]". optional string scrape_timeout = 7 [default = "10s"]; - // The DNS-SD service name pointing to SRV records containing endpoint - // information for a job. When this field is provided, no target_group - // elements may be set. - optional string sd_name = 3; - // Discovery refresh period when using DNS-SD to discover targets. Must be a - // valid Prometheus duration string in the form "[0-9]+[smhdwy]". - optional string sd_refresh_interval = 4 [default = "30s"]; - // List of labeled target groups for this job. Only legal when DNS-SD isn't - // used for a job. + // List of DNS service discovery configurations. + repeated DNSConfig dns_config = 9; + // List of labeled target groups for this job. repeated TargetGroup target_group = 5; // The HTTP resource path on which to fetch metrics from targets. optional string metrics_path = 6 [default = "/metrics"]; @@ -83,6 +87,6 @@ message PrometheusConfig { // configuration with default values (see GlobalConfig definition) will be // created. optional GlobalConfig global = 1; - // The list of jobs to scrape. - repeated JobConfig job = 2; + // The list of scrape configs. + repeated ScrapeConfig scrape_config = 3; } diff --git a/config/config_test.go b/config/config_test.go index acc94b50d..9955dc99f 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -55,15 +55,10 @@ var configTests = []struct { shouldFail: true, errContains: "invalid label name", }, - { - inputFile: "mixing_sd_and_manual_targets.conf.input", - shouldFail: true, - errContains: "specified both DNS-SD name and target group", - }, { inputFile: "repeated_job_name.conf.input", shouldFail: true, - errContains: "found multiple jobs configured with the same name: 'testjob1'", + errContains: "found multiple scrape configs configured with the same job name: \"testjob1\"", }, } diff --git a/config/fixtures/invalid_job_name.conf.input b/config/fixtures/invalid_job_name.conf.input index e3923ac72..dcebbccb4 100644 --- a/config/fixtures/invalid_job_name.conf.input +++ b/config/fixtures/invalid_job_name.conf.input @@ -1,3 +1,3 @@ -job: < - name: "1testjob" +scrape_config: < + job_name: "1testjob" > diff --git a/config/fixtures/minimal.conf.input b/config/fixtures/minimal.conf.input index 637d95e50..9a436411f 100644 --- a/config/fixtures/minimal.conf.input +++ b/config/fixtures/minimal.conf.input @@ -10,8 +10,8 @@ global < rule_file: "prometheus.rules" > -job: < - name: "prometheus" +scrape_config: < + job_name: "prometheus" scrape_interval: "15s" metrics_path: "/metrics" scheme: "http" diff --git a/config/fixtures/mixing_sd_and_manual_targets.conf.input b/config/fixtures/mixing_sd_and_manual_targets.conf.input deleted file mode 100644 index 0d564234e..000000000 --- a/config/fixtures/mixing_sd_and_manual_targets.conf.input +++ /dev/null @@ -1,7 +0,0 @@ -job: < - name: "testjob" - sd_name: "sd_name" - target_group: < - target: "sampletarget:8080" - > -> diff --git a/config/fixtures/repeated_job_name.conf.input b/config/fixtures/repeated_job_name.conf.input index c59486219..3ca4fa468 100644 --- a/config/fixtures/repeated_job_name.conf.input +++ b/config/fixtures/repeated_job_name.conf.input @@ -1,11 +1,11 @@ -job: < - name: "testjob1" +scrape_config: < + job_name: "testjob1" > -job: < - name: "testjob2" +scrape_config: < + job_name: "testjob2" > -job: < - name: "testjob1" +scrape_config: < + job_name: "testjob1" > diff --git a/config/fixtures/sample.conf.input b/config/fixtures/sample.conf.input index 8ea3a069d..6bd42873f 100644 --- a/config/fixtures/sample.conf.input +++ b/config/fixtures/sample.conf.input @@ -10,8 +10,8 @@ global < rule_file: "prometheus.rules" > -job: < - name: "prometheus" +scrape_config: < + job_name: "prometheus" scrape_interval: "15s" target_group: < @@ -25,8 +25,8 @@ job: < > > -job: < - name: "random" +scrape_config: < + job_name: "random" scrape_interval: "30s" target_group: < diff --git a/config/fixtures/sd_targets.conf.input b/config/fixtures/sd_targets.conf.input index ffded895f..7b694db21 100644 --- a/config/fixtures/sd_targets.conf.input +++ b/config/fixtures/sd_targets.conf.input @@ -1,4 +1,6 @@ -job: < - name: "testjob" - sd_name: "sd_name" +scrape_config: < + job_name: "testjob" + dns_config: < + name: "sd_name" + > > diff --git a/config/generated/config.pb.go b/config/generated/config.pb.go index 089f9053b..c44247ab5 100644 --- a/config/generated/config.pb.go +++ b/config/generated/config.pb.go @@ -13,7 +13,8 @@ It has these top-level messages: LabelPairs GlobalConfig TargetGroup - JobConfig + DNSConfig + ScrapeConfig PrometheusConfig */ package io_prometheus @@ -146,12 +147,43 @@ func (m *TargetGroup) GetLabels() *LabelPairs { return nil } +// The configuration for DNS based service discovery. +type DNSConfig struct { + // The list of DNS-SD service names pointing to SRV records + // containing endpoint information. + Name []string `protobuf:"bytes,1,rep,name=name" json:"name,omitempty"` + // Discovery refresh period when using DNS-SD to discover targets. Must be a + // valid Prometheus duration string in the form "[0-9]+[smhdwy]". + RefreshInterval *string `protobuf:"bytes,2,opt,name=refresh_interval,def=30s" json:"refresh_interval,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *DNSConfig) Reset() { *m = DNSConfig{} } +func (m *DNSConfig) String() string { return proto.CompactTextString(m) } +func (*DNSConfig) ProtoMessage() {} + +const Default_DNSConfig_RefreshInterval string = "30s" + +func (m *DNSConfig) GetName() []string { + if m != nil { + return m.Name + } + return nil +} + +func (m *DNSConfig) GetRefreshInterval() string { + if m != nil && m.RefreshInterval != nil { + return *m.RefreshInterval + } + return Default_DNSConfig_RefreshInterval +} + // The configuration for a Prometheus job to scrape. // -// The next field no. is 8. -type JobConfig struct { +// The next field no. is 10. +type ScrapeConfig struct { // The job name. Must adhere to the regex "[a-zA-Z_][a-zA-Z0-9_-]*". - Name *string `protobuf:"bytes,1,req,name=name" json:"name,omitempty"` + JobName *string `protobuf:"bytes,1,req,name=job_name" json:"job_name,omitempty"` // How frequently to scrape targets from this job. Overrides the global // default. Must be a valid Prometheus duration string in the form // "[0-9]+[smhdwy]". @@ -159,15 +191,9 @@ type JobConfig struct { // Per-target timeout when scraping this job. Must be a valid Prometheus // duration string in the form "[0-9]+[smhdwy]". ScrapeTimeout *string `protobuf:"bytes,7,opt,name=scrape_timeout,def=10s" json:"scrape_timeout,omitempty"` - // The DNS-SD service name pointing to SRV records containing endpoint - // information for a job. When this field is provided, no target_group - // elements may be set. - SdName *string `protobuf:"bytes,3,opt,name=sd_name" json:"sd_name,omitempty"` - // Discovery refresh period when using DNS-SD to discover targets. Must be a - // valid Prometheus duration string in the form "[0-9]+[smhdwy]". - SdRefreshInterval *string `protobuf:"bytes,4,opt,name=sd_refresh_interval,def=30s" json:"sd_refresh_interval,omitempty"` - // List of labeled target groups for this job. Only legal when DNS-SD isn't - // used for a job. + // List of DNS service discovery configurations. + DnsConfig []*DNSConfig `protobuf:"bytes,9,rep,name=dns_config" json:"dns_config,omitempty"` + // List of labeled target groups for this job. TargetGroup []*TargetGroup `protobuf:"bytes,5,rep,name=target_group" json:"target_group,omitempty"` // The HTTP resource path on which to fetch metrics from targets. MetricsPath *string `protobuf:"bytes,6,opt,name=metrics_path,def=/metrics" json:"metrics_path,omitempty"` @@ -176,69 +202,61 @@ type JobConfig struct { XXX_unrecognized []byte `json:"-"` } -func (m *JobConfig) Reset() { *m = JobConfig{} } -func (m *JobConfig) String() string { return proto.CompactTextString(m) } -func (*JobConfig) ProtoMessage() {} +func (m *ScrapeConfig) Reset() { *m = ScrapeConfig{} } +func (m *ScrapeConfig) String() string { return proto.CompactTextString(m) } +func (*ScrapeConfig) ProtoMessage() {} -const Default_JobConfig_ScrapeTimeout string = "10s" -const Default_JobConfig_SdRefreshInterval string = "30s" -const Default_JobConfig_MetricsPath string = "/metrics" -const Default_JobConfig_Scheme string = "http" +const Default_ScrapeConfig_ScrapeTimeout string = "10s" +const Default_ScrapeConfig_MetricsPath string = "/metrics" +const Default_ScrapeConfig_Scheme string = "http" -func (m *JobConfig) GetName() string { - if m != nil && m.Name != nil { - return *m.Name +func (m *ScrapeConfig) GetJobName() string { + if m != nil && m.JobName != nil { + return *m.JobName } return "" } -func (m *JobConfig) GetScrapeInterval() string { +func (m *ScrapeConfig) GetScrapeInterval() string { if m != nil && m.ScrapeInterval != nil { return *m.ScrapeInterval } return "" } -func (m *JobConfig) GetScrapeTimeout() string { +func (m *ScrapeConfig) GetScrapeTimeout() string { if m != nil && m.ScrapeTimeout != nil { return *m.ScrapeTimeout } - return Default_JobConfig_ScrapeTimeout + return Default_ScrapeConfig_ScrapeTimeout } -func (m *JobConfig) GetSdName() string { - if m != nil && m.SdName != nil { - return *m.SdName +func (m *ScrapeConfig) GetDnsConfig() []*DNSConfig { + if m != nil { + return m.DnsConfig } - return "" + return nil } -func (m *JobConfig) GetSdRefreshInterval() string { - if m != nil && m.SdRefreshInterval != nil { - return *m.SdRefreshInterval - } - return Default_JobConfig_SdRefreshInterval -} - -func (m *JobConfig) GetTargetGroup() []*TargetGroup { +func (m *ScrapeConfig) GetTargetGroup() []*TargetGroup { if m != nil { return m.TargetGroup } return nil } -func (m *JobConfig) GetMetricsPath() string { +func (m *ScrapeConfig) GetMetricsPath() string { if m != nil && m.MetricsPath != nil { return *m.MetricsPath } - return Default_JobConfig_MetricsPath + return Default_ScrapeConfig_MetricsPath } -func (m *JobConfig) GetScheme() string { +func (m *ScrapeConfig) GetScheme() string { if m != nil && m.Scheme != nil { return *m.Scheme } - return Default_JobConfig_Scheme + return Default_ScrapeConfig_Scheme } // The top-level Prometheus configuration. @@ -247,9 +265,9 @@ type PrometheusConfig struct { // configuration with default values (see GlobalConfig definition) will be // created. Global *GlobalConfig `protobuf:"bytes,1,opt,name=global" json:"global,omitempty"` - // The list of jobs to scrape. - Job []*JobConfig `protobuf:"bytes,2,rep,name=job" json:"job,omitempty"` - XXX_unrecognized []byte `json:"-"` + // The list of scrape configs. + ScrapeConfig []*ScrapeConfig `protobuf:"bytes,3,rep,name=scrape_config" json:"scrape_config,omitempty"` + XXX_unrecognized []byte `json:"-"` } func (m *PrometheusConfig) Reset() { *m = PrometheusConfig{} } @@ -263,9 +281,9 @@ func (m *PrometheusConfig) GetGlobal() *GlobalConfig { return nil } -func (m *PrometheusConfig) GetJob() []*JobConfig { +func (m *PrometheusConfig) GetScrapeConfig() []*ScrapeConfig { if m != nil { - return m.Job + return m.ScrapeConfig } return nil } diff --git a/config/load.go b/config/load.go index 195226aee..75b6b03f8 100644 --- a/config/load.go +++ b/config/load.go @@ -30,9 +30,9 @@ func LoadFromString(configStr string) (Config, error) { if configProto.Global == nil { configProto.Global = &pb.GlobalConfig{} } - for _, job := range configProto.Job { - if job.ScrapeInterval == nil { - job.ScrapeInterval = proto.String(configProto.Global.GetScrapeInterval()) + for _, scfg := range configProto.GetScrapeConfig() { + if scfg.ScrapeInterval == nil { + scfg.ScrapeInterval = proto.String(configProto.Global.GetScrapeInterval()) } } diff --git a/retrieval/discovery/dns.go b/retrieval/discovery/dns.go index 6b71dc0d1..70cf3a738 100644 --- a/retrieval/discovery/dns.go +++ b/retrieval/discovery/dns.go @@ -61,7 +61,7 @@ func init() { // DNSDiscovery periodically performs DNS-SD requests. It implements // the TargetProvider interface. type DNSDiscovery struct { - name string + names []string done chan struct{} ticker *time.Ticker @@ -69,9 +69,9 @@ type DNSDiscovery struct { } // NewDNSDiscovery returns a new DNSDiscovery which periodically refreshes its targets. -func NewDNSDiscovery(name string, refreshInterval time.Duration) *DNSDiscovery { +func NewDNSDiscovery(names []string, refreshInterval time.Duration) *DNSDiscovery { return &DNSDiscovery{ - name: name, + names: names, done: make(chan struct{}), ticker: time.NewTicker(refreshInterval), } @@ -82,16 +82,12 @@ func (dd *DNSDiscovery) Run(ch chan<- *config.TargetGroup) { defer close(ch) // Get an initial set right away. - if err := dd.refresh(ch); err != nil { - glog.Errorf("Error refreshing DNS targets: %s", err) - } + dd.refreshAll(ch) for { select { case <-dd.ticker.C: - if err := dd.refresh(ch); err != nil { - glog.Errorf("Error refreshing DNS targets: %s", err) - } + dd.refreshAll(ch) case <-dd.done: return } @@ -100,21 +96,39 @@ func (dd *DNSDiscovery) Run(ch chan<- *config.TargetGroup) { // Stop implements the TargetProvider interface. func (dd *DNSDiscovery) Stop() { - glog.V(1).Info("Stopping DNS discovery for %s...", dd.name) + glog.V(1).Info("Stopping DNS discovery for %s...", dd.names) dd.ticker.Stop() dd.done <- struct{}{} - glog.V(1).Info("DNS discovery for %s stopped.", dd.name) + glog.V(1).Info("DNS discovery for %s stopped.", dd.names) } // Sources implements the TargetProvider interface. func (dd *DNSDiscovery) Sources() []string { - return []string{dnsSourcePrefix + ":" + dd.name} + var srcs []string + for _, name := range dd.names { + srcs = append(srcs, dnsSourcePrefix+":"+name) + } + return srcs } -func (dd *DNSDiscovery) refresh(ch chan<- *config.TargetGroup) error { - response, err := lookupSRV(dd.name) +func (dd *DNSDiscovery) refreshAll(ch chan<- *config.TargetGroup) { + var wg sync.WaitGroup + wg.Add(len(dd.names)) + for _, name := range dd.names { + go func(n string) { + if err := dd.refresh(n, ch); err != nil { + glog.Errorf("Error refreshing DNS targets: %s", err) + } + wg.Done() + }(name) + } + wg.Wait() +} + +func (dd *DNSDiscovery) refresh(name string, ch chan<- *config.TargetGroup) error { + response, err := lookupSRV(name) dnsSDLookupsCount.Inc() if err != nil { dnsSDLookupFailuresCount.Inc() @@ -137,7 +151,7 @@ func (dd *DNSDiscovery) refresh(ch chan<- *config.TargetGroup) error { }) } - tg.Source = dnsSourcePrefix + ":" + dd.name + tg.Source = dnsSourcePrefix + ":" + name ch <- tg return nil diff --git a/retrieval/target.go b/retrieval/target.go index a87a5ddb4..d09dbeecb 100644 --- a/retrieval/target.go +++ b/retrieval/target.go @@ -125,6 +125,9 @@ type Target interface { // The URL as seen from other hosts. References to localhost are resolved // to the address of the prometheus server. GlobalURL() string + // Return the labels describing the targets. These are the base labels + // as well as internal labels. + Labels() clientmodel.LabelSet // Return the target's base labels. BaseLabels() clientmodel.LabelSet // Return the target's base labels without job and instance label. That's @@ -135,7 +138,7 @@ type Target interface { // Stop scraping, synchronous. StopScraper() // Update the target's state. - Update(config.JobConfig, clientmodel.LabelSet) + Update(*config.ScrapeConfig, clientmodel.LabelSet) } // target is a Target that refers to a singular HTTP or HTTPS endpoint. @@ -169,7 +172,7 @@ type target struct { } // NewTarget creates a reasonably configured target for querying. -func NewTarget(address string, cfg config.JobConfig, baseLabels clientmodel.LabelSet) Target { +func NewTarget(address string, cfg *config.ScrapeConfig, baseLabels clientmodel.LabelSet) Target { t := &target{ url: &url.URL{ Host: address, @@ -183,12 +186,12 @@ func NewTarget(address string, cfg config.JobConfig, baseLabels clientmodel.Labe // Update overwrites settings in the target that are derived from the job config // it belongs to. -func (t *target) Update(cfg config.JobConfig, baseLabels clientmodel.LabelSet) { +func (t *target) Update(cfg *config.ScrapeConfig, baseLabels clientmodel.LabelSet) { t.Lock() defer t.Unlock() t.url.Scheme = cfg.GetScheme() - t.url.Path = cfg.GetMetricsPath() + t.url.Path = string(baseLabels[clientmodel.MetricsPathLabel]) t.scrapeInterval = cfg.ScrapeInterval() t.deadline = cfg.ScrapeTimeout() @@ -197,8 +200,12 @@ func (t *target) Update(cfg config.JobConfig, baseLabels clientmodel.LabelSet) { t.baseLabels = clientmodel.LabelSet{ clientmodel.InstanceLabel: clientmodel.LabelValue(t.InstanceIdentifier()), } + + // All remaining internal labels will not be part of the label set. for name, val := range baseLabels { - t.baseLabels[name] = val + if !strings.HasPrefix(string(name), clientmodel.ReservedLabelPrefix) { + t.baseLabels[name] = val + } } } @@ -425,6 +432,19 @@ func (t *target) GlobalURL() string { return url } +// Labels implements Target. +func (t *target) Labels() clientmodel.LabelSet { + t.RLock() + defer t.RUnlock() + ls := clientmodel.LabelSet{} + for ln, lv := range t.baseLabels { + ls[ln] = lv + } + ls[clientmodel.MetricsPathLabel] = clientmodel.LabelValue(t.url.Path) + ls[clientmodel.AddressLabel] = clientmodel.LabelValue(t.url.Host) + return ls +} + // BaseLabels implements Target. func (t *target) BaseLabels() clientmodel.LabelSet { t.RLock() @@ -433,9 +453,14 @@ func (t *target) BaseLabels() clientmodel.LabelSet { } // BaseLabelsWithoutJobAndInstance implements Target. +// +// TODO(fabxc): This method does not have to be part of the interface. Implement this +// as a template filter func for the single use case. func (t *target) BaseLabelsWithoutJobAndInstance() clientmodel.LabelSet { + t.RLock() + defer t.RUnlock() ls := clientmodel.LabelSet{} - for ln, lv := range t.BaseLabels() { + for ln, lv := range t.baseLabels { if ln != clientmodel.JobLabel && ln != clientmodel.InstanceLabel { ls[ln] = lv } diff --git a/retrieval/target_test.go b/retrieval/target_test.go index 6ab80050b..20b0fcee3 100644 --- a/retrieval/target_test.go +++ b/retrieval/target_test.go @@ -91,8 +91,8 @@ func TestTargetScrapeWithFullChannel(t *testing.T) { } func TestTargetRecordScrapeHealth(t *testing.T) { - jcfg := config.JobConfig{} - proto.SetDefaults(&jcfg.JobConfig) + scfg := &config.ScrapeConfig{} + proto.SetDefaults(&scfg.ScrapeConfig) testTarget := newTestTarget("example.url", 0, clientmodel.LabelSet{clientmodel.JobLabel: "testjob"}) @@ -150,8 +150,8 @@ func TestTargetScrapeTimeout(t *testing.T) { ) defer server.Close() - jcfg := config.JobConfig{} - proto.SetDefaults(&jcfg.JobConfig) + scfg := &config.ScrapeConfig{} + proto.SetDefaults(&scfg.ScrapeConfig) var testTarget Target = newTestTarget(server.URL, 10*time.Millisecond, clientmodel.LabelSet{}) diff --git a/retrieval/targetmanager.go b/retrieval/targetmanager.go index 2a3e3b325..2427dc2cf 100644 --- a/retrieval/targetmanager.go +++ b/retrieval/targetmanager.go @@ -30,7 +30,7 @@ import ( // A TargetProvider provides information about target groups. It maintains a set // of sources from which TargetGroups can originate. Whenever a target provider -// detects a potential change it sends the TargetGroup through its provided channel. +// detects a potential change, it sends the TargetGroup through its provided channel. // // The TargetProvider does not have to guarantee that an actual change happened. // It does guarantee that it sends the new TargetGroup whenever a change happens. @@ -58,10 +58,8 @@ type TargetManager struct { // Targets by their source ID. targets map[string][]Target - // Providers and configs by their job name. - // TODO(fabxc): turn this into map[*ScrapeConfig][]TargetProvider eventually. - providers map[string][]TargetProvider - configs map[string]config.JobConfig + // Providers by the scrape configs they are derived from. + providers map[*config.ScrapeConfig][]TargetProvider } // NewTargetManager creates a new TargetManager based on the given config. @@ -82,15 +80,13 @@ func (tm *TargetManager) Run() { sources := map[string]struct{}{} - for name, provs := range tm.providers { + for scfg, provs := range tm.providers { for _, p := range provs { - jcfg := tm.configs[name] - ch := make(chan *config.TargetGroup) - go tm.handleTargetUpdates(tm.configs[name], ch) + go tm.handleTargetUpdates(scfg, ch) for _, src := range p.Sources() { - src = fullSource(jcfg, src) + src = fullSource(scfg, src) sources[src] = struct{}{} } @@ -113,7 +109,7 @@ func (tm *TargetManager) Run() { // handleTargetUpdates receives target group updates and handles them in the // context of the given job config. -func (tm *TargetManager) handleTargetUpdates(cfg config.JobConfig, ch <-chan *config.TargetGroup) { +func (tm *TargetManager) handleTargetUpdates(cfg *config.ScrapeConfig, ch <-chan *config.TargetGroup) { for tg := range ch { glog.V(1).Infof("Received potential update for target group %q", tg.Source) @@ -127,8 +123,8 @@ func (tm *TargetManager) handleTargetUpdates(cfg config.JobConfig, ch <-chan *co // // Thus, oscilliating label sets for targets with the same source, // but providers from different configs, are prevented. -func fullSource(cfg config.JobConfig, src string) string { - return cfg.GetName() + ":" + src +func fullSource(cfg *config.ScrapeConfig, src string) string { + return cfg.GetJobName() + ":" + src } // Stop all background processing. @@ -187,7 +183,7 @@ func (tm *TargetManager) removeTargets(f func(string) bool) { // updateTargetGroup creates new targets for the group and replaces the old targets // for the source ID. -func (tm *TargetManager) updateTargetGroup(tgroup *config.TargetGroup, cfg config.JobConfig) error { +func (tm *TargetManager) updateTargetGroup(tgroup *config.TargetGroup, cfg *config.ScrapeConfig) error { newTargets, err := tm.targetsFromGroup(tgroup, cfg) if err != nil { return err @@ -197,6 +193,10 @@ func (tm *TargetManager) updateTargetGroup(tgroup *config.TargetGroup, cfg confi tm.m.Lock() defer tm.m.Unlock() + if !tm.running { + return nil + } + oldTargets, ok := tm.targets[src] if ok { var wg sync.WaitGroup @@ -221,7 +221,7 @@ func (tm *TargetManager) updateTargetGroup(tgroup *config.TargetGroup, cfg confi // to build up. wg.Add(1) go func(t Target) { - match.Update(cfg, t.BaseLabels()) + match.Update(cfg, t.Labels()) wg.Done() }(tnew) newTargets[i] = match @@ -287,71 +287,72 @@ func (tm *TargetManager) ApplyConfig(cfg config.Config) error { func (tm *TargetManager) applyConfig(cfg config.Config) error { // Only apply changes if everything was successful. - providers := map[string][]TargetProvider{} - configs := map[string]config.JobConfig{} + providers := map[*config.ScrapeConfig][]TargetProvider{} - for _, jcfg := range cfg.Jobs() { - provs, err := ProvidersFromConfig(jcfg) + for _, scfg := range cfg.ScrapeConfigs() { + provs, err := ProvidersFromConfig(scfg) if err != nil { return err } - configs[jcfg.GetName()] = jcfg - providers[jcfg.GetName()] = provs + providers[scfg] = provs } tm.m.Lock() defer tm.m.Unlock() tm.globalLabels = cfg.GlobalLabels() tm.providers = providers - tm.configs = configs return nil } // targetsFromGroup builds targets based on the given TargetGroup and config. -func (tm *TargetManager) targetsFromGroup(tg *config.TargetGroup, cfg config.JobConfig) ([]Target, error) { +func (tm *TargetManager) targetsFromGroup(tg *config.TargetGroup, cfg *config.ScrapeConfig) ([]Target, error) { tm.m.RLock() defer tm.m.RUnlock() targets := make([]Target, 0, len(tg.Targets)) for i, labels := range tg.Targets { - for ln, lv := range tg.Labels { - if _, ok := labels[ln]; !ok { - labels[ln] = lv - } - } - for ln, lv := range tm.globalLabels { - if _, ok := labels[ln]; !ok { - labels[ln] = lv + // Copy labels into the labelset for the target if they are not + // set already. Apply the labelsets in order of decreasing precedence. + labelsets := []clientmodel.LabelSet{ + tg.Labels, + cfg.Labels(), + tm.globalLabels, + } + for _, lset := range labelsets { + for ln, lv := range lset { + if _, ok := labels[ln]; !ok { + labels[ln] = lv + } } } + address, ok := labels[clientmodel.AddressLabel] if !ok { return nil, fmt.Errorf("Instance %d in target group %s has no address", i, tg) } - if _, ok := labels[clientmodel.JobLabel]; !ok { - labels[clientmodel.JobLabel] = clientmodel.LabelValue(cfg.GetName()) - } for ln := range labels { - // There are currently no internal labels we want to take over to time series. - if strings.HasPrefix(string(ln), clientmodel.ReservedLabelPrefix) { + // Meta labels are deleted after relabelling. Other internal labels propagate to + // the target which decides whether they will be part of their label set. + if strings.HasPrefix(string(ln), clientmodel.MetaLabelPrefix) { delete(labels, ln) } } targets = append(targets, NewTarget(string(address), cfg, labels)) + } + return targets, nil } // ProvidersFromConfig returns all TargetProviders configured in cfg. -func ProvidersFromConfig(cfg config.JobConfig) ([]TargetProvider, error) { +func ProvidersFromConfig(cfg *config.ScrapeConfig) ([]TargetProvider, error) { var providers []TargetProvider - if name := cfg.GetSdName(); name != "" { - dnsSD := discovery.NewDNSDiscovery(name, cfg.SDRefreshInterval()) + for _, dnscfg := range cfg.DNSConfigs() { + dnsSD := discovery.NewDNSDiscovery(dnscfg.GetName(), dnscfg.RefreshInterval()) providers = append(providers, dnsSD) } - if tgs := cfg.GetTargetGroup(); tgs != nil { static := NewStaticProvider(tgs) providers = append(providers, static) diff --git a/retrieval/targetmanager_test.go b/retrieval/targetmanager_test.go index ed1b1cd00..1d8315ca2 100644 --- a/retrieval/targetmanager_test.go +++ b/retrieval/targetmanager_test.go @@ -27,12 +27,12 @@ import ( ) func TestTargetManagerChan(t *testing.T) { - testJob1 := pb.JobConfig{ - Name: proto.String("test_job1"), + testJob1 := &config.ScrapeConfig{pb.ScrapeConfig{ + JobName: proto.String("test_job1"), ScrapeInterval: proto.String("1m"), TargetGroup: []*pb.TargetGroup{ {Target: []string{"example.org:80", "example.com:80"}}, - }, + }}, } prov1 := &fakeTargetProvider{ sources: []string{"src1", "src2"}, @@ -41,11 +41,8 @@ func TestTargetManagerChan(t *testing.T) { targetManager := &TargetManager{ sampleAppender: nopAppender{}, - providers: map[string][]TargetProvider{ - *testJob1.Name: []TargetProvider{prov1}, - }, - configs: map[string]config.JobConfig{ - *testJob1.Name: config.JobConfig{testJob1}, + providers: map[*config.ScrapeConfig][]TargetProvider{ + testJob1: []TargetProvider{prov1}, }, targets: make(map[string][]Target), } @@ -156,15 +153,15 @@ func TestTargetManagerChan(t *testing.T) { } func TestTargetManagerConfigUpdate(t *testing.T) { - testJob1 := &pb.JobConfig{ - Name: proto.String("test_job1"), + testJob1 := &pb.ScrapeConfig{ + JobName: proto.String("test_job1"), ScrapeInterval: proto.String("1m"), TargetGroup: []*pb.TargetGroup{ {Target: []string{"example.org:80", "example.com:80"}}, }, } - testJob2 := &pb.JobConfig{ - Name: proto.String("test_job2"), + testJob2 := &pb.ScrapeConfig{ + JobName: proto.String("test_job2"), ScrapeInterval: proto.String("1m"), TargetGroup: []*pb.TargetGroup{ {Target: []string{"example.org:8080", "example.com:8081"}}, @@ -173,11 +170,11 @@ func TestTargetManagerConfigUpdate(t *testing.T) { } sequence := []struct { - jobConfigs []*pb.JobConfig - expected map[string][]clientmodel.LabelSet + scrapeConfigs []*pb.ScrapeConfig + expected map[string][]clientmodel.LabelSet }{ { - jobConfigs: []*pb.JobConfig{testJob1}, + scrapeConfigs: []*pb.ScrapeConfig{testJob1}, expected: map[string][]clientmodel.LabelSet{ "test_job1:static:0": { {clientmodel.JobLabel: "test_job1", clientmodel.InstanceLabel: "example.org:80"}, @@ -185,7 +182,7 @@ func TestTargetManagerConfigUpdate(t *testing.T) { }, }, }, { - jobConfigs: []*pb.JobConfig{testJob1}, + scrapeConfigs: []*pb.ScrapeConfig{testJob1}, expected: map[string][]clientmodel.LabelSet{ "test_job1:static:0": { {clientmodel.JobLabel: "test_job1", clientmodel.InstanceLabel: "example.org:80"}, @@ -193,7 +190,7 @@ func TestTargetManagerConfigUpdate(t *testing.T) { }, }, }, { - jobConfigs: []*pb.JobConfig{testJob1, testJob2}, + scrapeConfigs: []*pb.ScrapeConfig{testJob1, testJob2}, expected: map[string][]clientmodel.LabelSet{ "test_job1:static:0": { {clientmodel.JobLabel: "test_job1", clientmodel.InstanceLabel: "example.org:80"}, @@ -208,10 +205,10 @@ func TestTargetManagerConfigUpdate(t *testing.T) { }, }, }, { - jobConfigs: []*pb.JobConfig{}, - expected: map[string][]clientmodel.LabelSet{}, + scrapeConfigs: []*pb.ScrapeConfig{}, + expected: map[string][]clientmodel.LabelSet{}, }, { - jobConfigs: []*pb.JobConfig{testJob2}, + scrapeConfigs: []*pb.ScrapeConfig{testJob2}, expected: map[string][]clientmodel.LabelSet{ "test_job2:static:0": { {clientmodel.JobLabel: "test_job2", clientmodel.InstanceLabel: "example.org:8080"}, @@ -233,7 +230,7 @@ func TestTargetManagerConfigUpdate(t *testing.T) { for i, step := range sequence { cfg := pb.PrometheusConfig{ - Job: step.jobConfigs, + ScrapeConfig: step.scrapeConfigs, } err := targetManager.ApplyConfig(config.Config{cfg}) if err != nil {