Merge branch 'master' into marathon_tls

pull/2045/head
bekbulatov 2016-10-24 10:37:32 +01:00
commit c689b35858
20 changed files with 275 additions and 45 deletions

View File

@ -1,3 +1,16 @@
## 1.3.0-beta.0 / 2016-10-18
This is a breaking change to the Kubernetes service discovery.
* [CHANGE] Rework Kubernetes SD.
* [FEATURE] Add support for interpolating `target_label`.
* [FEATURE] Add GCE metadata as Prometheus meta labels.
* [ENHANCEMENT] Add EC2 SD metrics.
* [ENHANCEMENT] Add Azure SD metrics.
* [ENHANCEMENT] Add fuzzy search to `/graph` textarea.
* [ENHANCEMENT] Always show instance labels on target page.
* [BUGFIX] Correctly handle on() in alerts.
## 1.2.1 / 2016-10-10 ## 1.2.1 / 2016-10-10
* [BUGFIX] Count chunk evictions properly so that the server doesn't * [BUGFIX] Count chunk evictions properly so that the server doesn't

View File

@ -1 +1 @@
1.2.1 1.3.0-beta.0

3
code-of-conduct.md Normal file
View File

@ -0,0 +1,3 @@
## Prometheus Community Code of Conduct
Prometheus follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/master/code-of-conduct.md).

View File

@ -31,6 +31,7 @@ var (
patFileSDName = regexp.MustCompile(`^[^*]*(\*[^/]*)?\.(json|yml|yaml|JSON|YML|YAML)$`) patFileSDName = regexp.MustCompile(`^[^*]*(\*[^/]*)?\.(json|yml|yaml|JSON|YML|YAML)$`)
patRulePath = regexp.MustCompile(`^[^*]*(\*[^/]*)?$`) patRulePath = regexp.MustCompile(`^[^*]*(\*[^/]*)?$`)
patAuthLine = regexp.MustCompile(`((?:password|bearer_token|secret_key|client_secret):\s+)(".+"|'.+'|[^\s]+)`) patAuthLine = regexp.MustCompile(`((?:password|bearer_token|secret_key|client_secret):\s+)(".+"|'.+'|[^\s]+)`)
relabelTarget = regexp.MustCompile(`^(?:(?:[a-zA-Z_]|\$(?:\{\w+\}|\w+))+\w*)+$`)
) )
// Load parses the YAML input s into a Config. // Load parses the YAML input s into a Config.
@ -362,7 +363,6 @@ func (c *GlobalConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
gc.EvaluationInterval = DefaultGlobalConfig.EvaluationInterval gc.EvaluationInterval = DefaultGlobalConfig.EvaluationInterval
} }
*c = *gc *c = *gc
return nil return nil
} }
@ -907,6 +907,7 @@ type EC2SDConfig struct {
SecretKey string `yaml:"secret_key,omitempty"` SecretKey string `yaml:"secret_key,omitempty"`
RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
Port int `yaml:"port"` Port int `yaml:"port"`
// Catches all undefined fields and must be empty after parsing. // Catches all undefined fields and must be empty after parsing.
XXX map[string]interface{} `yaml:",inline"` XXX map[string]interface{} `yaml:",inline"`
} }
@ -936,6 +937,7 @@ type AzureSDConfig struct {
ClientID string `yaml:"client_id,omitempty"` ClientID string `yaml:"client_id,omitempty"`
ClientSecret string `yaml:"client_secret,omitempty"` ClientSecret string `yaml:"client_secret,omitempty"`
RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
// Catches all undefined fields and must be empty after parsing. // Catches all undefined fields and must be empty after parsing.
XXX map[string]interface{} `yaml:",inline"` XXX map[string]interface{} `yaml:",inline"`
} }
@ -993,8 +995,9 @@ type RelabelConfig struct {
Regex Regexp `yaml:"regex,omitempty"` Regex Regexp `yaml:"regex,omitempty"`
// Modulus to take of the hash of concatenated values from the source labels. // Modulus to take of the hash of concatenated values from the source labels.
Modulus uint64 `yaml:"modulus,omitempty"` Modulus uint64 `yaml:"modulus,omitempty"`
// The label to which the resulting string is written in a replacement. // TargetLabel is the label to which the resulting string is written in a replacement.
TargetLabel model.LabelName `yaml:"target_label,omitempty"` // Regexp interpolation is allowed for the replace action.
TargetLabel string `yaml:"target_label,omitempty"`
// Replacement is the regex replacement pattern to be used. // Replacement is the regex replacement pattern to be used.
Replacement string `yaml:"replacement,omitempty"` Replacement string `yaml:"replacement,omitempty"`
// Action is the action to be performed for the relabeling. // Action is the action to be performed for the relabeling.
@ -1020,6 +1023,12 @@ func (c *RelabelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
if (c.Action == RelabelReplace || c.Action == RelabelHashMod) && c.TargetLabel == "" { if (c.Action == RelabelReplace || c.Action == RelabelHashMod) && c.TargetLabel == "" {
return fmt.Errorf("relabel configuration for %s action requires 'target_label' value", c.Action) return fmt.Errorf("relabel configuration for %s action requires 'target_label' value", c.Action)
} }
if c.Action == RelabelReplace && !relabelTarget.MatchString(c.TargetLabel) {
return fmt.Errorf("%q is invalid 'target_label' for %s action", c.TargetLabel, c.Action)
}
if c.Action == RelabelHashMod && !model.LabelName(c.TargetLabel).IsValid() {
return fmt.Errorf("%q is invalid 'target_label' for %s action", c.TargetLabel, c.Action)
}
return nil return nil
} }

View File

@ -207,6 +207,17 @@ var expectedConf = &Config{
Scheme: DefaultConsulSDConfig.Scheme, Scheme: DefaultConsulSDConfig.Scheme,
}, },
}, },
RelabelConfigs: []*RelabelConfig{
{
SourceLabels: model.LabelNames{"__meta_sd_consul_tags"},
Regex: MustNewRegexp("label:([^=]+)=([^,]+)"),
Separator: ",",
TargetLabel: "${1}",
Replacement: "${2}",
Action: RelabelReplace,
},
},
}, },
{ {
JobName: "service-z", JobName: "service-z",
@ -364,7 +375,7 @@ func TestLoadConfig(t *testing.T) {
// Parse a valid file that sets a global scrape timeout. This tests whether parsing // Parse a valid file that sets a global scrape timeout. This tests whether parsing
// an overwritten default field in the global config permanently changes the default. // an overwritten default field in the global config permanently changes the default.
if _, err := LoadFile("testdata/global_timeout.good.yml"); err != nil { if _, err := LoadFile("testdata/global_timeout.good.yml"); err != nil {
t.Errorf("Error parsing %s: %s", "testdata/conf.good.yml", err) t.Errorf("Error parsing %s: %s", "testdata/global_timeout.good.yml", err)
} }
c, err := LoadFile("testdata/conf.good.yml") c, err := LoadFile("testdata/conf.good.yml")
@ -505,6 +516,34 @@ func TestEmptyGlobalBlock(t *testing.T) {
} }
} }
func TestTargetLabelValidity(t *testing.T) {
tests := []struct {
str string
valid bool
}{
{"-label", false},
{"label", true},
{"label${1}", true},
{"${1}label", true},
{"${1}", true},
{"${1}label", true},
{"${", false},
{"$", false},
{"${}", false},
{"foo${", false},
{"$1", true},
{"asd$2asd", true},
{"-foo${1}bar-", false},
{"_${1}_", true},
{"foo${bar}foo", true},
}
for _, test := range tests {
if relabelTarget.Match([]byte(test.str)) != test.valid {
t.Fatalf("Expected %q to be %v", test.str, test.valid)
}
}
}
func kubernetesSDHostURL() URL { func kubernetesSDHostURL() URL {
tURL, _ := url.Parse("https://localhost:1234") tURL, _ := url.Parse("https://localhost:1234")
return URL{URL: tURL} return URL{URL: tURL}

View File

@ -104,6 +104,13 @@ scrape_configs:
- server: 'localhost:1234' - server: 'localhost:1234'
services: ['nginx', 'cache', 'mysql'] services: ['nginx', 'cache', 'mysql']
relabel_configs:
- source_labels: [__meta_sd_consul_tags]
separator: ','
regex: label:([^=]+)=([^,]+)
target_label: ${1}
replacement: ${2}
- job_name: service-z - job_name: service-z
tls_config: tls_config:

View File

@ -290,6 +290,7 @@ func (n *Notifier) sendAll(alerts ...*model.Alert) int {
} }
defer resp.Body.Close() defer resp.Body.Close()
// Any HTTP status 2xx is OK.
if resp.StatusCode/100 != 2 { if resp.StatusCode/100 != 2 {
return fmt.Errorf("bad response status %v", resp.Status) return fmt.Errorf("bad response status %v", resp.Status)
} }

View File

@ -163,7 +163,7 @@ func (node *BinaryExpr) String() string {
matching := "" matching := ""
vm := node.VectorMatching vm := node.VectorMatching
if vm != nil && len(vm.MatchingLabels) > 0 { if vm != nil && (len(vm.MatchingLabels) > 0 || vm.On) {
if vm.On { if vm.On {
matching = fmt.Sprintf(" ON(%s)", vm.MatchingLabels) matching = fmt.Sprintf(" ON(%s)", vm.MatchingLabels)
} else { } else {

View File

@ -59,6 +59,10 @@ func TestExprString(t *testing.T) {
inputs := []struct { inputs := []struct {
in, out string in, out string
}{ }{
{
in: `sum(task:errors:rate10s{job="s"}) BY ()`,
out: `sum(task:errors:rate10s{job="s"})`,
},
{ {
in: `sum(task:errors:rate10s{job="s"}) BY (code)`, in: `sum(task:errors:rate10s{job="s"}) BY (code)`,
}, },
@ -77,6 +81,9 @@ func TestExprString(t *testing.T) {
{ {
in: `count_values("value", task:errors:rate10s{job="s"})`, in: `count_values("value", task:errors:rate10s{job="s"})`,
}, },
{
in: `a - ON() c`,
},
{ {
in: `a - ON(b) c`, in: `a - ON(b) c`,
}, },
@ -92,6 +99,10 @@ func TestExprString(t *testing.T) {
{ {
in: `a - IGNORING(b) c`, in: `a - IGNORING(b) c`,
}, },
{
in: `a - IGNORING() c`,
out: `a - c`,
},
{ {
in: `up > BOOL 0`, in: `up > BOOL 0`,
}, },

View File

@ -61,20 +61,20 @@ func relabel(labels model.LabelSet, cfg *config.RelabelConfig) model.LabelSet {
if indexes == nil { if indexes == nil {
break break
} }
target := model.LabelName(cfg.Regex.ExpandString([]byte{}, string(cfg.TargetLabel), val, indexes)) target := model.LabelName(cfg.Regex.ExpandString([]byte{}, cfg.TargetLabel, val, indexes))
if !target.IsValid() { if !target.IsValid() {
delete(labels, cfg.TargetLabel) delete(labels, model.LabelName(cfg.TargetLabel))
break break
} }
res := cfg.Regex.ExpandString([]byte{}, cfg.Replacement, val, indexes) res := cfg.Regex.ExpandString([]byte{}, cfg.Replacement, val, indexes)
if len(res) == 0 { if len(res) == 0 {
delete(labels, cfg.TargetLabel) delete(labels, model.LabelName(cfg.TargetLabel))
break break
} }
labels[target] = model.LabelValue(res) labels[target] = model.LabelValue(res)
case config.RelabelHashMod: case config.RelabelHashMod:
mod := sum64(md5.Sum([]byte(val))) % cfg.Modulus mod := sum64(md5.Sum([]byte(val))) % cfg.Modulus
labels[cfg.TargetLabel] = model.LabelValue(fmt.Sprintf("%d", mod)) labels[model.LabelName(cfg.TargetLabel)] = model.LabelValue(fmt.Sprintf("%d", mod))
case config.RelabelLabelMap: case config.RelabelLabelMap:
out := make(model.LabelSet, len(labels)) out := make(model.LabelSet, len(labels))
// Take a copy to avoid infinite loops. // Take a copy to avoid infinite loops.

View File

@ -38,7 +38,7 @@ func TestRelabel(t *testing.T) {
{ {
SourceLabels: model.LabelNames{"a"}, SourceLabels: model.LabelNames{"a"},
Regex: config.MustNewRegexp("f(.*)"), Regex: config.MustNewRegexp("f(.*)"),
TargetLabel: model.LabelName("d"), TargetLabel: "d",
Separator: ";", Separator: ";",
Replacement: "ch${1}-ch${1}", Replacement: "ch${1}-ch${1}",
Action: config.RelabelReplace, Action: config.RelabelReplace,
@ -61,7 +61,7 @@ func TestRelabel(t *testing.T) {
{ {
SourceLabels: model.LabelNames{"a", "b"}, SourceLabels: model.LabelNames{"a", "b"},
Regex: config.MustNewRegexp("f(.*);(.*)r"), Regex: config.MustNewRegexp("f(.*);(.*)r"),
TargetLabel: model.LabelName("a"), TargetLabel: "a",
Separator: ";", Separator: ";",
Replacement: "b${1}${2}m", // boobam Replacement: "b${1}${2}m", // boobam
Action: config.RelabelReplace, Action: config.RelabelReplace,
@ -69,7 +69,7 @@ func TestRelabel(t *testing.T) {
{ {
SourceLabels: model.LabelNames{"c", "a"}, SourceLabels: model.LabelNames{"c", "a"},
Regex: config.MustNewRegexp("(b).*b(.*)ba(.*)"), Regex: config.MustNewRegexp("(b).*b(.*)ba(.*)"),
TargetLabel: model.LabelName("d"), TargetLabel: "d",
Separator: ";", Separator: ";",
Replacement: "$1$2$2$3", Replacement: "$1$2$2$3",
Action: config.RelabelReplace, Action: config.RelabelReplace,
@ -94,7 +94,7 @@ func TestRelabel(t *testing.T) {
}, { }, {
SourceLabels: model.LabelNames{"a"}, SourceLabels: model.LabelNames{"a"},
Regex: config.MustNewRegexp("f(.*)"), Regex: config.MustNewRegexp("f(.*)"),
TargetLabel: model.LabelName("d"), TargetLabel: "d",
Separator: ";", Separator: ";",
Replacement: "ch$1-ch$1", Replacement: "ch$1-ch$1",
Action: config.RelabelReplace, Action: config.RelabelReplace,
@ -124,7 +124,7 @@ func TestRelabel(t *testing.T) {
{ {
SourceLabels: model.LabelNames{"a"}, SourceLabels: model.LabelNames{"a"},
Regex: config.MustNewRegexp(".*(b).*"), Regex: config.MustNewRegexp(".*(b).*"),
TargetLabel: model.LabelName("d"), TargetLabel: "d",
Separator: ";", Separator: ";",
Replacement: "$1", Replacement: "$1",
Action: config.RelabelReplace, Action: config.RelabelReplace,
@ -202,7 +202,7 @@ func TestRelabel(t *testing.T) {
{ {
SourceLabels: model.LabelNames{"a"}, SourceLabels: model.LabelNames{"a"},
Regex: config.MustNewRegexp("f"), Regex: config.MustNewRegexp("f"),
TargetLabel: model.LabelName("b"), TargetLabel: "b",
Replacement: "bar", Replacement: "bar",
Action: config.RelabelReplace, Action: config.RelabelReplace,
}, },
@ -220,7 +220,7 @@ func TestRelabel(t *testing.T) {
relabel: []*config.RelabelConfig{ relabel: []*config.RelabelConfig{
{ {
SourceLabels: model.LabelNames{"c"}, SourceLabels: model.LabelNames{"c"},
TargetLabel: model.LabelName("d"), TargetLabel: "d",
Separator: ";", Separator: ";",
Action: config.RelabelHashMod, Action: config.RelabelHashMod,
Modulus: 1000, Modulus: 1000,
@ -287,7 +287,7 @@ func TestRelabel(t *testing.T) {
Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"), Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"),
Action: config.RelabelReplace, Action: config.RelabelReplace,
Replacement: "${2}", Replacement: "${2}",
TargetLabel: model.LabelName("${1}"), TargetLabel: "${1}",
}, },
}, },
output: model.LabelSet{ output: model.LabelSet{
@ -305,7 +305,7 @@ func TestRelabel(t *testing.T) {
Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"), Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"),
Action: config.RelabelReplace, Action: config.RelabelReplace,
Replacement: "${3}", Replacement: "${3}",
TargetLabel: model.LabelName("${1}"), TargetLabel: "${1}",
}, },
}, },
output: model.LabelSet{ output: model.LabelSet{
@ -322,21 +322,21 @@ func TestRelabel(t *testing.T) {
Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"), Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"),
Action: config.RelabelReplace, Action: config.RelabelReplace,
Replacement: "${1}", Replacement: "${1}",
TargetLabel: model.LabelName("${3}"), TargetLabel: "${3}",
}, },
{ {
SourceLabels: model.LabelNames{"a"}, SourceLabels: model.LabelNames{"a"},
Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"), Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"),
Action: config.RelabelReplace, Action: config.RelabelReplace,
Replacement: "${1}", Replacement: "${1}",
TargetLabel: model.LabelName("0${3}"), TargetLabel: "0${3}",
}, },
{ {
SourceLabels: model.LabelNames{"a"}, SourceLabels: model.LabelNames{"a"},
Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"), Regex: config.MustNewRegexp("some-([^-]+)-([^,]+)"),
Action: config.RelabelReplace, Action: config.RelabelReplace,
Replacement: "${1}", Replacement: "${1}",
TargetLabel: model.LabelName("-${3}"), TargetLabel: "-${3}",
}, },
}, },
output: model.LabelSet{ output: model.LabelSet{
@ -353,21 +353,21 @@ func TestRelabel(t *testing.T) {
Regex: config.MustNewRegexp("(?:.+,|^)path:(/[^,]+).*"), Regex: config.MustNewRegexp("(?:.+,|^)path:(/[^,]+).*"),
Action: config.RelabelReplace, Action: config.RelabelReplace,
Replacement: "${1}", Replacement: "${1}",
TargetLabel: model.LabelName("__metrics_path__"), TargetLabel: "__metrics_path__",
}, },
{ {
SourceLabels: model.LabelNames{"__meta_sd_tags"}, SourceLabels: model.LabelNames{"__meta_sd_tags"},
Regex: config.MustNewRegexp("(?:.+,|^)job:([^,]+).*"), Regex: config.MustNewRegexp("(?:.+,|^)job:([^,]+).*"),
Action: config.RelabelReplace, Action: config.RelabelReplace,
Replacement: "${1}", Replacement: "${1}",
TargetLabel: model.LabelName("job"), TargetLabel: "job",
}, },
{ {
SourceLabels: model.LabelNames{"__meta_sd_tags"}, SourceLabels: model.LabelNames{"__meta_sd_tags"},
Regex: config.MustNewRegexp("(?:.+,|^)label:([^=]+)=([^,]+).*"), Regex: config.MustNewRegexp("(?:.+,|^)label:([^=]+)=([^,]+).*"),
Action: config.RelabelReplace, Action: config.RelabelReplace,
Replacement: "${2}", Replacement: "${2}",
TargetLabel: model.LabelName("${1}"), TargetLabel: "${1}",
}, },
}, },
output: model.LabelSet{ output: model.LabelSet{

View File

@ -23,6 +23,7 @@ import (
"github.com/Azure/azure-sdk-for-go/arm/network" "github.com/Azure/azure-sdk-for-go/arm/network"
"github.com/Azure/go-autorest/autorest/azure" "github.com/Azure/go-autorest/autorest/azure"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log" "github.com/prometheus/common/log"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"golang.org/x/net/context" "golang.org/x/net/context"
@ -41,6 +42,26 @@ const (
azureLabelMachineTag = azureLabel + "machine_tag_" azureLabelMachineTag = azureLabel + "machine_tag_"
) )
var (
azureSDScrapeFailuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "sd_azure_scrape_failures_total",
Help: "Number of Azure-SD scrape failures.",
})
azureSDScrapeDuration = prometheus.NewSummary(
prometheus.SummaryOpts{
Namespace: namespace,
Name: "sd_azure_scrape_duration_seconds",
Help: "The duration of a Azure-SD scrape in seconds.",
})
)
func init() {
prometheus.MustRegister(azureSDScrapeDuration)
prometheus.MustRegister(azureSDScrapeFailuresCount)
}
// AzureDiscovery periodically performs Azure-SD requests. It implements // AzureDiscovery periodically performs Azure-SD requests. It implements
// the TargetProvider interface. // the TargetProvider interface.
type AzureDiscovery struct { type AzureDiscovery struct {
@ -135,8 +156,15 @@ func newAzureResourceFromID(id string) (azureResource, error) {
}, nil }, nil
} }
func (ad *AzureDiscovery) refresh() (*config.TargetGroup, error) { func (ad *AzureDiscovery) refresh() (tg *config.TargetGroup, err error) {
tg := &config.TargetGroup{} t0 := time.Now()
defer func() {
azureSDScrapeDuration.Observe(time.Since(t0).Seconds())
if err != nil {
azureSDScrapeFailuresCount.Inc()
}
}()
tg = &config.TargetGroup{}
client, err := createAzureClient(*ad.cfg) client, err := createAzureClient(*ad.cfg)
if err != nil { if err != nil {
return tg, fmt.Errorf("could not create Azure client: %s", err) return tg, fmt.Errorf("could not create Azure client: %s", err)

View File

@ -21,6 +21,7 @@ import (
"time" "time"
consul "github.com/hashicorp/consul/api" consul "github.com/hashicorp/consul/api"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log" "github.com/prometheus/common/log"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"golang.org/x/net/context" "golang.org/x/net/context"
@ -48,8 +49,37 @@ const (
datacenterLabel = model.MetaLabelPrefix + "consul_dc" datacenterLabel = model.MetaLabelPrefix + "consul_dc"
// serviceIDLabel is the name of the label containing the service ID. // serviceIDLabel is the name of the label containing the service ID.
serviceIDLabel = model.MetaLabelPrefix + "consul_service_id" serviceIDLabel = model.MetaLabelPrefix + "consul_service_id"
// Constants for instrumentation.
namespace = "prometheus"
) )
var (
rpcFailuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "sd_consul_rpc_failures_total",
Help: "The number of Consul RPC call failures.",
})
rpcDuration = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace,
Name: "sd_consul_rpc_duration_seconds",
Help: "The duration of a Consul RPC call in seconds.",
},
[]string{"endpoint", "call"},
)
)
func init() {
prometheus.MustRegister(rpcFailuresCount)
prometheus.MustRegister(rpcDuration)
// Initialize metric vectors.
rpcDuration.WithLabelValues("catalog", "service")
rpcDuration.WithLabelValues("catalog", "services")
}
// Discovery retrieves target information from a Consul server // Discovery retrieves target information from a Consul server
// and updates them via watches. // and updates them via watches.
type Discovery struct { type Discovery struct {
@ -110,10 +140,12 @@ func (cd *Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) {
var lastIndex uint64 var lastIndex uint64
for { for {
catalog := cd.client.Catalog() catalog := cd.client.Catalog()
t0 := time.Now()
srvs, meta, err := catalog.Services(&consul.QueryOptions{ srvs, meta, err := catalog.Services(&consul.QueryOptions{
WaitIndex: lastIndex, WaitIndex: lastIndex,
WaitTime: watchTimeout, WaitTime: watchTimeout,
}) })
rpcDuration.WithLabelValues("catalog", "services").Observe(time.Since(t0).Seconds())
// We have to check the context at least once. The checks during channel sends // We have to check the context at least once. The checks during channel sends
// do not guarantee that. // do not guarantee that.
@ -125,6 +157,7 @@ func (cd *Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) {
if err != nil { if err != nil {
log.Errorf("Error refreshing service list: %s", err) log.Errorf("Error refreshing service list: %s", err)
rpcFailuresCount.Inc()
time.Sleep(retryInterval) time.Sleep(retryInterval)
continue continue
} }
@ -202,10 +235,13 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*config.TargetG
lastIndex := uint64(0) lastIndex := uint64(0)
for { for {
t0 := time.Now()
nodes, meta, err := catalog.Service(srv.name, "", &consul.QueryOptions{ nodes, meta, err := catalog.Service(srv.name, "", &consul.QueryOptions{
WaitIndex: lastIndex, WaitIndex: lastIndex,
WaitTime: watchTimeout, WaitTime: watchTimeout,
}) })
rpcDuration.WithLabelValues("catalog", "service").Observe(time.Since(t0).Seconds())
// Check the context before potentially falling in a continue-loop. // Check the context before potentially falling in a continue-loop.
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -216,6 +252,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*config.TargetG
if err != nil { if err != nil {
log.Errorf("Error refreshing service %s: %s", srv.name, err) log.Errorf("Error refreshing service %s: %s", srv.name, err)
rpcFailuresCount.Inc()
time.Sleep(retryInterval) time.Sleep(retryInterval)
continue continue
} }

View File

@ -42,13 +42,13 @@ var (
dnsSDLookupsCount = prometheus.NewCounter( dnsSDLookupsCount = prometheus.NewCounter(
prometheus.CounterOpts{ prometheus.CounterOpts{
Namespace: namespace, Namespace: namespace,
Name: "dns_sd_lookups_total", Name: "sd_dns_lookups_total",
Help: "The number of DNS-SD lookups.", Help: "The number of DNS-SD lookups.",
}) })
dnsSDLookupFailuresCount = prometheus.NewCounter( dnsSDLookupFailuresCount = prometheus.NewCounter(
prometheus.CounterOpts{ prometheus.CounterOpts{
Namespace: namespace, Namespace: namespace,
Name: "dns_sd_lookup_failures_total", Name: "sd_dns_lookup_failures_total",
Help: "The number of DNS-SD lookup failures.", Help: "The number of DNS-SD lookup failures.",
}) })
) )

View File

@ -22,6 +22,7 @@ import (
"github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials" "github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/defaults" "github.com/aws/aws-sdk-go/aws/defaults"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log" "github.com/prometheus/common/log"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"golang.org/x/net/context" "golang.org/x/net/context"
@ -36,6 +37,7 @@ const (
ec2LabelAZ = ec2Label + "availability_zone" ec2LabelAZ = ec2Label + "availability_zone"
ec2LabelInstanceID = ec2Label + "instance_id" ec2LabelInstanceID = ec2Label + "instance_id"
ec2LabelInstanceState = ec2Label + "instance_state" ec2LabelInstanceState = ec2Label + "instance_state"
ec2LabelInstanceType = ec2Label + "instance_type"
ec2LabelPublicDNS = ec2Label + "public_dns_name" ec2LabelPublicDNS = ec2Label + "public_dns_name"
ec2LabelPublicIP = ec2Label + "public_ip" ec2LabelPublicIP = ec2Label + "public_ip"
ec2LabelPrivateIP = ec2Label + "private_ip" ec2LabelPrivateIP = ec2Label + "private_ip"
@ -45,6 +47,26 @@ const (
subnetSeparator = "," subnetSeparator = ","
) )
var (
ec2SDScrapeFailuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "sd_ec2_scrape_failures_total",
Help: "The number of EC2-SD scrape failures.",
})
ec2SDScrapeDuration = prometheus.NewSummary(
prometheus.SummaryOpts{
Namespace: namespace,
Name: "sd_ec2_scrape_duration_seconds",
Help: "The duration of a EC2-SD scrape in seconds.",
})
)
func init() {
prometheus.MustRegister(ec2SDScrapeFailuresCount)
prometheus.MustRegister(ec2SDScrapeDuration)
}
// EC2Discovery periodically performs EC2-SD requests. It implements // EC2Discovery periodically performs EC2-SD requests. It implements
// the TargetProvider interface. // the TargetProvider interface.
type EC2Discovery struct { type EC2Discovery struct {
@ -99,12 +121,20 @@ func (ed *EC2Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup
} }
} }
func (ed *EC2Discovery) refresh() (*config.TargetGroup, error) { func (ed *EC2Discovery) refresh() (tg *config.TargetGroup, err error) {
t0 := time.Now()
defer func() {
ec2SDScrapeDuration.Observe(time.Since(t0).Seconds())
if err != nil {
ec2SDScrapeFailuresCount.Inc()
}
}()
ec2s := ec2.New(ed.aws) ec2s := ec2.New(ed.aws)
tg := &config.TargetGroup{ tg = &config.TargetGroup{
Source: *ed.aws.Region, Source: *ed.aws.Region,
} }
if err := ec2s.DescribeInstancesPages(nil, func(p *ec2.DescribeInstancesOutput, lastPage bool) bool { if err = ec2s.DescribeInstancesPages(nil, func(p *ec2.DescribeInstancesOutput, lastPage bool) bool {
for _, r := range p.Reservations { for _, r := range p.Reservations {
for _, inst := range r.Instances { for _, inst := range r.Instances {
if inst.PrivateIpAddress == nil { if inst.PrivateIpAddress == nil {
@ -124,6 +154,7 @@ func (ed *EC2Discovery) refresh() (*config.TargetGroup, error) {
labels[ec2LabelAZ] = model.LabelValue(*inst.Placement.AvailabilityZone) labels[ec2LabelAZ] = model.LabelValue(*inst.Placement.AvailabilityZone)
labels[ec2LabelInstanceState] = model.LabelValue(*inst.State.Name) labels[ec2LabelInstanceState] = model.LabelValue(*inst.State.Name)
labels[ec2LabelInstanceType] = model.LabelValue(*inst.InstanceType)
if inst.VpcId != nil { if inst.VpcId != nil {
labels[ec2LabelVPCID] = model.LabelValue(*inst.VpcId) labels[ec2LabelVPCID] = model.LabelValue(*inst.VpcId)

View File

@ -21,6 +21,7 @@ import (
"strings" "strings"
"time" "time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log" "github.com/prometheus/common/log"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"golang.org/x/net/context" "golang.org/x/net/context"
@ -32,6 +33,26 @@ import (
const fileSDFilepathLabel = model.MetaLabelPrefix + "filepath" const fileSDFilepathLabel = model.MetaLabelPrefix + "filepath"
var (
fileSDScanDuration = prometheus.NewSummary(
prometheus.SummaryOpts{
Namespace: namespace,
Name: "sd_file_scan_duration_seconds",
Help: "The duration of the File-SD scan in seconds.",
})
fileSDReadErrorsCount = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "sd_file_read_errors_total",
Help: "The number of File-SD read errors.",
})
)
func init() {
prometheus.MustRegister(fileSDScanDuration)
prometheus.MustRegister(fileSDReadErrorsCount)
}
// FileDiscovery provides service discovery functionality based // FileDiscovery provides service discovery functionality based
// on files that contain target groups in JSON or YAML format. Refreshing // on files that contain target groups in JSON or YAML format. Refreshing
// happens using file watches and periodic refreshes. // happens using file watches and periodic refreshes.
@ -173,10 +194,16 @@ func (fd *FileDiscovery) stop() {
// refresh reads all files matching the discovery's patterns and sends the respective // refresh reads all files matching the discovery's patterns and sends the respective
// updated target groups through the channel. // updated target groups through the channel.
func (fd *FileDiscovery) refresh(ch chan<- []*config.TargetGroup) { func (fd *FileDiscovery) refresh(ch chan<- []*config.TargetGroup) {
t0 := time.Now()
defer func() {
fileSDScanDuration.Observe(time.Since(t0).Seconds())
}()
ref := map[string]int{} ref := map[string]int{}
for _, p := range fd.listFiles() { for _, p := range fd.listFiles() {
tgroups, err := readFile(p) tgroups, err := readFile(p)
if err != nil { if err != nil {
fileSDReadErrorsCount.Inc()
log.Errorf("Error reading file %q: %s", p, err) log.Errorf("Error reading file %q: %s", p, err)
// Prevent deletion down below. // Prevent deletion down below.
ref[p] = fd.lastRefresh[p] ref[p] = fd.lastRefresh[p]

View File

@ -50,28 +50,21 @@ const (
) )
var ( var (
gceSDScrapesCount = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "gce_sd_scrapes_total",
Help: "The number of GCE-SD scrapes.",
})
gceSDScrapeFailuresCount = prometheus.NewCounter( gceSDScrapeFailuresCount = prometheus.NewCounter(
prometheus.CounterOpts{ prometheus.CounterOpts{
Namespace: namespace, Namespace: namespace,
Name: "gce_sd_scrape_failures_total", Name: "sd_gce_scrape_failures_total",
Help: "The number of GCE-SD scrape failures.", Help: "The number of GCE-SD scrape failures.",
}) })
gceSDScrapeDuration = prometheus.NewSummary( gceSDScrapeDuration = prometheus.NewSummary(
prometheus.SummaryOpts{ prometheus.SummaryOpts{
Namespace: namespace, Namespace: namespace,
Name: "gce_sd_scrape_duration", Name: "sd_gce_scrape_duration",
Help: "The duration of a GCE-SD scrape in seconds.", Help: "The duration of a GCE-SD scrape in seconds.",
}) })
) )
func init() { func init() {
prometheus.MustRegister(gceSDScrapesCount)
prometheus.MustRegister(gceSDScrapeFailuresCount) prometheus.MustRegister(gceSDScrapeFailuresCount)
prometheus.MustRegister(gceSDScrapeDuration) prometheus.MustRegister(gceSDScrapeDuration)
} }
@ -147,7 +140,6 @@ func (gd *GCEDiscovery) refresh() (tg *config.TargetGroup, err error) {
t0 := time.Now() t0 := time.Now()
defer func() { defer func() {
gceSDScrapeDuration.Observe(time.Since(t0).Seconds()) gceSDScrapeDuration.Observe(time.Since(t0).Seconds())
gceSDScrapesCount.Inc()
if err != nil { if err != nil {
gceSDScrapeFailuresCount.Inc() gceSDScrapeFailuresCount.Inc()
} }

View File

@ -110,7 +110,7 @@ func (k *Kubernetes) Run(ctx context.Context, ch chan<- []*config.TargetGroup) {
rclient := k.client.Core().GetRESTClient() rclient := k.client.Core().GetRESTClient()
switch k.role { switch k.role {
case "endpoint": case "endpoints":
elw := cache.NewListWatchFromClient(rclient, "endpoints", api.NamespaceAll, nil) elw := cache.NewListWatchFromClient(rclient, "endpoints", api.NamespaceAll, nil)
slw := cache.NewListWatchFromClient(rclient, "services", api.NamespaceAll, nil) slw := cache.NewListWatchFromClient(rclient, "services", api.NamespaceAll, nil)
plw := cache.NewListWatchFromClient(rclient, "pods", api.NamespaceAll, nil) plw := cache.NewListWatchFromClient(rclient, "pods", api.NamespaceAll, nil)

View File

@ -24,6 +24,7 @@ import (
"golang.org/x/net/context" "golang.org/x/net/context"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log" "github.com/prometheus/common/log"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/config"
@ -42,8 +43,31 @@ const (
imageLabel model.LabelName = metaLabelPrefix + "image" imageLabel model.LabelName = metaLabelPrefix + "image"
// taskLabel contains the mesos task name of the app instance. // taskLabel contains the mesos task name of the app instance.
taskLabel model.LabelName = metaLabelPrefix + "task" taskLabel model.LabelName = metaLabelPrefix + "task"
// Constants for instrumentation.
namespace = "prometheus"
) )
var (
scrapeFailuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "sd_marathon_scrape_failures_total",
Help: "The number of Marathon-SD scrape failures.",
})
scrapeDuration = prometheus.NewSummary(
prometheus.SummaryOpts{
Namespace: namespace,
Name: "sd_marathon_scrape_duration_seconds",
Help: "The duration of a Marathon-SD scrape in seconds.",
})
)
func init() {
prometheus.MustRegister(scrapeFailuresCount)
prometheus.MustRegister(scrapeDuration)
}
const appListPath string = "/v2/apps/?embed=apps.tasks" const appListPath string = "/v2/apps/?embed=apps.tasks"
// Discovery provides service discovery based on a Marathon instance. // Discovery provides service discovery based on a Marathon instance.
@ -93,7 +117,15 @@ func (md *Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) {
} }
} }
func (md *Discovery) updateServices(ctx context.Context, ch chan<- []*config.TargetGroup) error { func (md *Discovery) updateServices(ctx context.Context, ch chan<- []*config.TargetGroup) (err error) {
t0 := time.Now()
defer func() {
scrapeDuration.Observe(time.Since(t0).Seconds())
if err != nil {
scrapeFailuresCount.Inc()
}
}()
targetMap, err := md.fetchTargetGroups() targetMap, err := md.fetchTargetGroups()
if err != nil { if err != nil {
return err return err

View File

@ -518,7 +518,7 @@ func (c *varbitChunk) addSecondSample(s model.SamplePair) ([]Chunk, error) {
return []Chunk{c}, nil return []Chunk{c}, nil
} }
// addLastSample isa a helper method only used by c.add() and in other helper // addLastSample is a helper method only used by c.add() and in other helper
// methods called by c.add(). It simply sets the given sample as the last sample // methods called by c.add(). It simply sets the given sample as the last sample
// in the heador and declares the chunk closed. In other words, addLastSample // in the heador and declares the chunk closed. In other words, addLastSample
// adds the very last sample added to this chunk ever, while setLastSample sets // adds the very last sample added to this chunk ever, while setLastSample sets