Have scrape time as a pseudovariable, not a prometheus variable.

This ensures it has the right timestamp, and is easier to work with.

Switch sd variable away from 'outcome', using total/failed instead.
pull/429/head
Brian Brazil 2014-12-26 12:37:30 +00:00
parent cc27fb8aab
commit e56786b221
7 changed files with 71 additions and 58 deletions

View File

@ -36,30 +36,19 @@ const (
InstanceLabel clientmodel.LabelName = "instance"
// ScrapeHealthMetricName is the metric name for the synthetic health
// variable.
ScrapeHealthMetricName clientmodel.LabelValue = "up"
scrapeHealthMetricName clientmodel.LabelValue = "up"
// ScrapeTimeMetricName is the metric name for the synthetic scrape duration
// variable.
scrapeDurationMetricName clientmodel.LabelValue = "scrape_duration_seconds"
// Constants for instrumentation.
namespace = "prometheus"
job = "target_job"
instance = "target_instance"
failure = "failure"
outcome = "outcome"
success = "success"
interval = "interval"
)
var (
localhostRepresentations = []string{"http://127.0.0.1", "http://localhost"}
targetOperationLatencies = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace,
Name: "target_operation_latency_milliseconds",
Help: "The latencies for target operations.",
Objectives: []float64{0.01, 0.05, 0.5, 0.90, 0.99},
},
[]string{job, instance, outcome},
)
targetIntervalLength = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace,
@ -72,7 +61,6 @@ var (
)
func init() {
prometheus.MustRegister(targetOperationLatencies)
prometheus.MustRegister(targetIntervalLength)
}
@ -189,28 +177,37 @@ func NewTarget(url string, deadline time.Duration, baseLabels clientmodel.LabelS
return target
}
func (t *target) recordScrapeHealth(ingester extraction.Ingester, timestamp clientmodel.Timestamp, healthy bool) {
metric := clientmodel.Metric{}
func (t *target) recordScrapeHealth(ingester extraction.Ingester, timestamp clientmodel.Timestamp, healthy bool, scrapeDuration time.Duration) {
healthMetric := clientmodel.Metric{}
durationMetric := clientmodel.Metric{}
for label, value := range t.baseLabels {
metric[label] = value
healthMetric[label] = value
durationMetric[label] = value
}
metric[clientmodel.MetricNameLabel] = clientmodel.LabelValue(ScrapeHealthMetricName)
metric[InstanceLabel] = clientmodel.LabelValue(t.URL())
healthMetric[clientmodel.MetricNameLabel] = clientmodel.LabelValue(scrapeHealthMetricName)
durationMetric[clientmodel.MetricNameLabel] = clientmodel.LabelValue(scrapeDurationMetricName)
healthMetric[InstanceLabel] = clientmodel.LabelValue(t.URL())
durationMetric[InstanceLabel] = clientmodel.LabelValue(t.URL())
healthValue := clientmodel.SampleValue(0)
if healthy {
healthValue = clientmodel.SampleValue(1)
}
sample := &clientmodel.Sample{
Metric: metric,
healthSample := &clientmodel.Sample{
Metric: healthMetric,
Timestamp: timestamp,
Value: healthValue,
}
durationSample := &clientmodel.Sample{
Metric: durationMetric,
Timestamp: timestamp,
Value: clientmodel.SampleValue(float64(scrapeDuration) / float64(time.Second)),
}
ingester.Ingest(&extraction.Result{
Err: nil,
Samples: clientmodel.Samples{sample},
Samples: clientmodel.Samples{healthSample, durationSample},
})
}
@ -292,23 +289,15 @@ const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client
func (t *target) scrape(ingester extraction.Ingester) (err error) {
timestamp := clientmodel.Now()
defer func(start time.Time) {
ms := float64(time.Since(start)) / float64(time.Millisecond)
labels := prometheus.Labels{
job: string(t.baseLabels[clientmodel.JobLabel]),
instance: t.URL(),
outcome: success,
}
t.Lock() // Writing t.state and t.lastError requires the lock.
if err == nil {
t.state = Alive
labels[outcome] = failure
} else {
t.state = Unreachable
}
t.lastError = err
t.Unlock()
targetOperationLatencies.With(labels).Observe(ms)
t.recordScrapeHealth(ingester, timestamp, err == nil)
t.recordScrapeHealth(ingester, timestamp, err == nil, time.Since(start))
}(time.Now())
req, err := http.NewRequest("GET", t.URL(), nil)

View File

@ -33,17 +33,22 @@ import (
const resolvConf = "/etc/resolv.conf"
var (
dnsSDLookupsCount = prometheus.NewCounterVec(
dnsSDLookupsCount = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "dns_sd_lookups_total",
Help: "The number of DNS-SD lookup successes/failures per pool.",
},
[]string{outcome},
)
Help: "The number of DNS-SD lookups.",
})
dnsSDLookupFailuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "dns_sd_lookup_failures_total",
Help: "The number of DNS-SD lookup failures.",
})
)
func init() {
prometheus.MustRegister(dnsSDLookupFailuresCount)
prometheus.MustRegister(dnsSDLookupsCount)
}
@ -77,11 +82,10 @@ func NewSdTargetProvider(job config.JobConfig) *sdTargetProvider {
func (p *sdTargetProvider) Targets() ([]Target, error) {
var err error
defer func() {
message := success
dnsSDLookupsCount.Inc()
if err != nil {
message = failure
dnsSDLookupFailuresCount.Inc()
}
dnsSDLookupsCount.WithLabelValues(message).Inc()
}()
if time.Since(p.lastRefresh) < p.refreshInterval {

View File

@ -39,7 +39,7 @@ func (i *collectResultIngester) Ingest(r *extraction.Result) error {
func TestTargetScrapeUpdatesState(t *testing.T) {
testTarget := target{
state: Unknown,
url: "bad schema",
url: "bad schema",
httpClient: utility.NewDeadlineClient(0),
}
testTarget.scrape(nopIngester{})
@ -50,25 +50,25 @@ func TestTargetScrapeUpdatesState(t *testing.T) {
func TestTargetRecordScrapeHealth(t *testing.T) {
testTarget := target{
url: "http://example.url",
url: "http://example.url",
baseLabels: clientmodel.LabelSet{clientmodel.JobLabel: "testjob"},
httpClient: utility.NewDeadlineClient(0),
}
now := clientmodel.Now()
ingester := &collectResultIngester{}
testTarget.recordScrapeHealth(ingester, now, true)
testTarget.recordScrapeHealth(ingester, now, true, 2 * time.Second)
result := ingester.result
if len(result.Samples) != 1 {
t.Fatalf("Expected one sample, got %d", len(result.Samples))
if len(result.Samples) != 2 {
t.Fatalf("Expected two samples, got %d", len(result.Samples))
}
actual := result.Samples[0]
expected := &clientmodel.Sample{
Metric: clientmodel.Metric{
clientmodel.MetricNameLabel: ScrapeHealthMetricName,
clientmodel.MetricNameLabel: scrapeHealthMetricName,
InstanceLabel: "http://example.url",
clientmodel.JobLabel: "testjob",
},
@ -83,6 +83,25 @@ func TestTargetRecordScrapeHealth(t *testing.T) {
if !actual.Equal(expected) {
t.Fatalf("Expected and actual samples not equal. Expected: %v, actual: %v", expected, actual)
}
actual = result.Samples[1]
expected = &clientmodel.Sample{
Metric: clientmodel.Metric{
clientmodel.MetricNameLabel: scrapeDurationMetricName,
InstanceLabel: "http://example.url",
clientmodel.JobLabel: "testjob",
},
Timestamp: now,
Value: 2.0,
}
if result.Err != nil {
t.Fatalf("Got unexpected error: %v", result.Err)
}
if !actual.Equal(expected) {
t.Fatalf("Expected and actual samples not equal. Expected: %v, actual: %v", expected, actual)
}
}
func TestTargetScrapeTimeout(t *testing.T) {
@ -147,7 +166,7 @@ func TestTargetScrape404(t *testing.T) {
func TestTargetRunScraperScrapes(t *testing.T) {
testTarget := target{
state: Unknown,
url: "bad schema",
url: "bad schema",
httpClient: utility.NewDeadlineClient(0),
scraperStopping: make(chan struct{}),
scraperStopped: make(chan struct{}),

View File

@ -15,6 +15,7 @@ package retrieval
import (
"sync"
"github.com/golang/glog"
"github.com/prometheus/client_golang/extraction"

View File

@ -27,7 +27,7 @@ func testTargetPool(t testing.TB) {
}
type input struct {
url string
url string
scheduledFor time.Time
}
@ -84,7 +84,7 @@ func testTargetPool(t testing.TB) {
for _, input := range scenario.inputs {
target := target{
url: input.url,
url: input.url,
newBaseLabels: make(chan clientmodel.LabelSet, 1),
httpClient: &http.Client{},
}
@ -114,7 +114,7 @@ func TestTargetPool(t *testing.T) {
func TestTargetPoolReplaceTargets(t *testing.T) {
pool := NewTargetPool(nil, nil, nopIngester{}, time.Duration(1))
oldTarget1 := &target{
url: "example1",
url: "example1",
state: Unreachable,
scraperStopping: make(chan struct{}),
scraperStopped: make(chan struct{}),
@ -122,7 +122,7 @@ func TestTargetPoolReplaceTargets(t *testing.T) {
httpClient: &http.Client{},
}
oldTarget2 := &target{
url: "example2",
url: "example2",
state: Unreachable,
scraperStopping: make(chan struct{}),
scraperStopped: make(chan struct{}),
@ -130,7 +130,7 @@ func TestTargetPoolReplaceTargets(t *testing.T) {
httpClient: &http.Client{},
}
newTarget1 := &target{
url: "example1",
url: "example1",
state: Alive,
scraperStopping: make(chan struct{}),
scraperStopped: make(chan struct{}),
@ -138,7 +138,7 @@ func TestTargetPoolReplaceTargets(t *testing.T) {
httpClient: &http.Client{},
}
newTarget2 := &target{
url: "example3",
url: "example3",
state: Alive,
scraperStopping: make(chan struct{}),
scraperStopped: make(chan struct{}),

View File

@ -869,7 +869,7 @@ func (p *persistence) checkpointSeriesMapAndHeads(fingerprintToSeries *seriesMap
iter := fingerprintToSeries.iter()
defer func() {
// Consume the iterator in any case to not leak goroutines.
for _ = range iter {
for range iter {
}
}()

View File

@ -567,7 +567,7 @@ func (s *memorySeriesStorage) cycleThroughMemoryFingerprints() chan clientmodel.
defer func() {
if fpIter != nil {
for _ = range fpIter {
for range fpIter {
// Consume the iterator.
}
}
@ -661,9 +661,9 @@ loop:
}
}
// Wait until both channels are closed.
for _ = range memoryFingerprints {
for range memoryFingerprints {
}
for _ = range archivedFingerprints {
for range archivedFingerprints {
}
}