From 895f2f092fe3c41419d8107835ebff82cbf91f54 Mon Sep 17 00:00:00 2001 From: Fabian Reinartz Date: Wed, 9 Mar 2016 16:00:33 +0100 Subject: [PATCH 1/3] Fix flaky scrape test t --- retrieval/scrape_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/retrieval/scrape_test.go b/retrieval/scrape_test.go index 7ea1eba5a..4d2c13e91 100644 --- a/retrieval/scrape_test.go +++ b/retrieval/scrape_test.go @@ -19,6 +19,7 @@ import ( "net/http/httptest" "net/url" "reflect" + "sort" "strings" "sync" "testing" @@ -469,6 +470,8 @@ func TestTargetScraperScrapeOK(t *testing.T) { Value: 2, }, } + sort.Sort(expectedSamples) + sort.Sort(samples) if !reflect.DeepEqual(samples, expectedSamples) { t.Errorf("Scraped samples did not match served metrics") From a1ee77601aa93cda8c4711a6b3a860be45c0a4e3 Mon Sep 17 00:00:00 2001 From: stuart nelson Date: Wed, 9 Mar 2016 16:33:10 +0100 Subject: [PATCH 2/3] Instrument the duration of the `reload` function --- retrieval/scrape.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/retrieval/scrape.go b/retrieval/scrape.go index 3697a2216..a3e7e6d9c 100644 --- a/retrieval/scrape.go +++ b/retrieval/scrape.go @@ -65,11 +65,21 @@ var ( }, []string{interval}, ) + targetReloadIntervalLength = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "target_reload_length_seconds", + Help: "Actual interval to reload the scrape pool with a given configuration.", + Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001}, + }, + []string{interval}, + ) ) func init() { prometheus.MustRegister(targetIntervalLength) prometheus.MustRegister(targetSkippedScrapes) + prometheus.MustRegister(targetReloadIntervalLength) } // scrapePool manages scrapes for sets of targets. @@ -132,6 +142,7 @@ func (sp *scrapePool) stop() { // but all scrape loops are restarted with the new scrape configuration. // This method returns after all scrape loops that were stopped have fully terminated. func (sp *scrapePool) reload(cfg *config.ScrapeConfig) { + start := time.Now() sp.mtx.Lock() defer sp.mtx.Unlock() @@ -168,6 +179,9 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) { } wg.Wait() + targetReloadIntervalLength.WithLabelValues(interval.String()).Observe( + float64(time.Since(start)) / float64(time.Second), + ) } // sync takes a list of potentially duplicated targets, deduplicates them, starts From dbe5d18b6e6d76804bafa462b795857d9cd44a0c Mon Sep 17 00:00:00 2001 From: stuart nelson Date: Fri, 11 Mar 2016 12:22:23 +0100 Subject: [PATCH 3/3] Instrument scrape pool `sync()` Instruments: - duration - count --- retrieval/scrape.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/retrieval/scrape.go b/retrieval/scrape.go index a3e7e6d9c..c2ff4496e 100644 --- a/retrieval/scrape.go +++ b/retrieval/scrape.go @@ -43,6 +43,7 @@ const ( // Constants for instrumentation. namespace = "prometheus" interval = "interval" + scrapeJob = "scrape_job" ) var ( @@ -74,12 +75,31 @@ var ( }, []string{interval}, ) + targetSyncIntervalLength = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "target_sync_length_seconds", + Help: "Actual interval to sync the scrape pool.", + Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001}, + }, + []string{scrapeJob}, + ) + targetScrapePoolSyncsCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "target_scrape_pool_sync_total", + Help: "Total number of syncs that were executed on a scrape pool.", + }, + []string{scrapeJob}, + ) ) func init() { prometheus.MustRegister(targetIntervalLength) prometheus.MustRegister(targetSkippedScrapes) prometheus.MustRegister(targetReloadIntervalLength) + prometheus.MustRegister(targetSyncIntervalLength) + prometheus.MustRegister(targetScrapePoolSyncsCounter) } // scrapePool manages scrapes for sets of targets. @@ -188,6 +208,7 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) { // scrape loops for new targets, and stops scrape loops for disappeared targets. // It returns after all stopped scrape loops terminated. func (sp *scrapePool) sync(targets []*Target) { + start := time.Now() sp.mtx.Lock() defer sp.mtx.Unlock() @@ -233,6 +254,10 @@ func (sp *scrapePool) sync(targets []*Target) { // may be active and tries to insert. The old scraper that didn't terminate yet could still // be inserting a previous sample set. wg.Wait() + targetSyncIntervalLength.WithLabelValues(sp.config.JobName).Observe( + float64(time.Since(start)) / float64(time.Second), + ) + targetScrapePoolSyncsCounter.WithLabelValues(sp.config.JobName).Inc() } // sampleAppender returns an appender for ingested samples from the target.