Duration statistics for each target pool.

We have an open question of how long does it take for each target pool to have the state retrieved from all participating elements. This commit starts by providing insight into this.
2013-01-28 16:36:28 +01:00 · 2013-01-28 16:36:28 +01:00 · e01b6cdb44
parent 567e2948d3
commit e01b6cdb44
2 changed files with 28 additions and 5 deletions
--- a/retrieval/instrumentation.go
+++ b/retrieval/instrumentation.go
@ -38,7 +38,10 @@ var (

 	targetOperationLatencies = metrics.NewHistogram(networkLatencyHistogram)

-	// TODO: Include durations partitioned by target pool intervals.
+	retrievalDurations = metrics.NewHistogram(&metrics.HistogramSpecification{
+		Starts:                metrics.LogarithmicSizedBucketsFor(0, 10000),
+		BucketBuilder:         metrics.AccumulatingBucketBuilder(metrics.EvictAndReplaceWith(10, maths.Average), 100),
+		ReportablePercentiles: []float64{0.01, 0.05, 0.5, 0.90, 0.99}})

 	targetOperations = metrics.NewCounter()
 )
@ -46,4 +49,5 @@ var (
 func init() {
 	registry.Register("prometheus_target_operations_total", "The total numbers of operations of the various targets that are being monitored.", registry.NilLabels, targetOperations)
 	registry.Register("prometheus_target_operation_latency_ms", "The latencies for various target operations.", registry.NilLabels, targetOperationLatencies)
+	registry.Register("prometheus_targetpool_duration_ms", "The durations for each TargetPool to retrieve state from all included entities.", registry.NilLabels, retrievalDurations)
 }
--- a/retrieval/targetpool.go
+++ b/retrieval/targetpool.go
@ -7,10 +7,14 @@ import (
 	"time"
 )

+const (
+	intervalKey = "interval"
+)
+
 type TargetPool struct {
 	done    chan bool
-	targets []Target
 	manager TargetManager
+	targets []Target
 }

 func NewTargetPool(m TargetManager) (p *TargetPool) {
@ -51,7 +55,7 @@ func (p *TargetPool) Run(results chan format.Result, interval time.Duration) {
 	for {
 		select {
 		case <-ticker:
-			p.runIteration(results)
+			p.runIteration(results, interval)
 		case <-p.done:
 			log.Printf("TargetPool exiting...")
 			break
@ -70,8 +74,13 @@ func (p *TargetPool) runSingle(earliest time.Time, results chan format.Result, t
 	t.Scrape(earliest, results)
 }

-func (p *TargetPool) runIteration(results chan format.Result) {
-	for i := 0; i < p.Len(); i++ {
+func (p *TargetPool) runIteration(results chan format.Result, interval time.Duration) {
+	begin := time.Now()
+
+	targetCount := p.Len()
+	finished := make(chan bool, targetCount)
+
+	for i := 0; i < targetCount; i++ {
 		target := heap.Pop(p).(Target)
 		if target == nil {
 			break
@ -88,6 +97,16 @@ func (p *TargetPool) runIteration(results chan format.Result) {
 		go func() {
 			p.runSingle(now, results, target)
 			heap.Push(p, target)
+			finished <- true
 		}()
 	}
+
+	for i := 0; i < targetCount; i++ {
+		<-finished
+	}
+
+	close(finished)
+
+	duration := float64(time.Now().Sub(begin) / time.Millisecond)
+	retrievalDurations.Add(map[string]string{intervalKey: interval.String()}, duration)
 }