Collecting etcd histogram metrics

pull/8/head
Krzysztof Siedlecki 2018-07-13 12:39:04 +02:00
parent 82c986ecbc
commit e5c9383b59
2 changed files with 45 additions and 80 deletions

View File

@ -141,7 +141,6 @@ go_library(
"//vendor/github.com/aws/aws-sdk-go/aws/session:go_default_library", "//vendor/github.com/aws/aws-sdk-go/aws/session:go_default_library",
"//vendor/github.com/aws/aws-sdk-go/service/autoscaling:go_default_library", "//vendor/github.com/aws/aws-sdk-go/service/autoscaling:go_default_library",
"//vendor/github.com/aws/aws-sdk-go/service/ec2:go_default_library", "//vendor/github.com/aws/aws-sdk-go/service/ec2:go_default_library",
"//vendor/github.com/beorn7/perks/quantile:go_default_library",
"//vendor/github.com/golang/glog:go_default_library", "//vendor/github.com/golang/glog:go_default_library",
"//vendor/github.com/onsi/ginkgo:go_default_library", "//vendor/github.com/onsi/ginkgo:go_default_library",
"//vendor/github.com/onsi/ginkgo/config:go_default_library", "//vendor/github.com/onsi/ginkgo/config:go_default_library",

View File

@ -23,6 +23,7 @@ import (
"fmt" "fmt"
"io" "io"
"math" "math"
"reflect"
"sort" "sort"
"strconv" "strconv"
"strings" "strings"
@ -36,7 +37,6 @@ import (
"k8s.io/kubernetes/pkg/util/system" "k8s.io/kubernetes/pkg/util/system"
"k8s.io/kubernetes/test/e2e/framework/metrics" "k8s.io/kubernetes/test/e2e/framework/metrics"
"github.com/beorn7/perks/quantile"
"github.com/prometheus/common/expfmt" "github.com/prometheus/common/expfmt"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
) )
@ -233,60 +233,34 @@ func (l *SchedulingMetrics) PrintJSON() string {
return PrettyPrintJSON(l) return PrettyPrintJSON(l)
} }
type histogram struct { type Histogram struct {
Count int Labels map[string]string `json:"labels"`
Buckets map[float64]int Buckets map[string]int `json:"buckets"`
} }
func (h *histogram) ConvertToRangeBuckets() { type HistogramVec []Histogram
keys := []float64{}
for k := range h.Buckets {
keys = append(keys, k)
}
sort.Float64s(keys)
lessValuesSum := 0
for _, k := range keys {
h.Buckets[k] -= lessValuesSum
lessValuesSum += h.Buckets[k]
}
}
func (h *histogram) CalculatePerc(perc float64) float64 { func newHistogram(labels map[string]string) *Histogram {
targets := map[float64]float64{ return &Histogram{
perc: 0.001, Labels: labels,
} Buckets: make(map[string]int),
q := quantile.NewTargeted(targets)
var samples quantile.Samples
for k, v := range h.Buckets {
if v > 0 {
samples = append(samples, quantile.Sample{Value: k, Width: float64(v)})
}
}
q.Merge(samples)
return q.Query(perc)
}
func (h *histogram) ConvertToLatencyMetric(m *LatencyMetric) {
h.ConvertToRangeBuckets()
m.Perc50 = time.Duration(h.CalculatePerc(0.5))
m.Perc90 = time.Duration(h.CalculatePerc(0.9))
m.Perc99 = time.Duration(h.CalculatePerc(0.99))
}
func getNewHistogram() *histogram {
return &histogram{
Count: 0,
Buckets: make(map[float64]int),
} }
} }
type EtcdMetrics struct { type EtcdMetrics struct {
BackendCommitDuration LatencyMetric `json:"backendCommitDuration"` BackendCommitDuration HistogramVec `json:"backendCommitDuration"`
SnapshotSaveTotalDuration LatencyMetric `json:"snapshotSaveTotalDuration"` SnapshotSaveTotalDuration HistogramVec `json:"snapshotSaveTotalDuration"`
PeerRoundTripTime LatencyMetric `json:"peerRoundTripTime"` PeerRoundTripTime HistogramVec `json:"peerRoundTripTime"`
WalFsyncFuration LatencyMetric `json:"walFsyncFuration"` WalFsyncDuration HistogramVec `json:"walFsyncDuration"`
}
func newEtcdMetrics() *EtcdMetrics {
return &EtcdMetrics{
BackendCommitDuration: make(HistogramVec, 0),
SnapshotSaveTotalDuration: make(HistogramVec, 0),
PeerRoundTripTime: make(HistogramVec, 0),
WalFsyncDuration: make(HistogramVec, 0),
}
} }
func (l *EtcdMetrics) SummaryKind() string { func (l *EtcdMetrics) SummaryKind() string {
@ -621,15 +595,25 @@ func ResetSchedulerMetrics(c clientset.Interface) error {
return nil return nil
} }
func convertSampleToBucket(sample *model.Sample, h *histogram, ratio float64) { func convertSampleToBucket(sample *model.Sample, h *HistogramVec) {
if sample.Metric["le"] == "+Inf" { labels := make(map[string]string)
h.Buckets[math.MaxFloat64] = int(sample.Value) for k, v := range sample.Metric {
} else { if k != "le" {
f, err := strconv.ParseFloat(string(sample.Metric["le"]), 64) labels[string(k)] = string(v)
if err == nil {
h.Buckets[f*ratio] = int(sample.Value)
} }
} }
var hist *Histogram
for i := range *h {
if reflect.DeepEqual(labels, (*h)[i].Labels) {
hist = &((*h)[i])
break
}
}
if hist == nil {
hist = newHistogram(labels)
*h = append(*h, *hist)
}
hist.Buckets[string(sample.Metric["le"])] = int(sample.Value)
} }
// VerifyEtcdMetrics verifies etcd metrics by logging them // VerifyEtcdMetrics verifies etcd metrics by logging them
@ -652,38 +636,20 @@ func VerifyEtcdMetrics(c clientset.Interface) (*EtcdMetrics, error) {
return nil, err return nil, err
} }
backendCommitDurationHistogam := getNewHistogram() result := newEtcdMetrics()
snapshotSaveTotalDurationHistogram := getNewHistogram()
peerRoundTripTimeHistogram := getNewHistogram()
walFsyncDurationHistogram := getNewHistogram()
secondToMillisecondRatio := float64(time.Second / time.Millisecond)
for _, sample := range samples { for _, sample := range samples {
switch sample.Metric[model.MetricNameLabel] { switch sample.Metric[model.MetricNameLabel] {
case "etcd_disk_backend_commit_duration_seconds_bucket": case "etcd_disk_backend_commit_duration_seconds_bucket":
convertSampleToBucket(sample, backendCommitDurationHistogam, secondToMillisecondRatio) convertSampleToBucket(sample, &result.BackendCommitDuration)
case "etcd_disk_backend_commit_duration_seconds_count":
backendCommitDurationHistogam.Count = int(sample.Value)
case "etcd_debugging_snap_save_total_duration_seconds_bucket": case "etcd_debugging_snap_save_total_duration_seconds_bucket":
convertSampleToBucket(sample, snapshotSaveTotalDurationHistogram, secondToMillisecondRatio) convertSampleToBucket(sample, &result.SnapshotSaveTotalDuration)
case "etcd_debugging_snap_save_total_duration_seconds_count":
backendCommitDurationHistogam.Count = int(sample.Value)
case "etcd_disk_wal_fsync_duration_seconds_bucket": case "etcd_disk_wal_fsync_duration_seconds_bucket":
convertSampleToBucket(sample, walFsyncDurationHistogram, secondToMillisecondRatio) convertSampleToBucket(sample, &result.WalFsyncDuration)
case "etcd_disk_wal_fsync_duration_seconds_count":
walFsyncDurationHistogram.Count = int(sample.Value)
case "etcd_network_peer_round_trip_time_seconds_bucket": case "etcd_network_peer_round_trip_time_seconds_bucket":
convertSampleToBucket(sample, peerRoundTripTimeHistogram, secondToMillisecondRatio) convertSampleToBucket(sample, &result.PeerRoundTripTime)
case "etcd_network_peer_round_trip_time_seconds_count":
peerRoundTripTimeHistogram.Count = int(sample.Value)
} }
} }
return result, nil
result := EtcdMetrics{}
backendCommitDurationHistogam.ConvertToLatencyMetric(&result.BackendCommitDuration)
snapshotSaveTotalDurationHistogram.ConvertToLatencyMetric(&result.SnapshotSaveTotalDuration)
peerRoundTripTimeHistogram.ConvertToLatencyMetric(&result.PeerRoundTripTime)
walFsyncDurationHistogram.ConvertToLatencyMetric(&result.WalFsyncFuration)
return &result, nil
} }
func PrettyPrintJSON(metrics interface{}) string { func PrettyPrintJSON(metrics interface{}) string {