mirror of https://github.com/k3s-io/k3s
Collecting etcd histogram metrics
parent
82c986ecbc
commit
e5c9383b59
|
@ -141,7 +141,6 @@ go_library(
|
||||||
"//vendor/github.com/aws/aws-sdk-go/aws/session:go_default_library",
|
"//vendor/github.com/aws/aws-sdk-go/aws/session:go_default_library",
|
||||||
"//vendor/github.com/aws/aws-sdk-go/service/autoscaling:go_default_library",
|
"//vendor/github.com/aws/aws-sdk-go/service/autoscaling:go_default_library",
|
||||||
"//vendor/github.com/aws/aws-sdk-go/service/ec2:go_default_library",
|
"//vendor/github.com/aws/aws-sdk-go/service/ec2:go_default_library",
|
||||||
"//vendor/github.com/beorn7/perks/quantile:go_default_library",
|
|
||||||
"//vendor/github.com/golang/glog:go_default_library",
|
"//vendor/github.com/golang/glog:go_default_library",
|
||||||
"//vendor/github.com/onsi/ginkgo:go_default_library",
|
"//vendor/github.com/onsi/ginkgo:go_default_library",
|
||||||
"//vendor/github.com/onsi/ginkgo/config:go_default_library",
|
"//vendor/github.com/onsi/ginkgo/config:go_default_library",
|
||||||
|
|
|
@ -23,6 +23,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"math"
|
"math"
|
||||||
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -36,7 +37,6 @@ import (
|
||||||
"k8s.io/kubernetes/pkg/util/system"
|
"k8s.io/kubernetes/pkg/util/system"
|
||||||
"k8s.io/kubernetes/test/e2e/framework/metrics"
|
"k8s.io/kubernetes/test/e2e/framework/metrics"
|
||||||
|
|
||||||
"github.com/beorn7/perks/quantile"
|
|
||||||
"github.com/prometheus/common/expfmt"
|
"github.com/prometheus/common/expfmt"
|
||||||
"github.com/prometheus/common/model"
|
"github.com/prometheus/common/model"
|
||||||
)
|
)
|
||||||
|
@ -233,60 +233,34 @@ func (l *SchedulingMetrics) PrintJSON() string {
|
||||||
return PrettyPrintJSON(l)
|
return PrettyPrintJSON(l)
|
||||||
}
|
}
|
||||||
|
|
||||||
type histogram struct {
|
type Histogram struct {
|
||||||
Count int
|
Labels map[string]string `json:"labels"`
|
||||||
Buckets map[float64]int
|
Buckets map[string]int `json:"buckets"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *histogram) ConvertToRangeBuckets() {
|
type HistogramVec []Histogram
|
||||||
keys := []float64{}
|
|
||||||
for k := range h.Buckets {
|
|
||||||
keys = append(keys, k)
|
|
||||||
}
|
|
||||||
sort.Float64s(keys)
|
|
||||||
lessValuesSum := 0
|
|
||||||
for _, k := range keys {
|
|
||||||
h.Buckets[k] -= lessValuesSum
|
|
||||||
lessValuesSum += h.Buckets[k]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (h *histogram) CalculatePerc(perc float64) float64 {
|
func newHistogram(labels map[string]string) *Histogram {
|
||||||
targets := map[float64]float64{
|
return &Histogram{
|
||||||
perc: 0.001,
|
Labels: labels,
|
||||||
}
|
Buckets: make(map[string]int),
|
||||||
q := quantile.NewTargeted(targets)
|
|
||||||
var samples quantile.Samples
|
|
||||||
|
|
||||||
for k, v := range h.Buckets {
|
|
||||||
if v > 0 {
|
|
||||||
samples = append(samples, quantile.Sample{Value: k, Width: float64(v)})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
q.Merge(samples)
|
|
||||||
|
|
||||||
return q.Query(perc)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (h *histogram) ConvertToLatencyMetric(m *LatencyMetric) {
|
|
||||||
h.ConvertToRangeBuckets()
|
|
||||||
m.Perc50 = time.Duration(h.CalculatePerc(0.5))
|
|
||||||
m.Perc90 = time.Duration(h.CalculatePerc(0.9))
|
|
||||||
m.Perc99 = time.Duration(h.CalculatePerc(0.99))
|
|
||||||
}
|
|
||||||
|
|
||||||
func getNewHistogram() *histogram {
|
|
||||||
return &histogram{
|
|
||||||
Count: 0,
|
|
||||||
Buckets: make(map[float64]int),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type EtcdMetrics struct {
|
type EtcdMetrics struct {
|
||||||
BackendCommitDuration LatencyMetric `json:"backendCommitDuration"`
|
BackendCommitDuration HistogramVec `json:"backendCommitDuration"`
|
||||||
SnapshotSaveTotalDuration LatencyMetric `json:"snapshotSaveTotalDuration"`
|
SnapshotSaveTotalDuration HistogramVec `json:"snapshotSaveTotalDuration"`
|
||||||
PeerRoundTripTime LatencyMetric `json:"peerRoundTripTime"`
|
PeerRoundTripTime HistogramVec `json:"peerRoundTripTime"`
|
||||||
WalFsyncFuration LatencyMetric `json:"walFsyncFuration"`
|
WalFsyncDuration HistogramVec `json:"walFsyncDuration"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func newEtcdMetrics() *EtcdMetrics {
|
||||||
|
return &EtcdMetrics{
|
||||||
|
BackendCommitDuration: make(HistogramVec, 0),
|
||||||
|
SnapshotSaveTotalDuration: make(HistogramVec, 0),
|
||||||
|
PeerRoundTripTime: make(HistogramVec, 0),
|
||||||
|
WalFsyncDuration: make(HistogramVec, 0),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *EtcdMetrics) SummaryKind() string {
|
func (l *EtcdMetrics) SummaryKind() string {
|
||||||
|
@ -621,15 +595,25 @@ func ResetSchedulerMetrics(c clientset.Interface) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func convertSampleToBucket(sample *model.Sample, h *histogram, ratio float64) {
|
func convertSampleToBucket(sample *model.Sample, h *HistogramVec) {
|
||||||
if sample.Metric["le"] == "+Inf" {
|
labels := make(map[string]string)
|
||||||
h.Buckets[math.MaxFloat64] = int(sample.Value)
|
for k, v := range sample.Metric {
|
||||||
} else {
|
if k != "le" {
|
||||||
f, err := strconv.ParseFloat(string(sample.Metric["le"]), 64)
|
labels[string(k)] = string(v)
|
||||||
if err == nil {
|
|
||||||
h.Buckets[f*ratio] = int(sample.Value)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
var hist *Histogram
|
||||||
|
for i := range *h {
|
||||||
|
if reflect.DeepEqual(labels, (*h)[i].Labels) {
|
||||||
|
hist = &((*h)[i])
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if hist == nil {
|
||||||
|
hist = newHistogram(labels)
|
||||||
|
*h = append(*h, *hist)
|
||||||
|
}
|
||||||
|
hist.Buckets[string(sample.Metric["le"])] = int(sample.Value)
|
||||||
}
|
}
|
||||||
|
|
||||||
// VerifyEtcdMetrics verifies etcd metrics by logging them
|
// VerifyEtcdMetrics verifies etcd metrics by logging them
|
||||||
|
@ -652,38 +636,20 @@ func VerifyEtcdMetrics(c clientset.Interface) (*EtcdMetrics, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
backendCommitDurationHistogam := getNewHistogram()
|
result := newEtcdMetrics()
|
||||||
snapshotSaveTotalDurationHistogram := getNewHistogram()
|
|
||||||
peerRoundTripTimeHistogram := getNewHistogram()
|
|
||||||
walFsyncDurationHistogram := getNewHistogram()
|
|
||||||
secondToMillisecondRatio := float64(time.Second / time.Millisecond)
|
|
||||||
for _, sample := range samples {
|
for _, sample := range samples {
|
||||||
switch sample.Metric[model.MetricNameLabel] {
|
switch sample.Metric[model.MetricNameLabel] {
|
||||||
case "etcd_disk_backend_commit_duration_seconds_bucket":
|
case "etcd_disk_backend_commit_duration_seconds_bucket":
|
||||||
convertSampleToBucket(sample, backendCommitDurationHistogam, secondToMillisecondRatio)
|
convertSampleToBucket(sample, &result.BackendCommitDuration)
|
||||||
case "etcd_disk_backend_commit_duration_seconds_count":
|
|
||||||
backendCommitDurationHistogam.Count = int(sample.Value)
|
|
||||||
case "etcd_debugging_snap_save_total_duration_seconds_bucket":
|
case "etcd_debugging_snap_save_total_duration_seconds_bucket":
|
||||||
convertSampleToBucket(sample, snapshotSaveTotalDurationHistogram, secondToMillisecondRatio)
|
convertSampleToBucket(sample, &result.SnapshotSaveTotalDuration)
|
||||||
case "etcd_debugging_snap_save_total_duration_seconds_count":
|
|
||||||
backendCommitDurationHistogam.Count = int(sample.Value)
|
|
||||||
case "etcd_disk_wal_fsync_duration_seconds_bucket":
|
case "etcd_disk_wal_fsync_duration_seconds_bucket":
|
||||||
convertSampleToBucket(sample, walFsyncDurationHistogram, secondToMillisecondRatio)
|
convertSampleToBucket(sample, &result.WalFsyncDuration)
|
||||||
case "etcd_disk_wal_fsync_duration_seconds_count":
|
|
||||||
walFsyncDurationHistogram.Count = int(sample.Value)
|
|
||||||
case "etcd_network_peer_round_trip_time_seconds_bucket":
|
case "etcd_network_peer_round_trip_time_seconds_bucket":
|
||||||
convertSampleToBucket(sample, peerRoundTripTimeHistogram, secondToMillisecondRatio)
|
convertSampleToBucket(sample, &result.PeerRoundTripTime)
|
||||||
case "etcd_network_peer_round_trip_time_seconds_count":
|
|
||||||
peerRoundTripTimeHistogram.Count = int(sample.Value)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return result, nil
|
||||||
result := EtcdMetrics{}
|
|
||||||
backendCommitDurationHistogam.ConvertToLatencyMetric(&result.BackendCommitDuration)
|
|
||||||
snapshotSaveTotalDurationHistogram.ConvertToLatencyMetric(&result.SnapshotSaveTotalDuration)
|
|
||||||
peerRoundTripTimeHistogram.ConvertToLatencyMetric(&result.PeerRoundTripTime)
|
|
||||||
walFsyncDurationHistogram.ConvertToLatencyMetric(&result.WalFsyncFuration)
|
|
||||||
return &result, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func PrettyPrintJSON(metrics interface{}) string {
|
func PrettyPrintJSON(metrics interface{}) string {
|
||||||
|
|
Loading…
Reference in New Issue