mirror of https://github.com/k3s-io/k3s
Move etcd latency metrics to histogram and update test case
parent
88c4b64400
commit
8b418631c0
|
@ -57,7 +57,7 @@ var (
|
|||
deprecatedRequestCounter = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "apiserver_request_count",
|
||||
Help: "Counter of apiserver requests broken out for each verb, group, version, resource, scope, component, client, and HTTP response contentType and code.",
|
||||
Help: "(Deprecated) Counter of apiserver requests broken out for each verb, group, version, resource, scope, component, client, and HTTP response contentType and code.",
|
||||
},
|
||||
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component", "client", "contentType", "code"},
|
||||
)
|
||||
|
@ -77,20 +77,10 @@ var (
|
|||
},
|
||||
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
|
||||
)
|
||||
requestLatenciesSummary = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "apiserver_request_latency_seconds_summary",
|
||||
Help: "Response latency summary in seconds for each verb, group, version, resource, subresource, scope and component.",
|
||||
// Make the sliding window of 5h.
|
||||
// TODO: The value for this should be based on our SLI definition (medium term).
|
||||
MaxAge: 5 * time.Hour,
|
||||
},
|
||||
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
|
||||
)
|
||||
deprecatedRequestLatencies = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "apiserver_request_latencies",
|
||||
Help: "Response latency distribution in microseconds for each verb, group, version, resource, subresource, scope and component.",
|
||||
Help: "(Deprecated) Response latency distribution in microseconds for each verb, group, version, resource, subresource, scope and component.",
|
||||
// Use buckets ranging from 125 ms to 8 seconds.
|
||||
Buckets: prometheus.ExponentialBuckets(125000, 2.0, 7),
|
||||
},
|
||||
|
@ -99,7 +89,7 @@ var (
|
|||
deprecatedRequestLatenciesSummary = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "apiserver_request_latencies_summary",
|
||||
Help: "Response latency summary in microseconds for each verb, group, version, resource, subresource, scope and component.",
|
||||
Help: "(Deprecated) Response latency summary in microseconds for each verb, group, version, resource, subresource, scope and component.",
|
||||
// Make the sliding window of 5h.
|
||||
// TODO: The value for this should be based on our SLI definition (medium term).
|
||||
MaxAge: 5 * time.Hour,
|
||||
|
@ -126,7 +116,7 @@ var (
|
|||
DeprecatedDroppedRequests = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "apiserver_dropped_requests",
|
||||
Help: "Number of requests dropped with 'Try again later' response",
|
||||
Help: "(Deprecated) Number of requests dropped with 'Try again later' response",
|
||||
},
|
||||
[]string{"requestKind"},
|
||||
)
|
||||
|
@ -154,7 +144,6 @@ var (
|
|||
deprecatedRequestCounter,
|
||||
longRunningRequestGauge,
|
||||
requestLatencies,
|
||||
requestLatenciesSummary,
|
||||
deprecatedRequestLatencies,
|
||||
deprecatedRequestLatenciesSummary,
|
||||
responseSizes,
|
||||
|
@ -240,7 +229,6 @@ func MonitorRequest(req *http.Request, verb, group, version, resource, subresour
|
|||
deprecatedRequestCounter.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component, client, contentType, codeToString(httpCode)).Inc()
|
||||
requestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedSeconds)
|
||||
deprecatedRequestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds)
|
||||
requestLatenciesSummary.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedSeconds)
|
||||
deprecatedRequestLatenciesSummary.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds)
|
||||
// We are only interested in response sizes of read requests.
|
||||
if verb == "GET" || verb == "LIST" {
|
||||
|
|
|
@ -40,22 +40,22 @@ var (
|
|||
"because two concurrent threads can miss the cache and generate the same entry twice.",
|
||||
}
|
||||
cacheEntryCounter = prometheus.NewCounter(cacheEntryCounterOpts)
|
||||
cacheGetLatency = prometheus.NewSummary(
|
||||
prometheus.SummaryOpts{
|
||||
cacheGetLatency = prometheus.NewHistogram(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "etcd_request_cache_get_latency_seconds",
|
||||
Help: "Latency in seconds of getting an object from etcd cache",
|
||||
},
|
||||
)
|
||||
cacheAddLatency = prometheus.NewSummary(
|
||||
prometheus.SummaryOpts{
|
||||
cacheAddLatency = prometheus.NewHistogram(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "etcd_request_cache_add_latency_seconds",
|
||||
Help: "Latency in seconds of adding an object to etcd cache",
|
||||
},
|
||||
)
|
||||
etcdRequestLatenciesSummary = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
etcdRequestLatency = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "etcd_request_latency_seconds",
|
||||
Help: "Etcd request latency summary in seconds for each operation and object type.",
|
||||
Help: "Etcd request latency in seconds for each operation and object type.",
|
||||
},
|
||||
[]string{"operation", "type"},
|
||||
)
|
||||
|
@ -69,36 +69,36 @@ var (
|
|||
|
||||
deprecatedCacheHitCounterOpts = prometheus.CounterOpts{
|
||||
Name: "etcd_helper_cache_hit_count",
|
||||
Help: "Counter of etcd helper cache hits.",
|
||||
Help: "(Deprecated) Counter of etcd helper cache hits.",
|
||||
}
|
||||
deprecatedCacheHitCounter = prometheus.NewCounter(deprecatedCacheHitCounterOpts)
|
||||
deprecatedCacheMissCounterOpts = prometheus.CounterOpts{
|
||||
Name: "etcd_helper_cache_miss_count",
|
||||
Help: "Counter of etcd helper cache miss.",
|
||||
Help: "(Deprecated) Counter of etcd helper cache miss.",
|
||||
}
|
||||
deprecatedCacheMissCounter = prometheus.NewCounter(deprecatedCacheMissCounterOpts)
|
||||
deprecatedCacheEntryCounterOpts = prometheus.CounterOpts{
|
||||
Name: "etcd_helper_cache_entry_count",
|
||||
Help: "Counter of etcd helper cache entries. This can be different from etcd_helper_cache_miss_count " +
|
||||
Help: "(Deprecated) Counter of etcd helper cache entries. This can be different from etcd_helper_cache_miss_count " +
|
||||
"because two concurrent threads can miss the cache and generate the same entry twice.",
|
||||
}
|
||||
deprecatedCacheEntryCounter = prometheus.NewCounter(deprecatedCacheEntryCounterOpts)
|
||||
deprecatedCacheGetLatency = prometheus.NewSummary(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "etcd_request_cache_get_latencies_summary",
|
||||
Help: "Latency in microseconds of getting an object from etcd cache",
|
||||
Help: "(Deprecated) Latency in microseconds of getting an object from etcd cache",
|
||||
},
|
||||
)
|
||||
deprecatedCacheAddLatency = prometheus.NewSummary(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "etcd_request_cache_add_latencies_summary",
|
||||
Help: "Latency in microseconds of adding an object to etcd cache",
|
||||
Help: "(Deprecated) Latency in microseconds of adding an object to etcd cache",
|
||||
},
|
||||
)
|
||||
deprecatedEtcdRequestLatenciesSummary = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "etcd_request_latencies_summary",
|
||||
Help: "Etcd request latency summary in microseconds for each operation and object type.",
|
||||
Help: "(Deprecated) Etcd request latency summary in microseconds for each operation and object type.",
|
||||
},
|
||||
[]string{"operation", "type"},
|
||||
)
|
||||
|
@ -115,7 +115,7 @@ func Register() {
|
|||
prometheus.MustRegister(cacheEntryCounter)
|
||||
prometheus.MustRegister(cacheAddLatency)
|
||||
prometheus.MustRegister(cacheGetLatency)
|
||||
prometheus.MustRegister(etcdRequestLatenciesSummary)
|
||||
prometheus.MustRegister(etcdRequestLatency)
|
||||
prometheus.MustRegister(objectCounts)
|
||||
|
||||
// TODO(danielqsj): Remove the following metrics, they are deprecated
|
||||
|
@ -133,7 +133,7 @@ func UpdateObjectCount(resourcePrefix string, count int64) {
|
|||
}
|
||||
|
||||
func RecordEtcdRequestLatency(verb, resource string, startTime time.Time) {
|
||||
etcdRequestLatenciesSummary.WithLabelValues(verb, resource).Observe(sinceInSeconds(startTime))
|
||||
etcdRequestLatency.WithLabelValues(verb, resource).Observe(sinceInSeconds(startTime))
|
||||
deprecatedEtcdRequestLatenciesSummary.WithLabelValues(verb, resource).Observe(sinceInMicroseconds(startTime))
|
||||
}
|
||||
|
||||
|
@ -168,7 +168,7 @@ func Reset() {
|
|||
cacheEntryCounter = prometheus.NewCounter(cacheEntryCounterOpts)
|
||||
// TODO: Reset cacheAddLatency.
|
||||
// TODO: Reset cacheGetLatency.
|
||||
etcdRequestLatenciesSummary.Reset()
|
||||
etcdRequestLatency.Reset()
|
||||
|
||||
deprecatedCacheHitCounter = prometheus.NewCounter(deprecatedCacheHitCounterOpts)
|
||||
deprecatedCacheMissCounter = prometheus.NewCounter(deprecatedCacheMissCounterOpts)
|
||||
|
|
|
@ -46,7 +46,7 @@ var (
|
|||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "transformation_latencies_microseconds",
|
||||
Help: "Latencies in microseconds of value transformation operations.",
|
||||
Help: "(Deprecated) Latencies in microseconds of value transformation operations.",
|
||||
// In-process transformations (ex. AES CBC) complete on the order of 20 microseconds. However, when
|
||||
// external KMS is involved latencies may climb into milliseconds.
|
||||
Buckets: prometheus.ExponentialBuckets(5, 2, 14),
|
||||
|
@ -86,7 +86,7 @@ var (
|
|||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "data_key_generation_latencies_microseconds",
|
||||
Help: "Latencies in microseconds of data encryption key(DEK) generation operations.",
|
||||
Help: "(Deprecated) Latencies in microseconds of data encryption key(DEK) generation operations.",
|
||||
Buckets: prometheus.ExponentialBuckets(5, 2, 14),
|
||||
},
|
||||
)
|
||||
|
|
|
@ -136,14 +136,14 @@ func (m *MetricsForE2E) SummaryKind() string {
|
|||
var SchedulingLatencyMetricName = model.LabelValue(schedulermetric.SchedulerSubsystem + "_" + schedulermetric.SchedulingLatencyName)
|
||||
|
||||
var InterestingApiServerMetrics = []string{
|
||||
"apiserver_request_count",
|
||||
"apiserver_request_latencies_summary",
|
||||
"etcd_helper_cache_entry_count",
|
||||
"etcd_helper_cache_hit_count",
|
||||
"etcd_helper_cache_miss_count",
|
||||
"etcd_request_cache_add_latencies_summary",
|
||||
"etcd_request_cache_get_latencies_summary",
|
||||
"etcd_request_latencies_summary",
|
||||
"apiserver_request_total",
|
||||
"apiserver_request_latency_seconds_summary",
|
||||
"etcd_helper_cache_entry_total",
|
||||
"etcd_helper_cache_hit_total",
|
||||
"etcd_helper_cache_miss_total",
|
||||
"etcd_request_cache_add_latency_seconds",
|
||||
"etcd_request_cache_get_latency_seconds",
|
||||
"etcd_request_latency_seconds",
|
||||
}
|
||||
|
||||
var InterestingControllerManagerMetrics = []string{
|
||||
|
@ -475,10 +475,10 @@ func readLatencyMetrics(c clientset.Interface) (*APIResponsiveness, error) {
|
|||
|
||||
for _, sample := range samples {
|
||||
// Example line:
|
||||
// apiserver_request_latencies_summary{resource="namespaces",verb="LIST",quantile="0.99"} 908
|
||||
// apiserver_request_count{resource="pods",verb="LIST",client="kubectl",code="200",contentType="json"} 233
|
||||
if sample.Metric[model.MetricNameLabel] != "apiserver_request_latencies_summary" &&
|
||||
sample.Metric[model.MetricNameLabel] != "apiserver_request_count" {
|
||||
// apiserver_request_latency_seconds_summary{resource="namespaces",verb="LIST",quantile="0.99"} 0.000908
|
||||
// apiserver_request_total{resource="pods",verb="LIST",client="kubectl",code="200",contentType="json"} 233
|
||||
if sample.Metric[model.MetricNameLabel] != "apiserver_request_latency_seconds_summary" &&
|
||||
sample.Metric[model.MetricNameLabel] != "apiserver_request_total" {
|
||||
continue
|
||||
}
|
||||
|
||||
|
@ -491,14 +491,14 @@ func readLatencyMetrics(c clientset.Interface) (*APIResponsiveness, error) {
|
|||
}
|
||||
|
||||
switch sample.Metric[model.MetricNameLabel] {
|
||||
case "apiserver_request_latencies_summary":
|
||||
case "apiserver_request_latency_seconds_summary":
|
||||
latency := sample.Value
|
||||
quantile, err := strconv.ParseFloat(string(sample.Metric[model.QuantileLabel]), 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
a.addMetricRequestLatency(resource, subresource, verb, scope, quantile, time.Duration(int64(latency))*time.Microsecond)
|
||||
case "apiserver_request_count":
|
||||
a.addMetricRequestLatency(resource, subresource, verb, scope, quantile, time.Duration(int64(latency))*time.Second)
|
||||
case "apiserver_request_total":
|
||||
count := sample.Value
|
||||
a.addMetricRequestCount(resource, subresource, verb, scope, int(count))
|
||||
|
||||
|
|
|
@ -120,7 +120,7 @@ func TestApiserverMetrics(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
checkForExpectedMetrics(t, metrics, []string{
|
||||
"apiserver_request_count",
|
||||
"apiserver_request_latencies",
|
||||
"apiserver_request_total",
|
||||
"apiserver_request_latency_seconds",
|
||||
})
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue