From f7b437cae02c93a0c83a3ad6c044f43751c20346 Mon Sep 17 00:00:00 2001 From: danielqsj Date: Fri, 22 Feb 2019 21:40:13 +0800 Subject: [PATCH] convert latency in mertics name to duration --- pkg/client/metrics/prometheus/prometheus.go | 19 +++++++++++++--- pkg/kubelet/dockershim/metrics/metrics.go | 2 +- .../dockershim/network/metrics/metrics.go | 2 +- pkg/master/master.go | 6 ++++- pkg/proxy/iptables/proxier.go | 1 + pkg/proxy/ipvs/proxier.go | 1 + pkg/proxy/metrics/metrics.go | 17 ++++++++++++-- pkg/proxy/winkernel/metrics.go | 2 +- pkg/scheduler/core/generic_scheduler.go | 2 ++ pkg/scheduler/metrics/metrics.go | 22 +++++++++++++++---- pkg/scheduler/scheduler.go | 2 ++ pkg/util/workqueue/prometheus/prometheus.go | 2 +- .../pkg/endpoints/metrics/metrics.go | 2 +- .../pkg/storage/etcd/metrics/metrics.go | 6 ++--- test/e2e/framework/metrics_util.go | 2 +- test/integration/metrics/metrics_test.go | 2 +- 16 files changed, 70 insertions(+), 20 deletions(-) diff --git a/pkg/client/metrics/prometheus/prometheus.go b/pkg/client/metrics/prometheus/prometheus.go index 45a08f6500..c7804dfc2c 100644 --- a/pkg/client/metrics/prometheus/prometheus.go +++ b/pkg/client/metrics/prometheus/prometheus.go @@ -32,13 +32,23 @@ var ( // "verb" and "url" labels. It is used for the rest client latency metrics. requestLatency = prometheus.NewHistogramVec( prometheus.HistogramOpts{ - Name: "rest_client_request_latency_seconds", + Name: "rest_client_request_duration_seconds", Help: "Request latency in seconds. Broken down by verb and URL.", Buckets: prometheus.ExponentialBuckets(0.001, 2, 10), }, []string{"verb", "url"}, ) + // deprecatedRequestLatency is deprecated, please use requestLatency. + deprecatedRequestLatency = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "rest_client_request_latency_seconds", + Help: "(Deprecated) Request latency in seconds. Broken down by verb and URL.", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 10), + }, + []string{"verb", "url"}, + ) + requestResult = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "rest_client_requests_total", @@ -50,16 +60,19 @@ var ( func init() { prometheus.MustRegister(requestLatency) + prometheus.MustRegister(deprecatedRequestLatency) prometheus.MustRegister(requestResult) - metrics.Register(&latencyAdapter{requestLatency}, &resultAdapter{requestResult}) + metrics.Register(&latencyAdapter{m: requestLatency, dm: deprecatedRequestLatency}, &resultAdapter{requestResult}) } type latencyAdapter struct { - m *prometheus.HistogramVec + m *prometheus.HistogramVec + dm *prometheus.HistogramVec } func (l *latencyAdapter) Observe(verb string, u url.URL, latency time.Duration) { l.m.WithLabelValues(verb, u.String()).Observe(latency.Seconds()) + l.dm.WithLabelValues(verb, u.String()).Observe(latency.Seconds()) } type resultAdapter struct { diff --git a/pkg/kubelet/dockershim/metrics/metrics.go b/pkg/kubelet/dockershim/metrics/metrics.go index d36eeba473..fb6433daa8 100644 --- a/pkg/kubelet/dockershim/metrics/metrics.go +++ b/pkg/kubelet/dockershim/metrics/metrics.go @@ -27,7 +27,7 @@ const ( // DockerOperationsKey is the key for docker operation metrics. DockerOperationsKey = "docker_operations_total" // DockerOperationsLatencyKey is the key for the operation latency metrics. - DockerOperationsLatencyKey = "docker_operations_latency_seconds" + DockerOperationsLatencyKey = "docker_operations_duration_seconds" // DockerOperationsErrorsKey is the key for the operation error metrics. DockerOperationsErrorsKey = "docker_operations_errors_total" // DockerOperationsTimeoutKey is the key for the operation timeout metrics. diff --git a/pkg/kubelet/dockershim/network/metrics/metrics.go b/pkg/kubelet/dockershim/network/metrics/metrics.go index b7cc13c88e..3e2247ba14 100644 --- a/pkg/kubelet/dockershim/network/metrics/metrics.go +++ b/pkg/kubelet/dockershim/network/metrics/metrics.go @@ -27,7 +27,7 @@ const ( // NetworkPluginOperationsKey is the key for operation count metrics. NetworkPluginOperationsKey = "network_plugin_operations" // NetworkPluginOperationsLatencyKey is the key for the operation latency metrics. - NetworkPluginOperationsLatencyKey = "network_plugin_operations_latency_seconds" + NetworkPluginOperationsLatencyKey = "network_plugin_operations_duration_seconds" // DeprecatedNetworkPluginOperationsLatencyKey is the deprecated key for the operation latency metrics. DeprecatedNetworkPluginOperationsLatencyKey = "network_plugin_operations_latency_microseconds" diff --git a/pkg/master/master.go b/pkg/master/master.go index c2c6a0a1df..705473df3d 100644 --- a/pkg/master/master.go +++ b/pkg/master/master.go @@ -387,9 +387,13 @@ func (m *Master) installTunneler(nodeTunneler tunneler.Tunneler, nodeClient core nodeTunneler.Run(nodeAddressProvider{nodeClient}.externalAddresses) m.GenericAPIServer.AddHealthzChecks(healthz.NamedCheck("SSH Tunnel Check", tunneler.TunnelSyncHealthChecker(nodeTunneler))) prometheus.NewGaugeFunc(prometheus.GaugeOpts{ - Name: "apiserver_proxy_tunnel_sync_latency_secs", + Name: "apiserver_proxy_tunnel_sync_duration_seconds", Help: "The time since the last successful synchronization of the SSH tunnels for proxy requests.", }, func() float64 { return float64(nodeTunneler.SecondsSinceSync()) }) + prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: "apiserver_proxy_tunnel_sync_latency_secs", + Help: "(Deprecated) The time since the last successful synchronization of the SSH tunnels for proxy requests.", + }, func() float64 { return float64(nodeTunneler.SecondsSinceSync()) }) } // RESTStorageProvider is a factory type for REST storage. diff --git a/pkg/proxy/iptables/proxier.go b/pkg/proxy/iptables/proxier.go index a4f8dc8ef7..6f275d113d 100644 --- a/pkg/proxy/iptables/proxier.go +++ b/pkg/proxy/iptables/proxier.go @@ -1356,6 +1356,7 @@ func (proxier *Proxier) syncProxyRules() { for _, lastChangeTriggerTime := range endpointUpdateResult.LastChangeTriggerTimes { latency := metrics.SinceInSeconds(lastChangeTriggerTime) metrics.NetworkProgrammingLatency.Observe(latency) + metrics.DeprecatedNetworkProgrammingLatency.Observe(latency) klog.V(4).Infof("Network programming took %f seconds", latency) } diff --git a/pkg/proxy/ipvs/proxier.go b/pkg/proxy/ipvs/proxier.go index 49c32a0ca8..bf07ff6b92 100644 --- a/pkg/proxy/ipvs/proxier.go +++ b/pkg/proxy/ipvs/proxier.go @@ -1203,6 +1203,7 @@ func (proxier *Proxier) syncProxyRules() { for _, lastChangeTriggerTime := range endpointUpdateResult.LastChangeTriggerTimes { latency := metrics.SinceInSeconds(lastChangeTriggerTime) metrics.NetworkProgrammingLatency.Observe(latency) + metrics.DeprecatedNetworkProgrammingLatency.Observe(latency) klog.V(4).Infof("Network programming took %f seconds", latency) } diff --git a/pkg/proxy/metrics/metrics.go b/pkg/proxy/metrics/metrics.go index d8c5ed1cd2..4d7e755341 100644 --- a/pkg/proxy/metrics/metrics.go +++ b/pkg/proxy/metrics/metrics.go @@ -30,7 +30,7 @@ var ( SyncProxyRulesLatency = prometheus.NewHistogram( prometheus.HistogramOpts{ Subsystem: kubeProxySubsystem, - Name: "sync_proxy_rules_latency_seconds", + Name: "sync_proxy_rules_duration_seconds", Help: "SyncProxyRules latency in seconds", Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), }, @@ -56,13 +56,25 @@ var ( NetworkProgrammingLatency = prometheus.NewHistogram( prometheus.HistogramOpts{ Subsystem: kubeProxySubsystem, - Name: "network_programming_latency_seconds", + Name: "network_programming_duration_seconds", Help: "In Cluster Network Programming Latency in seconds", // TODO(mm4tt): Reevaluate buckets before 1.14 release. // The last bucket will be [0.001s*2^20 ~= 17min, +inf) Buckets: prometheus.ExponentialBuckets(0.001, 2, 20), }, ) + + // DeprecatedNetworkProgrammingLatency is deprecated, please use NetworkProgrammingLatency. + DeprecatedNetworkProgrammingLatency = prometheus.NewHistogram( + prometheus.HistogramOpts{ + Subsystem: kubeProxySubsystem, + Name: "network_programming_latency_seconds", + Help: "(Deprecated) In Cluster Network Programming Latency in seconds", + // TODO(mm4tt): Reevaluate buckets before 1.14 release. + // The last bucket will be [0.001s*2^20 ~= 17min, +inf) + Buckets: prometheus.ExponentialBuckets(0.001, 2, 20), + }, + ) ) var registerMetricsOnce sync.Once @@ -73,6 +85,7 @@ func RegisterMetrics() { prometheus.MustRegister(SyncProxyRulesLatency) prometheus.MustRegister(DeprecatedSyncProxyRulesLatency) prometheus.MustRegister(NetworkProgrammingLatency) + prometheus.MustRegister(DeprecatedNetworkProgrammingLatency) }) } diff --git a/pkg/proxy/winkernel/metrics.go b/pkg/proxy/winkernel/metrics.go index db91cf1e41..61cf962ee0 100644 --- a/pkg/proxy/winkernel/metrics.go +++ b/pkg/proxy/winkernel/metrics.go @@ -29,7 +29,7 @@ var ( SyncProxyRulesLatency = prometheus.NewHistogram( prometheus.HistogramOpts{ Subsystem: kubeProxySubsystem, - Name: "sync_proxy_rules_latency_seconds", + Name: "sync_proxy_rules_duration_seconds", Help: "SyncProxyRules latency in seconds", Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), }, diff --git a/pkg/scheduler/core/generic_scheduler.go b/pkg/scheduler/core/generic_scheduler.go index d11ee4f915..bf6f4b78fa 100644 --- a/pkg/scheduler/core/generic_scheduler.go +++ b/pkg/scheduler/core/generic_scheduler.go @@ -196,6 +196,7 @@ func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister metrics.SchedulingAlgorithmPredicateEvaluationDuration.Observe(metrics.SinceInSeconds(startPredicateEvalTime)) metrics.DeprecatedSchedulingAlgorithmPredicateEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPredicateEvalTime)) metrics.SchedulingLatency.WithLabelValues(metrics.PredicateEvaluation).Observe(metrics.SinceInSeconds(startPredicateEvalTime)) + metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.PredicateEvaluation).Observe(metrics.SinceInSeconds(startPredicateEvalTime)) trace.Step("Prioritizing") startPriorityEvalTime := time.Now() @@ -218,6 +219,7 @@ func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister metrics.SchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInSeconds(startPriorityEvalTime)) metrics.DeprecatedSchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPriorityEvalTime)) metrics.SchedulingLatency.WithLabelValues(metrics.PriorityEvaluation).Observe(metrics.SinceInSeconds(startPriorityEvalTime)) + metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.PriorityEvaluation).Observe(metrics.SinceInSeconds(startPriorityEvalTime)) trace.Step("Selecting host") diff --git a/pkg/scheduler/metrics/metrics.go b/pkg/scheduler/metrics/metrics.go index edc9b9becb..0a4c94c4bf 100644 --- a/pkg/scheduler/metrics/metrics.go +++ b/pkg/scheduler/metrics/metrics.go @@ -28,7 +28,8 @@ const ( // SchedulerSubsystem - subsystem name used by scheduler SchedulerSubsystem = "scheduler" // SchedulingLatencyName - scheduler latency metric name - SchedulingLatencyName = "scheduling_latency_seconds" + SchedulingLatencyName = "scheduling_duration_seconds" + DeprecatedSchedulingLatencyName = "scheduling_latency_seconds" // OperationLabel - operation label name OperationLabel = "operation" @@ -70,10 +71,21 @@ var ( }, []string{OperationLabel}, ) + DeprecatedSchedulingLatency = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Subsystem: SchedulerSubsystem, + Name: DeprecatedSchedulingLatencyName, + Help: "(Deprecated) Scheduling latency in seconds split by sub-parts of the scheduling operation", + // Make the sliding window of 5h. + // TODO: The value for this should be based on some SLI definition (long term). + MaxAge: 5 * time.Hour, + }, + []string{OperationLabel}, + ) E2eSchedulingLatency = prometheus.NewHistogram( prometheus.HistogramOpts{ Subsystem: SchedulerSubsystem, - Name: "e2e_scheduling_latency_seconds", + Name: "e2e_scheduling_duration_seconds", Help: "E2e scheduling latency in seconds (scheduling algorithm + binding)", Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), }, @@ -89,7 +101,7 @@ var ( SchedulingAlgorithmLatency = prometheus.NewHistogram( prometheus.HistogramOpts{ Subsystem: SchedulerSubsystem, - Name: "scheduling_algorithm_latency_seconds", + Name: "scheduling_algorithm_duration_seconds", Help: "Scheduling algorithm latency in seconds", Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), }, @@ -153,7 +165,7 @@ var ( BindingLatency = prometheus.NewHistogram( prometheus.HistogramOpts{ Subsystem: SchedulerSubsystem, - Name: "binding_latency_seconds", + Name: "binding_duration_seconds", Help: "Binding latency in seconds", Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), }, @@ -182,6 +194,7 @@ var ( metricsList = []prometheus.Collector{ scheduleAttempts, SchedulingLatency, + DeprecatedSchedulingLatency, E2eSchedulingLatency, DeprecatedE2eSchedulingLatency, SchedulingAlgorithmLatency, @@ -216,6 +229,7 @@ func Register() { // Reset resets metrics func Reset() { SchedulingLatency.Reset() + DeprecatedSchedulingLatency.Reset() } // SinceInMicroseconds gets the time since the specified start in microseconds. diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index b78bab7d7a..7f671e1e59 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -427,6 +427,7 @@ func (sched *Scheduler) bind(assumed *v1.Pod, b *v1.Binding) error { metrics.BindingLatency.Observe(metrics.SinceInSeconds(bindingStart)) metrics.DeprecatedBindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart)) metrics.SchedulingLatency.WithLabelValues(metrics.Binding).Observe(metrics.SinceInSeconds(bindingStart)) + metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.Binding).Observe(metrics.SinceInSeconds(bindingStart)) sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v/%v to %v", assumed.Namespace, assumed.Name, b.Target.Name) return nil } @@ -471,6 +472,7 @@ func (sched *Scheduler) scheduleOne() { metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInSeconds(preemptionStartTime)) metrics.DeprecatedSchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime)) metrics.SchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime)) + metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime)) } // Pod did not fit anywhere, so it is counted as a failure. If preemption // succeeds, the pod should get counted as a success the next time we try to diff --git a/pkg/util/workqueue/prometheus/prometheus.go b/pkg/util/workqueue/prometheus/prometheus.go index e9c856c1ae..fa9b22f779 100644 --- a/pkg/util/workqueue/prometheus/prometheus.go +++ b/pkg/util/workqueue/prometheus/prometheus.go @@ -31,7 +31,7 @@ const ( WorkQueueSubsystem = "workqueue" DepthKey = "depth" AddsKey = "adds_total" - QueueLatencyKey = "queue_latency_seconds" + QueueLatencyKey = "queue_duration_seconds" WorkDurationKey = "work_duration_seconds" UnfinishedWorkKey = "unfinished_work_seconds" LongestRunningProcessorKey = "longest_running_processor_seconds" diff --git a/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go b/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go index 0c9a1857eb..1c658d4c82 100644 --- a/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go @@ -70,7 +70,7 @@ var ( ) requestLatencies = prometheus.NewHistogramVec( prometheus.HistogramOpts{ - Name: "apiserver_request_latency_seconds", + Name: "apiserver_request_duration_seconds", Help: "Response latency distribution in seconds for each verb, group, version, resource, subresource, scope and component.", // This metric is used for verifying api call latencies SLO, // as well as tracking regressions in this aspects. diff --git a/staging/src/k8s.io/apiserver/pkg/storage/etcd/metrics/metrics.go b/staging/src/k8s.io/apiserver/pkg/storage/etcd/metrics/metrics.go index ad9def4456..911a343138 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/etcd/metrics/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/etcd/metrics/metrics.go @@ -42,19 +42,19 @@ var ( cacheEntryCounter = prometheus.NewCounter(cacheEntryCounterOpts) cacheGetLatency = prometheus.NewHistogram( prometheus.HistogramOpts{ - Name: "etcd_request_cache_get_latency_seconds", + Name: "etcd_request_cache_get_duration_seconds", Help: "Latency in seconds of getting an object from etcd cache", }, ) cacheAddLatency = prometheus.NewHistogram( prometheus.HistogramOpts{ - Name: "etcd_request_cache_add_latency_seconds", + Name: "etcd_request_cache_add_duration_seconds", Help: "Latency in seconds of adding an object to etcd cache", }, ) etcdRequestLatency = prometheus.NewHistogramVec( prometheus.HistogramOpts{ - Name: "etcd_request_latency_seconds", + Name: "etcd_request_duration_seconds", Help: "Etcd request latency in seconds for each operation and object type.", }, []string{"operation", "type"}, diff --git a/test/e2e/framework/metrics_util.go b/test/e2e/framework/metrics_util.go index 3d83e2b7c0..3bcc29ab29 100644 --- a/test/e2e/framework/metrics_util.go +++ b/test/e2e/framework/metrics_util.go @@ -186,7 +186,7 @@ var InterestingControllerManagerMetrics = []string{ var InterestingKubeletMetrics = []string{ "kubelet_container_manager_latency_microseconds", "kubelet_docker_errors", - "kubelet_docker_operations_latency_seconds", + "kubelet_docker_operations_duration_seconds", "kubelet_generate_pod_status_latency_microseconds", "kubelet_pod_start_duration_seconds", "kubelet_pod_worker_duration_seconds", diff --git a/test/integration/metrics/metrics_test.go b/test/integration/metrics/metrics_test.go index 969dbe6088..22d581ae48 100644 --- a/test/integration/metrics/metrics_test.go +++ b/test/integration/metrics/metrics_test.go @@ -121,6 +121,6 @@ func TestApiserverMetrics(t *testing.T) { } checkForExpectedMetrics(t, metrics, []string{ "apiserver_request_total", - "apiserver_request_latency_seconds", + "apiserver_request_duration_seconds", }) }