convert latency in mertics name to duration

pull/564/head
danielqsj 2019-02-22 21:40:13 +08:00
parent 9e4f8d6fae
commit f7b437cae0
16 changed files with 70 additions and 20 deletions

View File

@ -32,13 +32,23 @@ var (
// "verb" and "url" labels. It is used for the rest client latency metrics. // "verb" and "url" labels. It is used for the rest client latency metrics.
requestLatency = prometheus.NewHistogramVec( requestLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Name: "rest_client_request_latency_seconds", Name: "rest_client_request_duration_seconds",
Help: "Request latency in seconds. Broken down by verb and URL.", Help: "Request latency in seconds. Broken down by verb and URL.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 10), Buckets: prometheus.ExponentialBuckets(0.001, 2, 10),
}, },
[]string{"verb", "url"}, []string{"verb", "url"},
) )
// deprecatedRequestLatency is deprecated, please use requestLatency.
deprecatedRequestLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "rest_client_request_latency_seconds",
Help: "(Deprecated) Request latency in seconds. Broken down by verb and URL.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 10),
},
[]string{"verb", "url"},
)
requestResult = prometheus.NewCounterVec( requestResult = prometheus.NewCounterVec(
prometheus.CounterOpts{ prometheus.CounterOpts{
Name: "rest_client_requests_total", Name: "rest_client_requests_total",
@ -50,16 +60,19 @@ var (
func init() { func init() {
prometheus.MustRegister(requestLatency) prometheus.MustRegister(requestLatency)
prometheus.MustRegister(deprecatedRequestLatency)
prometheus.MustRegister(requestResult) prometheus.MustRegister(requestResult)
metrics.Register(&latencyAdapter{requestLatency}, &resultAdapter{requestResult}) metrics.Register(&latencyAdapter{m: requestLatency, dm: deprecatedRequestLatency}, &resultAdapter{requestResult})
} }
type latencyAdapter struct { type latencyAdapter struct {
m *prometheus.HistogramVec m *prometheus.HistogramVec
dm *prometheus.HistogramVec
} }
func (l *latencyAdapter) Observe(verb string, u url.URL, latency time.Duration) { func (l *latencyAdapter) Observe(verb string, u url.URL, latency time.Duration) {
l.m.WithLabelValues(verb, u.String()).Observe(latency.Seconds()) l.m.WithLabelValues(verb, u.String()).Observe(latency.Seconds())
l.dm.WithLabelValues(verb, u.String()).Observe(latency.Seconds())
} }
type resultAdapter struct { type resultAdapter struct {

View File

@ -27,7 +27,7 @@ const (
// DockerOperationsKey is the key for docker operation metrics. // DockerOperationsKey is the key for docker operation metrics.
DockerOperationsKey = "docker_operations_total" DockerOperationsKey = "docker_operations_total"
// DockerOperationsLatencyKey is the key for the operation latency metrics. // DockerOperationsLatencyKey is the key for the operation latency metrics.
DockerOperationsLatencyKey = "docker_operations_latency_seconds" DockerOperationsLatencyKey = "docker_operations_duration_seconds"
// DockerOperationsErrorsKey is the key for the operation error metrics. // DockerOperationsErrorsKey is the key for the operation error metrics.
DockerOperationsErrorsKey = "docker_operations_errors_total" DockerOperationsErrorsKey = "docker_operations_errors_total"
// DockerOperationsTimeoutKey is the key for the operation timeout metrics. // DockerOperationsTimeoutKey is the key for the operation timeout metrics.

View File

@ -27,7 +27,7 @@ const (
// NetworkPluginOperationsKey is the key for operation count metrics. // NetworkPluginOperationsKey is the key for operation count metrics.
NetworkPluginOperationsKey = "network_plugin_operations" NetworkPluginOperationsKey = "network_plugin_operations"
// NetworkPluginOperationsLatencyKey is the key for the operation latency metrics. // NetworkPluginOperationsLatencyKey is the key for the operation latency metrics.
NetworkPluginOperationsLatencyKey = "network_plugin_operations_latency_seconds" NetworkPluginOperationsLatencyKey = "network_plugin_operations_duration_seconds"
// DeprecatedNetworkPluginOperationsLatencyKey is the deprecated key for the operation latency metrics. // DeprecatedNetworkPluginOperationsLatencyKey is the deprecated key for the operation latency metrics.
DeprecatedNetworkPluginOperationsLatencyKey = "network_plugin_operations_latency_microseconds" DeprecatedNetworkPluginOperationsLatencyKey = "network_plugin_operations_latency_microseconds"

View File

@ -387,9 +387,13 @@ func (m *Master) installTunneler(nodeTunneler tunneler.Tunneler, nodeClient core
nodeTunneler.Run(nodeAddressProvider{nodeClient}.externalAddresses) nodeTunneler.Run(nodeAddressProvider{nodeClient}.externalAddresses)
m.GenericAPIServer.AddHealthzChecks(healthz.NamedCheck("SSH Tunnel Check", tunneler.TunnelSyncHealthChecker(nodeTunneler))) m.GenericAPIServer.AddHealthzChecks(healthz.NamedCheck("SSH Tunnel Check", tunneler.TunnelSyncHealthChecker(nodeTunneler)))
prometheus.NewGaugeFunc(prometheus.GaugeOpts{ prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Name: "apiserver_proxy_tunnel_sync_latency_secs", Name: "apiserver_proxy_tunnel_sync_duration_seconds",
Help: "The time since the last successful synchronization of the SSH tunnels for proxy requests.", Help: "The time since the last successful synchronization of the SSH tunnels for proxy requests.",
}, func() float64 { return float64(nodeTunneler.SecondsSinceSync()) }) }, func() float64 { return float64(nodeTunneler.SecondsSinceSync()) })
prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Name: "apiserver_proxy_tunnel_sync_latency_secs",
Help: "(Deprecated) The time since the last successful synchronization of the SSH tunnels for proxy requests.",
}, func() float64 { return float64(nodeTunneler.SecondsSinceSync()) })
} }
// RESTStorageProvider is a factory type for REST storage. // RESTStorageProvider is a factory type for REST storage.

View File

@ -1356,6 +1356,7 @@ func (proxier *Proxier) syncProxyRules() {
for _, lastChangeTriggerTime := range endpointUpdateResult.LastChangeTriggerTimes { for _, lastChangeTriggerTime := range endpointUpdateResult.LastChangeTriggerTimes {
latency := metrics.SinceInSeconds(lastChangeTriggerTime) latency := metrics.SinceInSeconds(lastChangeTriggerTime)
metrics.NetworkProgrammingLatency.Observe(latency) metrics.NetworkProgrammingLatency.Observe(latency)
metrics.DeprecatedNetworkProgrammingLatency.Observe(latency)
klog.V(4).Infof("Network programming took %f seconds", latency) klog.V(4).Infof("Network programming took %f seconds", latency)
} }

View File

@ -1203,6 +1203,7 @@ func (proxier *Proxier) syncProxyRules() {
for _, lastChangeTriggerTime := range endpointUpdateResult.LastChangeTriggerTimes { for _, lastChangeTriggerTime := range endpointUpdateResult.LastChangeTriggerTimes {
latency := metrics.SinceInSeconds(lastChangeTriggerTime) latency := metrics.SinceInSeconds(lastChangeTriggerTime)
metrics.NetworkProgrammingLatency.Observe(latency) metrics.NetworkProgrammingLatency.Observe(latency)
metrics.DeprecatedNetworkProgrammingLatency.Observe(latency)
klog.V(4).Infof("Network programming took %f seconds", latency) klog.V(4).Infof("Network programming took %f seconds", latency)
} }

View File

@ -30,7 +30,7 @@ var (
SyncProxyRulesLatency = prometheus.NewHistogram( SyncProxyRulesLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Subsystem: kubeProxySubsystem, Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_latency_seconds", Name: "sync_proxy_rules_duration_seconds",
Help: "SyncProxyRules latency in seconds", Help: "SyncProxyRules latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
}, },
@ -56,13 +56,25 @@ var (
NetworkProgrammingLatency = prometheus.NewHistogram( NetworkProgrammingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Subsystem: kubeProxySubsystem, Subsystem: kubeProxySubsystem,
Name: "network_programming_latency_seconds", Name: "network_programming_duration_seconds",
Help: "In Cluster Network Programming Latency in seconds", Help: "In Cluster Network Programming Latency in seconds",
// TODO(mm4tt): Reevaluate buckets before 1.14 release. // TODO(mm4tt): Reevaluate buckets before 1.14 release.
// The last bucket will be [0.001s*2^20 ~= 17min, +inf) // The last bucket will be [0.001s*2^20 ~= 17min, +inf)
Buckets: prometheus.ExponentialBuckets(0.001, 2, 20), Buckets: prometheus.ExponentialBuckets(0.001, 2, 20),
}, },
) )
// DeprecatedNetworkProgrammingLatency is deprecated, please use NetworkProgrammingLatency.
DeprecatedNetworkProgrammingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: kubeProxySubsystem,
Name: "network_programming_latency_seconds",
Help: "(Deprecated) In Cluster Network Programming Latency in seconds",
// TODO(mm4tt): Reevaluate buckets before 1.14 release.
// The last bucket will be [0.001s*2^20 ~= 17min, +inf)
Buckets: prometheus.ExponentialBuckets(0.001, 2, 20),
},
)
) )
var registerMetricsOnce sync.Once var registerMetricsOnce sync.Once
@ -73,6 +85,7 @@ func RegisterMetrics() {
prometheus.MustRegister(SyncProxyRulesLatency) prometheus.MustRegister(SyncProxyRulesLatency)
prometheus.MustRegister(DeprecatedSyncProxyRulesLatency) prometheus.MustRegister(DeprecatedSyncProxyRulesLatency)
prometheus.MustRegister(NetworkProgrammingLatency) prometheus.MustRegister(NetworkProgrammingLatency)
prometheus.MustRegister(DeprecatedNetworkProgrammingLatency)
}) })
} }

View File

@ -29,7 +29,7 @@ var (
SyncProxyRulesLatency = prometheus.NewHistogram( SyncProxyRulesLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Subsystem: kubeProxySubsystem, Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_latency_seconds", Name: "sync_proxy_rules_duration_seconds",
Help: "SyncProxyRules latency in seconds", Help: "SyncProxyRules latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
}, },

View File

@ -196,6 +196,7 @@ func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister
metrics.SchedulingAlgorithmPredicateEvaluationDuration.Observe(metrics.SinceInSeconds(startPredicateEvalTime)) metrics.SchedulingAlgorithmPredicateEvaluationDuration.Observe(metrics.SinceInSeconds(startPredicateEvalTime))
metrics.DeprecatedSchedulingAlgorithmPredicateEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPredicateEvalTime)) metrics.DeprecatedSchedulingAlgorithmPredicateEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPredicateEvalTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PredicateEvaluation).Observe(metrics.SinceInSeconds(startPredicateEvalTime)) metrics.SchedulingLatency.WithLabelValues(metrics.PredicateEvaluation).Observe(metrics.SinceInSeconds(startPredicateEvalTime))
metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.PredicateEvaluation).Observe(metrics.SinceInSeconds(startPredicateEvalTime))
trace.Step("Prioritizing") trace.Step("Prioritizing")
startPriorityEvalTime := time.Now() startPriorityEvalTime := time.Now()
@ -218,6 +219,7 @@ func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister
metrics.SchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInSeconds(startPriorityEvalTime)) metrics.SchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInSeconds(startPriorityEvalTime))
metrics.DeprecatedSchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPriorityEvalTime)) metrics.DeprecatedSchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPriorityEvalTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PriorityEvaluation).Observe(metrics.SinceInSeconds(startPriorityEvalTime)) metrics.SchedulingLatency.WithLabelValues(metrics.PriorityEvaluation).Observe(metrics.SinceInSeconds(startPriorityEvalTime))
metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.PriorityEvaluation).Observe(metrics.SinceInSeconds(startPriorityEvalTime))
trace.Step("Selecting host") trace.Step("Selecting host")

View File

@ -28,7 +28,8 @@ const (
// SchedulerSubsystem - subsystem name used by scheduler // SchedulerSubsystem - subsystem name used by scheduler
SchedulerSubsystem = "scheduler" SchedulerSubsystem = "scheduler"
// SchedulingLatencyName - scheduler latency metric name // SchedulingLatencyName - scheduler latency metric name
SchedulingLatencyName = "scheduling_latency_seconds" SchedulingLatencyName = "scheduling_duration_seconds"
DeprecatedSchedulingLatencyName = "scheduling_latency_seconds"
// OperationLabel - operation label name // OperationLabel - operation label name
OperationLabel = "operation" OperationLabel = "operation"
@ -70,10 +71,21 @@ var (
}, },
[]string{OperationLabel}, []string{OperationLabel},
) )
DeprecatedSchedulingLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: SchedulerSubsystem,
Name: DeprecatedSchedulingLatencyName,
Help: "(Deprecated) Scheduling latency in seconds split by sub-parts of the scheduling operation",
// Make the sliding window of 5h.
// TODO: The value for this should be based on some SLI definition (long term).
MaxAge: 5 * time.Hour,
},
[]string{OperationLabel},
)
E2eSchedulingLatency = prometheus.NewHistogram( E2eSchedulingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "e2e_scheduling_latency_seconds", Name: "e2e_scheduling_duration_seconds",
Help: "E2e scheduling latency in seconds (scheduling algorithm + binding)", Help: "E2e scheduling latency in seconds (scheduling algorithm + binding)",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
}, },
@ -89,7 +101,7 @@ var (
SchedulingAlgorithmLatency = prometheus.NewHistogram( SchedulingAlgorithmLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_latency_seconds", Name: "scheduling_algorithm_duration_seconds",
Help: "Scheduling algorithm latency in seconds", Help: "Scheduling algorithm latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
}, },
@ -153,7 +165,7 @@ var (
BindingLatency = prometheus.NewHistogram( BindingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "binding_latency_seconds", Name: "binding_duration_seconds",
Help: "Binding latency in seconds", Help: "Binding latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
}, },
@ -182,6 +194,7 @@ var (
metricsList = []prometheus.Collector{ metricsList = []prometheus.Collector{
scheduleAttempts, scheduleAttempts,
SchedulingLatency, SchedulingLatency,
DeprecatedSchedulingLatency,
E2eSchedulingLatency, E2eSchedulingLatency,
DeprecatedE2eSchedulingLatency, DeprecatedE2eSchedulingLatency,
SchedulingAlgorithmLatency, SchedulingAlgorithmLatency,
@ -216,6 +229,7 @@ func Register() {
// Reset resets metrics // Reset resets metrics
func Reset() { func Reset() {
SchedulingLatency.Reset() SchedulingLatency.Reset()
DeprecatedSchedulingLatency.Reset()
} }
// SinceInMicroseconds gets the time since the specified start in microseconds. // SinceInMicroseconds gets the time since the specified start in microseconds.

View File

@ -427,6 +427,7 @@ func (sched *Scheduler) bind(assumed *v1.Pod, b *v1.Binding) error {
metrics.BindingLatency.Observe(metrics.SinceInSeconds(bindingStart)) metrics.BindingLatency.Observe(metrics.SinceInSeconds(bindingStart))
metrics.DeprecatedBindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart)) metrics.DeprecatedBindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
metrics.SchedulingLatency.WithLabelValues(metrics.Binding).Observe(metrics.SinceInSeconds(bindingStart)) metrics.SchedulingLatency.WithLabelValues(metrics.Binding).Observe(metrics.SinceInSeconds(bindingStart))
metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.Binding).Observe(metrics.SinceInSeconds(bindingStart))
sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v/%v to %v", assumed.Namespace, assumed.Name, b.Target.Name) sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v/%v to %v", assumed.Namespace, assumed.Name, b.Target.Name)
return nil return nil
} }
@ -471,6 +472,7 @@ func (sched *Scheduler) scheduleOne() {
metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInSeconds(preemptionStartTime)) metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInSeconds(preemptionStartTime))
metrics.DeprecatedSchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime)) metrics.DeprecatedSchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime)) metrics.SchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime))
metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime))
} }
// Pod did not fit anywhere, so it is counted as a failure. If preemption // Pod did not fit anywhere, so it is counted as a failure. If preemption
// succeeds, the pod should get counted as a success the next time we try to // succeeds, the pod should get counted as a success the next time we try to

View File

@ -31,7 +31,7 @@ const (
WorkQueueSubsystem = "workqueue" WorkQueueSubsystem = "workqueue"
DepthKey = "depth" DepthKey = "depth"
AddsKey = "adds_total" AddsKey = "adds_total"
QueueLatencyKey = "queue_latency_seconds" QueueLatencyKey = "queue_duration_seconds"
WorkDurationKey = "work_duration_seconds" WorkDurationKey = "work_duration_seconds"
UnfinishedWorkKey = "unfinished_work_seconds" UnfinishedWorkKey = "unfinished_work_seconds"
LongestRunningProcessorKey = "longest_running_processor_seconds" LongestRunningProcessorKey = "longest_running_processor_seconds"

View File

@ -70,7 +70,7 @@ var (
) )
requestLatencies = prometheus.NewHistogramVec( requestLatencies = prometheus.NewHistogramVec(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Name: "apiserver_request_latency_seconds", Name: "apiserver_request_duration_seconds",
Help: "Response latency distribution in seconds for each verb, group, version, resource, subresource, scope and component.", Help: "Response latency distribution in seconds for each verb, group, version, resource, subresource, scope and component.",
// This metric is used for verifying api call latencies SLO, // This metric is used for verifying api call latencies SLO,
// as well as tracking regressions in this aspects. // as well as tracking regressions in this aspects.

View File

@ -42,19 +42,19 @@ var (
cacheEntryCounter = prometheus.NewCounter(cacheEntryCounterOpts) cacheEntryCounter = prometheus.NewCounter(cacheEntryCounterOpts)
cacheGetLatency = prometheus.NewHistogram( cacheGetLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Name: "etcd_request_cache_get_latency_seconds", Name: "etcd_request_cache_get_duration_seconds",
Help: "Latency in seconds of getting an object from etcd cache", Help: "Latency in seconds of getting an object from etcd cache",
}, },
) )
cacheAddLatency = prometheus.NewHistogram( cacheAddLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Name: "etcd_request_cache_add_latency_seconds", Name: "etcd_request_cache_add_duration_seconds",
Help: "Latency in seconds of adding an object to etcd cache", Help: "Latency in seconds of adding an object to etcd cache",
}, },
) )
etcdRequestLatency = prometheus.NewHistogramVec( etcdRequestLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Name: "etcd_request_latency_seconds", Name: "etcd_request_duration_seconds",
Help: "Etcd request latency in seconds for each operation and object type.", Help: "Etcd request latency in seconds for each operation and object type.",
}, },
[]string{"operation", "type"}, []string{"operation", "type"},

View File

@ -186,7 +186,7 @@ var InterestingControllerManagerMetrics = []string{
var InterestingKubeletMetrics = []string{ var InterestingKubeletMetrics = []string{
"kubelet_container_manager_latency_microseconds", "kubelet_container_manager_latency_microseconds",
"kubelet_docker_errors", "kubelet_docker_errors",
"kubelet_docker_operations_latency_seconds", "kubelet_docker_operations_duration_seconds",
"kubelet_generate_pod_status_latency_microseconds", "kubelet_generate_pod_status_latency_microseconds",
"kubelet_pod_start_duration_seconds", "kubelet_pod_start_duration_seconds",
"kubelet_pod_worker_duration_seconds", "kubelet_pod_worker_duration_seconds",

View File

@ -121,6 +121,6 @@ func TestApiserverMetrics(t *testing.T) {
} }
checkForExpectedMetrics(t, metrics, []string{ checkForExpectedMetrics(t, metrics, []string{
"apiserver_request_total", "apiserver_request_total",
"apiserver_request_latency_seconds", "apiserver_request_duration_seconds",
}) })
} }