convert latency in mertics name to duration

pull/564/head
danielqsj 2019-02-22 21:40:13 +08:00
parent 9e4f8d6fae
commit f7b437cae0
16 changed files with 70 additions and 20 deletions

View File

@ -32,13 +32,23 @@ var (
// "verb" and "url" labels. It is used for the rest client latency metrics.
requestLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "rest_client_request_latency_seconds",
Name: "rest_client_request_duration_seconds",
Help: "Request latency in seconds. Broken down by verb and URL.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 10),
},
[]string{"verb", "url"},
)
// deprecatedRequestLatency is deprecated, please use requestLatency.
deprecatedRequestLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "rest_client_request_latency_seconds",
Help: "(Deprecated) Request latency in seconds. Broken down by verb and URL.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 10),
},
[]string{"verb", "url"},
)
requestResult = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "rest_client_requests_total",
@ -50,16 +60,19 @@ var (
func init() {
prometheus.MustRegister(requestLatency)
prometheus.MustRegister(deprecatedRequestLatency)
prometheus.MustRegister(requestResult)
metrics.Register(&latencyAdapter{requestLatency}, &resultAdapter{requestResult})
metrics.Register(&latencyAdapter{m: requestLatency, dm: deprecatedRequestLatency}, &resultAdapter{requestResult})
}
type latencyAdapter struct {
m *prometheus.HistogramVec
dm *prometheus.HistogramVec
}
func (l *latencyAdapter) Observe(verb string, u url.URL, latency time.Duration) {
l.m.WithLabelValues(verb, u.String()).Observe(latency.Seconds())
l.dm.WithLabelValues(verb, u.String()).Observe(latency.Seconds())
}
type resultAdapter struct {

View File

@ -27,7 +27,7 @@ const (
// DockerOperationsKey is the key for docker operation metrics.
DockerOperationsKey = "docker_operations_total"
// DockerOperationsLatencyKey is the key for the operation latency metrics.
DockerOperationsLatencyKey = "docker_operations_latency_seconds"
DockerOperationsLatencyKey = "docker_operations_duration_seconds"
// DockerOperationsErrorsKey is the key for the operation error metrics.
DockerOperationsErrorsKey = "docker_operations_errors_total"
// DockerOperationsTimeoutKey is the key for the operation timeout metrics.

View File

@ -27,7 +27,7 @@ const (
// NetworkPluginOperationsKey is the key for operation count metrics.
NetworkPluginOperationsKey = "network_plugin_operations"
// NetworkPluginOperationsLatencyKey is the key for the operation latency metrics.
NetworkPluginOperationsLatencyKey = "network_plugin_operations_latency_seconds"
NetworkPluginOperationsLatencyKey = "network_plugin_operations_duration_seconds"
// DeprecatedNetworkPluginOperationsLatencyKey is the deprecated key for the operation latency metrics.
DeprecatedNetworkPluginOperationsLatencyKey = "network_plugin_operations_latency_microseconds"

View File

@ -387,9 +387,13 @@ func (m *Master) installTunneler(nodeTunneler tunneler.Tunneler, nodeClient core
nodeTunneler.Run(nodeAddressProvider{nodeClient}.externalAddresses)
m.GenericAPIServer.AddHealthzChecks(healthz.NamedCheck("SSH Tunnel Check", tunneler.TunnelSyncHealthChecker(nodeTunneler)))
prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Name: "apiserver_proxy_tunnel_sync_latency_secs",
Name: "apiserver_proxy_tunnel_sync_duration_seconds",
Help: "The time since the last successful synchronization of the SSH tunnels for proxy requests.",
}, func() float64 { return float64(nodeTunneler.SecondsSinceSync()) })
prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Name: "apiserver_proxy_tunnel_sync_latency_secs",
Help: "(Deprecated) The time since the last successful synchronization of the SSH tunnels for proxy requests.",
}, func() float64 { return float64(nodeTunneler.SecondsSinceSync()) })
}
// RESTStorageProvider is a factory type for REST storage.

View File

@ -1356,6 +1356,7 @@ func (proxier *Proxier) syncProxyRules() {
for _, lastChangeTriggerTime := range endpointUpdateResult.LastChangeTriggerTimes {
latency := metrics.SinceInSeconds(lastChangeTriggerTime)
metrics.NetworkProgrammingLatency.Observe(latency)
metrics.DeprecatedNetworkProgrammingLatency.Observe(latency)
klog.V(4).Infof("Network programming took %f seconds", latency)
}

View File

@ -1203,6 +1203,7 @@ func (proxier *Proxier) syncProxyRules() {
for _, lastChangeTriggerTime := range endpointUpdateResult.LastChangeTriggerTimes {
latency := metrics.SinceInSeconds(lastChangeTriggerTime)
metrics.NetworkProgrammingLatency.Observe(latency)
metrics.DeprecatedNetworkProgrammingLatency.Observe(latency)
klog.V(4).Infof("Network programming took %f seconds", latency)
}

View File

@ -30,7 +30,7 @@ var (
SyncProxyRulesLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_latency_seconds",
Name: "sync_proxy_rules_duration_seconds",
Help: "SyncProxyRules latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
@ -56,13 +56,25 @@ var (
NetworkProgrammingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: kubeProxySubsystem,
Name: "network_programming_latency_seconds",
Name: "network_programming_duration_seconds",
Help: "In Cluster Network Programming Latency in seconds",
// TODO(mm4tt): Reevaluate buckets before 1.14 release.
// The last bucket will be [0.001s*2^20 ~= 17min, +inf)
Buckets: prometheus.ExponentialBuckets(0.001, 2, 20),
},
)
// DeprecatedNetworkProgrammingLatency is deprecated, please use NetworkProgrammingLatency.
DeprecatedNetworkProgrammingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: kubeProxySubsystem,
Name: "network_programming_latency_seconds",
Help: "(Deprecated) In Cluster Network Programming Latency in seconds",
// TODO(mm4tt): Reevaluate buckets before 1.14 release.
// The last bucket will be [0.001s*2^20 ~= 17min, +inf)
Buckets: prometheus.ExponentialBuckets(0.001, 2, 20),
},
)
)
var registerMetricsOnce sync.Once
@ -73,6 +85,7 @@ func RegisterMetrics() {
prometheus.MustRegister(SyncProxyRulesLatency)
prometheus.MustRegister(DeprecatedSyncProxyRulesLatency)
prometheus.MustRegister(NetworkProgrammingLatency)
prometheus.MustRegister(DeprecatedNetworkProgrammingLatency)
})
}

View File

@ -29,7 +29,7 @@ var (
SyncProxyRulesLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_latency_seconds",
Name: "sync_proxy_rules_duration_seconds",
Help: "SyncProxyRules latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},

View File

@ -196,6 +196,7 @@ func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister
metrics.SchedulingAlgorithmPredicateEvaluationDuration.Observe(metrics.SinceInSeconds(startPredicateEvalTime))
metrics.DeprecatedSchedulingAlgorithmPredicateEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPredicateEvalTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PredicateEvaluation).Observe(metrics.SinceInSeconds(startPredicateEvalTime))
metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.PredicateEvaluation).Observe(metrics.SinceInSeconds(startPredicateEvalTime))
trace.Step("Prioritizing")
startPriorityEvalTime := time.Now()
@ -218,6 +219,7 @@ func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister
metrics.SchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInSeconds(startPriorityEvalTime))
metrics.DeprecatedSchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPriorityEvalTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PriorityEvaluation).Observe(metrics.SinceInSeconds(startPriorityEvalTime))
metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.PriorityEvaluation).Observe(metrics.SinceInSeconds(startPriorityEvalTime))
trace.Step("Selecting host")

View File

@ -28,7 +28,8 @@ const (
// SchedulerSubsystem - subsystem name used by scheduler
SchedulerSubsystem = "scheduler"
// SchedulingLatencyName - scheduler latency metric name
SchedulingLatencyName = "scheduling_latency_seconds"
SchedulingLatencyName = "scheduling_duration_seconds"
DeprecatedSchedulingLatencyName = "scheduling_latency_seconds"
// OperationLabel - operation label name
OperationLabel = "operation"
@ -70,10 +71,21 @@ var (
},
[]string{OperationLabel},
)
DeprecatedSchedulingLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: SchedulerSubsystem,
Name: DeprecatedSchedulingLatencyName,
Help: "(Deprecated) Scheduling latency in seconds split by sub-parts of the scheduling operation",
// Make the sliding window of 5h.
// TODO: The value for this should be based on some SLI definition (long term).
MaxAge: 5 * time.Hour,
},
[]string{OperationLabel},
)
E2eSchedulingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "e2e_scheduling_latency_seconds",
Name: "e2e_scheduling_duration_seconds",
Help: "E2e scheduling latency in seconds (scheduling algorithm + binding)",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
@ -89,7 +101,7 @@ var (
SchedulingAlgorithmLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_latency_seconds",
Name: "scheduling_algorithm_duration_seconds",
Help: "Scheduling algorithm latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
@ -153,7 +165,7 @@ var (
BindingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "binding_latency_seconds",
Name: "binding_duration_seconds",
Help: "Binding latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
@ -182,6 +194,7 @@ var (
metricsList = []prometheus.Collector{
scheduleAttempts,
SchedulingLatency,
DeprecatedSchedulingLatency,
E2eSchedulingLatency,
DeprecatedE2eSchedulingLatency,
SchedulingAlgorithmLatency,
@ -216,6 +229,7 @@ func Register() {
// Reset resets metrics
func Reset() {
SchedulingLatency.Reset()
DeprecatedSchedulingLatency.Reset()
}
// SinceInMicroseconds gets the time since the specified start in microseconds.

View File

@ -427,6 +427,7 @@ func (sched *Scheduler) bind(assumed *v1.Pod, b *v1.Binding) error {
metrics.BindingLatency.Observe(metrics.SinceInSeconds(bindingStart))
metrics.DeprecatedBindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
metrics.SchedulingLatency.WithLabelValues(metrics.Binding).Observe(metrics.SinceInSeconds(bindingStart))
metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.Binding).Observe(metrics.SinceInSeconds(bindingStart))
sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v/%v to %v", assumed.Namespace, assumed.Name, b.Target.Name)
return nil
}
@ -471,6 +472,7 @@ func (sched *Scheduler) scheduleOne() {
metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInSeconds(preemptionStartTime))
metrics.DeprecatedSchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime))
metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime))
}
// Pod did not fit anywhere, so it is counted as a failure. If preemption
// succeeds, the pod should get counted as a success the next time we try to

View File

@ -31,7 +31,7 @@ const (
WorkQueueSubsystem = "workqueue"
DepthKey = "depth"
AddsKey = "adds_total"
QueueLatencyKey = "queue_latency_seconds"
QueueLatencyKey = "queue_duration_seconds"
WorkDurationKey = "work_duration_seconds"
UnfinishedWorkKey = "unfinished_work_seconds"
LongestRunningProcessorKey = "longest_running_processor_seconds"

View File

@ -70,7 +70,7 @@ var (
)
requestLatencies = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "apiserver_request_latency_seconds",
Name: "apiserver_request_duration_seconds",
Help: "Response latency distribution in seconds for each verb, group, version, resource, subresource, scope and component.",
// This metric is used for verifying api call latencies SLO,
// as well as tracking regressions in this aspects.

View File

@ -42,19 +42,19 @@ var (
cacheEntryCounter = prometheus.NewCounter(cacheEntryCounterOpts)
cacheGetLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "etcd_request_cache_get_latency_seconds",
Name: "etcd_request_cache_get_duration_seconds",
Help: "Latency in seconds of getting an object from etcd cache",
},
)
cacheAddLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "etcd_request_cache_add_latency_seconds",
Name: "etcd_request_cache_add_duration_seconds",
Help: "Latency in seconds of adding an object to etcd cache",
},
)
etcdRequestLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "etcd_request_latency_seconds",
Name: "etcd_request_duration_seconds",
Help: "Etcd request latency in seconds for each operation and object type.",
},
[]string{"operation", "type"},

View File

@ -186,7 +186,7 @@ var InterestingControllerManagerMetrics = []string{
var InterestingKubeletMetrics = []string{
"kubelet_container_manager_latency_microseconds",
"kubelet_docker_errors",
"kubelet_docker_operations_latency_seconds",
"kubelet_docker_operations_duration_seconds",
"kubelet_generate_pod_status_latency_microseconds",
"kubelet_pod_start_duration_seconds",
"kubelet_pod_worker_duration_seconds",

View File

@ -121,6 +121,6 @@ func TestApiserverMetrics(t *testing.T) {
}
checkForExpectedMetrics(t, metrics, []string{
"apiserver_request_total",
"apiserver_request_latency_seconds",
"apiserver_request_duration_seconds",
})
}