Merge pull request #74418 from danielqsj/duration

convert latency/latencies in metrics name to duration
pull/564/head
Kubernetes Prow Robot 2019-03-01 17:58:12 -08:00 committed by GitHub
commit 9b8c58644a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 86 additions and 35 deletions

View File

@ -32,13 +32,23 @@ var (
// "verb" and "url" labels. It is used for the rest client latency metrics.
requestLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "rest_client_request_latency_seconds",
Name: "rest_client_request_duration_seconds",
Help: "Request latency in seconds. Broken down by verb and URL.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 10),
},
[]string{"verb", "url"},
)
// deprecatedRequestLatency is deprecated, please use requestLatency.
deprecatedRequestLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "rest_client_request_latency_seconds",
Help: "(Deprecated) Request latency in seconds. Broken down by verb and URL.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 10),
},
[]string{"verb", "url"},
)
requestResult = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "rest_client_requests_total",
@ -50,16 +60,19 @@ var (
func init() {
prometheus.MustRegister(requestLatency)
prometheus.MustRegister(deprecatedRequestLatency)
prometheus.MustRegister(requestResult)
metrics.Register(&latencyAdapter{requestLatency}, &resultAdapter{requestResult})
metrics.Register(&latencyAdapter{m: requestLatency, dm: deprecatedRequestLatency}, &resultAdapter{requestResult})
}
type latencyAdapter struct {
m *prometheus.HistogramVec
m *prometheus.HistogramVec
dm *prometheus.HistogramVec
}
func (l *latencyAdapter) Observe(verb string, u url.URL, latency time.Duration) {
l.m.WithLabelValues(verb, u.String()).Observe(latency.Seconds())
l.dm.WithLabelValues(verb, u.String()).Observe(latency.Seconds())
}
type resultAdapter struct {

View File

@ -27,7 +27,7 @@ const (
// DockerOperationsKey is the key for docker operation metrics.
DockerOperationsKey = "docker_operations_total"
// DockerOperationsLatencyKey is the key for the operation latency metrics.
DockerOperationsLatencyKey = "docker_operations_latency_seconds"
DockerOperationsLatencyKey = "docker_operations_duration_seconds"
// DockerOperationsErrorsKey is the key for the operation error metrics.
DockerOperationsErrorsKey = "docker_operations_errors_total"
// DockerOperationsTimeoutKey is the key for the operation timeout metrics.

View File

@ -27,7 +27,7 @@ const (
// NetworkPluginOperationsKey is the key for operation count metrics.
NetworkPluginOperationsKey = "network_plugin_operations"
// NetworkPluginOperationsLatencyKey is the key for the operation latency metrics.
NetworkPluginOperationsLatencyKey = "network_plugin_operations_latency_seconds"
NetworkPluginOperationsLatencyKey = "network_plugin_operations_duration_seconds"
// DeprecatedNetworkPluginOperationsLatencyKey is the deprecated key for the operation latency metrics.
DeprecatedNetworkPluginOperationsLatencyKey = "network_plugin_operations_latency_microseconds"

View File

@ -388,9 +388,13 @@ func (m *Master) installTunneler(nodeTunneler tunneler.Tunneler, nodeClient core
nodeTunneler.Run(nodeAddressProvider{nodeClient}.externalAddresses)
m.GenericAPIServer.AddHealthzChecks(healthz.NamedCheck("SSH Tunnel Check", tunneler.TunnelSyncHealthChecker(nodeTunneler)))
prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Name: "apiserver_proxy_tunnel_sync_latency_secs",
Name: "apiserver_proxy_tunnel_sync_duration_seconds",
Help: "The time since the last successful synchronization of the SSH tunnels for proxy requests.",
}, func() float64 { return float64(nodeTunneler.SecondsSinceSync()) })
prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Name: "apiserver_proxy_tunnel_sync_latency_secs",
Help: "(Deprecated) The time since the last successful synchronization of the SSH tunnels for proxy requests.",
}, func() float64 { return float64(nodeTunneler.SecondsSinceSync()) })
}
// RESTStorageProvider is a factory type for REST storage.

View File

@ -1356,6 +1356,7 @@ func (proxier *Proxier) syncProxyRules() {
for _, lastChangeTriggerTime := range endpointUpdateResult.LastChangeTriggerTimes {
latency := metrics.SinceInSeconds(lastChangeTriggerTime)
metrics.NetworkProgrammingLatency.Observe(latency)
metrics.DeprecatedNetworkProgrammingLatency.Observe(latency)
klog.V(4).Infof("Network programming took %f seconds", latency)
}

View File

@ -1203,6 +1203,7 @@ func (proxier *Proxier) syncProxyRules() {
for _, lastChangeTriggerTime := range endpointUpdateResult.LastChangeTriggerTimes {
latency := metrics.SinceInSeconds(lastChangeTriggerTime)
metrics.NetworkProgrammingLatency.Observe(latency)
metrics.DeprecatedNetworkProgrammingLatency.Observe(latency)
klog.V(4).Infof("Network programming took %f seconds", latency)
}

View File

@ -30,7 +30,7 @@ var (
SyncProxyRulesLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_latency_seconds",
Name: "sync_proxy_rules_duration_seconds",
Help: "SyncProxyRules latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
@ -56,13 +56,25 @@ var (
NetworkProgrammingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: kubeProxySubsystem,
Name: "network_programming_latency_seconds",
Name: "network_programming_duration_seconds",
Help: "In Cluster Network Programming Latency in seconds",
// TODO(mm4tt): Reevaluate buckets before 1.14 release.
// The last bucket will be [0.001s*2^20 ~= 17min, +inf)
Buckets: prometheus.ExponentialBuckets(0.001, 2, 20),
},
)
// DeprecatedNetworkProgrammingLatency is deprecated, please use NetworkProgrammingLatency.
DeprecatedNetworkProgrammingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: kubeProxySubsystem,
Name: "network_programming_latency_seconds",
Help: "(Deprecated) In Cluster Network Programming Latency in seconds",
// TODO(mm4tt): Reevaluate buckets before 1.14 release.
// The last bucket will be [0.001s*2^20 ~= 17min, +inf)
Buckets: prometheus.ExponentialBuckets(0.001, 2, 20),
},
)
)
var registerMetricsOnce sync.Once
@ -73,6 +85,7 @@ func RegisterMetrics() {
prometheus.MustRegister(SyncProxyRulesLatency)
prometheus.MustRegister(DeprecatedSyncProxyRulesLatency)
prometheus.MustRegister(NetworkProgrammingLatency)
prometheus.MustRegister(DeprecatedNetworkProgrammingLatency)
})
}

View File

@ -29,7 +29,7 @@ var (
SyncProxyRulesLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_latency_seconds",
Name: "sync_proxy_rules_duration_seconds",
Help: "SyncProxyRules latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},

View File

@ -196,6 +196,7 @@ func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister
metrics.SchedulingAlgorithmPredicateEvaluationDuration.Observe(metrics.SinceInSeconds(startPredicateEvalTime))
metrics.DeprecatedSchedulingAlgorithmPredicateEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPredicateEvalTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PredicateEvaluation).Observe(metrics.SinceInSeconds(startPredicateEvalTime))
metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.PredicateEvaluation).Observe(metrics.SinceInSeconds(startPredicateEvalTime))
trace.Step("Prioritizing")
startPriorityEvalTime := time.Now()
@ -218,6 +219,7 @@ func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister
metrics.SchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInSeconds(startPriorityEvalTime))
metrics.DeprecatedSchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPriorityEvalTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PriorityEvaluation).Observe(metrics.SinceInSeconds(startPriorityEvalTime))
metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.PriorityEvaluation).Observe(metrics.SinceInSeconds(startPriorityEvalTime))
trace.Step("Selecting host")

View File

@ -28,7 +28,9 @@ const (
// SchedulerSubsystem - subsystem name used by scheduler
SchedulerSubsystem = "scheduler"
// SchedulingLatencyName - scheduler latency metric name
SchedulingLatencyName = "scheduling_latency_seconds"
SchedulingLatencyName = "scheduling_duration_seconds"
// DeprecatedSchedulingLatencyName - scheduler latency metric name which is deprecated
DeprecatedSchedulingLatencyName = "scheduling_latency_seconds"
// OperationLabel - operation label name
OperationLabel = "operation"
@ -70,10 +72,21 @@ var (
},
[]string{OperationLabel},
)
DeprecatedSchedulingLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: SchedulerSubsystem,
Name: DeprecatedSchedulingLatencyName,
Help: "(Deprecated) Scheduling latency in seconds split by sub-parts of the scheduling operation",
// Make the sliding window of 5h.
// TODO: The value for this should be based on some SLI definition (long term).
MaxAge: 5 * time.Hour,
},
[]string{OperationLabel},
)
E2eSchedulingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "e2e_scheduling_latency_seconds",
Name: "e2e_scheduling_duration_seconds",
Help: "E2e scheduling latency in seconds (scheduling algorithm + binding)",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
@ -89,7 +102,7 @@ var (
SchedulingAlgorithmLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_latency_seconds",
Name: "scheduling_algorithm_duration_seconds",
Help: "Scheduling algorithm latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
@ -153,7 +166,7 @@ var (
BindingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "binding_latency_seconds",
Name: "binding_duration_seconds",
Help: "Binding latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
@ -182,6 +195,7 @@ var (
metricsList = []prometheus.Collector{
scheduleAttempts,
SchedulingLatency,
DeprecatedSchedulingLatency,
E2eSchedulingLatency,
DeprecatedE2eSchedulingLatency,
SchedulingAlgorithmLatency,
@ -216,6 +230,7 @@ func Register() {
// Reset resets metrics
func Reset() {
SchedulingLatency.Reset()
DeprecatedSchedulingLatency.Reset()
}
// SinceInMicroseconds gets the time since the specified start in microseconds.

View File

@ -426,6 +426,7 @@ func (sched *Scheduler) bind(assumed *v1.Pod, b *v1.Binding) error {
metrics.BindingLatency.Observe(metrics.SinceInSeconds(bindingStart))
metrics.DeprecatedBindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
metrics.SchedulingLatency.WithLabelValues(metrics.Binding).Observe(metrics.SinceInSeconds(bindingStart))
metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.Binding).Observe(metrics.SinceInSeconds(bindingStart))
sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v/%v to %v", assumed.Namespace, assumed.Name, b.Target.Name)
return nil
}
@ -470,6 +471,7 @@ func (sched *Scheduler) scheduleOne() {
metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInSeconds(preemptionStartTime))
metrics.DeprecatedSchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime))
metrics.DeprecatedSchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime))
}
// Pod did not fit anywhere, so it is counted as a failure. If preemption
// succeeds, the pod should get counted as a success the next time we try to

View File

@ -31,7 +31,7 @@ const (
WorkQueueSubsystem = "workqueue"
DepthKey = "depth"
AddsKey = "adds_total"
QueueLatencyKey = "queue_latency_seconds"
QueueLatencyKey = "queue_duration_seconds"
WorkDurationKey = "work_duration_seconds"
UnfinishedWorkKey = "unfinished_work_seconds"
LongestRunningProcessorKey = "longest_running_processor_seconds"

View File

@ -166,7 +166,7 @@ func newMetricSet(name string, labels []string, helpTemplate string, hasSummary
prometheus.SummaryOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: fmt.Sprintf("%s_admission_latencies_seconds_summary", name),
Name: fmt.Sprintf("%s_admission_duration_seconds_summary", name),
Help: fmt.Sprintf(helpTemplate, "latency summary in seconds"),
MaxAge: latencySummaryMaxAge,
},
@ -189,7 +189,7 @@ func newMetricSet(name string, labels []string, helpTemplate string, hasSummary
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: fmt.Sprintf("%s_admission_latencies_seconds", name),
Name: fmt.Sprintf("%s_admission_duration_seconds", name),
Help: fmt.Sprintf(helpTemplate, "latency histogram in seconds"),
Buckets: latencyBuckets,
},

View File

@ -41,12 +41,12 @@ func TestObserveAdmissionStep(t *testing.T) {
"type": "admit",
"rejected": "false",
}
expectHistogramCountTotal(t, "apiserver_admission_step_admission_latencies_seconds", wantLabels, 1)
expectFindMetric(t, "apiserver_admission_step_admission_latencies_seconds_summary", wantLabels)
expectHistogramCountTotal(t, "apiserver_admission_step_admission_duration_seconds", wantLabels, 1)
expectFindMetric(t, "apiserver_admission_step_admission_duration_seconds_summary", wantLabels)
wantLabels["type"] = "validate"
expectHistogramCountTotal(t, "apiserver_admission_step_admission_latencies_seconds", wantLabels, 1)
expectFindMetric(t, "apiserver_admission_step_admission_latencies_seconds_summary", wantLabels)
expectHistogramCountTotal(t, "apiserver_admission_step_admission_duration_seconds", wantLabels, 1)
expectFindMetric(t, "apiserver_admission_step_admission_duration_seconds_summary", wantLabels)
}
func TestObserveAdmissionController(t *testing.T) {
@ -60,10 +60,10 @@ func TestObserveAdmissionController(t *testing.T) {
"type": "admit",
"rejected": "false",
}
expectHistogramCountTotal(t, "apiserver_admission_controller_admission_latencies_seconds", wantLabels, 1)
expectHistogramCountTotal(t, "apiserver_admission_controller_admission_duration_seconds", wantLabels, 1)
wantLabels["type"] = "validate"
expectHistogramCountTotal(t, "apiserver_admission_controller_admission_latencies_seconds", wantLabels, 1)
expectHistogramCountTotal(t, "apiserver_admission_controller_admission_duration_seconds", wantLabels, 1)
}
func TestObserveWebhook(t *testing.T) {
@ -75,7 +75,7 @@ func TestObserveWebhook(t *testing.T) {
"type": "admit",
"rejected": "false",
}
expectHistogramCountTotal(t, "apiserver_admission_webhook_admission_latencies_seconds", wantLabels, 1)
expectHistogramCountTotal(t, "apiserver_admission_webhook_admission_duration_seconds", wantLabels, 1)
}
func TestWithMetrics(t *testing.T) {
@ -158,9 +158,9 @@ func TestWithMetrics(t *testing.T) {
filter["rejected"] = "true"
}
if _, mutating := test.handler.(admission.MutationInterface); mutating {
expectHistogramCountTotal(t, "apiserver_admission_controller_admission_latencies_seconds", filter, 1)
expectHistogramCountTotal(t, "apiserver_admission_controller_admission_duration_seconds", filter, 1)
} else {
expectHistogramCountTotal(t, "apiserver_admission_controller_admission_latencies_seconds", filter, 0)
expectHistogramCountTotal(t, "apiserver_admission_controller_admission_duration_seconds", filter, 0)
}
if err != nil {
@ -183,9 +183,9 @@ func TestWithMetrics(t *testing.T) {
filter["rejected"] = "true"
}
if _, validating := test.handler.(admission.ValidationInterface); validating {
expectHistogramCountTotal(t, "apiserver_admission_controller_admission_latencies_seconds", filter, 1)
expectHistogramCountTotal(t, "apiserver_admission_controller_admission_duration_seconds", filter, 1)
} else {
expectHistogramCountTotal(t, "apiserver_admission_controller_admission_latencies_seconds", filter, 0)
expectHistogramCountTotal(t, "apiserver_admission_controller_admission_duration_seconds", filter, 0)
}
}
}

View File

@ -70,7 +70,7 @@ var (
)
requestLatencies = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "apiserver_request_latency_seconds",
Name: "apiserver_request_duration_seconds",
Help: "Response latency distribution in seconds for each verb, group, version, resource, subresource, scope and component.",
// This metric is used for verifying api call latencies SLO,
// as well as tracking regressions in this aspects.

View File

@ -42,19 +42,19 @@ var (
cacheEntryCounter = prometheus.NewCounter(cacheEntryCounterOpts)
cacheGetLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "etcd_request_cache_get_latency_seconds",
Name: "etcd_request_cache_get_duration_seconds",
Help: "Latency in seconds of getting an object from etcd cache",
},
)
cacheAddLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "etcd_request_cache_add_latency_seconds",
Name: "etcd_request_cache_add_duration_seconds",
Help: "Latency in seconds of adding an object to etcd cache",
},
)
etcdRequestLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "etcd_request_latency_seconds",
Name: "etcd_request_duration_seconds",
Help: "Etcd request latency in seconds for each operation and object type.",
},
[]string{"operation", "type"},

View File

@ -33,7 +33,7 @@ var (
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "transformation_latencies_seconds",
Name: "transformation_duration_seconds",
Help: "Latencies in seconds of value transformation operations.",
// In-process transformations (ex. AES CBC) complete on the order of 20 microseconds. However, when
// external KMS is involved latencies may climb into milliseconds.
@ -76,7 +76,7 @@ var (
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "data_key_generation_latencies_seconds",
Name: "data_key_generation_duration_seconds",
Help: "Latencies in seconds of data encryption key(DEK) generation operations.",
Buckets: prometheus.ExponentialBuckets(5e-6, 2, 14),
},

View File

@ -186,7 +186,7 @@ var InterestingControllerManagerMetrics = []string{
var InterestingKubeletMetrics = []string{
"kubelet_container_manager_latency_microseconds",
"kubelet_docker_errors",
"kubelet_docker_operations_latency_seconds",
"kubelet_docker_operations_duration_seconds",
"kubelet_generate_pod_status_latency_microseconds",
"kubelet_pod_start_duration_seconds",
"kubelet_pod_worker_duration_seconds",

View File

@ -121,6 +121,6 @@ func TestApiserverMetrics(t *testing.T) {
}
checkForExpectedMetrics(t, metrics, []string{
"apiserver_request_total",
"apiserver_request_latency_seconds",
"apiserver_request_duration_seconds",
})
}