From 33bf81f22abeb39ee18b159b8248ba207c4f9047 Mon Sep 17 00:00:00 2001 From: Michelle Au Date: Tue, 26 Mar 2019 16:40:56 -0700 Subject: [PATCH] Improve volume operation metrics --- pkg/volume/util/metrics.go | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/pkg/volume/util/metrics.go b/pkg/volume/util/metrics.go index 338e812663..ee20c0e10b 100644 --- a/pkg/volume/util/metrics.go +++ b/pkg/volume/util/metrics.go @@ -24,11 +24,16 @@ import ( "k8s.io/kubernetes/pkg/volume" ) +const ( + statusSuccess = "success" + statusFailUnknown = "fail-unknown" +) + var storageOperationMetric = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Name: "storage_operation_duration_seconds", Help: "Storage operation duration", - Buckets: []float64{.1, .25, .5, 1, 2.5, 5, 10, 15, 25, 50}, + Buckets: []float64{.1, .25, .5, 1, 2.5, 5, 10, 15, 25, 50, 120, 300, 600}, }, []string{"volume_plugin", "operation_name"}, ) @@ -41,6 +46,14 @@ var storageOperationErrorMetric = prometheus.NewCounterVec( []string{"volume_plugin", "operation_name"}, ) +var storageOperationStatusMetric = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "storage_operation_status_count", + Help: "Storage operation return statuses count", + }, + []string{"volume_plugin", "operation_name", "status"}, +) + func init() { registerMetrics() } @@ -48,6 +61,7 @@ func init() { func registerMetrics() { prometheus.MustRegister(storageOperationMetric) prometheus.MustRegister(storageOperationErrorMetric) + prometheus.MustRegister(storageOperationStatusMetric) } // OperationCompleteHook returns a hook to call when an operation is completed @@ -56,11 +70,16 @@ func OperationCompleteHook(plugin, operationName string) func(*error) { opComplete := func(err *error) { timeTaken := time.Since(requestTime).Seconds() // Create metric with operation name and plugin name + status := statusSuccess if *err != nil { + // TODO: Establish well-known error codes to be able to distinguish + // user configuration errors from system errors. + status = statusFailUnknown storageOperationErrorMetric.WithLabelValues(plugin, operationName).Inc() } else { storageOperationMetric.WithLabelValues(plugin, operationName).Observe(timeTaken) } + storageOperationStatusMetric.WithLabelValues(plugin, operationName, status).Inc() } return opComplete }