diff --git a/pkg/controller/volume/persistentvolume/BUILD b/pkg/controller/volume/persistentvolume/BUILD index bcb58f56d2..7ac8a71884 100644 --- a/pkg/controller/volume/persistentvolume/BUILD +++ b/pkg/controller/volume/persistentvolume/BUILD @@ -13,6 +13,7 @@ go_library( "pv_controller.go", "pv_controller_base.go", "scheduler_assume_cache.go", + "scheduler_bind_cache_metrics.go", "scheduler_binder.go", "scheduler_binder_cache.go", "scheduler_binder_fake.go", @@ -58,6 +59,7 @@ go_library( "//staging/src/k8s.io/cloud-provider:go_default_library", "//staging/src/k8s.io/csi-api/pkg/client/clientset/versioned:go_default_library", "//vendor/github.com/golang/glog:go_default_library", + "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library", ], ) diff --git a/pkg/controller/volume/persistentvolume/metrics/BUILD b/pkg/controller/volume/persistentvolume/metrics/BUILD index e231fc8d5c..bcf22a490a 100644 --- a/pkg/controller/volume/persistentvolume/metrics/BUILD +++ b/pkg/controller/volume/persistentvolume/metrics/BUILD @@ -1,14 +1,10 @@ -package(default_visibility = ["//visibility:public"]) - -load( - "@io_bazel_rules_go//go:def.bzl", - "go_library", -) +load("@io_bazel_rules_go//go:def.bzl", "go_library") go_library( name = "go_default_library", srcs = ["metrics.go"], importpath = "k8s.io/kubernetes/pkg/controller/volume/persistentvolume/metrics", + visibility = ["//visibility:public"], deps = [ "//staging/src/k8s.io/api/core/v1:go_default_library", "//vendor/github.com/golang/glog:go_default_library", @@ -27,4 +23,5 @@ filegroup( name = "all-srcs", srcs = [":package-srcs"], tags = ["automanaged"], + visibility = ["//visibility:public"], ) diff --git a/pkg/controller/volume/persistentvolume/scheduler_bind_cache_metrics.go b/pkg/controller/volume/persistentvolume/scheduler_bind_cache_metrics.go new file mode 100644 index 0000000000..9a56c2422b --- /dev/null +++ b/pkg/controller/volume/persistentvolume/scheduler_bind_cache_metrics.go @@ -0,0 +1,60 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package persistentvolume + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +// VolumeSchedulerSubsystem - subsystem name used by scheduler +const VolumeSchedulerSubsystem = "scheduler_volume" + +var ( + VolumeBindingRequestSchedulerBinderCache = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: VolumeSchedulerSubsystem, + Name: "binder_cache_requests_total", + Help: "Total number for request volume binding cache", + }, + []string{"operation"}, + ) + VolumeSchedulingStageLatency = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Subsystem: VolumeSchedulerSubsystem, + Name: "scheduling_duration_seconds", + Help: "Volume scheduling stage latency", + Buckets: prometheus.ExponentialBuckets(1000, 2, 15), + }, + []string{"operation"}, + ) + VolumeSchedulingStageFailed = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: VolumeSchedulerSubsystem, + Name: "scheduling_stage_error_total", + Help: "Volume scheduling stage error count", + }, + []string{"operation"}, + ) +) + +// RegisterVolumeSchedulingMetrics is used for scheduler, because the volume binding cache is a library +// used by scheduler process. +func RegisterVolumeSchedulingMetrics() { + prometheus.MustRegister(VolumeBindingRequestSchedulerBinderCache) + prometheus.MustRegister(VolumeSchedulingStageLatency) + prometheus.MustRegister(VolumeSchedulingStageFailed) +} diff --git a/pkg/controller/volume/persistentvolume/scheduler_binder.go b/pkg/controller/volume/persistentvolume/scheduler_binder.go index 5375c78a67..8d981a076e 100644 --- a/pkg/controller/volume/persistentvolume/scheduler_binder.go +++ b/pkg/controller/volume/persistentvolume/scheduler_binder.go @@ -149,6 +149,13 @@ func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolume // Initialize to true for pods that don't have volumes unboundVolumesSatisfied = true boundVolumesSatisfied = true + start := time.Now() + defer func() { + VolumeSchedulingStageLatency.WithLabelValues("predicate").Observe(time.Since(start).Seconds()) + if err != nil { + VolumeSchedulingStageFailed.WithLabelValues("predicate").Inc() + } + }() // The pod's volumes need to be processed in one call to avoid the race condition where // volumes can get bound/provisioned in between calls. @@ -198,6 +205,13 @@ func (b *volumeBinder) AssumePodVolumes(assumedPod *v1.Pod, nodeName string) (al podName := getPodName(assumedPod) glog.V(4).Infof("AssumePodVolumes for pod %q, node %q", podName, nodeName) + start := time.Now() + defer func() { + VolumeSchedulingStageLatency.WithLabelValues("assume").Observe(time.Since(start).Seconds()) + if err != nil { + VolumeSchedulingStageFailed.WithLabelValues("assume").Inc() + } + }() if allBound := b.arePodVolumesBound(assumedPod); allBound { glog.V(4).Infof("AssumePodVolumes for pod %q, node %q: all PVCs bound and nothing to do", podName, nodeName) @@ -264,15 +278,23 @@ func (b *volumeBinder) AssumePodVolumes(assumedPod *v1.Pod, nodeName string) (al // BindPodVolumes gets the cached bindings and PVCs to provision in podBindingCache, // makes the API update for those PVs/PVCs, and waits for the PVCs to be completely bound // by the PV controller. -func (b *volumeBinder) BindPodVolumes(assumedPod *v1.Pod) error { +func (b *volumeBinder) BindPodVolumes(assumedPod *v1.Pod) (err error) { podName := getPodName(assumedPod) glog.V(4).Infof("BindPodVolumes for pod %q, node %q", podName, assumedPod.Spec.NodeName) + start := time.Now() + defer func() { + VolumeSchedulingStageLatency.WithLabelValues("bind").Observe(time.Since(start).Seconds()) + if err != nil { + VolumeSchedulingStageFailed.WithLabelValues("bind").Inc() + } + }() + bindings := b.podBindingCache.GetBindings(assumedPod, assumedPod.Spec.NodeName) claimsToProvision := b.podBindingCache.GetProvisionedPVCs(assumedPod, assumedPod.Spec.NodeName) // Start API operations - err := b.bindAPIUpdate(podName, bindings, claimsToProvision) + err = b.bindAPIUpdate(podName, bindings, claimsToProvision) if err != nil { return err } diff --git a/pkg/controller/volume/persistentvolume/scheduler_binder_cache.go b/pkg/controller/volume/persistentvolume/scheduler_binder_cache.go index e3acc35163..b95538304a 100644 --- a/pkg/controller/volume/persistentvolume/scheduler_binder_cache.go +++ b/pkg/controller/volume/persistentvolume/scheduler_binder_cache.go @@ -77,7 +77,11 @@ func (c *podBindingCache) DeleteBindings(pod *v1.Pod) { defer c.rwMutex.Unlock() podName := getPodName(pod) - delete(c.bindingDecisions, podName) + + if _, ok := c.bindingDecisions[podName]; ok { + delete(c.bindingDecisions, podName) + VolumeBindingRequestSchedulerBinderCache.WithLabelValues("delete").Inc() + } } func (c *podBindingCache) UpdateBindings(pod *v1.Pod, node string, bindings []*bindingInfo) { @@ -95,6 +99,7 @@ func (c *podBindingCache) UpdateBindings(pod *v1.Pod, node string, bindings []*b decision = nodeDecision{ bindings: bindings, } + VolumeBindingRequestSchedulerBinderCache.WithLabelValues("add").Inc() } else { decision.bindings = bindings } diff --git a/pkg/scheduler/metrics/BUILD b/pkg/scheduler/metrics/BUILD index 81d6d19d58..9b2bce0ff9 100644 --- a/pkg/scheduler/metrics/BUILD +++ b/pkg/scheduler/metrics/BUILD @@ -9,7 +9,10 @@ go_library( name = "go_default_library", srcs = ["metrics.go"], importpath = "k8s.io/kubernetes/pkg/scheduler/metrics", - deps = ["//vendor/github.com/prometheus/client_golang/prometheus:go_default_library"], + deps = [ + "//pkg/controller/volume/persistentvolume:go_default_library", + "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library", + ], ) filegroup( diff --git a/pkg/scheduler/metrics/metrics.go b/pkg/scheduler/metrics/metrics.go index 81e047e0c8..0ebbd4f8ef 100644 --- a/pkg/scheduler/metrics/metrics.go +++ b/pkg/scheduler/metrics/metrics.go @@ -21,6 +21,7 @@ import ( "time" "github.com/prometheus/client_golang/prometheus" + "k8s.io/kubernetes/pkg/controller/volume/persistentvolume" ) const ( @@ -171,6 +172,8 @@ func Register() { for _, metric := range metricsList { prometheus.MustRegister(metric) } + + persistentvolume.RegisterVolumeSchedulingMetrics() }) }