Add metrics to volume scheduling operations

pull/58/head
wackxu 2018-06-15 10:59:36 +08:00
parent e1a1aa2112
commit d5edcd3dc3
7 changed files with 102 additions and 10 deletions

View File

@ -13,6 +13,7 @@ go_library(
"pv_controller.go", "pv_controller.go",
"pv_controller_base.go", "pv_controller_base.go",
"scheduler_assume_cache.go", "scheduler_assume_cache.go",
"scheduler_bind_cache_metrics.go",
"scheduler_binder.go", "scheduler_binder.go",
"scheduler_binder_cache.go", "scheduler_binder_cache.go",
"scheduler_binder_fake.go", "scheduler_binder_fake.go",
@ -58,6 +59,7 @@ go_library(
"//staging/src/k8s.io/cloud-provider:go_default_library", "//staging/src/k8s.io/cloud-provider:go_default_library",
"//staging/src/k8s.io/csi-api/pkg/client/clientset/versioned:go_default_library", "//staging/src/k8s.io/csi-api/pkg/client/clientset/versioned:go_default_library",
"//vendor/github.com/golang/glog:go_default_library", "//vendor/github.com/golang/glog:go_default_library",
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
], ],
) )

View File

@ -1,14 +1,10 @@
package(default_visibility = ["//visibility:public"]) load("@io_bazel_rules_go//go:def.bzl", "go_library")
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library( go_library(
name = "go_default_library", name = "go_default_library",
srcs = ["metrics.go"], srcs = ["metrics.go"],
importpath = "k8s.io/kubernetes/pkg/controller/volume/persistentvolume/metrics", importpath = "k8s.io/kubernetes/pkg/controller/volume/persistentvolume/metrics",
visibility = ["//visibility:public"],
deps = [ deps = [
"//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library",
"//vendor/github.com/golang/glog:go_default_library", "//vendor/github.com/golang/glog:go_default_library",
@ -27,4 +23,5 @@ filegroup(
name = "all-srcs", name = "all-srcs",
srcs = [":package-srcs"], srcs = [":package-srcs"],
tags = ["automanaged"], tags = ["automanaged"],
visibility = ["//visibility:public"],
) )

View File

@ -0,0 +1,60 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package persistentvolume
import (
"github.com/prometheus/client_golang/prometheus"
)
// VolumeSchedulerSubsystem - subsystem name used by scheduler
const VolumeSchedulerSubsystem = "scheduler_volume"
var (
VolumeBindingRequestSchedulerBinderCache = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: VolumeSchedulerSubsystem,
Name: "binder_cache_requests_total",
Help: "Total number for request volume binding cache",
},
[]string{"operation"},
)
VolumeSchedulingStageLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Subsystem: VolumeSchedulerSubsystem,
Name: "scheduling_duration_seconds",
Help: "Volume scheduling stage latency",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
[]string{"operation"},
)
VolumeSchedulingStageFailed = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: VolumeSchedulerSubsystem,
Name: "scheduling_stage_error_total",
Help: "Volume scheduling stage error count",
},
[]string{"operation"},
)
)
// RegisterVolumeSchedulingMetrics is used for scheduler, because the volume binding cache is a library
// used by scheduler process.
func RegisterVolumeSchedulingMetrics() {
prometheus.MustRegister(VolumeBindingRequestSchedulerBinderCache)
prometheus.MustRegister(VolumeSchedulingStageLatency)
prometheus.MustRegister(VolumeSchedulingStageFailed)
}

View File

@ -149,6 +149,13 @@ func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolume
// Initialize to true for pods that don't have volumes // Initialize to true for pods that don't have volumes
unboundVolumesSatisfied = true unboundVolumesSatisfied = true
boundVolumesSatisfied = true boundVolumesSatisfied = true
start := time.Now()
defer func() {
VolumeSchedulingStageLatency.WithLabelValues("predicate").Observe(time.Since(start).Seconds())
if err != nil {
VolumeSchedulingStageFailed.WithLabelValues("predicate").Inc()
}
}()
// The pod's volumes need to be processed in one call to avoid the race condition where // The pod's volumes need to be processed in one call to avoid the race condition where
// volumes can get bound/provisioned in between calls. // volumes can get bound/provisioned in between calls.
@ -198,6 +205,13 @@ func (b *volumeBinder) AssumePodVolumes(assumedPod *v1.Pod, nodeName string) (al
podName := getPodName(assumedPod) podName := getPodName(assumedPod)
glog.V(4).Infof("AssumePodVolumes for pod %q, node %q", podName, nodeName) glog.V(4).Infof("AssumePodVolumes for pod %q, node %q", podName, nodeName)
start := time.Now()
defer func() {
VolumeSchedulingStageLatency.WithLabelValues("assume").Observe(time.Since(start).Seconds())
if err != nil {
VolumeSchedulingStageFailed.WithLabelValues("assume").Inc()
}
}()
if allBound := b.arePodVolumesBound(assumedPod); allBound { if allBound := b.arePodVolumesBound(assumedPod); allBound {
glog.V(4).Infof("AssumePodVolumes for pod %q, node %q: all PVCs bound and nothing to do", podName, nodeName) glog.V(4).Infof("AssumePodVolumes for pod %q, node %q: all PVCs bound and nothing to do", podName, nodeName)
@ -264,15 +278,23 @@ func (b *volumeBinder) AssumePodVolumes(assumedPod *v1.Pod, nodeName string) (al
// BindPodVolumes gets the cached bindings and PVCs to provision in podBindingCache, // BindPodVolumes gets the cached bindings and PVCs to provision in podBindingCache,
// makes the API update for those PVs/PVCs, and waits for the PVCs to be completely bound // makes the API update for those PVs/PVCs, and waits for the PVCs to be completely bound
// by the PV controller. // by the PV controller.
func (b *volumeBinder) BindPodVolumes(assumedPod *v1.Pod) error { func (b *volumeBinder) BindPodVolumes(assumedPod *v1.Pod) (err error) {
podName := getPodName(assumedPod) podName := getPodName(assumedPod)
glog.V(4).Infof("BindPodVolumes for pod %q, node %q", podName, assumedPod.Spec.NodeName) glog.V(4).Infof("BindPodVolumes for pod %q, node %q", podName, assumedPod.Spec.NodeName)
start := time.Now()
defer func() {
VolumeSchedulingStageLatency.WithLabelValues("bind").Observe(time.Since(start).Seconds())
if err != nil {
VolumeSchedulingStageFailed.WithLabelValues("bind").Inc()
}
}()
bindings := b.podBindingCache.GetBindings(assumedPod, assumedPod.Spec.NodeName) bindings := b.podBindingCache.GetBindings(assumedPod, assumedPod.Spec.NodeName)
claimsToProvision := b.podBindingCache.GetProvisionedPVCs(assumedPod, assumedPod.Spec.NodeName) claimsToProvision := b.podBindingCache.GetProvisionedPVCs(assumedPod, assumedPod.Spec.NodeName)
// Start API operations // Start API operations
err := b.bindAPIUpdate(podName, bindings, claimsToProvision) err = b.bindAPIUpdate(podName, bindings, claimsToProvision)
if err != nil { if err != nil {
return err return err
} }

View File

@ -77,7 +77,11 @@ func (c *podBindingCache) DeleteBindings(pod *v1.Pod) {
defer c.rwMutex.Unlock() defer c.rwMutex.Unlock()
podName := getPodName(pod) podName := getPodName(pod)
delete(c.bindingDecisions, podName)
if _, ok := c.bindingDecisions[podName]; ok {
delete(c.bindingDecisions, podName)
VolumeBindingRequestSchedulerBinderCache.WithLabelValues("delete").Inc()
}
} }
func (c *podBindingCache) UpdateBindings(pod *v1.Pod, node string, bindings []*bindingInfo) { func (c *podBindingCache) UpdateBindings(pod *v1.Pod, node string, bindings []*bindingInfo) {
@ -95,6 +99,7 @@ func (c *podBindingCache) UpdateBindings(pod *v1.Pod, node string, bindings []*b
decision = nodeDecision{ decision = nodeDecision{
bindings: bindings, bindings: bindings,
} }
VolumeBindingRequestSchedulerBinderCache.WithLabelValues("add").Inc()
} else { } else {
decision.bindings = bindings decision.bindings = bindings
} }

View File

@ -9,7 +9,10 @@ go_library(
name = "go_default_library", name = "go_default_library",
srcs = ["metrics.go"], srcs = ["metrics.go"],
importpath = "k8s.io/kubernetes/pkg/scheduler/metrics", importpath = "k8s.io/kubernetes/pkg/scheduler/metrics",
deps = ["//vendor/github.com/prometheus/client_golang/prometheus:go_default_library"], deps = [
"//pkg/controller/volume/persistentvolume:go_default_library",
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
],
) )
filegroup( filegroup(

View File

@ -21,6 +21,7 @@ import (
"time" "time"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
) )
const ( const (
@ -171,6 +172,8 @@ func Register() {
for _, metric := range metricsList { for _, metric := range metricsList {
prometheus.MustRegister(metric) prometheus.MustRegister(metric)
} }
persistentvolume.RegisterVolumeSchedulingMetrics()
}) })
} }