mirror of https://github.com/k3s-io/k3s
Add metrics to volume scheduling operations
parent
e1a1aa2112
commit
d5edcd3dc3
|
@ -13,6 +13,7 @@ go_library(
|
|||
"pv_controller.go",
|
||||
"pv_controller_base.go",
|
||||
"scheduler_assume_cache.go",
|
||||
"scheduler_bind_cache_metrics.go",
|
||||
"scheduler_binder.go",
|
||||
"scheduler_binder_cache.go",
|
||||
"scheduler_binder_fake.go",
|
||||
|
@ -58,6 +59,7 @@ go_library(
|
|||
"//staging/src/k8s.io/cloud-provider:go_default_library",
|
||||
"//staging/src/k8s.io/csi-api/pkg/client/clientset/versioned:go_default_library",
|
||||
"//vendor/github.com/golang/glog:go_default_library",
|
||||
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
@ -1,14 +1,10 @@
|
|||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
load(
|
||||
"@io_bazel_rules_go//go:def.bzl",
|
||||
"go_library",
|
||||
)
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["metrics.go"],
|
||||
importpath = "k8s.io/kubernetes/pkg/controller/volume/persistentvolume/metrics",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
||||
"//vendor/github.com/golang/glog:go_default_library",
|
||||
|
@ -27,4 +23,5 @@ filegroup(
|
|||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package persistentvolume
|
||||
|
||||
import (
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// VolumeSchedulerSubsystem - subsystem name used by scheduler
|
||||
const VolumeSchedulerSubsystem = "scheduler_volume"
|
||||
|
||||
var (
|
||||
VolumeBindingRequestSchedulerBinderCache = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: VolumeSchedulerSubsystem,
|
||||
Name: "binder_cache_requests_total",
|
||||
Help: "Total number for request volume binding cache",
|
||||
},
|
||||
[]string{"operation"},
|
||||
)
|
||||
VolumeSchedulingStageLatency = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Subsystem: VolumeSchedulerSubsystem,
|
||||
Name: "scheduling_duration_seconds",
|
||||
Help: "Volume scheduling stage latency",
|
||||
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
||||
},
|
||||
[]string{"operation"},
|
||||
)
|
||||
VolumeSchedulingStageFailed = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: VolumeSchedulerSubsystem,
|
||||
Name: "scheduling_stage_error_total",
|
||||
Help: "Volume scheduling stage error count",
|
||||
},
|
||||
[]string{"operation"},
|
||||
)
|
||||
)
|
||||
|
||||
// RegisterVolumeSchedulingMetrics is used for scheduler, because the volume binding cache is a library
|
||||
// used by scheduler process.
|
||||
func RegisterVolumeSchedulingMetrics() {
|
||||
prometheus.MustRegister(VolumeBindingRequestSchedulerBinderCache)
|
||||
prometheus.MustRegister(VolumeSchedulingStageLatency)
|
||||
prometheus.MustRegister(VolumeSchedulingStageFailed)
|
||||
}
|
|
@ -149,6 +149,13 @@ func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolume
|
|||
// Initialize to true for pods that don't have volumes
|
||||
unboundVolumesSatisfied = true
|
||||
boundVolumesSatisfied = true
|
||||
start := time.Now()
|
||||
defer func() {
|
||||
VolumeSchedulingStageLatency.WithLabelValues("predicate").Observe(time.Since(start).Seconds())
|
||||
if err != nil {
|
||||
VolumeSchedulingStageFailed.WithLabelValues("predicate").Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
// The pod's volumes need to be processed in one call to avoid the race condition where
|
||||
// volumes can get bound/provisioned in between calls.
|
||||
|
@ -198,6 +205,13 @@ func (b *volumeBinder) AssumePodVolumes(assumedPod *v1.Pod, nodeName string) (al
|
|||
podName := getPodName(assumedPod)
|
||||
|
||||
glog.V(4).Infof("AssumePodVolumes for pod %q, node %q", podName, nodeName)
|
||||
start := time.Now()
|
||||
defer func() {
|
||||
VolumeSchedulingStageLatency.WithLabelValues("assume").Observe(time.Since(start).Seconds())
|
||||
if err != nil {
|
||||
VolumeSchedulingStageFailed.WithLabelValues("assume").Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
if allBound := b.arePodVolumesBound(assumedPod); allBound {
|
||||
glog.V(4).Infof("AssumePodVolumes for pod %q, node %q: all PVCs bound and nothing to do", podName, nodeName)
|
||||
|
@ -264,15 +278,23 @@ func (b *volumeBinder) AssumePodVolumes(assumedPod *v1.Pod, nodeName string) (al
|
|||
// BindPodVolumes gets the cached bindings and PVCs to provision in podBindingCache,
|
||||
// makes the API update for those PVs/PVCs, and waits for the PVCs to be completely bound
|
||||
// by the PV controller.
|
||||
func (b *volumeBinder) BindPodVolumes(assumedPod *v1.Pod) error {
|
||||
func (b *volumeBinder) BindPodVolumes(assumedPod *v1.Pod) (err error) {
|
||||
podName := getPodName(assumedPod)
|
||||
glog.V(4).Infof("BindPodVolumes for pod %q, node %q", podName, assumedPod.Spec.NodeName)
|
||||
|
||||
start := time.Now()
|
||||
defer func() {
|
||||
VolumeSchedulingStageLatency.WithLabelValues("bind").Observe(time.Since(start).Seconds())
|
||||
if err != nil {
|
||||
VolumeSchedulingStageFailed.WithLabelValues("bind").Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
bindings := b.podBindingCache.GetBindings(assumedPod, assumedPod.Spec.NodeName)
|
||||
claimsToProvision := b.podBindingCache.GetProvisionedPVCs(assumedPod, assumedPod.Spec.NodeName)
|
||||
|
||||
// Start API operations
|
||||
err := b.bindAPIUpdate(podName, bindings, claimsToProvision)
|
||||
err = b.bindAPIUpdate(podName, bindings, claimsToProvision)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -77,7 +77,11 @@ func (c *podBindingCache) DeleteBindings(pod *v1.Pod) {
|
|||
defer c.rwMutex.Unlock()
|
||||
|
||||
podName := getPodName(pod)
|
||||
delete(c.bindingDecisions, podName)
|
||||
|
||||
if _, ok := c.bindingDecisions[podName]; ok {
|
||||
delete(c.bindingDecisions, podName)
|
||||
VolumeBindingRequestSchedulerBinderCache.WithLabelValues("delete").Inc()
|
||||
}
|
||||
}
|
||||
|
||||
func (c *podBindingCache) UpdateBindings(pod *v1.Pod, node string, bindings []*bindingInfo) {
|
||||
|
@ -95,6 +99,7 @@ func (c *podBindingCache) UpdateBindings(pod *v1.Pod, node string, bindings []*b
|
|||
decision = nodeDecision{
|
||||
bindings: bindings,
|
||||
}
|
||||
VolumeBindingRequestSchedulerBinderCache.WithLabelValues("add").Inc()
|
||||
} else {
|
||||
decision.bindings = bindings
|
||||
}
|
||||
|
|
|
@ -9,7 +9,10 @@ go_library(
|
|||
name = "go_default_library",
|
||||
srcs = ["metrics.go"],
|
||||
importpath = "k8s.io/kubernetes/pkg/scheduler/metrics",
|
||||
deps = ["//vendor/github.com/prometheus/client_golang/prometheus:go_default_library"],
|
||||
deps = [
|
||||
"//pkg/controller/volume/persistentvolume:go_default_library",
|
||||
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
|
|
|
@ -21,6 +21,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -171,6 +172,8 @@ func Register() {
|
|||
for _, metric := range metricsList {
|
||||
prometheus.MustRegister(metric)
|
||||
}
|
||||
|
||||
persistentvolume.RegisterVolumeSchedulingMetrics()
|
||||
})
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue