Merge pull request #43031 from dashpole/eviction_metrics

Automatic merge from submit-queue

Add prometheus metrics for age of stats used for evictions.

Completes #42923 

This PR adds metrics for evictions, and records how stale data used for evictions is.

cc @vishh @derekwaynecarr @kubernetes/sig-node-pr-reviews
pull/6/head
Kubernetes Submit Queue 2017-04-12 12:38:58 -07:00 committed by GitHub
commit 42c0994c34
3 changed files with 20 additions and 0 deletions

View File

@ -66,6 +66,7 @@ go_library(
"//pkg/kubelet/cm:go_default_library", "//pkg/kubelet/cm:go_default_library",
"//pkg/kubelet/eviction/api:go_default_library", "//pkg/kubelet/eviction/api:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library", "//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/metrics:go_default_library",
"//pkg/kubelet/pod:go_default_library", "//pkg/kubelet/pod:go_default_library",
"//pkg/kubelet/qos:go_default_library", "//pkg/kubelet/qos:go_default_library",
"//pkg/kubelet/server/stats:go_default_library", "//pkg/kubelet/server/stats:go_default_library",

View File

@ -35,6 +35,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cm" "k8s.io/kubernetes/pkg/kubelet/cm"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/metrics"
kubepod "k8s.io/kubernetes/pkg/kubelet/pod" kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
"k8s.io/kubernetes/pkg/kubelet/qos" "k8s.io/kubernetes/pkg/kubelet/qos"
"k8s.io/kubernetes/pkg/kubelet/server/stats" "k8s.io/kubernetes/pkg/kubelet/server/stats"
@ -331,6 +332,14 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
glog.Infof("eviction manager: pods ranked for eviction: %s", format.Pods(activePods)) glog.Infof("eviction manager: pods ranked for eviction: %s", format.Pods(activePods))
//record age of metrics for met thresholds that we are using for evictions.
for _, t := range thresholds {
timeObserved := observations[t.Signal].time
if !timeObserved.IsZero() {
metrics.EvictionStatsAge.WithLabelValues(string(t.Signal)).Observe(metrics.SinceInMicroseconds(timeObserved.Time))
}
}
// we kill at most a single pod during each eviction interval // we kill at most a single pod during each eviction interval
for i := range activePods { for i := range activePods {
pod := activePods[i] pod := activePods[i]

View File

@ -40,6 +40,7 @@ const (
PodWorkerStartLatencyKey = "pod_worker_start_latency_microseconds" PodWorkerStartLatencyKey = "pod_worker_start_latency_microseconds"
PLEGRelistLatencyKey = "pleg_relist_latency_microseconds" PLEGRelistLatencyKey = "pleg_relist_latency_microseconds"
PLEGRelistIntervalKey = "pleg_relist_interval_microseconds" PLEGRelistIntervalKey = "pleg_relist_interval_microseconds"
EvictionStatsAgeKey = "eviction_stats_age_microseconds"
// Metrics keys of remote runtime operations // Metrics keys of remote runtime operations
RuntimeOperationsKey = "runtime_operations" RuntimeOperationsKey = "runtime_operations"
RuntimeOperationsLatencyKey = "runtime_operations_latency_microseconds" RuntimeOperationsLatencyKey = "runtime_operations_latency_microseconds"
@ -178,6 +179,14 @@ var (
}, },
[]string{"operation_type"}, []string{"operation_type"},
) )
EvictionStatsAge = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: EvictionStatsAgeKey,
Help: "Time between when stats are collected, and when pod is evicted based on those stats by eviction signal",
},
[]string{"eviction_signal"},
)
) )
var registerMetrics sync.Once var registerMetrics sync.Once
@ -204,6 +213,7 @@ func Register(containerCache kubecontainer.RuntimeCache) {
prometheus.MustRegister(RuntimeOperations) prometheus.MustRegister(RuntimeOperations)
prometheus.MustRegister(RuntimeOperationsLatency) prometheus.MustRegister(RuntimeOperationsLatency)
prometheus.MustRegister(RuntimeOperationsErrors) prometheus.MustRegister(RuntimeOperationsErrors)
prometheus.MustRegister(EvictionStatsAge)
}) })
} }