mirror of https://github.com/k3s-io/k3s
Merge pull request #43031 from dashpole/eviction_metrics
Automatic merge from submit-queue Add prometheus metrics for age of stats used for evictions. Completes #42923 This PR adds metrics for evictions, and records how stale data used for evictions is. cc @vishh @derekwaynecarr @kubernetes/sig-node-pr-reviewspull/6/head
commit
42c0994c34
|
@ -66,6 +66,7 @@ go_library(
|
|||
"//pkg/kubelet/cm:go_default_library",
|
||||
"//pkg/kubelet/eviction/api:go_default_library",
|
||||
"//pkg/kubelet/lifecycle:go_default_library",
|
||||
"//pkg/kubelet/metrics:go_default_library",
|
||||
"//pkg/kubelet/pod:go_default_library",
|
||||
"//pkg/kubelet/qos:go_default_library",
|
||||
"//pkg/kubelet/server/stats:go_default_library",
|
||||
|
|
|
@ -35,6 +35,7 @@ import (
|
|||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
|
||||
"k8s.io/kubernetes/pkg/kubelet/qos"
|
||||
"k8s.io/kubernetes/pkg/kubelet/server/stats"
|
||||
|
@ -331,6 +332,14 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
|||
|
||||
glog.Infof("eviction manager: pods ranked for eviction: %s", format.Pods(activePods))
|
||||
|
||||
//record age of metrics for met thresholds that we are using for evictions.
|
||||
for _, t := range thresholds {
|
||||
timeObserved := observations[t.Signal].time
|
||||
if !timeObserved.IsZero() {
|
||||
metrics.EvictionStatsAge.WithLabelValues(string(t.Signal)).Observe(metrics.SinceInMicroseconds(timeObserved.Time))
|
||||
}
|
||||
}
|
||||
|
||||
// we kill at most a single pod during each eviction interval
|
||||
for i := range activePods {
|
||||
pod := activePods[i]
|
||||
|
|
|
@ -40,6 +40,7 @@ const (
|
|||
PodWorkerStartLatencyKey = "pod_worker_start_latency_microseconds"
|
||||
PLEGRelistLatencyKey = "pleg_relist_latency_microseconds"
|
||||
PLEGRelistIntervalKey = "pleg_relist_interval_microseconds"
|
||||
EvictionStatsAgeKey = "eviction_stats_age_microseconds"
|
||||
// Metrics keys of remote runtime operations
|
||||
RuntimeOperationsKey = "runtime_operations"
|
||||
RuntimeOperationsLatencyKey = "runtime_operations_latency_microseconds"
|
||||
|
@ -178,6 +179,14 @@ var (
|
|||
},
|
||||
[]string{"operation_type"},
|
||||
)
|
||||
EvictionStatsAge = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Subsystem: KubeletSubsystem,
|
||||
Name: EvictionStatsAgeKey,
|
||||
Help: "Time between when stats are collected, and when pod is evicted based on those stats by eviction signal",
|
||||
},
|
||||
[]string{"eviction_signal"},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
|
@ -204,6 +213,7 @@ func Register(containerCache kubecontainer.RuntimeCache) {
|
|||
prometheus.MustRegister(RuntimeOperations)
|
||||
prometheus.MustRegister(RuntimeOperationsLatency)
|
||||
prometheus.MustRegister(RuntimeOperationsErrors)
|
||||
prometheus.MustRegister(EvictionStatsAge)
|
||||
})
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue