Merge pull request #63260 from misterikkit/ecache-metrics

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

scheduler: add metrics to equivalence cache

This adds counters to equiv. cache reads & writes. Reads are labeled by
hit/miss, while writes are labeled to indicate whether the write was
discarded.

This will give us visibility into,
- hit rate of cache reads
- ratio of reads to writes
- rate of discarded writes



**What this PR does / why we need it**:

**Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*:
Fixes https://github.com/kubernetes/kubernetes/issues/63259

**Special notes for your reviewer**:

**Release note**:

```release-note
NONE
```
pull/8/head
Kubernetes Submit Queue 2018-08-17 01:10:51 -07:00 committed by GitHub
commit eeb3389f3b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 29 additions and 0 deletions

View File

@ -9,6 +9,7 @@ go_library(
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/metrics:go_default_library",
"//pkg/util/hash:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",

View File

@ -23,6 +23,8 @@ import (
"hash/fnv"
"sync"
"k8s.io/kubernetes/pkg/scheduler/metrics"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
@ -244,10 +246,12 @@ func (n *NodeCache) updateResult(
) {
if nodeInfo == nil || nodeInfo.Node() == nil {
// This may happen during tests.
metrics.EquivalenceCacheWrites.WithLabelValues("discarded_bad_node").Inc()
return
}
// Skip update if NodeInfo is stale.
if !cache.IsUpToDate(nodeInfo) {
metrics.EquivalenceCacheWrites.WithLabelValues("discarded_stale").Inc()
return
}
@ -282,6 +286,11 @@ func (n *NodeCache) lookupResult(
n.mu.RLock()
defer n.mu.RUnlock()
value, ok = n.cache[predicateKey][equivalenceHash]
if ok {
metrics.EquivalenceCacheHits.Inc()
} else {
metrics.EquivalenceCacheMisses.Inc()
}
return value, ok
}

View File

@ -117,6 +117,23 @@ var (
Name: "total_preemption_attempts",
Help: "Total preemption attempts in the cluster till now",
})
equivalenceCacheLookups = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: SchedulerSubsystem,
Name: "equiv_cache_lookups_total",
Help: "Total number of equivalence cache lookups, by whether or not a cache entry was found",
}, []string{"result"})
EquivalenceCacheHits = equivalenceCacheLookups.With(prometheus.Labels{"result": "hit"})
EquivalenceCacheMisses = equivalenceCacheLookups.With(prometheus.Labels{"result": "miss"})
EquivalenceCacheWrites = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: SchedulerSubsystem,
Name: "equiv_cache_writes",
Help: "Total number of equivalence cache writes, by result",
}, []string{"result"})
metricsList = []prometheus.Collector{
SchedulingLatency,
E2eSchedulingLatency,
@ -127,6 +144,8 @@ var (
SchedulingAlgorithmPremptionEvaluationDuration,
PreemptionVictims,
PreemptionAttempts,
equivalenceCacheLookups,
EquivalenceCacheWrites,
}
)