Merge pull request #65306 from shyamjvs/fine-grained-scheduler-metric

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Split scheduler latency metric to fine-grained steps

This splits the summary metric we recently added into finer steps. It should be very useful for performance experiments.

/cc @wojtek-t 
fyi - @bsalamat @misterikkit 

Strictly speaking this is a breaking change, but since this metric was added only ~week ago I think it should fine (we should port this change to 1.11).

```release-note
Split 'scheduling_latency_seconds' metric into finer steps (predicate, priority, premption)
```
pull/8/head
Kubernetes Submit Queue 2018-06-21 09:11:58 -07:00 committed by GitHub
commit 23b4690d00
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 25 additions and 11 deletions

View File

@ -142,6 +142,7 @@ func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister
} }
} }
metrics.SchedulingAlgorithmPredicateEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPredicateEvalTime)) metrics.SchedulingAlgorithmPredicateEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPredicateEvalTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PredicateEvaluation).Observe(metrics.SinceInSeconds(startPredicateEvalTime))
trace.Step("Prioritizing") trace.Step("Prioritizing")
startPriorityEvalTime := time.Now() startPriorityEvalTime := time.Now()
@ -157,6 +158,7 @@ func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister
return "", err return "", err
} }
metrics.SchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPriorityEvalTime)) metrics.SchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPriorityEvalTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PriorityEvaluation).Observe(metrics.SinceInSeconds(startPriorityEvalTime))
trace.Step("Selecting host") trace.Step("Selecting host")
return g.selectHost(priorityList) return g.selectHost(priorityList)

View File

@ -31,10 +31,16 @@ const (
// OperationLabel - operation label name // OperationLabel - operation label name
OperationLabel = "operation" OperationLabel = "operation"
// Below are possible values for the operation label. Each represents a substep of e2e scheduling:
// PredicateEvaluation - predicate evaluation operation label value
PredicateEvaluation = "predicate_evaluation"
// PriorityEvaluation - priority evaluation operation label value
PriorityEvaluation = "priority_evaluation"
// PreemptionEvaluation - preemption evaluation operation label value (occurs in case of scheduling fitError).
PreemptionEvaluation = "preemption_evaluation"
// Binding - binding operation label value // Binding - binding operation label value
Binding = "binding" Binding = "binding"
// SelectingNode - selecting node operation label value
SelectingNode = "selecting_node"
// E2eScheduling - e2e scheduling operation label value // E2eScheduling - e2e scheduling operation label value
) )

View File

@ -460,11 +460,11 @@ func (sched *Scheduler) scheduleOne() {
sched.preempt(pod, fitError) sched.preempt(pod, fitError)
metrics.PreemptionAttempts.Inc() metrics.PreemptionAttempts.Inc()
metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime)) metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime))
} }
return return
} }
metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start)) metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start))
metrics.SchedulingLatency.WithLabelValues(metrics.SelectingNode).Observe(metrics.SinceInSeconds(start))
// Tell the cache to assume that a pod now is running on a given node, even though it hasn't been bound yet. // Tell the cache to assume that a pod now is running on a given node, even though it hasn't been bound yet.
// This allows us to keep scheduling without waiting on binding to occur. // This allows us to keep scheduling without waiting on binding to occur.
assumedPod := pod.DeepCopy() assumedPod := pod.DeepCopy()

View File

@ -210,12 +210,14 @@ func (l *PodStartupLatency) PrintJSON() string {
} }
type SchedulingMetrics struct { type SchedulingMetrics struct {
SelectingNodeLatency LatencyMetric `json:"selectingNodeLatency"` PredicateEvaluationLatency LatencyMetric `json:"predicateEvaluationLatency"`
BindingLatency LatencyMetric `json:"bindingLatency"` PriorityEvaluationLatency LatencyMetric `json:"priorityEvaluationLatency"`
ThroughputAverage float64 `json:"throughputAverage"` PreemptionEvaluationLatency LatencyMetric `json:"preemptionEvaluationLatency"`
ThroughputPerc50 float64 `json:"throughputPerc50"` BindingLatency LatencyMetric `json:"bindingLatency"`
ThroughputPerc90 float64 `json:"throughputPerc90"` ThroughputAverage float64 `json:"throughputAverage"`
ThroughputPerc99 float64 `json:"throughputPerc99"` ThroughputPerc50 float64 `json:"throughputPerc50"`
ThroughputPerc90 float64 `json:"throughputPerc90"`
ThroughputPerc99 float64 `json:"throughputPerc99"`
} }
func (l *SchedulingMetrics) SummaryKind() string { func (l *SchedulingMetrics) SummaryKind() string {
@ -511,8 +513,12 @@ func getSchedulingLatency(c clientset.Interface) (*SchedulingMetrics, error) {
var metric *LatencyMetric = nil var metric *LatencyMetric = nil
switch sample.Metric[schedulermetric.OperationLabel] { switch sample.Metric[schedulermetric.OperationLabel] {
case schedulermetric.SelectingNode: case schedulermetric.PredicateEvaluation:
metric = &result.SelectingNodeLatency metric = &result.PredicateEvaluationLatency
case schedulermetric.PriorityEvaluation:
metric = &result.PriorityEvaluationLatency
case schedulermetric.PreemptionEvaluation:
metric = &result.PreemptionEvaluationLatency
case schedulermetric.Binding: case schedulermetric.Binding:
metric = &result.BindingLatency metric = &result.BindingLatency
} }