diff --git a/pkg/scheduler/core/generic_scheduler.go b/pkg/scheduler/core/generic_scheduler.go index bdde7aaf50..73d6aa802a 100644 --- a/pkg/scheduler/core/generic_scheduler.go +++ b/pkg/scheduler/core/generic_scheduler.go @@ -142,6 +142,7 @@ func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister } } metrics.SchedulingAlgorithmPredicateEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPredicateEvalTime)) + metrics.SchedulingLatency.WithLabelValues(metrics.PredicateEvaluation).Observe(metrics.SinceInSeconds(startPredicateEvalTime)) trace.Step("Prioritizing") startPriorityEvalTime := time.Now() @@ -157,6 +158,7 @@ func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister return "", err } metrics.SchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPriorityEvalTime)) + metrics.SchedulingLatency.WithLabelValues(metrics.PriorityEvaluation).Observe(metrics.SinceInSeconds(startPriorityEvalTime)) trace.Step("Selecting host") return g.selectHost(priorityList) diff --git a/pkg/scheduler/metrics/metrics.go b/pkg/scheduler/metrics/metrics.go index 2fbba9980c..1b5d77b476 100644 --- a/pkg/scheduler/metrics/metrics.go +++ b/pkg/scheduler/metrics/metrics.go @@ -31,10 +31,16 @@ const ( // OperationLabel - operation label name OperationLabel = "operation" + // Below are possible values for the operation label. Each represents a substep of e2e scheduling: + + // PredicateEvaluation - predicate evaluation operation label value + PredicateEvaluation = "predicate_evaluation" + // PriorityEvaluation - priority evaluation operation label value + PriorityEvaluation = "priority_evaluation" + // PreemptionEvaluation - preemption evaluation operation label value (occurs in case of scheduling fitError). + PreemptionEvaluation = "preemption_evaluation" // Binding - binding operation label value Binding = "binding" - // SelectingNode - selecting node operation label value - SelectingNode = "selecting_node" // E2eScheduling - e2e scheduling operation label value ) diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index d06a6cb9e3..c028441f52 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -460,11 +460,11 @@ func (sched *Scheduler) scheduleOne() { sched.preempt(pod, fitError) metrics.PreemptionAttempts.Inc() metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime)) + metrics.SchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime)) } return } metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start)) - metrics.SchedulingLatency.WithLabelValues(metrics.SelectingNode).Observe(metrics.SinceInSeconds(start)) // Tell the cache to assume that a pod now is running on a given node, even though it hasn't been bound yet. // This allows us to keep scheduling without waiting on binding to occur. assumedPod := pod.DeepCopy() diff --git a/test/e2e/framework/metrics_util.go b/test/e2e/framework/metrics_util.go index 2562dc4720..5d61bbf51e 100644 --- a/test/e2e/framework/metrics_util.go +++ b/test/e2e/framework/metrics_util.go @@ -210,12 +210,14 @@ func (l *PodStartupLatency) PrintJSON() string { } type SchedulingMetrics struct { - SelectingNodeLatency LatencyMetric `json:"selectingNodeLatency"` - BindingLatency LatencyMetric `json:"bindingLatency"` - ThroughputAverage float64 `json:"throughputAverage"` - ThroughputPerc50 float64 `json:"throughputPerc50"` - ThroughputPerc90 float64 `json:"throughputPerc90"` - ThroughputPerc99 float64 `json:"throughputPerc99"` + PredicateEvaluationLatency LatencyMetric `json:"predicateEvaluationLatency"` + PriorityEvaluationLatency LatencyMetric `json:"priorityEvaluationLatency"` + PreemptionEvaluationLatency LatencyMetric `json:"preemptionEvaluationLatency"` + BindingLatency LatencyMetric `json:"bindingLatency"` + ThroughputAverage float64 `json:"throughputAverage"` + ThroughputPerc50 float64 `json:"throughputPerc50"` + ThroughputPerc90 float64 `json:"throughputPerc90"` + ThroughputPerc99 float64 `json:"throughputPerc99"` } func (l *SchedulingMetrics) SummaryKind() string { @@ -511,8 +513,12 @@ func getSchedulingLatency(c clientset.Interface) (*SchedulingMetrics, error) { var metric *LatencyMetric = nil switch sample.Metric[schedulermetric.OperationLabel] { - case schedulermetric.SelectingNode: - metric = &result.SelectingNodeLatency + case schedulermetric.PredicateEvaluation: + metric = &result.PredicateEvaluationLatency + case schedulermetric.PriorityEvaluation: + metric = &result.PriorityEvaluationLatency + case schedulermetric.PreemptionEvaluation: + metric = &result.PreemptionEvaluationLatency case schedulermetric.Binding: metric = &result.BindingLatency }