Merge pull request #72332 from danielqsj/ks

Change scheduler metrics to conform metrics guidelines
pull/564/head
Kubernetes Prow Robot 2019-01-14 22:05:52 -08:00 committed by GitHub
commit b91cbf7b4e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 74 additions and 13 deletions

View File

@ -192,14 +192,16 @@ func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister
FailedPredicates: failedPredicateMap, FailedPredicates: failedPredicateMap,
} }
} }
metrics.SchedulingAlgorithmPredicateEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPredicateEvalTime)) metrics.SchedulingAlgorithmPredicateEvaluationDuration.Observe(metrics.SinceInSeconds(startPredicateEvalTime))
metrics.DeprecatedSchedulingAlgorithmPredicateEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPredicateEvalTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PredicateEvaluation).Observe(metrics.SinceInSeconds(startPredicateEvalTime)) metrics.SchedulingLatency.WithLabelValues(metrics.PredicateEvaluation).Observe(metrics.SinceInSeconds(startPredicateEvalTime))
trace.Step("Prioritizing") trace.Step("Prioritizing")
startPriorityEvalTime := time.Now() startPriorityEvalTime := time.Now()
// When only one node after predicate, just use it. // When only one node after predicate, just use it.
if len(filteredNodes) == 1 { if len(filteredNodes) == 1 {
metrics.SchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPriorityEvalTime)) metrics.SchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInSeconds(startPriorityEvalTime))
metrics.DeprecatedSchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPriorityEvalTime))
return ScheduleResult{ return ScheduleResult{
SuggestedHost: filteredNodes[0].Name, SuggestedHost: filteredNodes[0].Name,
EvaluatedNodes: 1 + len(failedPredicateMap), EvaluatedNodes: 1 + len(failedPredicateMap),
@ -212,7 +214,8 @@ func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister
if err != nil { if err != nil {
return result, err return result, err
} }
metrics.SchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPriorityEvalTime)) metrics.SchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInSeconds(startPriorityEvalTime))
metrics.DeprecatedSchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPriorityEvalTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PriorityEvaluation).Observe(metrics.SinceInSeconds(startPriorityEvalTime)) metrics.SchedulingLatency.WithLabelValues(metrics.PriorityEvaluation).Observe(metrics.SinceInSeconds(startPriorityEvalTime))
trace.Step("Selecting host") trace.Step("Selecting host")

View File

@ -71,50 +71,98 @@ var (
[]string{OperationLabel}, []string{OperationLabel},
) )
E2eSchedulingLatency = prometheus.NewHistogram( E2eSchedulingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "e2e_scheduling_latency_seconds",
Help: "E2e scheduling latency in seconds (scheduling algorithm + binding)",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
)
DeprecatedE2eSchedulingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "e2e_scheduling_latency_microseconds", Name: "e2e_scheduling_latency_microseconds",
Help: "E2e scheduling latency (scheduling algorithm + binding)", Help: "(Deprecated) E2e scheduling latency in microseconds (scheduling algorithm + binding)",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15), Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
}, },
) )
SchedulingAlgorithmLatency = prometheus.NewHistogram( SchedulingAlgorithmLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_latency_seconds",
Help: "Scheduling algorithm latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
)
DeprecatedSchedulingAlgorithmLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_latency_microseconds", Name: "scheduling_algorithm_latency_microseconds",
Help: "Scheduling algorithm latency", Help: "(Deprecated) Scheduling algorithm latency in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15), Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
}, },
) )
SchedulingAlgorithmPredicateEvaluationDuration = prometheus.NewHistogram( SchedulingAlgorithmPredicateEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_predicate_evaluation_seconds",
Help: "Scheduling algorithm predicate evaluation duration in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
)
DeprecatedSchedulingAlgorithmPredicateEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_predicate_evaluation", Name: "scheduling_algorithm_predicate_evaluation",
Help: "Scheduling algorithm predicate evaluation duration", Help: "(Deprecated) Scheduling algorithm predicate evaluation duration in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15), Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
}, },
) )
SchedulingAlgorithmPriorityEvaluationDuration = prometheus.NewHistogram( SchedulingAlgorithmPriorityEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_priority_evaluation_seconds",
Help: "Scheduling algorithm priority evaluation duration in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
)
DeprecatedSchedulingAlgorithmPriorityEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_priority_evaluation", Name: "scheduling_algorithm_priority_evaluation",
Help: "Scheduling algorithm priority evaluation duration", Help: "(Deprecated) Scheduling algorithm priority evaluation duration in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15), Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
}, },
) )
SchedulingAlgorithmPremptionEvaluationDuration = prometheus.NewHistogram( SchedulingAlgorithmPremptionEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_preemption_evaluation_seconds",
Help: "Scheduling algorithm preemption evaluation duration in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
)
DeprecatedSchedulingAlgorithmPremptionEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_preemption_evaluation", Name: "scheduling_algorithm_preemption_evaluation",
Help: "Scheduling algorithm preemption evaluation duration", Help: "(Deprecated) Scheduling algorithm preemption evaluation duration in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15), Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
}, },
) )
BindingLatency = prometheus.NewHistogram( BindingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "binding_latency_seconds",
Help: "Binding latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
)
DeprecatedBindingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "binding_latency_microseconds", Name: "binding_latency_microseconds",
Help: "Binding latency", Help: "(Deprecated) Binding latency in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15), Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
}, },
) )
@ -135,11 +183,17 @@ var (
scheduleAttempts, scheduleAttempts,
SchedulingLatency, SchedulingLatency,
E2eSchedulingLatency, E2eSchedulingLatency,
DeprecatedE2eSchedulingLatency,
SchedulingAlgorithmLatency, SchedulingAlgorithmLatency,
DeprecatedSchedulingAlgorithmLatency,
BindingLatency, BindingLatency,
DeprecatedBindingLatency,
SchedulingAlgorithmPredicateEvaluationDuration, SchedulingAlgorithmPredicateEvaluationDuration,
DeprecatedSchedulingAlgorithmPredicateEvaluationDuration,
SchedulingAlgorithmPriorityEvaluationDuration, SchedulingAlgorithmPriorityEvaluationDuration,
DeprecatedSchedulingAlgorithmPriorityEvaluationDuration,
SchedulingAlgorithmPremptionEvaluationDuration, SchedulingAlgorithmPremptionEvaluationDuration,
DeprecatedSchedulingAlgorithmPremptionEvaluationDuration,
PreemptionVictims, PreemptionVictims,
PreemptionAttempts, PreemptionAttempts,
} }

View File

@ -422,7 +422,8 @@ func (sched *Scheduler) bind(assumed *v1.Pod, b *v1.Binding) error {
return err return err
} }
metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart)) metrics.BindingLatency.Observe(metrics.SinceInSeconds(bindingStart))
metrics.DeprecatedBindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
metrics.SchedulingLatency.WithLabelValues(metrics.Binding).Observe(metrics.SinceInSeconds(bindingStart)) metrics.SchedulingLatency.WithLabelValues(metrics.Binding).Observe(metrics.SinceInSeconds(bindingStart))
sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v/%v to %v", assumed.Namespace, assumed.Name, b.Target.Name) sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v/%v to %v", assumed.Namespace, assumed.Name, b.Target.Name)
return nil return nil
@ -465,7 +466,8 @@ func (sched *Scheduler) scheduleOne() {
preemptionStartTime := time.Now() preemptionStartTime := time.Now()
sched.preempt(pod, fitError) sched.preempt(pod, fitError)
metrics.PreemptionAttempts.Inc() metrics.PreemptionAttempts.Inc()
metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime)) metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInSeconds(preemptionStartTime))
metrics.DeprecatedSchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime)) metrics.SchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime))
} }
// Pod did not fit anywhere, so it is counted as a failure. If preemption // Pod did not fit anywhere, so it is counted as a failure. If preemption
@ -478,7 +480,8 @@ func (sched *Scheduler) scheduleOne() {
} }
return return
} }
metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start)) metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInSeconds(start))
metrics.DeprecatedSchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start))
// Tell the cache to assume that a pod now is running on a given node, even though it hasn't been bound yet. // Tell the cache to assume that a pod now is running on a given node, even though it hasn't been bound yet.
// This allows us to keep scheduling without waiting on binding to occur. // This allows us to keep scheduling without waiting on binding to occur.
assumedPod := pod.DeepCopy() assumedPod := pod.DeepCopy()
@ -557,7 +560,8 @@ func (sched *Scheduler) scheduleOne() {
Name: scheduleResult.SuggestedHost, Name: scheduleResult.SuggestedHost,
}, },
}) })
metrics.E2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start)) metrics.E2eSchedulingLatency.Observe(metrics.SinceInSeconds(start))
metrics.DeprecatedE2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start))
if err != nil { if err != nil {
klog.Errorf("error binding pod: %v", err) klog.Errorf("error binding pod: %v", err)
metrics.PodScheduleErrors.Inc() metrics.PodScheduleErrors.Inc()