mirror of https://github.com/k3s-io/k3s
commit
e1989af060
|
@ -46,6 +46,18 @@ const (
|
|||
|
||||
// All the histogram based metrics have 1ms as size for the smallest bucket.
|
||||
var (
|
||||
scheduleAttempts = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "schedule_attempts_total",
|
||||
Help: "Number of attempts to schedule pods, by the result. 'unschedulable' means a pod could not be scheduled, while 'error' means an internal scheduler problem.",
|
||||
}, []string{"result"})
|
||||
// PodScheduleSuccesses counts how many pods were scheduled.
|
||||
PodScheduleSuccesses = scheduleAttempts.With(prometheus.Labels{"result": "scheduled"})
|
||||
// PodScheduleFailures counts how many pods could not be scheduled.
|
||||
PodScheduleFailures = scheduleAttempts.With(prometheus.Labels{"result": "unschedulable"})
|
||||
// PodScheduleErrors counts how many pods could not be scheduled due to a scheduler error.
|
||||
PodScheduleErrors = scheduleAttempts.With(prometheus.Labels{"result": "error"})
|
||||
SchedulingLatency = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
|
@ -135,6 +147,7 @@ var (
|
|||
}, []string{"result"})
|
||||
|
||||
metricsList = []prometheus.Collector{
|
||||
scheduleAttempts,
|
||||
SchedulingLatency,
|
||||
E2eSchedulingLatency,
|
||||
SchedulingAlgorithmLatency,
|
||||
|
|
|
@ -416,6 +416,13 @@ func (sched *Scheduler) scheduleOne() {
|
|||
metrics.PreemptionAttempts.Inc()
|
||||
metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime))
|
||||
metrics.SchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime))
|
||||
// Pod did not fit anywhere, so it is counted as a failure. If preemption
|
||||
// succeeds, the pod should get counted as a success the next time we try to
|
||||
// schedule it. (hopefully)
|
||||
metrics.PodScheduleFailures.Inc()
|
||||
} else {
|
||||
glog.Errorf("error selecting node for pod: %v", err)
|
||||
metrics.PodScheduleErrors.Inc()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
@ -433,20 +440,26 @@ func (sched *Scheduler) scheduleOne() {
|
|||
// This function modifies 'assumedPod' if volume binding is required.
|
||||
allBound, err := sched.assumeVolumes(assumedPod, suggestedHost)
|
||||
if err != nil {
|
||||
glog.Errorf("error assuming volumes: %v", err)
|
||||
metrics.PodScheduleErrors.Inc()
|
||||
return
|
||||
}
|
||||
|
||||
// assume modifies `assumedPod` by setting NodeName=suggestedHost
|
||||
err = sched.assume(assumedPod, suggestedHost)
|
||||
if err != nil {
|
||||
glog.Errorf("error assuming pod: %v", err)
|
||||
metrics.PodScheduleErrors.Inc()
|
||||
return
|
||||
}
|
||||
// bind the pod to its host asynchronously (we can do this b/c of the assumption step above).
|
||||
go func() {
|
||||
// Bind volumes first before Pod
|
||||
if !allBound {
|
||||
err = sched.bindVolumes(assumedPod)
|
||||
err := sched.bindVolumes(assumedPod)
|
||||
if err != nil {
|
||||
glog.Errorf("error binding volumes: %v", err)
|
||||
metrics.PodScheduleErrors.Inc()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
@ -460,7 +473,10 @@ func (sched *Scheduler) scheduleOne() {
|
|||
})
|
||||
metrics.E2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start))
|
||||
if err != nil {
|
||||
glog.Errorf("Internal error binding pod: (%v)", err)
|
||||
glog.Errorf("error binding pod: %v", err)
|
||||
metrics.PodScheduleErrors.Inc()
|
||||
} else {
|
||||
metrics.PodScheduleSuccesses.Inc()
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue