2015-04-02 17:24:21 +00:00
/ *
2016-06-03 00:25:58 +00:00
Copyright 2015 The Kubernetes Authors .
2015-04-02 17:24:21 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package metrics
import (
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
2018-06-15 02:59:36 +00:00
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
2015-04-02 17:24:21 +00:00
)
2018-06-07 11:20:26 +00:00
const (
// SchedulerSubsystem - subsystem name used by scheduler
SchedulerSubsystem = "scheduler"
// SchedulingLatencyName - scheduler latency metric name
2019-02-22 23:57:05 +00:00
SchedulingLatencyName = "scheduling_duration_seconds"
// DeprecatedSchedulingLatencyName - scheduler latency metric name which is deprecated
2019-02-22 13:40:13 +00:00
DeprecatedSchedulingLatencyName = "scheduling_latency_seconds"
2018-06-07 11:20:26 +00:00
// OperationLabel - operation label name
OperationLabel = "operation"
2018-06-21 10:57:31 +00:00
// Below are possible values for the operation label. Each represents a substep of e2e scheduling:
// PredicateEvaluation - predicate evaluation operation label value
PredicateEvaluation = "predicate_evaluation"
// PriorityEvaluation - priority evaluation operation label value
PriorityEvaluation = "priority_evaluation"
// PreemptionEvaluation - preemption evaluation operation label value (occurs in case of scheduling fitError).
PreemptionEvaluation = "preemption_evaluation"
2018-06-07 11:20:26 +00:00
// Binding - binding operation label value
Binding = "binding"
// E2eScheduling - e2e scheduling operation label value
)
2015-04-02 17:24:21 +00:00
2018-01-09 05:12:07 +00:00
// All the histogram based metrics have 1ms as size for the smallest bucket.
2015-04-02 17:24:21 +00:00
var (
2018-05-30 18:59:55 +00:00
scheduleAttempts = prometheus . NewCounterVec (
prometheus . CounterOpts {
Subsystem : SchedulerSubsystem ,
Name : "schedule_attempts_total" ,
Help : "Number of attempts to schedule pods, by the result. 'unschedulable' means a pod could not be scheduled, while 'error' means an internal scheduler problem." ,
} , [ ] string { "result" } )
// PodScheduleSuccesses counts how many pods were scheduled.
PodScheduleSuccesses = scheduleAttempts . With ( prometheus . Labels { "result" : "scheduled" } )
// PodScheduleFailures counts how many pods could not be scheduled.
PodScheduleFailures = scheduleAttempts . With ( prometheus . Labels { "result" : "unschedulable" } )
// PodScheduleErrors counts how many pods could not be scheduled due to a scheduler error.
PodScheduleErrors = scheduleAttempts . With ( prometheus . Labels { "result" : "error" } )
2018-06-07 11:20:26 +00:00
SchedulingLatency = prometheus . NewSummaryVec (
prometheus . SummaryOpts {
Subsystem : SchedulerSubsystem ,
Name : SchedulingLatencyName ,
Help : "Scheduling latency in seconds split by sub-parts of the scheduling operation" ,
// Make the sliding window of 5h.
// TODO: The value for this should be based on some SLI definition (long term).
MaxAge : 5 * time . Hour ,
} ,
[ ] string { OperationLabel } ,
)
2019-02-22 13:40:13 +00:00
DeprecatedSchedulingLatency = prometheus . NewSummaryVec (
prometheus . SummaryOpts {
Subsystem : SchedulerSubsystem ,
Name : DeprecatedSchedulingLatencyName ,
Help : "(Deprecated) Scheduling latency in seconds split by sub-parts of the scheduling operation" ,
// Make the sliding window of 5h.
// TODO: The value for this should be based on some SLI definition (long term).
MaxAge : 5 * time . Hour ,
} ,
[ ] string { OperationLabel } ,
)
2018-06-07 10:52:40 +00:00
E2eSchedulingLatency = prometheus . NewHistogram (
2018-12-26 08:31:29 +00:00
prometheus . HistogramOpts {
Subsystem : SchedulerSubsystem ,
2019-02-22 13:40:13 +00:00
Name : "e2e_scheduling_duration_seconds" ,
2018-12-26 08:31:29 +00:00
Help : "E2e scheduling latency in seconds (scheduling algorithm + binding)" ,
Buckets : prometheus . ExponentialBuckets ( 0.001 , 2 , 15 ) ,
} ,
)
DeprecatedE2eSchedulingLatency = prometheus . NewHistogram (
2018-06-07 10:52:40 +00:00
prometheus . HistogramOpts {
2018-06-07 11:20:26 +00:00
Subsystem : SchedulerSubsystem ,
2018-06-07 10:52:40 +00:00
Name : "e2e_scheduling_latency_microseconds" ,
2019-01-08 05:07:16 +00:00
Help : "(Deprecated) E2e scheduling latency in microseconds (scheduling algorithm + binding)" ,
2018-06-07 10:52:40 +00:00
Buckets : prometheus . ExponentialBuckets ( 1000 , 2 , 15 ) ,
} ,
)
SchedulingAlgorithmLatency = prometheus . NewHistogram (
2018-12-26 08:31:29 +00:00
prometheus . HistogramOpts {
Subsystem : SchedulerSubsystem ,
2019-02-22 13:40:13 +00:00
Name : "scheduling_algorithm_duration_seconds" ,
2018-12-26 08:31:29 +00:00
Help : "Scheduling algorithm latency in seconds" ,
Buckets : prometheus . ExponentialBuckets ( 0.001 , 2 , 15 ) ,
} ,
)
DeprecatedSchedulingAlgorithmLatency = prometheus . NewHistogram (
2018-06-07 10:52:40 +00:00
prometheus . HistogramOpts {
2018-06-07 11:20:26 +00:00
Subsystem : SchedulerSubsystem ,
2018-06-07 10:52:40 +00:00
Name : "scheduling_algorithm_latency_microseconds" ,
2019-01-08 05:07:16 +00:00
Help : "(Deprecated) Scheduling algorithm latency in microseconds" ,
2018-06-07 10:52:40 +00:00
Buckets : prometheus . ExponentialBuckets ( 1000 , 2 , 15 ) ,
2015-04-02 17:24:21 +00:00
} ,
)
2018-01-09 05:12:07 +00:00
SchedulingAlgorithmPredicateEvaluationDuration = prometheus . NewHistogram (
2018-12-26 08:31:29 +00:00
prometheus . HistogramOpts {
Subsystem : SchedulerSubsystem ,
Name : "scheduling_algorithm_predicate_evaluation_seconds" ,
Help : "Scheduling algorithm predicate evaluation duration in seconds" ,
Buckets : prometheus . ExponentialBuckets ( 0.001 , 2 , 15 ) ,
} ,
)
DeprecatedSchedulingAlgorithmPredicateEvaluationDuration = prometheus . NewHistogram (
2018-01-09 05:12:07 +00:00
prometheus . HistogramOpts {
2018-06-07 11:20:26 +00:00
Subsystem : SchedulerSubsystem ,
2018-01-09 05:12:07 +00:00
Name : "scheduling_algorithm_predicate_evaluation" ,
2019-01-08 05:07:16 +00:00
Help : "(Deprecated) Scheduling algorithm predicate evaluation duration in microseconds" ,
2018-01-09 05:12:07 +00:00
Buckets : prometheus . ExponentialBuckets ( 1000 , 2 , 15 ) ,
} ,
)
SchedulingAlgorithmPriorityEvaluationDuration = prometheus . NewHistogram (
2018-12-26 08:31:29 +00:00
prometheus . HistogramOpts {
Subsystem : SchedulerSubsystem ,
Name : "scheduling_algorithm_priority_evaluation_seconds" ,
Help : "Scheduling algorithm priority evaluation duration in seconds" ,
Buckets : prometheus . ExponentialBuckets ( 0.001 , 2 , 15 ) ,
} ,
)
DeprecatedSchedulingAlgorithmPriorityEvaluationDuration = prometheus . NewHistogram (
2018-01-09 05:12:07 +00:00
prometheus . HistogramOpts {
2018-06-07 11:20:26 +00:00
Subsystem : SchedulerSubsystem ,
2018-01-09 05:12:07 +00:00
Name : "scheduling_algorithm_priority_evaluation" ,
2019-01-08 05:07:16 +00:00
Help : "(Deprecated) Scheduling algorithm priority evaluation duration in microseconds" ,
2018-01-09 05:12:07 +00:00
Buckets : prometheus . ExponentialBuckets ( 1000 , 2 , 15 ) ,
} ,
)
2018-01-13 04:51:06 +00:00
SchedulingAlgorithmPremptionEvaluationDuration = prometheus . NewHistogram (
2018-12-26 08:31:29 +00:00
prometheus . HistogramOpts {
Subsystem : SchedulerSubsystem ,
Name : "scheduling_algorithm_preemption_evaluation_seconds" ,
Help : "Scheduling algorithm preemption evaluation duration in seconds" ,
Buckets : prometheus . ExponentialBuckets ( 0.001 , 2 , 15 ) ,
} ,
)
DeprecatedSchedulingAlgorithmPremptionEvaluationDuration = prometheus . NewHistogram (
2018-01-13 04:51:06 +00:00
prometheus . HistogramOpts {
2018-06-07 11:20:26 +00:00
Subsystem : SchedulerSubsystem ,
2018-01-13 04:51:06 +00:00
Name : "scheduling_algorithm_preemption_evaluation" ,
2019-01-08 05:07:16 +00:00
Help : "(Deprecated) Scheduling algorithm preemption evaluation duration in microseconds" ,
2018-01-13 04:51:06 +00:00
Buckets : prometheus . ExponentialBuckets ( 1000 , 2 , 15 ) ,
} ,
)
2018-06-07 10:52:40 +00:00
BindingLatency = prometheus . NewHistogram (
2018-12-26 08:31:29 +00:00
prometheus . HistogramOpts {
Subsystem : SchedulerSubsystem ,
2019-02-22 13:40:13 +00:00
Name : "binding_duration_seconds" ,
2018-12-26 08:31:29 +00:00
Help : "Binding latency in seconds" ,
Buckets : prometheus . ExponentialBuckets ( 0.001 , 2 , 15 ) ,
} ,
)
DeprecatedBindingLatency = prometheus . NewHistogram (
2018-06-07 10:52:40 +00:00
prometheus . HistogramOpts {
2018-06-07 11:20:26 +00:00
Subsystem : SchedulerSubsystem ,
2018-06-07 10:52:40 +00:00
Name : "binding_latency_microseconds" ,
2019-01-08 05:07:16 +00:00
Help : "(Deprecated) Binding latency in microseconds" ,
2018-06-07 10:52:40 +00:00
Buckets : prometheus . ExponentialBuckets ( 1000 , 2 , 15 ) ,
} ,
)
2018-01-13 04:51:06 +00:00
PreemptionVictims = prometheus . NewGauge (
prometheus . GaugeOpts {
2018-06-07 11:20:26 +00:00
Subsystem : SchedulerSubsystem ,
2018-01-13 04:51:06 +00:00
Name : "pod_preemption_victims" ,
Help : "Number of selected preemption victims" ,
} )
PreemptionAttempts = prometheus . NewCounter (
prometheus . CounterOpts {
2018-06-07 11:20:26 +00:00
Subsystem : SchedulerSubsystem ,
2018-01-13 04:51:06 +00:00
Name : "total_preemption_attempts" ,
Help : "Total preemption attempts in the cluster till now" ,
} )
2018-04-27 18:20:39 +00:00
2018-06-07 11:20:26 +00:00
metricsList = [ ] prometheus . Collector {
2018-05-30 18:59:55 +00:00
scheduleAttempts ,
2018-06-07 11:20:26 +00:00
SchedulingLatency ,
2019-02-22 13:40:13 +00:00
DeprecatedSchedulingLatency ,
2018-06-07 11:20:26 +00:00
E2eSchedulingLatency ,
2018-12-26 08:31:29 +00:00
DeprecatedE2eSchedulingLatency ,
2018-06-07 11:20:26 +00:00
SchedulingAlgorithmLatency ,
2018-12-26 08:31:29 +00:00
DeprecatedSchedulingAlgorithmLatency ,
2018-06-07 11:20:26 +00:00
BindingLatency ,
2018-12-26 08:31:29 +00:00
DeprecatedBindingLatency ,
2018-06-07 11:20:26 +00:00
SchedulingAlgorithmPredicateEvaluationDuration ,
2018-12-26 08:31:29 +00:00
DeprecatedSchedulingAlgorithmPredicateEvaluationDuration ,
2018-06-07 11:20:26 +00:00
SchedulingAlgorithmPriorityEvaluationDuration ,
2018-12-26 08:31:29 +00:00
DeprecatedSchedulingAlgorithmPriorityEvaluationDuration ,
2018-06-07 11:20:26 +00:00
SchedulingAlgorithmPremptionEvaluationDuration ,
2018-12-26 08:31:29 +00:00
DeprecatedSchedulingAlgorithmPremptionEvaluationDuration ,
2018-06-07 11:20:26 +00:00
PreemptionVictims ,
PreemptionAttempts ,
}
2015-04-02 17:24:21 +00:00
)
var registerMetrics sync . Once
// Register all metrics.
func Register ( ) {
2018-06-07 10:52:40 +00:00
// Register the metrics.
2015-04-02 17:24:21 +00:00
registerMetrics . Do ( func ( ) {
2018-06-07 11:20:26 +00:00
for _ , metric := range metricsList {
prometheus . MustRegister ( metric )
}
2018-06-15 02:59:36 +00:00
persistentvolume . RegisterVolumeSchedulingMetrics ( )
2018-06-07 10:52:40 +00:00
} )
2018-05-25 13:58:43 +00:00
}
2018-06-07 11:20:26 +00:00
// Reset resets metrics
func Reset ( ) {
SchedulingLatency . Reset ( )
2019-02-22 13:40:13 +00:00
DeprecatedSchedulingLatency . Reset ( )
2018-06-07 11:20:26 +00:00
}
2018-02-08 06:42:19 +00:00
// SinceInMicroseconds gets the time since the specified start in microseconds.
2015-04-02 17:24:21 +00:00
func SinceInMicroseconds ( start time . Time ) float64 {
return float64 ( time . Since ( start ) . Nanoseconds ( ) / time . Microsecond . Nanoseconds ( ) )
}
2018-06-07 11:20:26 +00:00
// SinceInSeconds gets the time since the specified start in seconds.
func SinceInSeconds ( start time . Time ) float64 {
return time . Since ( start ) . Seconds ( )
}