2019-01-12 04:58:27 +00:00
/ *
Copyright 2015 The Kubernetes Authors .
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package metrics
import (
"sync"
"time"
2019-09-27 21:51:53 +00:00
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
2019-12-12 01:27:03 +00:00
volumeschedulingmetrics "k8s.io/kubernetes/pkg/controller/volume/scheduling/metrics"
2019-01-12 04:58:27 +00:00
)
const (
// SchedulerSubsystem - subsystem name used by scheduler
SchedulerSubsystem = "scheduler"
// Below are possible values for the operation label. Each represents a substep of e2e scheduling:
2021-03-18 22:40:29 +00:00
// PrioritizingExtender - prioritizing extender operation label value
PrioritizingExtender = "prioritizing_extender"
2019-01-12 04:58:27 +00:00
// Binding - binding operation label value
Binding = "binding"
// E2eScheduling - e2e scheduling operation label value
)
// All the histogram based metrics have 1ms as size for the smallest bucket.
var (
2019-09-27 21:51:53 +00:00
scheduleAttempts = metrics . NewCounterVec (
& metrics . CounterOpts {
Subsystem : SchedulerSubsystem ,
Name : "schedule_attempts_total" ,
Help : "Number of attempts to schedule pods, by the result. 'unschedulable' means a pod could not be scheduled, while 'error' means an internal scheduler problem." ,
StabilityLevel : metrics . ALPHA ,
2020-08-10 17:43:49 +00:00
} , [ ] string { "result" , "profile" } )
2020-12-01 01:06:26 +00:00
2020-08-10 17:43:49 +00:00
e2eSchedulingLatency = metrics . NewHistogramVec (
2019-09-27 21:51:53 +00:00
& metrics . HistogramOpts {
Subsystem : SchedulerSubsystem ,
Name : "e2e_scheduling_duration_seconds" ,
Help : "E2e scheduling latency in seconds (scheduling algorithm + binding)" ,
2019-12-12 01:27:03 +00:00
Buckets : metrics . ExponentialBuckets ( 0.001 , 2 , 15 ) ,
2019-09-27 21:51:53 +00:00
StabilityLevel : metrics . ALPHA ,
2020-08-10 17:43:49 +00:00
} , [ ] string { "result" , "profile" } )
2019-09-27 21:51:53 +00:00
SchedulingAlgorithmLatency = metrics . NewHistogram (
& metrics . HistogramOpts {
Subsystem : SchedulerSubsystem ,
Name : "scheduling_algorithm_duration_seconds" ,
Help : "Scheduling algorithm latency in seconds" ,
2019-12-12 01:27:03 +00:00
Buckets : metrics . ExponentialBuckets ( 0.001 , 2 , 15 ) ,
2019-09-27 21:51:53 +00:00
StabilityLevel : metrics . ALPHA ,
2019-04-07 17:07:55 +00:00
} ,
)
2019-12-12 01:27:03 +00:00
PreemptionVictims = metrics . NewHistogram (
& metrics . HistogramOpts {
Subsystem : SchedulerSubsystem ,
2020-08-10 17:43:49 +00:00
Name : "preemption_victims" ,
2019-12-12 01:27:03 +00:00
Help : "Number of selected preemption victims" ,
// we think #victims>50 is pretty rare, therefore [50, +Inf) is considered a single bucket.
Buckets : metrics . LinearBuckets ( 5 , 5 , 10 ) ,
2019-09-27 21:51:53 +00:00
StabilityLevel : metrics . ALPHA ,
2019-01-12 04:58:27 +00:00
} )
2019-09-27 21:51:53 +00:00
PreemptionAttempts = metrics . NewCounter (
& metrics . CounterOpts {
Subsystem : SchedulerSubsystem ,
2020-08-10 17:43:49 +00:00
Name : "preemption_attempts_total" ,
2019-09-27 21:51:53 +00:00
Help : "Total preemption attempts in the cluster till now" ,
StabilityLevel : metrics . ALPHA ,
2019-01-12 04:58:27 +00:00
} )
2019-09-27 21:51:53 +00:00
pendingPods = metrics . NewGaugeVec (
& metrics . GaugeOpts {
Subsystem : SchedulerSubsystem ,
Name : "pending_pods" ,
Help : "Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ." ,
StabilityLevel : metrics . ALPHA ,
2019-08-30 18:33:25 +00:00
} , [ ] string { "queue" } )
2019-12-12 01:27:03 +00:00
SchedulerGoroutines = metrics . NewGaugeVec (
& metrics . GaugeOpts {
Subsystem : SchedulerSubsystem ,
Name : "scheduler_goroutines" ,
Help : "Number of running goroutines split by the work they do such as binding." ,
StabilityLevel : metrics . ALPHA ,
} , [ ] string { "work" } )
2020-08-10 17:43:49 +00:00
PodSchedulingDuration = metrics . NewHistogramVec (
2019-12-12 01:27:03 +00:00
& metrics . HistogramOpts {
Subsystem : SchedulerSubsystem ,
Name : "pod_scheduling_duration_seconds" ,
Help : "E2e latency for a pod being scheduled which may include multiple scheduling attempts." ,
2020-08-10 17:43:49 +00:00
// Start with 10ms with the last bucket being [~88m, Inf).
Buckets : metrics . ExponentialBuckets ( 0.01 , 2 , 20 ) ,
2019-12-12 01:27:03 +00:00
StabilityLevel : metrics . ALPHA ,
2020-08-10 17:43:49 +00:00
} ,
[ ] string { "attempts" } )
2019-12-12 01:27:03 +00:00
PodSchedulingAttempts = metrics . NewHistogram (
& metrics . HistogramOpts {
Subsystem : SchedulerSubsystem ,
Name : "pod_scheduling_attempts" ,
Help : "Number of attempts to successfully schedule a pod." ,
Buckets : metrics . ExponentialBuckets ( 1 , 2 , 5 ) ,
StabilityLevel : metrics . ALPHA ,
} )
FrameworkExtensionPointDuration = metrics . NewHistogramVec (
& metrics . HistogramOpts {
Subsystem : SchedulerSubsystem ,
Name : "framework_extension_point_duration_seconds" ,
Help : "Latency for running all plugins of a specific extension point." ,
// Start with 0.1ms with the last bucket being [~200ms, Inf)
Buckets : metrics . ExponentialBuckets ( 0.0001 , 2 , 12 ) ,
StabilityLevel : metrics . ALPHA ,
} ,
2020-08-10 17:43:49 +00:00
[ ] string { "extension_point" , "status" , "profile" } )
2019-12-12 01:27:03 +00:00
PluginExecutionDuration = metrics . NewHistogramVec (
& metrics . HistogramOpts {
Subsystem : SchedulerSubsystem ,
Name : "plugin_execution_duration_seconds" ,
Help : "Duration for running a plugin at a specific extension point." ,
// Start with 0.01ms with the last bucket being [~22ms, Inf). We use a small factor (1.5)
// so that we have better granularity since plugin latency is very sensitive.
Buckets : metrics . ExponentialBuckets ( 0.00001 , 1.5 , 20 ) ,
StabilityLevel : metrics . ALPHA ,
} ,
[ ] string { "plugin" , "extension_point" , "status" } )
SchedulerQueueIncomingPods = metrics . NewCounterVec (
& metrics . CounterOpts {
Subsystem : SchedulerSubsystem ,
Name : "queue_incoming_pods_total" ,
Help : "Number of pods added to scheduling queues by event and queue type." ,
StabilityLevel : metrics . ALPHA ,
} , [ ] string { "queue" , "event" } )
PermitWaitDuration = metrics . NewHistogramVec (
& metrics . HistogramOpts {
Subsystem : SchedulerSubsystem ,
Name : "permit_wait_duration_seconds" ,
2020-03-26 21:07:15 +00:00
Help : "Duration of waiting on permit." ,
2019-12-12 01:27:03 +00:00
Buckets : metrics . ExponentialBuckets ( 0.001 , 2 , 15 ) ,
StabilityLevel : metrics . ALPHA ,
} ,
[ ] string { "result" } )
CacheSize = metrics . NewGaugeVec (
& metrics . GaugeOpts {
Subsystem : SchedulerSubsystem ,
Name : "scheduler_cache_size" ,
Help : "Number of nodes, pods, and assumed (bound) pods in the scheduler cache." ,
StabilityLevel : metrics . ALPHA ,
} , [ ] string { "type" } )
2019-08-30 18:33:25 +00:00
2019-09-27 21:51:53 +00:00
metricsList = [ ] metrics . Registerable {
2019-01-12 04:58:27 +00:00
scheduleAttempts ,
2020-08-10 17:43:49 +00:00
e2eSchedulingLatency ,
2019-01-12 04:58:27 +00:00
SchedulingAlgorithmLatency ,
PreemptionVictims ,
PreemptionAttempts ,
2019-08-30 18:33:25 +00:00
pendingPods ,
2019-12-12 01:27:03 +00:00
PodSchedulingDuration ,
PodSchedulingAttempts ,
FrameworkExtensionPointDuration ,
PluginExecutionDuration ,
SchedulerQueueIncomingPods ,
SchedulerGoroutines ,
PermitWaitDuration ,
CacheSize ,
2019-01-12 04:58:27 +00:00
}
)
var registerMetrics sync . Once
// Register all metrics.
func Register ( ) {
// Register the metrics.
registerMetrics . Do ( func ( ) {
2020-08-10 17:43:49 +00:00
RegisterMetrics ( metricsList ... )
2019-12-12 01:27:03 +00:00
volumeschedulingmetrics . RegisterVolumeSchedulingMetrics ( )
2019-01-12 04:58:27 +00:00
} )
}
2020-08-10 17:43:49 +00:00
// RegisterMetrics registers a list of metrics.
// This function is exported because it is intended to be used by out-of-tree plugins to register their custom metrics.
func RegisterMetrics ( extraMetrics ... metrics . Registerable ) {
for _ , metric := range extraMetrics {
legacyregistry . MustRegister ( metric )
}
}
2019-12-12 01:27:03 +00:00
// GetGather returns the gatherer. It used by test case outside current package.
func GetGather ( ) metrics . Gatherer {
return legacyregistry . DefaultGatherer
}
2019-09-27 21:51:53 +00:00
// ActivePods returns the pending pods metrics with the label active
func ActivePods ( ) metrics . GaugeMetric {
2019-12-12 01:27:03 +00:00
return pendingPods . With ( metrics . Labels { "queue" : "active" } )
2019-09-27 21:51:53 +00:00
}
// BackoffPods returns the pending pods metrics with the label backoff
func BackoffPods ( ) metrics . GaugeMetric {
2019-12-12 01:27:03 +00:00
return pendingPods . With ( metrics . Labels { "queue" : "backoff" } )
2019-09-27 21:51:53 +00:00
}
// UnschedulablePods returns the pending pods metrics with the label unschedulable
func UnschedulablePods ( ) metrics . GaugeMetric {
2019-12-12 01:27:03 +00:00
return pendingPods . With ( metrics . Labels { "queue" : "unschedulable" } )
2019-09-27 21:51:53 +00:00
}
2019-01-12 04:58:27 +00:00
// SinceInSeconds gets the time since the specified start in seconds.
func SinceInSeconds ( start time . Time ) float64 {
return time . Since ( start ) . Seconds ( )
}