diff --git a/plugin/cmd/kube-scheduler/app/server.go b/plugin/cmd/kube-scheduler/app/server.go index 98f4f774fd..42646aa182 100644 --- a/plugin/cmd/kube-scheduler/app/server.go +++ b/plugin/cmd/kube-scheduler/app/server.go @@ -38,6 +38,7 @@ import ( "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/factory" "github.com/golang/glog" + "github.com/prometheus/client_golang/prometheus" "github.com/spf13/pflag" ) @@ -80,11 +81,11 @@ func (s *SchedulerServer) Run(_ []string) error { go func() { if s.EnableProfiling { - mux := http.NewServeMux() - mux.HandleFunc("/debug/pprof/", pprof.Index) - mux.HandleFunc("/debug/pprof/profile", pprof.Profile) - mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) + http.HandleFunc("/debug/pprof/", pprof.Index) + http.HandleFunc("/debug/pprof/profile", pprof.Profile) + http.HandleFunc("/debug/pprof/symbol", pprof.Symbol) } + http.Handle("/metrics", prometheus.Handler()) http.ListenAndServe(net.JoinHostPort(s.Address.String(), strconv.Itoa(s.Port)), nil) }() diff --git a/plugin/pkg/scheduler/metrics/metrics.go b/plugin/pkg/scheduler/metrics/metrics.go new file mode 100644 index 0000000000..75c96d2936 --- /dev/null +++ b/plugin/pkg/scheduler/metrics/metrics.go @@ -0,0 +1,67 @@ +/* +Copyright 2015 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" +) + +const schedulerSubsystem = "scheduler" + +var ( + E2eSchedulingLatency = prometheus.NewSummary( + prometheus.SummaryOpts{ + Subsystem: schedulerSubsystem, + Name: "e2e_scheduling_latency_microseconds", + Help: "E2e scheduling latency (scheduling algorith + binding)", + }, + ) + SchedulingAlgorithmLatency = prometheus.NewSummary( + prometheus.SummaryOpts{ + Subsystem: schedulerSubsystem, + Name: "scheduling_algorithm_latency_microseconds", + Help: "Scheduling algorithm latency", + }, + ) + BindingLatency = prometheus.NewSummary( + prometheus.SummaryOpts{ + Subsystem: schedulerSubsystem, + Name: "binding_latency_microseconds", + Help: "Binding latency", + }, + ) +) + +var registerMetrics sync.Once + +// Register all metrics. +func Register() { + // Register the metrics. + registerMetrics.Do(func() { + prometheus.MustRegister(E2eSchedulingLatency) + prometheus.MustRegister(SchedulingAlgorithmLatency) + prometheus.MustRegister(BindingLatency) + }) +} + +// Gets the time since the specified start in microseconds. +func SinceInMicroseconds(start time.Time) float64 { + return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds()) +} diff --git a/plugin/pkg/scheduler/scheduler.go b/plugin/pkg/scheduler/scheduler.go index 3dcedb8e1d..3444b3e146 100644 --- a/plugin/pkg/scheduler/scheduler.go +++ b/plugin/pkg/scheduler/scheduler.go @@ -17,11 +17,14 @@ limitations under the License. package scheduler import ( + "time" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" "github.com/GoogleCloudPlatform/kubernetes/pkg/client/record" // TODO: move everything from pkg/scheduler into this package. Remove references from registry. "github.com/GoogleCloudPlatform/kubernetes/pkg/scheduler" "github.com/GoogleCloudPlatform/kubernetes/pkg/util" + "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/metrics" "github.com/golang/glog" ) @@ -89,6 +92,7 @@ func New(c *Config) *Scheduler { s := &Scheduler{ config: c, } + metrics.Register() return s } @@ -100,7 +104,12 @@ func (s *Scheduler) Run() { func (s *Scheduler) scheduleOne() { pod := s.config.NextPod() glog.V(3).Infof("Attempting to schedule: %v", pod) + start := time.Now() + defer func() { + metrics.E2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start)) + }() dest, err := s.config.Algorithm.Schedule(*pod, s.config.MinionLister) + metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start)) if err != nil { glog.V(1).Infof("Failed to schedule: %v", pod) s.config.Recorder.Eventf(pod, "failedScheduling", "Error scheduling: %v", err) @@ -118,7 +127,10 @@ func (s *Scheduler) scheduleOne() { // We want to add the pod to the model iff the bind succeeds, but we don't want to race // with any deletions, which happen asyncronously. s.config.Modeler.LockedAction(func() { - if err := s.config.Binder.Bind(b); err != nil { + bindingStart := time.Now() + err := s.config.Binder.Bind(b) + metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart)) + if err != nil { glog.V(1).Infof("Failed to bind pod: %v", err) s.config.Recorder.Eventf(pod, "failedScheduling", "Binding rejected: %v", err) s.config.Error(pod, err)