Add basic latency metrics to scheduler.

2015-04-02 19:24:21 +02:00 · 2015-04-02 19:24:21 +02:00 · 25f95b0672
parent c25a1be03a
commit 25f95b0672
3 changed files with 85 additions and 5 deletions
--- a/plugin/cmd/kube-scheduler/app/server.go
+++ b/plugin/cmd/kube-scheduler/app/server.go
@ -38,6 +38,7 @@ import (
 	"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/factory"

 	"github.com/golang/glog"
+	"github.com/prometheus/client_golang/prometheus"
 	"github.com/spf13/pflag"
 )

@ -80,11 +81,11 @@ func (s *SchedulerServer) Run(_ []string) error {

 	go func() {
 		if s.EnableProfiling {
-			mux := http.NewServeMux()
-			mux.HandleFunc("/debug/pprof/", pprof.Index)
-			mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
-			mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
+			http.HandleFunc("/debug/pprof/", pprof.Index)
+			http.HandleFunc("/debug/pprof/profile", pprof.Profile)
+			http.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
 		}
+		http.Handle("/metrics", prometheus.Handler())
 		http.ListenAndServe(net.JoinHostPort(s.Address.String(), strconv.Itoa(s.Port)), nil)
 	}()

--- a/plugin/pkg/scheduler/metrics/metrics.go
+++ b/plugin/pkg/scheduler/metrics/metrics.go
@ -0,0 +1,67 @@
+/*
+Copyright 2015 Google Inc. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package metrics
+
+import (
+	"sync"
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+const schedulerSubsystem = "scheduler"
+
+var (
+	E2eSchedulingLatency = prometheus.NewSummary(
+		prometheus.SummaryOpts{
+			Subsystem: schedulerSubsystem,
+			Name:      "e2e_scheduling_latency_microseconds",
+			Help:      "E2e scheduling latency (scheduling algorith + binding)",
+		},
+	)
+	SchedulingAlgorithmLatency = prometheus.NewSummary(
+		prometheus.SummaryOpts{
+			Subsystem: schedulerSubsystem,
+			Name:      "scheduling_algorithm_latency_microseconds",
+			Help:      "Scheduling algorithm latency",
+		},
+	)
+	BindingLatency = prometheus.NewSummary(
+		prometheus.SummaryOpts{
+			Subsystem: schedulerSubsystem,
+			Name:      "binding_latency_microseconds",
+			Help:      "Binding latency",
+		},
+	)
+)
+
+var registerMetrics sync.Once
+
+// Register all metrics.
+func Register() {
+	// Register the metrics.
+	registerMetrics.Do(func() {
+		prometheus.MustRegister(E2eSchedulingLatency)
+		prometheus.MustRegister(SchedulingAlgorithmLatency)
+		prometheus.MustRegister(BindingLatency)
+	})
+}
+
+// Gets the time since the specified start in microseconds.
+func SinceInMicroseconds(start time.Time) float64 {
+	return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds())
+}
--- a/plugin/pkg/scheduler/scheduler.go
+++ b/plugin/pkg/scheduler/scheduler.go
@ -17,11 +17,14 @@ limitations under the License.
 package scheduler

 import (
+	"time"
+
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
 	// TODO: move everything from pkg/scheduler into this package. Remove references from registry.
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/scheduler"
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
+	"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/metrics"

 	"github.com/golang/glog"
 )
@ -89,6 +92,7 @@ func New(c *Config) *Scheduler {
 	s := &Scheduler{
 		config: c,
 	}
+	metrics.Register()
 	return s
 }

@ -100,7 +104,12 @@ func (s *Scheduler) Run() {
 func (s *Scheduler) scheduleOne() {
 	pod := s.config.NextPod()
 	glog.V(3).Infof("Attempting to schedule: %v", pod)
+	start := time.Now()
+	defer func() {
+		metrics.E2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start))
+	}()
 	dest, err := s.config.Algorithm.Schedule(*pod, s.config.MinionLister)
+	metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start))
 	if err != nil {
 		glog.V(1).Infof("Failed to schedule: %v", pod)
 		s.config.Recorder.Eventf(pod, "failedScheduling", "Error scheduling: %v", err)
@ -118,7 +127,10 @@ func (s *Scheduler) scheduleOne() {
 	// We want to add the pod to the model iff the bind succeeds, but we don't want to race
 	// with any deletions, which happen asyncronously.
 	s.config.Modeler.LockedAction(func() {
-		if err := s.config.Binder.Bind(b); err != nil {
+		bindingStart := time.Now()
+		err := s.config.Binder.Bind(b)
+		metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
+		if err != nil {
 			glog.V(1).Infof("Failed to bind pod: %v", err)
 			s.config.Recorder.Eventf(pod, "failedScheduling", "Binding rejected: %v", err)
 			s.config.Error(pod, err)