From e32910a5441a30b25dbe04aaafb48e154b2e3953 Mon Sep 17 00:00:00 2001 From: Krzysztof Siedlecki Date: Thu, 7 Jun 2018 13:20:26 +0200 Subject: [PATCH] Readding summary metrics --- cmd/kube-scheduler/app/BUILD | 1 + cmd/kube-scheduler/app/server.go | 21 +++++++-- pkg/scheduler/metrics/metrics.go | 75 ++++++++++++++++++++++------- pkg/scheduler/scheduler.go | 2 + test/e2e/framework/BUILD | 1 + test/e2e/framework/metrics_util.go | 76 +++++++++++++++++++----------- test/e2e/scalability/density.go | 3 +- 7 files changed, 128 insertions(+), 51 deletions(-) diff --git a/cmd/kube-scheduler/app/BUILD b/cmd/kube-scheduler/app/BUILD index d6de2aaef8..a98dc30914 100644 --- a/cmd/kube-scheduler/app/BUILD +++ b/cmd/kube-scheduler/app/BUILD @@ -21,6 +21,7 @@ go_library( "//pkg/scheduler/api:go_default_library", "//pkg/scheduler/api/latest:go_default_library", "//pkg/scheduler/factory:go_default_library", + "//pkg/scheduler/metrics:go_default_library", "//pkg/util/configz:go_default_library", "//pkg/util/flag:go_default_library", "//pkg/version:go_default_library", diff --git a/cmd/kube-scheduler/app/server.go b/cmd/kube-scheduler/app/server.go index 11ace1a983..ee776c89b6 100644 --- a/cmd/kube-scheduler/app/server.go +++ b/cmd/kube-scheduler/app/server.go @@ -19,6 +19,7 @@ package app import ( "fmt" + "io" "io/ioutil" "net/http" "os" @@ -51,6 +52,7 @@ import ( schedulerapi "k8s.io/kubernetes/pkg/scheduler/api" latestschedulerapi "k8s.io/kubernetes/pkg/scheduler/api/latest" "k8s.io/kubernetes/pkg/scheduler/factory" + "k8s.io/kubernetes/pkg/scheduler/metrics" "k8s.io/kubernetes/pkg/util/configz" utilflag "k8s.io/kubernetes/pkg/util/flag" "k8s.io/kubernetes/pkg/version" @@ -221,11 +223,23 @@ func buildHandlerChain(handler http.Handler, authn authenticator.Request, authz return handler } +func installMetricHandler(pathRecorderMux *mux.PathRecorderMux) { + configz.InstallHandler(pathRecorderMux) + defaultMetricsHandler := prometheus.Handler().ServeHTTP + pathRecorderMux.HandleFunc("/metrics", func(w http.ResponseWriter, req *http.Request) { + if req.Method == "DELETE" { + metrics.Reset() + io.WriteString(w, "metrics reset\n") + return + } + defaultMetricsHandler(w, req) + }) +} + // newMetricsHandler builds a metrics server from the config. func newMetricsHandler(config *componentconfig.KubeSchedulerConfiguration) http.Handler { pathRecorderMux := mux.NewPathRecorderMux("kube-scheduler") - configz.InstallHandler(pathRecorderMux) - pathRecorderMux.Handle("/metrics", prometheus.Handler()) + installMetricHandler(pathRecorderMux) if config.EnableProfiling { routes.Profiling{}.Install(pathRecorderMux) if config.EnableContentionProfiling { @@ -242,8 +256,7 @@ func newHealthzHandler(config *componentconfig.KubeSchedulerConfiguration, separ pathRecorderMux := mux.NewPathRecorderMux("kube-scheduler") healthz.InstallHandler(pathRecorderMux) if !separateMetrics { - configz.InstallHandler(pathRecorderMux) - pathRecorderMux.Handle("/metrics", prometheus.Handler()) + installMetricHandler(pathRecorderMux) } if config.EnableProfiling { routes.Profiling{}.Install(pathRecorderMux) diff --git a/pkg/scheduler/metrics/metrics.go b/pkg/scheduler/metrics/metrics.go index 4836ac4b93..2fbba9980c 100644 --- a/pkg/scheduler/metrics/metrics.go +++ b/pkg/scheduler/metrics/metrics.go @@ -23,13 +23,37 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -const schedulerSubsystem = "scheduler" +const ( + // SchedulerSubsystem - subsystem name used by scheduler + SchedulerSubsystem = "scheduler" + // SchedulingLatencyName - scheduler latency metric name + SchedulingLatencyName = "scheduling_latency_seconds" + + // OperationLabel - operation label name + OperationLabel = "operation" + // Binding - binding operation label value + Binding = "binding" + // SelectingNode - selecting node operation label value + SelectingNode = "selecting_node" + // E2eScheduling - e2e scheduling operation label value +) // All the histogram based metrics have 1ms as size for the smallest bucket. var ( + SchedulingLatency = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Subsystem: SchedulerSubsystem, + Name: SchedulingLatencyName, + Help: "Scheduling latency in seconds split by sub-parts of the scheduling operation", + // Make the sliding window of 5h. + // TODO: The value for this should be based on some SLI definition (long term). + MaxAge: 5 * time.Hour, + }, + []string{OperationLabel}, + ) E2eSchedulingLatency = prometheus.NewHistogram( prometheus.HistogramOpts{ - Subsystem: schedulerSubsystem, + Subsystem: SchedulerSubsystem, Name: "e2e_scheduling_latency_microseconds", Help: "E2e scheduling latency (scheduling algorithm + binding)", Buckets: prometheus.ExponentialBuckets(1000, 2, 15), @@ -37,7 +61,7 @@ var ( ) SchedulingAlgorithmLatency = prometheus.NewHistogram( prometheus.HistogramOpts{ - Subsystem: schedulerSubsystem, + Subsystem: SchedulerSubsystem, Name: "scheduling_algorithm_latency_microseconds", Help: "Scheduling algorithm latency", Buckets: prometheus.ExponentialBuckets(1000, 2, 15), @@ -45,7 +69,7 @@ var ( ) SchedulingAlgorithmPredicateEvaluationDuration = prometheus.NewHistogram( prometheus.HistogramOpts{ - Subsystem: schedulerSubsystem, + Subsystem: SchedulerSubsystem, Name: "scheduling_algorithm_predicate_evaluation", Help: "Scheduling algorithm predicate evaluation duration", Buckets: prometheus.ExponentialBuckets(1000, 2, 15), @@ -53,7 +77,7 @@ var ( ) SchedulingAlgorithmPriorityEvaluationDuration = prometheus.NewHistogram( prometheus.HistogramOpts{ - Subsystem: schedulerSubsystem, + Subsystem: SchedulerSubsystem, Name: "scheduling_algorithm_priority_evaluation", Help: "Scheduling algorithm priority evaluation duration", Buckets: prometheus.ExponentialBuckets(1000, 2, 15), @@ -61,7 +85,7 @@ var ( ) SchedulingAlgorithmPremptionEvaluationDuration = prometheus.NewHistogram( prometheus.HistogramOpts{ - Subsystem: schedulerSubsystem, + Subsystem: SchedulerSubsystem, Name: "scheduling_algorithm_preemption_evaluation", Help: "Scheduling algorithm preemption evaluation duration", Buckets: prometheus.ExponentialBuckets(1000, 2, 15), @@ -69,7 +93,7 @@ var ( ) BindingLatency = prometheus.NewHistogram( prometheus.HistogramOpts{ - Subsystem: schedulerSubsystem, + Subsystem: SchedulerSubsystem, Name: "binding_latency_microseconds", Help: "Binding latency", Buckets: prometheus.ExponentialBuckets(1000, 2, 15), @@ -77,16 +101,27 @@ var ( ) PreemptionVictims = prometheus.NewGauge( prometheus.GaugeOpts{ - Subsystem: schedulerSubsystem, + Subsystem: SchedulerSubsystem, Name: "pod_preemption_victims", Help: "Number of selected preemption victims", }) PreemptionAttempts = prometheus.NewCounter( prometheus.CounterOpts{ - Subsystem: schedulerSubsystem, + Subsystem: SchedulerSubsystem, Name: "total_preemption_attempts", Help: "Total preemption attempts in the cluster till now", }) + metricsList = []prometheus.Collector{ + SchedulingLatency, + E2eSchedulingLatency, + SchedulingAlgorithmLatency, + BindingLatency, + SchedulingAlgorithmPredicateEvaluationDuration, + SchedulingAlgorithmPriorityEvaluationDuration, + SchedulingAlgorithmPremptionEvaluationDuration, + PreemptionVictims, + PreemptionAttempts, + } ) var registerMetrics sync.Once @@ -95,19 +130,23 @@ var registerMetrics sync.Once func Register() { // Register the metrics. registerMetrics.Do(func() { - prometheus.MustRegister(E2eSchedulingLatency) - prometheus.MustRegister(SchedulingAlgorithmLatency) - prometheus.MustRegister(BindingLatency) - - prometheus.MustRegister(SchedulingAlgorithmPredicateEvaluationDuration) - prometheus.MustRegister(SchedulingAlgorithmPriorityEvaluationDuration) - prometheus.MustRegister(SchedulingAlgorithmPremptionEvaluationDuration) - prometheus.MustRegister(PreemptionVictims) - prometheus.MustRegister(PreemptionAttempts) + for _, metric := range metricsList { + prometheus.MustRegister(metric) + } }) } +// Reset resets metrics +func Reset() { + SchedulingLatency.Reset() +} + // SinceInMicroseconds gets the time since the specified start in microseconds. func SinceInMicroseconds(start time.Time) float64 { return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds()) } + +// SinceInSeconds gets the time since the specified start in seconds. +func SinceInSeconds(start time.Time) float64 { + return time.Since(start).Seconds() +} diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index eb8d948af1..bc646548b4 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -430,6 +430,7 @@ func (sched *Scheduler) bind(assumed *v1.Pod, b *v1.Binding) error { } metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart)) + metrics.SchedulingLatency.WithLabelValues(metrics.Binding).Observe(metrics.SinceInSeconds(bindingStart)) sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v/%v to %v", assumed.Namespace, assumed.Name, b.Target.Name) return nil } @@ -462,6 +463,7 @@ func (sched *Scheduler) scheduleOne() { return } metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start)) + metrics.SchedulingLatency.WithLabelValues(metrics.SelectingNode).Observe(metrics.SinceInSeconds(start)) // Tell the cache to assume that a pod now is running on a given node, even though it hasn't been bound yet. // This allows us to keep scheduling without waiting on binding to occur. assumedPod := pod.DeepCopy() diff --git a/test/e2e/framework/BUILD b/test/e2e/framework/BUILD index 022339ae39..fc650ed9fc 100644 --- a/test/e2e/framework/BUILD +++ b/test/e2e/framework/BUILD @@ -72,6 +72,7 @@ go_library( "//pkg/master/ports:go_default_library", "//pkg/scheduler/algorithm/predicates:go_default_library", "//pkg/scheduler/cache:go_default_library", + "//pkg/scheduler/metrics:go_default_library", "//pkg/security/podsecuritypolicy/seccomp:go_default_library", "//pkg/ssh:go_default_library", "//pkg/util/file:go_default_library", diff --git a/test/e2e/framework/metrics_util.go b/test/e2e/framework/metrics_util.go index e8ae03d375..2562dc4720 100644 --- a/test/e2e/framework/metrics_util.go +++ b/test/e2e/framework/metrics_util.go @@ -32,6 +32,7 @@ import ( "k8s.io/apimachinery/pkg/util/sets" clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/pkg/master/ports" + schedulermetric "k8s.io/kubernetes/pkg/scheduler/metrics" "k8s.io/kubernetes/pkg/util/system" "k8s.io/kubernetes/test/e2e/framework/metrics" @@ -130,6 +131,8 @@ func (m *MetricsForE2E) SummaryKind() string { return "MetricsForE2E" } +var SchedulingLatencyMetricName = model.LabelValue(schedulermetric.SchedulerSubsystem + "_" + schedulermetric.SchedulingLatencyName) + var InterestingApiServerMetrics = []string{ "apiserver_request_count", "apiserver_request_latencies_summary", @@ -207,13 +210,12 @@ func (l *PodStartupLatency) PrintJSON() string { } type SchedulingMetrics struct { - SchedulingLatency LatencyMetric `json:"schedulingLatency"` - BindingLatency LatencyMetric `json:"bindingLatency"` - E2ELatency LatencyMetric `json:"e2eLatency"` - ThroughputAverage float64 `json:"throughputAverage"` - ThroughputPerc50 float64 `json:"throughputPerc50"` - ThroughputPerc90 float64 `json:"throughputPerc90"` - ThroughputPerc99 float64 `json:"throughputPerc99"` + SelectingNodeLatency LatencyMetric `json:"selectingNodeLatency"` + BindingLatency LatencyMetric `json:"bindingLatency"` + ThroughputAverage float64 `json:"throughputAverage"` + ThroughputPerc50 float64 `json:"throughputPerc50"` + ThroughputPerc90 float64 `json:"throughputPerc90"` + ThroughputPerc99 float64 `json:"throughputPerc99"` } func (l *SchedulingMetrics) SummaryKind() string { @@ -442,27 +444,29 @@ func getMetrics(c clientset.Interface) (string, error) { return string(body), nil } -// Retrieves scheduler latency metrics. -func getSchedulingLatency(c clientset.Interface) (*SchedulingMetrics, error) { - result := SchedulingMetrics{} +// Sends REST request to kube scheduler metrics +func sendRestRequestToScheduler(c clientset.Interface, op string) (string, error) { + opUpper := strings.ToUpper(op) + if opUpper != "GET" && opUpper != "DELETE" { + return "", fmt.Errorf("Unknown REST request") + } - // Check if master Node is registered nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{}) ExpectNoError(err) - var data string var masterRegistered = false for _, node := range nodes.Items { if system.IsMasterNode(node.Name) { masterRegistered = true } } + + var responseText string if masterRegistered { ctx, cancel := context.WithTimeout(context.Background(), SingleCallTimeout) defer cancel() - var rawData []byte - rawData, err = c.CoreV1().RESTClient().Get(). + body, err := c.CoreV1().RESTClient().Verb(opUpper). Context(ctx). Namespace(metav1.NamespaceSystem). Resource("pods"). @@ -472,45 +476,55 @@ func getSchedulingLatency(c clientset.Interface) (*SchedulingMetrics, error) { Do().Raw() ExpectNoError(err) - data = string(rawData) + responseText = string(body) } else { // If master is not registered fall back to old method of using SSH. if TestContext.Provider == "gke" { Logf("Not grabbing scheduler metrics through master SSH: unsupported for gke") - return nil, nil + return "", nil } - cmd := "curl http://localhost:10251/metrics" + + cmd := "curl -X " + opUpper + " http://localhost:10251/metrics" sshResult, err := SSH(cmd, GetMasterHost()+":22", TestContext.Provider) if err != nil || sshResult.Code != 0 { - return &result, fmt.Errorf("unexpected error (code: %d) in ssh connection to master: %#v", sshResult.Code, err) + return "", fmt.Errorf("unexpected error (code: %d) in ssh connection to master: %#v", sshResult.Code, err) } - data = sshResult.Stdout + responseText = sshResult.Stdout } + return responseText, nil +} + +// Retrieves scheduler latency metrics. +func getSchedulingLatency(c clientset.Interface) (*SchedulingMetrics, error) { + result := SchedulingMetrics{} + data, err := sendRestRequestToScheduler(c, "GET") + samples, err := extractMetricSamples(data) if err != nil { return nil, err } for _, sample := range samples { + if sample.Metric[model.MetricNameLabel] != SchedulingLatencyMetricName { + continue + } + var metric *LatencyMetric = nil - switch sample.Metric[model.MetricNameLabel] { - case "scheduler_scheduling_algorithm_latency_microseconds": - metric = &result.SchedulingLatency - case "scheduler_binding_latency_microseconds": + switch sample.Metric[schedulermetric.OperationLabel] { + case schedulermetric.SelectingNode: + metric = &result.SelectingNodeLatency + case schedulermetric.Binding: metric = &result.BindingLatency - case "scheduler_e2e_scheduling_latency_microseconds": - metric = &result.E2ELatency } if metric == nil { continue } - latency := sample.Value quantile, err := strconv.ParseFloat(string(sample.Metric[model.QuantileLabel]), 64) if err != nil { return nil, err } - setQuantile(metric, quantile, time.Duration(int64(latency))*time.Microsecond) + setQuantile(metric, quantile, time.Duration(int64(float64(sample.Value)*float64(time.Second)))) } return &result, nil } @@ -524,6 +538,14 @@ func VerifySchedulerLatency(c clientset.Interface) (*SchedulingMetrics, error) { return latency, nil } +func ResetSchedulerMetrics(c clientset.Interface) error { + responseText, err := sendRestRequestToScheduler(c, "DELETE") + if err != nil || responseText != "metrics reset\n" { + return fmt.Errorf("Unexpected response: %q", responseText) + } + return nil +} + func PrettyPrintJSON(metrics interface{}) string { output := &bytes.Buffer{} if err := json.NewEncoder(output).Encode(metrics); err != nil { diff --git a/test/e2e/scalability/density.go b/test/e2e/scalability/density.go index 92d4f8b991..30df63e7af 100644 --- a/test/e2e/scalability/density.go +++ b/test/e2e/scalability/density.go @@ -413,8 +413,6 @@ var _ = SIGDescribe("Density", func() { } // Verify scheduler metrics. - // TODO: Reset metrics at the beginning of the test. - // We should do something similar to how we do it for APIserver. latency, err := framework.VerifySchedulerLatency(c) framework.ExpectNoError(err) if err == nil { @@ -469,6 +467,7 @@ var _ = SIGDescribe("Density", func() { uuid = string(utiluuid.NewUUID()) + framework.ExpectNoError(framework.ResetSchedulerMetrics(c)) framework.ExpectNoError(framework.ResetMetrics(c)) framework.ExpectNoError(os.Mkdir(fmt.Sprintf(framework.TestContext.OutputDir+"/%s", uuid), 0777))