Expose information about scheduling latency in scalability tests.

2015-11-18 17:07:26 +01:00 · 2015-11-18 17:07:26 +01:00 · a968f98dc2
parent 4a9b0fc715
commit a968f98dc2
3 changed files with 77 additions and 8 deletions
--- a/plugin/pkg/scheduler/metrics/metrics.go
+++ b/plugin/pkg/scheduler/metrics/metrics.go
@ -31,6 +31,7 @@ var (
 			Subsystem: schedulerSubsystem,
 			Name:      "e2e_scheduling_latency_microseconds",
 			Help:      "E2e scheduling latency (scheduling algorithm + binding)",
 			MaxAge:    time.Hour,
 		},
 	)
 	SchedulingAlgorithmLatency = prometheus.NewSummary(
@ -38,6 +39,7 @@ var (
 			Subsystem: schedulerSubsystem,
 			Name:      "scheduling_algorithm_latency_microseconds",
 			Help:      "Scheduling algorithm latency",
 			MaxAge:    time.Hour,
 		},
 	)
 	BindingLatency = prometheus.NewSummary(
@ -45,6 +47,7 @@ var (
 			Subsystem: schedulerSubsystem,
 			Name:      "binding_latency_microseconds",
 			Help:      "Binding latency",
 			MaxAge:    time.Hour,
 		},
 	)
 )
--- a/test/e2e/density.go
+++ b/test/e2e/density.go
@ -104,10 +104,15 @@ var _ = Describe("Density [Skipped]", func() {
 		expectNoError(writePerfData(c, fmt.Sprintf(testContext.OutputDir+"/%s", uuid), "after"))
-		// Verify latency metrics
+		// Verify latency metrics.
 		highLatencyRequests, err := HighLatencyRequests(c)
 		expectNoError(err)
 		Expect(highLatencyRequests).NotTo(BeNumerically(">", 0), "There should be no high-latency requests")
 		// Verify scheduler metrics.
 		// TODO: Reset metrics at the beginning of the test.
 		// We should do something similar to how we do it for APIserver.
 		expectNoError(VerifySchedulerLatency())
 	})
 	framework := NewFramework("density")
--- a/test/e2e/metrics_util.go
+++ b/test/e2e/metrics_util.go
@ -57,6 +57,12 @@ type PodStartupLatency struct {
 	Latency LatencyMetric `json:"latency"`
 }
 type SchedulingLatency struct {
 	Scheduling LatencyMetric `json:"scheduling:`
 	Binding    LatencyMetric `json:"binding"`
 	Total      LatencyMetric `json:"total"`
 }
 type APICall struct {
 	Resource string        `json:"resource"`
 	Verb     string        `json:"verb"`
@ -78,26 +84,31 @@ func (a APIResponsiveness) Less(i, j int) bool {
 func (a *APIResponsiveness) addMetric(resource, verb string, quantile float64, latency time.Duration) {
 	for i, apicall := range a.APICalls {
 		if apicall.Resource == resource && apicall.Verb == verb {
-			a.APICalls[i] = setQuantile(apicall, quantile, latency)
+			a.APICalls[i] = setQuantileAPICall(apicall, quantile, latency)
 			return
 		}
 	}
-	apicall := setQuantile(APICall{Resource: resource, Verb: verb}, quantile, latency)
+	apicall := setQuantileAPICall(APICall{Resource: resource, Verb: verb}, quantile, latency)
 	a.APICalls = append(a.APICalls, apicall)
 }
 // 0 <= quantile <=1 (e.g. 0.95 is 95%tile, 0.5 is median)
 // Only 0.5, 0.9 and 0.99 quantiles are supported.
-func setQuantile(apicall APICall, quantile float64, latency time.Duration) APICall {
+func setQuantileAPICall(apicall APICall, quantile float64, latency time.Duration) APICall {
 	setQuantile(&apicall.Latency, quantile, latency)
 	return apicall
 }
 // Only 0.5, 0.9 and 0.99 quantiles are supported.
 func setQuantile(metric *LatencyMetric, quantile float64, latency time.Duration) {
 	switch quantile {
 	case 0.5:
-		apicall.Latency.Perc50 = latency
+		metric.Perc50 = latency
 	case 0.9:
-		apicall.Latency.Perc90 = latency
+		metric.Perc90 = latency
 	case 0.99:
-		apicall.Latency.Perc99 = latency
+		metric.Perc99 = latency
 	}
 	return apicall
 }
 func readLatencyMetrics(c *client.Client) (APIResponsiveness, error) {
@ -233,6 +244,56 @@ func getMetrics(c *client.Client) (string, error) {
 	return string(body), nil
 }
 // Retrieves scheduler metrics information.
 func getSchedulingLatency() (SchedulingLatency, error) {
 	result := SchedulingLatency{}
 	cmd := "curl http://localhost:10251/metrics"
 	sshResult, err := SSH(cmd, getMasterHost()+":22", testContext.Provider)
 	if err != nil || sshResult.Code != 0 {
 		return result, fmt.Errorf("unexpected error (code: %d) in ssh connection to master: %#v", sshResult.Code, err)
 	}
 	samples, err := extractMetricSamples(sshResult.Stdout)
 	if err != nil {
 		return result, err
 	}
 	for _, sample := range samples {
 		var metric *LatencyMetric = nil
 		switch sample.Metric[model.MetricNameLabel] {
 		case "scheduler_scheduling_algorithm_latency_microseconds":
 			metric = &result.Scheduling
 		case "scheduler_binding_latency_microseconds":
 			metric = &result.Binding
 		case "scheduler_e2e_scheduling_latency_microseconds":
 			metric = &result.Total
 		}
 		if metric == nil {
 			continue
 		}
 		latency := sample.Value
 		quantile, err := strconv.ParseFloat(string(sample.Metric[model.QuantileLabel]), 64)
 		if err != nil {
 			return result, err
 		}
 		setQuantile(metric, quantile, time.Duration(int64(latency))*time.Microsecond)
 	}
 	return result, nil
 }
 // Verifies (currently just by logging them) the scheduling latencies.
 func VerifySchedulerLatency() error {
 	latency, err := getSchedulingLatency()
 	if err != nil {
 		return err
 	}
 	Logf("Scheduling latency: %s", prettyPrintJSON(latency))
 	// TODO: Add some reasonable checks once we know more about the values.
 	return nil
 }
 func prettyPrintJSON(metrics interface{}) string {
 	output := &bytes.Buffer{}
 	if err := json.NewEncoder(output).Encode(metrics); err != nil {