Expose information about scheduling latency in scalability tests.

pull/6/head
Wojciech Tyczynski 2015-11-18 17:07:26 +01:00
parent 4a9b0fc715
commit a968f98dc2
3 changed files with 77 additions and 8 deletions

View File

@ -31,6 +31,7 @@ var (
Subsystem: schedulerSubsystem,
Name: "e2e_scheduling_latency_microseconds",
Help: "E2e scheduling latency (scheduling algorithm + binding)",
MaxAge: time.Hour,
},
)
SchedulingAlgorithmLatency = prometheus.NewSummary(
@ -38,6 +39,7 @@ var (
Subsystem: schedulerSubsystem,
Name: "scheduling_algorithm_latency_microseconds",
Help: "Scheduling algorithm latency",
MaxAge: time.Hour,
},
)
BindingLatency = prometheus.NewSummary(
@ -45,6 +47,7 @@ var (
Subsystem: schedulerSubsystem,
Name: "binding_latency_microseconds",
Help: "Binding latency",
MaxAge: time.Hour,
},
)
)

View File

@ -104,10 +104,15 @@ var _ = Describe("Density [Skipped]", func() {
expectNoError(writePerfData(c, fmt.Sprintf(testContext.OutputDir+"/%s", uuid), "after"))
// Verify latency metrics
// Verify latency metrics.
highLatencyRequests, err := HighLatencyRequests(c)
expectNoError(err)
Expect(highLatencyRequests).NotTo(BeNumerically(">", 0), "There should be no high-latency requests")
// Verify scheduler metrics.
// TODO: Reset metrics at the beginning of the test.
// We should do something similar to how we do it for APIserver.
expectNoError(VerifySchedulerLatency())
})
framework := NewFramework("density")

View File

@ -57,6 +57,12 @@ type PodStartupLatency struct {
Latency LatencyMetric `json:"latency"`
}
type SchedulingLatency struct {
Scheduling LatencyMetric `json:"scheduling:`
Binding LatencyMetric `json:"binding"`
Total LatencyMetric `json:"total"`
}
type APICall struct {
Resource string `json:"resource"`
Verb string `json:"verb"`
@ -78,26 +84,31 @@ func (a APIResponsiveness) Less(i, j int) bool {
func (a *APIResponsiveness) addMetric(resource, verb string, quantile float64, latency time.Duration) {
for i, apicall := range a.APICalls {
if apicall.Resource == resource && apicall.Verb == verb {
a.APICalls[i] = setQuantile(apicall, quantile, latency)
a.APICalls[i] = setQuantileAPICall(apicall, quantile, latency)
return
}
}
apicall := setQuantile(APICall{Resource: resource, Verb: verb}, quantile, latency)
apicall := setQuantileAPICall(APICall{Resource: resource, Verb: verb}, quantile, latency)
a.APICalls = append(a.APICalls, apicall)
}
// 0 <= quantile <=1 (e.g. 0.95 is 95%tile, 0.5 is median)
// Only 0.5, 0.9 and 0.99 quantiles are supported.
func setQuantile(apicall APICall, quantile float64, latency time.Duration) APICall {
func setQuantileAPICall(apicall APICall, quantile float64, latency time.Duration) APICall {
setQuantile(&apicall.Latency, quantile, latency)
return apicall
}
// Only 0.5, 0.9 and 0.99 quantiles are supported.
func setQuantile(metric *LatencyMetric, quantile float64, latency time.Duration) {
switch quantile {
case 0.5:
apicall.Latency.Perc50 = latency
metric.Perc50 = latency
case 0.9:
apicall.Latency.Perc90 = latency
metric.Perc90 = latency
case 0.99:
apicall.Latency.Perc99 = latency
metric.Perc99 = latency
}
return apicall
}
func readLatencyMetrics(c *client.Client) (APIResponsiveness, error) {
@ -233,6 +244,56 @@ func getMetrics(c *client.Client) (string, error) {
return string(body), nil
}
// Retrieves scheduler metrics information.
func getSchedulingLatency() (SchedulingLatency, error) {
result := SchedulingLatency{}
cmd := "curl http://localhost:10251/metrics"
sshResult, err := SSH(cmd, getMasterHost()+":22", testContext.Provider)
if err != nil || sshResult.Code != 0 {
return result, fmt.Errorf("unexpected error (code: %d) in ssh connection to master: %#v", sshResult.Code, err)
}
samples, err := extractMetricSamples(sshResult.Stdout)
if err != nil {
return result, err
}
for _, sample := range samples {
var metric *LatencyMetric = nil
switch sample.Metric[model.MetricNameLabel] {
case "scheduler_scheduling_algorithm_latency_microseconds":
metric = &result.Scheduling
case "scheduler_binding_latency_microseconds":
metric = &result.Binding
case "scheduler_e2e_scheduling_latency_microseconds":
metric = &result.Total
}
if metric == nil {
continue
}
latency := sample.Value
quantile, err := strconv.ParseFloat(string(sample.Metric[model.QuantileLabel]), 64)
if err != nil {
return result, err
}
setQuantile(metric, quantile, time.Duration(int64(latency))*time.Microsecond)
}
return result, nil
}
// Verifies (currently just by logging them) the scheduling latencies.
func VerifySchedulerLatency() error {
latency, err := getSchedulingLatency()
if err != nil {
return err
}
Logf("Scheduling latency: %s", prettyPrintJSON(latency))
// TODO: Add some reasonable checks once we know more about the values.
return nil
}
func prettyPrintJSON(metrics interface{}) string {
output := &bytes.Buffer{}
if err := json.NewEncoder(output).Encode(metrics); err != nil {