mirror of https://github.com/k3s-io/k3s
Expose information about scheduling latency in scalability tests.
parent
4a9b0fc715
commit
a968f98dc2
|
@ -31,6 +31,7 @@ var (
|
||||||
Subsystem: schedulerSubsystem,
|
Subsystem: schedulerSubsystem,
|
||||||
Name: "e2e_scheduling_latency_microseconds",
|
Name: "e2e_scheduling_latency_microseconds",
|
||||||
Help: "E2e scheduling latency (scheduling algorithm + binding)",
|
Help: "E2e scheduling latency (scheduling algorithm + binding)",
|
||||||
|
MaxAge: time.Hour,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
SchedulingAlgorithmLatency = prometheus.NewSummary(
|
SchedulingAlgorithmLatency = prometheus.NewSummary(
|
||||||
|
@ -38,6 +39,7 @@ var (
|
||||||
Subsystem: schedulerSubsystem,
|
Subsystem: schedulerSubsystem,
|
||||||
Name: "scheduling_algorithm_latency_microseconds",
|
Name: "scheduling_algorithm_latency_microseconds",
|
||||||
Help: "Scheduling algorithm latency",
|
Help: "Scheduling algorithm latency",
|
||||||
|
MaxAge: time.Hour,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
BindingLatency = prometheus.NewSummary(
|
BindingLatency = prometheus.NewSummary(
|
||||||
|
@ -45,6 +47,7 @@ var (
|
||||||
Subsystem: schedulerSubsystem,
|
Subsystem: schedulerSubsystem,
|
||||||
Name: "binding_latency_microseconds",
|
Name: "binding_latency_microseconds",
|
||||||
Help: "Binding latency",
|
Help: "Binding latency",
|
||||||
|
MaxAge: time.Hour,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
|
@ -104,10 +104,15 @@ var _ = Describe("Density [Skipped]", func() {
|
||||||
|
|
||||||
expectNoError(writePerfData(c, fmt.Sprintf(testContext.OutputDir+"/%s", uuid), "after"))
|
expectNoError(writePerfData(c, fmt.Sprintf(testContext.OutputDir+"/%s", uuid), "after"))
|
||||||
|
|
||||||
// Verify latency metrics
|
// Verify latency metrics.
|
||||||
highLatencyRequests, err := HighLatencyRequests(c)
|
highLatencyRequests, err := HighLatencyRequests(c)
|
||||||
expectNoError(err)
|
expectNoError(err)
|
||||||
Expect(highLatencyRequests).NotTo(BeNumerically(">", 0), "There should be no high-latency requests")
|
Expect(highLatencyRequests).NotTo(BeNumerically(">", 0), "There should be no high-latency requests")
|
||||||
|
|
||||||
|
// Verify scheduler metrics.
|
||||||
|
// TODO: Reset metrics at the beginning of the test.
|
||||||
|
// We should do something similar to how we do it for APIserver.
|
||||||
|
expectNoError(VerifySchedulerLatency())
|
||||||
})
|
})
|
||||||
|
|
||||||
framework := NewFramework("density")
|
framework := NewFramework("density")
|
||||||
|
|
|
@ -57,6 +57,12 @@ type PodStartupLatency struct {
|
||||||
Latency LatencyMetric `json:"latency"`
|
Latency LatencyMetric `json:"latency"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type SchedulingLatency struct {
|
||||||
|
Scheduling LatencyMetric `json:"scheduling:`
|
||||||
|
Binding LatencyMetric `json:"binding"`
|
||||||
|
Total LatencyMetric `json:"total"`
|
||||||
|
}
|
||||||
|
|
||||||
type APICall struct {
|
type APICall struct {
|
||||||
Resource string `json:"resource"`
|
Resource string `json:"resource"`
|
||||||
Verb string `json:"verb"`
|
Verb string `json:"verb"`
|
||||||
|
@ -78,26 +84,31 @@ func (a APIResponsiveness) Less(i, j int) bool {
|
||||||
func (a *APIResponsiveness) addMetric(resource, verb string, quantile float64, latency time.Duration) {
|
func (a *APIResponsiveness) addMetric(resource, verb string, quantile float64, latency time.Duration) {
|
||||||
for i, apicall := range a.APICalls {
|
for i, apicall := range a.APICalls {
|
||||||
if apicall.Resource == resource && apicall.Verb == verb {
|
if apicall.Resource == resource && apicall.Verb == verb {
|
||||||
a.APICalls[i] = setQuantile(apicall, quantile, latency)
|
a.APICalls[i] = setQuantileAPICall(apicall, quantile, latency)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
apicall := setQuantile(APICall{Resource: resource, Verb: verb}, quantile, latency)
|
apicall := setQuantileAPICall(APICall{Resource: resource, Verb: verb}, quantile, latency)
|
||||||
a.APICalls = append(a.APICalls, apicall)
|
a.APICalls = append(a.APICalls, apicall)
|
||||||
}
|
}
|
||||||
|
|
||||||
// 0 <= quantile <=1 (e.g. 0.95 is 95%tile, 0.5 is median)
|
// 0 <= quantile <=1 (e.g. 0.95 is 95%tile, 0.5 is median)
|
||||||
// Only 0.5, 0.9 and 0.99 quantiles are supported.
|
// Only 0.5, 0.9 and 0.99 quantiles are supported.
|
||||||
func setQuantile(apicall APICall, quantile float64, latency time.Duration) APICall {
|
func setQuantileAPICall(apicall APICall, quantile float64, latency time.Duration) APICall {
|
||||||
|
setQuantile(&apicall.Latency, quantile, latency)
|
||||||
|
return apicall
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only 0.5, 0.9 and 0.99 quantiles are supported.
|
||||||
|
func setQuantile(metric *LatencyMetric, quantile float64, latency time.Duration) {
|
||||||
switch quantile {
|
switch quantile {
|
||||||
case 0.5:
|
case 0.5:
|
||||||
apicall.Latency.Perc50 = latency
|
metric.Perc50 = latency
|
||||||
case 0.9:
|
case 0.9:
|
||||||
apicall.Latency.Perc90 = latency
|
metric.Perc90 = latency
|
||||||
case 0.99:
|
case 0.99:
|
||||||
apicall.Latency.Perc99 = latency
|
metric.Perc99 = latency
|
||||||
}
|
}
|
||||||
return apicall
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func readLatencyMetrics(c *client.Client) (APIResponsiveness, error) {
|
func readLatencyMetrics(c *client.Client) (APIResponsiveness, error) {
|
||||||
|
@ -233,6 +244,56 @@ func getMetrics(c *client.Client) (string, error) {
|
||||||
return string(body), nil
|
return string(body), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Retrieves scheduler metrics information.
|
||||||
|
func getSchedulingLatency() (SchedulingLatency, error) {
|
||||||
|
result := SchedulingLatency{}
|
||||||
|
|
||||||
|
cmd := "curl http://localhost:10251/metrics"
|
||||||
|
sshResult, err := SSH(cmd, getMasterHost()+":22", testContext.Provider)
|
||||||
|
if err != nil || sshResult.Code != 0 {
|
||||||
|
return result, fmt.Errorf("unexpected error (code: %d) in ssh connection to master: %#v", sshResult.Code, err)
|
||||||
|
}
|
||||||
|
samples, err := extractMetricSamples(sshResult.Stdout)
|
||||||
|
if err != nil {
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, sample := range samples {
|
||||||
|
var metric *LatencyMetric = nil
|
||||||
|
switch sample.Metric[model.MetricNameLabel] {
|
||||||
|
case "scheduler_scheduling_algorithm_latency_microseconds":
|
||||||
|
metric = &result.Scheduling
|
||||||
|
case "scheduler_binding_latency_microseconds":
|
||||||
|
metric = &result.Binding
|
||||||
|
case "scheduler_e2e_scheduling_latency_microseconds":
|
||||||
|
metric = &result.Total
|
||||||
|
}
|
||||||
|
if metric == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
latency := sample.Value
|
||||||
|
quantile, err := strconv.ParseFloat(string(sample.Metric[model.QuantileLabel]), 64)
|
||||||
|
if err != nil {
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
setQuantile(metric, quantile, time.Duration(int64(latency))*time.Microsecond)
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verifies (currently just by logging them) the scheduling latencies.
|
||||||
|
func VerifySchedulerLatency() error {
|
||||||
|
latency, err := getSchedulingLatency()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
Logf("Scheduling latency: %s", prettyPrintJSON(latency))
|
||||||
|
|
||||||
|
// TODO: Add some reasonable checks once we know more about the values.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func prettyPrintJSON(metrics interface{}) string {
|
func prettyPrintJSON(metrics interface{}) string {
|
||||||
output := &bytes.Buffer{}
|
output := &bytes.Buffer{}
|
||||||
if err := json.NewEncoder(output).Encode(metrics); err != nil {
|
if err := json.NewEncoder(output).Encode(metrics); err != nil {
|
||||||
|
|
Loading…
Reference in New Issue