mirror of https://github.com/k3s-io/k3s
Increase latency threshold for list api calls
parent
402e48b072
commit
70123e71bb
|
@ -51,9 +51,10 @@ const (
|
||||||
// Increasing threshold to 1s is within our SLO and should solve this problem.
|
// Increasing threshold to 1s is within our SLO and should solve this problem.
|
||||||
apiCallLatencyThreshold time.Duration = 1 * time.Second
|
apiCallLatencyThreshold time.Duration = 1 * time.Second
|
||||||
|
|
||||||
// We set a higher threshold for list apicalls as they can take more time when
|
// We use a higher threshold for list apicalls if the cluster is big (i.e having > 500 nodes)
|
||||||
// the list is really big. For eg. list nodes in a 5000-node cluster.
|
// as list response sizes are bigger in general for big clusters.
|
||||||
apiListCallLatencyThreshold time.Duration = 2 * time.Second
|
apiListCallLatencyThreshold time.Duration = 5 * time.Second
|
||||||
|
bigClusterNodeCountThreshold = 500
|
||||||
|
|
||||||
// Cluster Autoscaler metrics names
|
// Cluster Autoscaler metrics names
|
||||||
caFunctionMetric = "cluster_autoscaler_function_duration_seconds_bucket"
|
caFunctionMetric = "cluster_autoscaler_function_duration_seconds_bucket"
|
||||||
|
@ -354,8 +355,10 @@ func readLatencyMetrics(c clientset.Interface) (*APIResponsiveness, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prints top five summary metrics for request types with latency and returns
|
// Prints top five summary metrics for request types with latency and returns
|
||||||
// number of such request types above threshold.
|
// number of such request types above threshold. We use a higher threshold for
|
||||||
func HighLatencyRequests(c clientset.Interface) (int, *APIResponsiveness, error) {
|
// list calls if nodeCount is above a given threshold (i.e. cluster is big).
|
||||||
|
func HighLatencyRequests(c clientset.Interface, nodeCount int) (int, *APIResponsiveness, error) {
|
||||||
|
isBigCluster := (nodeCount > bigClusterNodeCountThreshold)
|
||||||
metrics, err := readLatencyMetrics(c)
|
metrics, err := readLatencyMetrics(c)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, metrics, err
|
return 0, metrics, err
|
||||||
|
@ -364,12 +367,14 @@ func HighLatencyRequests(c clientset.Interface) (int, *APIResponsiveness, error)
|
||||||
badMetrics := 0
|
badMetrics := 0
|
||||||
top := 5
|
top := 5
|
||||||
for i := range metrics.APICalls {
|
for i := range metrics.APICalls {
|
||||||
|
latency := metrics.APICalls[i].Latency.Perc99
|
||||||
|
isListCall := (metrics.APICalls[i].Verb == "LIST")
|
||||||
isBad := false
|
isBad := false
|
||||||
verb := metrics.APICalls[i].Verb
|
if latency > apiCallLatencyThreshold {
|
||||||
if verb != "LIST" && metrics.APICalls[i].Latency.Perc99 > apiCallLatencyThreshold ||
|
if !isListCall || !isBigCluster || (latency > apiListCallLatencyThreshold) {
|
||||||
verb == "LIST" && metrics.APICalls[i].Latency.Perc99 > apiListCallLatencyThreshold {
|
isBad = true
|
||||||
badMetrics++
|
badMetrics++
|
||||||
isBad = true
|
}
|
||||||
}
|
}
|
||||||
if top > 0 || isBad {
|
if top > 0 || isBad {
|
||||||
top--
|
top--
|
||||||
|
|
|
@ -328,7 +328,7 @@ var _ = SIGDescribe("Density", func() {
|
||||||
|
|
||||||
summaries := make([]framework.TestDataSummary, 0, 2)
|
summaries := make([]framework.TestDataSummary, 0, 2)
|
||||||
// Verify latency metrics.
|
// Verify latency metrics.
|
||||||
highLatencyRequests, metrics, err := framework.HighLatencyRequests(c)
|
highLatencyRequests, metrics, err := framework.HighLatencyRequests(c, nodeCount)
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
summaries = append(summaries, metrics)
|
summaries = append(summaries, metrics)
|
||||||
|
|
|
@ -92,7 +92,7 @@ var _ = SIGDescribe("Load capacity", func() {
|
||||||
// TODO add flag that allows to skip cleanup on failure
|
// TODO add flag that allows to skip cleanup on failure
|
||||||
AfterEach(func() {
|
AfterEach(func() {
|
||||||
// Verify latency metrics
|
// Verify latency metrics
|
||||||
highLatencyRequests, metrics, err := framework.HighLatencyRequests(clientset)
|
highLatencyRequests, metrics, err := framework.HighLatencyRequests(clientset, nodeCount)
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
summaries := make([]framework.TestDataSummary, 0, 1)
|
summaries := make([]framework.TestDataSummary, 0, 1)
|
||||||
|
|
Loading…
Reference in New Issue