Remove UpscaleForbiddenWindow

Instead discard metric values for pods that are unready and have never been ready (they may report misleading values, the original reason for introducing scale up forbidden window). Use per pod metric when pod is: - Ready, or - Not ready but creation timestamp and last readiness change are more than 10s apart. In the latter case we asume the pod was ready but later became unready. We want to use metrics for such pods because sometimes such pods are unready because they were getting too much load.
2018-07-18 14:21:00 +02:00 · 2018-07-18 14:21:00 +02:00 · 7681c284f5
parent 086ed3c659
commit 7681c284f5
6 changed files with 177 additions and 24 deletions
--- a/cmd/kube-controller-manager/app/autoscaling.go
+++ b/cmd/kube-controller-manager/app/autoscaling.go
@ -93,7 +93,6 @@ func startHPAControllerWithMetricsClient(ctx ControllerContext, metricsClient me
 		replicaCalc,
 		ctx.InformerFactory.Autoscaling().V1().HorizontalPodAutoscalers(),
 		ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerSyncPeriod.Duration,
-		ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow.Duration,
 		ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration,
 	).Run(ctx.Stop)
 	return nil, true, nil
--- a/pkg/controller/podautoscaler/horizontal.go
+++ b/pkg/controller/podautoscaler/horizontal.go
@ -64,7 +64,6 @@ type HorizontalController struct {
 	replicaCalc   *ReplicaCalculator
 	eventRecorder record.EventRecorder

-	upscaleForbiddenWindow   time.Duration
 	downscaleForbiddenWindow time.Duration

 	// hpaLister is able to list/get HPAs from the shared cache from the informer passed in to
@ -85,7 +84,6 @@ func NewHorizontalController(
 	replicaCalc *ReplicaCalculator,
 	hpaInformer autoscalinginformers.HorizontalPodAutoscalerInformer,
 	resyncPeriod time.Duration,
-	upscaleForbiddenWindow time.Duration,
 	downscaleForbiddenWindow time.Duration,

 ) *HorizontalController {
@ -99,7 +97,6 @@ func NewHorizontalController(
 		eventRecorder:            recorder,
 		scaleNamespacer:          scaleNamespacer,
 		hpaNamespacer:            hpaNamespacer,
-		upscaleForbiddenWindow:   upscaleForbiddenWindow,
 		downscaleForbiddenWindow: downscaleForbiddenWindow,
 		queue:  workqueue.NewNamedRateLimitingQueue(NewDefaultHPARateLimiter(resyncPeriod), "horizontalpodautoscaler"),
 		mapper: mapper,
@ -246,7 +243,6 @@ func (a *HorizontalController) computeReplicasForMetrics(hpa *autoscalingv2.Hori
 			setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "InvalidMetricSourceType", "the HPA was unable to compute the replica count: %s", errMsg)
 			return 0, "", nil, time.Time{}, fmt.Errorf(errMsg)
 		}
-
 		if replicas == 0 || replicaCountProposal > replicas {
 			timestamp = timestampProposal
 			replicas = replicaCountProposal
@ -472,6 +468,7 @@ func (a *HorizontalController) reconcileAutoscaler(hpav1Shared *autoscalingv1.Ho
 		rescaleReason = "Current number of replicas must be greater than 0"
 		desiredReplicas = 1
 	} else {
+
 		metricDesiredReplicas, metricName, metricStatuses, metricTimestamp, err = a.computeReplicasForMetrics(hpa, scale, hpa.Spec.Metrics)
 		if err != nil {
 			a.setCurrentReplicasInStatus(hpa, currentReplicas)
@ -507,15 +504,6 @@ func (a *HorizontalController) reconcileAutoscaler(hpav1Shared *autoscalingv1.Ho
 				setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionFalse, "BackoffDownscale", "the time since the previous scale is still within the downscale forbidden window")
 				backoffDown = true
 			}
-
-			if !hpa.Status.LastScaleTime.Add(a.upscaleForbiddenWindow).Before(timestamp) {
-				backoffUp = true
-				if backoffDown {
-					setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionFalse, "BackoffBoth", "the time since the previous scale is still within both the downscale and upscale forbidden windows")
-				} else {
-					setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionFalse, "BackoffUpscale", "the time since the previous scale is still within the upscale forbidden window")
-				}
-			}
 		}

 		if !backoffDown && !backoffUp {
@ -634,9 +622,8 @@ func (a *HorizontalController) shouldScale(hpa *autoscalingv2.HorizontalPodAutos
 		return true
 	}

-	// Going up only if the usage ratio increased significantly above the target
-	// and there was no rescaling in the last upscaleForbiddenWindow.
-	if desiredReplicas > currentReplicas && hpa.Status.LastScaleTime.Add(a.upscaleForbiddenWindow).Before(timestamp) {
+	// Going up only if the usage ratio increased significantly above the target.
+	if desiredReplicas > currentReplicas {
 		return true
 	}

--- a/pkg/controller/podautoscaler/horizontal_test.go
+++ b/pkg/controller/podautoscaler/horizontal_test.go
@ -643,7 +643,6 @@ func (tc *testCase) setupController(t *testing.T) (*HorizontalController, inform
 	}

 	informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc())
-	defaultUpscaleForbiddenWindow := 3 * time.Minute
 	defaultDownscaleForbiddenWindow := 5 * time.Minute

 	hpaController := NewHorizontalController(
@ -654,7 +653,6 @@ func (tc *testCase) setupController(t *testing.T) (*HorizontalController, inform
 		replicaCalc,
 		informerFactory.Autoscaling().V1().HorizontalPodAutoscalers(),
 		controller.NoResyncPeriodFunc(),
-		defaultUpscaleForbiddenWindow,
 		defaultDownscaleForbiddenWindow,
 	)
 	hpaController.hpaListerSynced = alwaysReady
@ -1728,7 +1726,7 @@ func TestConditionFailedUpdateScale(t *testing.T) {
 	tc.runTest(t)
 }

-func TestBackoffUpscale(t *testing.T) {
+func NoTestBackoffUpscale(t *testing.T) {
 	time := metav1.Time{Time: time.Now()}
 	tc := testCase{
 		minReplicas:             1,
@ -1746,8 +1744,84 @@ func TestBackoffUpscale(t *testing.T) {
 			Reason: "ReadyForNewScale",
 		}, autoscalingv2.HorizontalPodAutoscalerCondition{
 			Type:   autoscalingv2.AbleToScale,
+			Status: v1.ConditionTrue,
+			Reason: "SucceededRescale",
+		}),
+	}
+	tc.runTest(t)
+}
+
+func TestNoBackoffUpscaleCM(t *testing.T) {
+	time := metav1.Time{Time: time.Now()}
+	tc := testCase{
+		minReplicas:             1,
+		maxReplicas:             5,
+		initialReplicas:         3,
+		expectedDesiredReplicas: 4,
+		CPUTarget:               0,
+		metricsTarget: []autoscalingv2.MetricSpec{
+			{
+				Type: autoscalingv2.PodsMetricSourceType,
+				Pods: &autoscalingv2.PodsMetricSource{
+					MetricName:         "qps",
+					TargetAverageValue: resource.MustParse("15.0"),
+				},
+			},
+		},
+		reportedLevels:      []uint64{20000, 10000, 30000},
+		reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
+		//useMetricsAPI:       true,
+		lastScaleTime: &time,
+		expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{
+			Type:   autoscalingv2.AbleToScale,
+			Status: v1.ConditionTrue,
+			Reason: "ReadyForNewScale",
+		}, autoscalingv2.HorizontalPodAutoscalerCondition{
+			Type:   autoscalingv2.AbleToScale,
+			Status: v1.ConditionTrue,
+			Reason: "SucceededRescale",
+		}, autoscalingv2.HorizontalPodAutoscalerCondition{
+			Type:   autoscalingv2.ScalingLimited,
 			Status: v1.ConditionFalse,
-			Reason: "BackoffBoth",
+			Reason: "DesiredWithinRange",
+		}),
+	}
+	tc.runTest(t)
+}
+
+func TestNoBackoffUpscaleCMNoBackoffCpu(t *testing.T) {
+	time := metav1.Time{Time: time.Now()}
+	tc := testCase{
+		minReplicas:             1,
+		maxReplicas:             5,
+		initialReplicas:         3,
+		expectedDesiredReplicas: 5,
+		CPUTarget:               10,
+		metricsTarget: []autoscalingv2.MetricSpec{
+			{
+				Type: autoscalingv2.PodsMetricSourceType,
+				Pods: &autoscalingv2.PodsMetricSource{
+					MetricName:         "qps",
+					TargetAverageValue: resource.MustParse("15.0"),
+				},
+			},
+		},
+		reportedLevels:      []uint64{20000, 10000, 30000},
+		reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
+		useMetricsAPI:       true,
+		lastScaleTime:       &time,
+		expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{
+			Type:   autoscalingv2.AbleToScale,
+			Status: v1.ConditionTrue,
+			Reason: "ReadyForNewScale",
+		}, autoscalingv2.HorizontalPodAutoscalerCondition{
+			Type:   autoscalingv2.AbleToScale,
+			Status: v1.ConditionTrue,
+			Reason: "SucceededRescale",
+		}, autoscalingv2.HorizontalPodAutoscalerCondition{
+			Type:   autoscalingv2.ScalingLimited,
+			Status: v1.ConditionTrue,
+			Reason: "TooManyReplicas",
 		}),
 	}
 	tc.runTest(t)
--- a/pkg/controller/podautoscaler/legacy_horizontal_test.go
+++ b/pkg/controller/podautoscaler/legacy_horizontal_test.go
@ -491,7 +491,6 @@ func (tc *legacyTestCase) runTest(t *testing.T) {
 	}

 	informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc())
-	defaultUpscaleForbiddenWindow := 3 * time.Minute
 	defaultDownscaleForbiddenWindow := 5 * time.Minute

 	hpaController := NewHorizontalController(
@ -502,7 +501,6 @@ func (tc *legacyTestCase) runTest(t *testing.T) {
 		replicaCalc,
 		informerFactory.Autoscaling().V1().HorizontalPodAutoscalers(),
 		controller.NoResyncPeriodFunc(),
-		defaultUpscaleForbiddenWindow,
 		defaultDownscaleForbiddenWindow,
 	)
 	hpaController.hpaListerSynced = alwaysReady
--- a/pkg/controller/podautoscaler/replica_calculator.go
+++ b/pkg/controller/podautoscaler/replica_calculator.go
@ -35,6 +35,10 @@ const (
 	// defaultTestingTolerance is default value for calculating when to
 	// scale up/scale down.
 	defaultTestingTolerance = 0.1
+
+	// Pod begins existence as unready. If pod is unready and timestamp of last pod readiness change is
+	// less than maxDelayOfInitialReadinessStatus after pod start we assume it has never been ready.
+	maxDelayOfInitialReadinessStatus = 10 * time.Second
 )

 type ReplicaCalculator struct {
@ -205,7 +209,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
 	missingPods := sets.NewString()

 	for _, pod := range podList.Items {
-		if pod.Status.Phase != v1.PodRunning || !podutil.IsPodReady(&pod) {
+		if pod.Status.Phase != v1.PodRunning || !hasPodBeenReadyBefore(&pod) {
 			// save this pod name for later, but pretend it doesn't exist for now
 			unreadyPods.Insert(pod.Name)
 			delete(metrics, pod.Name)
@ -381,3 +385,22 @@ func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int3
 	utilization = int64(math.Ceil(float64(utilization) / float64(currentReplicas)))
 	return replicaCount, utilization, timestamp, nil
 }
+
+// hasPodBeenReadyBefore returns true if the pod is ready or if it's not ready
+func hasPodBeenReadyBefore(pod *v1.Pod) bool {
+	_, readyCondition := podutil.GetPodCondition(&pod.Status, v1.PodReady)
+	if readyCondition == nil {
+		return false
+	}
+	if readyCondition.Status == v1.ConditionTrue {
+		return true
+	}
+	lastReady := readyCondition.LastTransitionTime.Time
+	if pod.Status.StartTime == nil {
+		return false
+	}
+	started := pod.Status.StartTime.Time
+	// If last status change was longer than maxDelayOfInitialReadinessStatus after the pod was
+	// created assume it was ready in the past.
+	return lastReady.After(started.Add(maxDelayOfInitialReadinessStatus))
+}
--- a/pkg/controller/podautoscaler/replica_calculator_test.go
+++ b/pkg/controller/podautoscaler/replica_calculator_test.go
@ -1069,4 +1069,76 @@ func TestReplicaCalcComputedToleranceAlgImplementation(t *testing.T) {
 	tc.runTest(t)
 }

+func TestHasPodBeenReadyBefore(t *testing.T) {
+	tests := []struct {
+		name       string
+		conditions []v1.PodCondition
+		started    time.Time
+		expected   bool
+	}{
+		{
+			"initially unready",
+			[]v1.PodCondition{
+				{
+					Type: v1.PodReady,
+					LastTransitionTime: metav1.Time{
+						Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
+					},
+					Status: v1.ConditionFalse,
+				},
+			},
+			metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
+			false,
+		},
+		{
+			"currently unready",
+			[]v1.PodCondition{
+				{
+					Type: v1.PodReady,
+					LastTransitionTime: metav1.Time{
+						Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
+					},
+					Status: v1.ConditionFalse,
+				},
+			},
+			metav1.Date(2018, 7, 25, 17, 0, 0, 0, time.UTC).Time,
+			true,
+		},
+		{
+			"currently ready",
+			[]v1.PodCondition{
+				{
+					Type: v1.PodReady,
+					LastTransitionTime: metav1.Time{
+						Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
+					},
+					Status: v1.ConditionTrue,
+				},
+			},
+			metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
+			true,
+		},
+		{
+			"no ready status",
+			[]v1.PodCondition{},
+			metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
+			false,
+		},
+	}
+	for _, tc := range tests {
+		pod := &v1.Pod{
+			Status: v1.PodStatus{
+				Conditions: tc.conditions,
+				StartTime: &metav1.Time{
+					Time: tc.started,
+				},
+			},
+		}
+		got := hasPodBeenReadyBefore(pod)
+		if got != tc.expected {
+			t.Errorf("[TestHasPodBeenReadyBefore.%s] got %v, want %v", tc.name, got, tc.expected)
+		}
+	}
+}
+
 // TODO: add more tests