From d881d48b2c7fed80350768585c03e552a67c1324 Mon Sep 17 00:00:00 2001 From: Joseph Burnett Date: Tue, 2 Jul 2019 14:21:32 +0200 Subject: [PATCH] There are various reasons that the HPA will decide not the change the current scale. Two important ones are when missing metrics might change the direction of scaling, and when the recommended scale is within tolerance of the current scale. The way that ReplicaCalculator signals it's desire to not change the current scale is by returning the current scale. However the current scale is from scale.Status.Replicas and can be larger than scale.Spec.Replicas (e.g. during Deployment rollout with configured surge). This causes a positive feedback loop because scale.Status.Replicas is written back into scale.Spec.Replicas, further increasing the current scale. This PR fixes the feedback loop by plumbing the replica count from spec through horizontal.go and replica_calculator.go so the calculator can punt with the right value. --- pkg/controller/podautoscaler/horizontal.go | 27 ++++++++++--------- .../podautoscaler/replica_calculator.go | 8 +++--- .../podautoscaler/replica_calculator_test.go | 2 +- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/pkg/controller/podautoscaler/horizontal.go b/pkg/controller/podautoscaler/horizontal.go index c6a31532f1..0394df5fcd 100644 --- a/pkg/controller/podautoscaler/horizontal.go +++ b/pkg/controller/podautoscaler/horizontal.go @@ -23,7 +23,7 @@ import ( autoscalingv1 "k8s.io/api/autoscaling/v1" autoscalingv2 "k8s.io/api/autoscaling/v2beta2" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" apiequality "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" apimeta "k8s.io/apimachinery/pkg/api/meta" @@ -235,7 +235,8 @@ func (a *HorizontalController) processNextWorkItem() bool { func (a *HorizontalController) computeReplicasForMetrics(hpa *autoscalingv2.HorizontalPodAutoscaler, scale *autoscalingv1.Scale, metricSpecs []autoscalingv2.MetricSpec) (replicas int32, metric string, statuses []autoscalingv2.MetricStatus, timestamp time.Time, err error) { - currentReplicas := scale.Status.Replicas + specReplicas := scale.Spec.Replicas + statusReplicas := scale.Status.Replicas statuses = make([]autoscalingv2.MetricStatus, len(metricSpecs)) @@ -267,7 +268,7 @@ func (a *HorizontalController) computeReplicasForMetrics(hpa *autoscalingv2.Hori setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "FailedGetObjectMetric", "the HPA was unable to compute the replica count: %v", err) return 0, "", nil, time.Time{}, fmt.Errorf("failed to get object metric value: %v", err) } - replicaCountProposal, timestampProposal, metricNameProposal, err = a.computeStatusForObjectMetric(currentReplicas, metricSpec, hpa, selector, &statuses[i], metricSelector) + replicaCountProposal, timestampProposal, metricNameProposal, err = a.computeStatusForObjectMetric(specReplicas, statusReplicas, metricSpec, hpa, selector, &statuses[i], metricSelector) if err != nil { return 0, "", nil, time.Time{}, fmt.Errorf("failed to get object metric value: %v", err) } @@ -278,17 +279,17 @@ func (a *HorizontalController) computeReplicasForMetrics(hpa *autoscalingv2.Hori setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "FailedGetPodsMetric", "the HPA was unable to compute the replica count: %v", err) return 0, "", nil, time.Time{}, fmt.Errorf("failed to get pods metric value: %v", err) } - replicaCountProposal, timestampProposal, metricNameProposal, err = a.computeStatusForPodsMetric(currentReplicas, metricSpec, hpa, selector, &statuses[i], metricSelector) + replicaCountProposal, timestampProposal, metricNameProposal, err = a.computeStatusForPodsMetric(specReplicas, metricSpec, hpa, selector, &statuses[i], metricSelector) if err != nil { return 0, "", nil, time.Time{}, fmt.Errorf("failed to get object metric value: %v", err) } case autoscalingv2.ResourceMetricSourceType: - replicaCountProposal, timestampProposal, metricNameProposal, err = a.computeStatusForResourceMetric(currentReplicas, metricSpec, hpa, selector, &statuses[i]) + replicaCountProposal, timestampProposal, metricNameProposal, err = a.computeStatusForResourceMetric(specReplicas, metricSpec, hpa, selector, &statuses[i]) if err != nil { return 0, "", nil, time.Time{}, err } case autoscalingv2.ExternalMetricSourceType: - replicaCountProposal, timestampProposal, metricNameProposal, err = a.computeStatusForExternalMetric(currentReplicas, metricSpec, hpa, selector, &statuses[i]) + replicaCountProposal, timestampProposal, metricNameProposal, err = a.computeStatusForExternalMetric(specReplicas, statusReplicas, metricSpec, hpa, selector, &statuses[i]) if err != nil { return 0, "", nil, time.Time{}, err } @@ -326,9 +327,9 @@ func (a *HorizontalController) reconcileKey(key string) (deleted bool, err error } // computeStatusForObjectMetric computes the desired number of replicas for the specified metric of type ObjectMetricSourceType. -func (a *HorizontalController) computeStatusForObjectMetric(currentReplicas int32, metricSpec autoscalingv2.MetricSpec, hpa *autoscalingv2.HorizontalPodAutoscaler, selector labels.Selector, status *autoscalingv2.MetricStatus, metricSelector labels.Selector) (int32, time.Time, string, error) { +func (a *HorizontalController) computeStatusForObjectMetric(specReplicas, statusReplicas int32, metricSpec autoscalingv2.MetricSpec, hpa *autoscalingv2.HorizontalPodAutoscaler, selector labels.Selector, status *autoscalingv2.MetricStatus, metricSelector labels.Selector) (int32, time.Time, string, error) { if metricSpec.Object.Target.Type == autoscalingv2.ValueMetricType { - replicaCountProposal, utilizationProposal, timestampProposal, err := a.replicaCalc.GetObjectMetricReplicas(currentReplicas, metricSpec.Object.Target.Value.MilliValue(), metricSpec.Object.Metric.Name, hpa.Namespace, &metricSpec.Object.DescribedObject, selector, metricSelector) + replicaCountProposal, utilizationProposal, timestampProposal, err := a.replicaCalc.GetObjectMetricReplicas(specReplicas, metricSpec.Object.Target.Value.MilliValue(), metricSpec.Object.Metric.Name, hpa.Namespace, &metricSpec.Object.DescribedObject, selector, metricSelector) if err != nil { a.eventRecorder.Event(hpa, v1.EventTypeWarning, "FailedGetObjectMetric", err.Error()) setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "FailedGetObjectMetric", "the HPA was unable to compute the replica count: %v", err) @@ -349,7 +350,7 @@ func (a *HorizontalController) computeStatusForObjectMetric(currentReplicas int3 } return replicaCountProposal, timestampProposal, fmt.Sprintf("%s metric %s", metricSpec.Object.DescribedObject.Kind, metricSpec.Object.Metric.Name), nil } else if metricSpec.Object.Target.Type == autoscalingv2.AverageValueMetricType { - replicaCountProposal, utilizationProposal, timestampProposal, err := a.replicaCalc.GetObjectPerPodMetricReplicas(currentReplicas, metricSpec.Object.Target.AverageValue.MilliValue(), metricSpec.Object.Metric.Name, hpa.Namespace, &metricSpec.Object.DescribedObject, metricSelector) + replicaCountProposal, utilizationProposal, timestampProposal, err := a.replicaCalc.GetObjectPerPodMetricReplicas(statusReplicas, metricSpec.Object.Target.AverageValue.MilliValue(), metricSpec.Object.Metric.Name, hpa.Namespace, &metricSpec.Object.DescribedObject, metricSelector) if err != nil { a.eventRecorder.Event(hpa, v1.EventTypeWarning, "FailedGetObjectMetric", err.Error()) setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "FailedGetObjectMetric", "the HPA was unable to compute the replica count: %v", err) @@ -452,9 +453,9 @@ func (a *HorizontalController) computeStatusForResourceMetric(currentReplicas in } // computeStatusForExternalMetric computes the desired number of replicas for the specified metric of type ExternalMetricSourceType. -func (a *HorizontalController) computeStatusForExternalMetric(currentReplicas int32, metricSpec autoscalingv2.MetricSpec, hpa *autoscalingv2.HorizontalPodAutoscaler, selector labels.Selector, status *autoscalingv2.MetricStatus) (int32, time.Time, string, error) { +func (a *HorizontalController) computeStatusForExternalMetric(specReplicas, statusReplicas int32, metricSpec autoscalingv2.MetricSpec, hpa *autoscalingv2.HorizontalPodAutoscaler, selector labels.Selector, status *autoscalingv2.MetricStatus) (int32, time.Time, string, error) { if metricSpec.External.Target.AverageValue != nil { - replicaCountProposal, utilizationProposal, timestampProposal, err := a.replicaCalc.GetExternalPerPodMetricReplicas(currentReplicas, metricSpec.External.Target.AverageValue.MilliValue(), metricSpec.External.Metric.Name, hpa.Namespace, metricSpec.External.Metric.Selector) + replicaCountProposal, utilizationProposal, timestampProposal, err := a.replicaCalc.GetExternalPerPodMetricReplicas(statusReplicas, metricSpec.External.Target.AverageValue.MilliValue(), metricSpec.External.Metric.Name, hpa.Namespace, metricSpec.External.Metric.Selector) if err != nil { a.eventRecorder.Event(hpa, v1.EventTypeWarning, "FailedGetExternalMetric", err.Error()) setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "FailedGetExternalMetric", "the HPA was unable to compute the replica count: %v", err) @@ -475,7 +476,7 @@ func (a *HorizontalController) computeStatusForExternalMetric(currentReplicas in return replicaCountProposal, timestampProposal, fmt.Sprintf("external metric %s(%+v)", metricSpec.External.Metric.Name, metricSpec.External.Metric.Selector), nil } if metricSpec.External.Target.Value != nil { - replicaCountProposal, utilizationProposal, timestampProposal, err := a.replicaCalc.GetExternalMetricReplicas(currentReplicas, metricSpec.External.Target.Value.MilliValue(), metricSpec.External.Metric.Name, hpa.Namespace, metricSpec.External.Metric.Selector, selector) + replicaCountProposal, utilizationProposal, timestampProposal, err := a.replicaCalc.GetExternalMetricReplicas(specReplicas, metricSpec.External.Target.Value.MilliValue(), metricSpec.External.Metric.Name, hpa.Namespace, metricSpec.External.Metric.Selector, selector) if err != nil { a.eventRecorder.Event(hpa, v1.EventTypeWarning, "FailedGetExternalMetric", err.Error()) setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "FailedGetExternalMetric", "the HPA was unable to compute the replica count: %v", err) @@ -550,7 +551,7 @@ func (a *HorizontalController) reconcileAutoscaler(hpav1Shared *autoscalingv1.Ho return fmt.Errorf("failed to query scale subresource for %s: %v", reference, err) } setCondition(hpa, autoscalingv2.AbleToScale, v1.ConditionTrue, "SucceededGetScale", "the HPA controller was able to get the target's current scale") - currentReplicas := scale.Status.Replicas + currentReplicas := scale.Spec.Replicas a.recordInitialRecommendation(currentReplicas, key) var metricStatuses []autoscalingv2.MetricStatus diff --git a/pkg/controller/podautoscaler/replica_calculator.go b/pkg/controller/podautoscaler/replica_calculator.go index ace9803bdf..f498a0ddeb 100644 --- a/pkg/controller/podautoscaler/replica_calculator.go +++ b/pkg/controller/podautoscaler/replica_calculator.go @@ -22,7 +22,7 @@ import ( "time" autoscaling "k8s.io/api/autoscaling/v2beta2" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/sets" @@ -333,7 +333,7 @@ func (c *ReplicaCalculator) GetExternalMetricReplicas(currentReplicas int32, tar // GetExternalPerPodMetricReplicas calculates the desired replica count based on a // target metric value per pod (as a milli-value) for the external metric in the // given namespace, and the current replica count. -func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int32, targetUtilizationPerPod int64, metricName, namespace string, metricSelector *metav1.LabelSelector) (replicaCount int32, utilization int64, timestamp time.Time, err error) { +func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(statusReplicas int32, targetUtilizationPerPod int64, metricName, namespace string, metricSelector *metav1.LabelSelector) (replicaCount int32, utilization int64, timestamp time.Time, err error) { metricLabelSelector, err := metav1.LabelSelectorAsSelector(metricSelector) if err != nil { return 0, 0, time.Time{}, err @@ -347,13 +347,13 @@ func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int3 utilization = utilization + val } - replicaCount = currentReplicas + replicaCount = statusReplicas usageRatio := float64(utilization) / (float64(targetUtilizationPerPod) * float64(replicaCount)) if math.Abs(1.0-usageRatio) > c.tolerance { // update number of replicas if the change is large enough replicaCount = int32(math.Ceil(float64(utilization) / float64(targetUtilizationPerPod))) } - utilization = int64(math.Ceil(float64(utilization) / float64(currentReplicas))) + utilization = int64(math.Ceil(float64(utilization) / float64(statusReplicas))) return replicaCount, utilization, timestamp, nil } diff --git a/pkg/controller/podautoscaler/replica_calculator_test.go b/pkg/controller/podautoscaler/replica_calculator_test.go index 0638f89341..3968103b92 100644 --- a/pkg/controller/podautoscaler/replica_calculator_test.go +++ b/pkg/controller/podautoscaler/replica_calculator_test.go @@ -23,7 +23,7 @@ import ( "time" autoscalingv2 "k8s.io/api/autoscaling/v2beta2" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/meta/testrestmapper" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"