mirror of https://github.com/k3s-io/k3s
Merge pull request #60648 from bskiba/hpa-unready
Automatic merge from submit-queue (batch tested with PRs 60732, 60689, 60648, 60704). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Do not count failed pods as unready in HPA controller **What this PR does / why we need it**: Currently, when performing a scale up, any failed pods (which can be present for example in case of evictions performed by kubelet) will be treated as unready. Unready pods are treated as if they had 0% utilization which will slow down or even block scale up. After this change, failed pods are ignored in all calculations. This way they do not influence neither scale up nor scale down replica calculations. @MaciekPytel @DirectXMan12 **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes #55630 **Special notes for your reviewer**: **Release note**: ``` Stop counting failed pods as unready in HPA controller to avoid failed pods incorrectly affecting scale up replica count calculation. ```pull/6/head
commit
30eb1aa7c5
|
@ -103,6 +103,7 @@ type testCase struct {
|
|||
reportedLevels []uint64
|
||||
reportedCPURequests []resource.Quantity
|
||||
reportedPodReadiness []v1.ConditionStatus
|
||||
reportedPodPhase []v1.PodPhase
|
||||
scaleUpdated bool
|
||||
statusUpdated bool
|
||||
eventCreated bool
|
||||
|
@ -250,10 +251,14 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa
|
|||
if tc.reportedPodReadiness != nil {
|
||||
podReadiness = tc.reportedPodReadiness[i]
|
||||
}
|
||||
podPhase := v1.PodRunning
|
||||
if tc.reportedPodPhase != nil {
|
||||
podPhase = tc.reportedPodPhase[i]
|
||||
}
|
||||
podName := fmt.Sprintf("%s-%d", podNamePrefix, i)
|
||||
pod := v1.Pod{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
Phase: podPhase,
|
||||
Conditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.PodReady,
|
||||
|
@ -713,6 +718,24 @@ func TestScaleUpUnreadyNoScale(t *testing.T) {
|
|||
tc.runTest(t)
|
||||
}
|
||||
|
||||
func TestScaleUpIgnoresFailedPods(t *testing.T) {
|
||||
tc := testCase{
|
||||
minReplicas: 2,
|
||||
maxReplicas: 6,
|
||||
initialReplicas: 2,
|
||||
desiredReplicas: 4,
|
||||
CPUTarget: 30,
|
||||
CPUCurrent: 60,
|
||||
verifyCPUCurrent: true,
|
||||
reportedLevels: []uint64{500, 700},
|
||||
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
|
||||
reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
|
||||
reportedPodPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed},
|
||||
useMetricsAPI: true,
|
||||
}
|
||||
tc.runTest(t)
|
||||
}
|
||||
|
||||
func TestScaleUpDeployment(t *testing.T) {
|
||||
tc := testCase{
|
||||
minReplicas: 2,
|
||||
|
@ -1017,6 +1040,24 @@ func TestScaleDownIgnoresUnreadyPods(t *testing.T) {
|
|||
tc.runTest(t)
|
||||
}
|
||||
|
||||
func TestScaleDownIgnoresFailedPods(t *testing.T) {
|
||||
tc := testCase{
|
||||
minReplicas: 2,
|
||||
maxReplicas: 6,
|
||||
initialReplicas: 5,
|
||||
desiredReplicas: 3,
|
||||
CPUTarget: 50,
|
||||
CPUCurrent: 28,
|
||||
verifyCPUCurrent: true,
|
||||
reportedLevels: []uint64{100, 300, 500, 250, 250},
|
||||
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
|
||||
useMetricsAPI: true,
|
||||
reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
|
||||
reportedPodPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed},
|
||||
}
|
||||
tc.runTest(t)
|
||||
}
|
||||
|
||||
func TestTolerance(t *testing.T) {
|
||||
tc := testCase{
|
||||
minReplicas: 1,
|
||||
|
|
|
@ -88,7 +88,11 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti
|
|||
|
||||
if pod.Status.Phase != v1.PodRunning || !podutil.IsPodReady(&pod) {
|
||||
// save this pod name for later, but pretend it doesn't exist for now
|
||||
unreadyPods.Insert(pod.Name)
|
||||
if pod.Status.Phase != v1.PodFailed {
|
||||
// Failed pods should not be counted as unready pods as they will
|
||||
// not become running anymore.
|
||||
unreadyPods.Insert(pod.Name)
|
||||
}
|
||||
delete(metrics, pod.Name)
|
||||
continue
|
||||
}
|
||||
|
|
|
@ -77,6 +77,7 @@ type replicaCalcTestCase struct {
|
|||
metric *metricInfo
|
||||
|
||||
podReadiness []v1.ConditionStatus
|
||||
podPhase []v1.PodPhase
|
||||
}
|
||||
|
||||
const (
|
||||
|
@ -90,15 +91,24 @@ func (tc *replicaCalcTestCase) prepareTestClient(t *testing.T) (*fake.Clientset,
|
|||
fakeClient := &fake.Clientset{}
|
||||
fakeClient.AddReactor("list", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) {
|
||||
obj := &v1.PodList{}
|
||||
for i := 0; i < int(tc.currentReplicas); i++ {
|
||||
podsCount := int(tc.currentReplicas)
|
||||
// Failed pods are not included in tc.currentReplicas
|
||||
if tc.podPhase != nil && len(tc.podPhase) > podsCount {
|
||||
podsCount = len(tc.podPhase)
|
||||
}
|
||||
for i := 0; i < podsCount; i++ {
|
||||
podReadiness := v1.ConditionTrue
|
||||
if tc.podReadiness != nil {
|
||||
if tc.podReadiness != nil && i < len(tc.podReadiness) {
|
||||
podReadiness = tc.podReadiness[i]
|
||||
}
|
||||
podPhase := v1.PodRunning
|
||||
if tc.podPhase != nil {
|
||||
podPhase = tc.podPhase[i]
|
||||
}
|
||||
podName := fmt.Sprintf("%s-%d", podNamePrefix, i)
|
||||
pod := v1.Pod{
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodRunning,
|
||||
Phase: podPhase,
|
||||
Conditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.PodReady,
|
||||
|
@ -405,6 +415,25 @@ func TestReplicaCalcScaleUpUnreadyNoScale(t *testing.T) {
|
|||
tc.runTest(t)
|
||||
}
|
||||
|
||||
func TestReplicaCalcScaleUpIgnoresFailedPods(t *testing.T) {
|
||||
tc := replicaCalcTestCase{
|
||||
currentReplicas: 2,
|
||||
expectedReplicas: 4,
|
||||
podReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
|
||||
podPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed},
|
||||
resource: &resourceInfo{
|
||||
name: v1.ResourceCPU,
|
||||
requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
|
||||
levels: []int64{500, 700},
|
||||
|
||||
targetUtilization: 30,
|
||||
expectedUtilization: 60,
|
||||
expectedValue: numContainersPerPod * 600,
|
||||
},
|
||||
}
|
||||
tc.runTest(t)
|
||||
}
|
||||
|
||||
func TestReplicaCalcScaleUpCM(t *testing.T) {
|
||||
tc := replicaCalcTestCase{
|
||||
currentReplicas: 3,
|
||||
|
@ -610,6 +639,25 @@ func TestReplicaCalcScaleDownIgnoresUnreadyPods(t *testing.T) {
|
|||
tc.runTest(t)
|
||||
}
|
||||
|
||||
func TestReplicaCalcScaleDownIgnoresFailedPods(t *testing.T) {
|
||||
tc := replicaCalcTestCase{
|
||||
currentReplicas: 5,
|
||||
expectedReplicas: 3,
|
||||
podReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
|
||||
podPhase: []v1.PodPhase{v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodRunning, v1.PodFailed, v1.PodFailed},
|
||||
resource: &resourceInfo{
|
||||
name: v1.ResourceCPU,
|
||||
requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
|
||||
levels: []int64{100, 300, 500, 250, 250},
|
||||
|
||||
targetUtilization: 50,
|
||||
expectedUtilization: 28,
|
||||
expectedValue: numContainersPerPod * 280,
|
||||
},
|
||||
}
|
||||
tc.runTest(t)
|
||||
}
|
||||
|
||||
func TestReplicaCalcTolerance(t *testing.T) {
|
||||
tc := replicaCalcTestCase{
|
||||
currentReplicas: 3,
|
||||
|
|
Loading…
Reference in New Issue