mirror of https://github.com/k3s-io/k3s
Merge pull request #50910 from wasylkowski/autoscaler-test-6
Automatic merge from submit-queue Added an end-to-end test ensuring that Cluster Autoscaler does not scale up when all pending pods are unschedulable **What this PR does / why we need it**: **Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes # **Special notes for your reviewer**: **Release note**: ```release-note NONE ```pull/6/head
commit
28f6b3fcc0
|
@ -70,7 +70,7 @@ var _ = SIGDescribe("[Feature:ClusterSizeAutoscalingScaleUp] [Slow] Autoscaling"
|
|||
AfterEach(func() {
|
||||
// Scale down back to only 'nodesNum' nodes, as expected at the start of the test.
|
||||
framework.ExpectNoError(framework.ResizeGroup(nodeGroupName, nodesNum))
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(f.ClientSet, nodesNum, 15*time.Minute))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(f.ClientSet, nodesNum, 15*time.Minute))
|
||||
})
|
||||
|
||||
Measure("takes less than 15 minutes", func(b Benchmarker) {
|
||||
|
|
|
@ -19,6 +19,7 @@ package autoscaling
|
|||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
|
@ -88,7 +89,7 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
|
|||
}
|
||||
}
|
||||
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(c, sum, scaleUpTimeout))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, sum, scaleUpTimeout))
|
||||
|
||||
nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
|
||||
nodeCount = len(nodes.Items)
|
||||
|
@ -113,7 +114,7 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
|
|||
AfterEach(func() {
|
||||
By(fmt.Sprintf("Restoring initial size of the cluster"))
|
||||
setMigSizes(originalSizes)
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount, scaleDownTimeout))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount, scaleDownTimeout))
|
||||
nodes, err := c.Core().Nodes().List(metav1.ListOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
s := time.Now()
|
||||
|
@ -159,8 +160,11 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
|
|||
It("should scale up twice [Feature:ClusterAutoscalerScalability2]", func() {
|
||||
perNodeReservation := int(float64(memCapacityMb) * 0.95)
|
||||
replicasPerNode := 10
|
||||
additionalNodes1 := int(0.7 * maxNodes)
|
||||
additionalNodes2 := int(0.25 * maxNodes)
|
||||
additionalNodes1 := int(math.Ceil(0.7 * maxNodes))
|
||||
additionalNodes2 := int(math.Ceil(0.25 * maxNodes))
|
||||
if additionalNodes1+additionalNodes2 > maxNodes {
|
||||
additionalNodes2 = maxNodes - additionalNodes1
|
||||
}
|
||||
|
||||
replicas1 := additionalNodes1 * replicasPerNode
|
||||
replicas2 := additionalNodes2 * replicasPerNode
|
||||
|
@ -168,7 +172,8 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
|
|||
glog.Infof("cores per node: %v", coresPerNode)
|
||||
|
||||
// saturate cluster
|
||||
reservationCleanup := ReserveMemory(f, "some-pod", nodeCount, nodeCount*perNodeReservation, true, memoryReservationTimeout)
|
||||
initialReplicas := nodeCount
|
||||
reservationCleanup := ReserveMemory(f, "some-pod", initialReplicas, nodeCount*perNodeReservation, true, memoryReservationTimeout)
|
||||
defer reservationCleanup()
|
||||
framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c))
|
||||
|
||||
|
@ -179,10 +184,10 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
|
|||
expectedResult := createClusterPredicates(nodeCount + additionalNodes1)
|
||||
config := createScaleUpTestConfig(nodeCount, nodeCount, rcConfig, expectedResult)
|
||||
|
||||
epsilon := 0.05
|
||||
|
||||
// run test #1
|
||||
testCleanup1 := simpleScaleUpTestWithEpsilon(f, config, epsilon)
|
||||
tolerateUnreadyNodes := additionalNodes1 / 20
|
||||
tolerateUnreadyPods := (initialReplicas + replicas1) / 20
|
||||
testCleanup1 := simpleScaleUpTestWithTolerance(f, config, tolerateUnreadyNodes, tolerateUnreadyPods)
|
||||
defer testCleanup1()
|
||||
|
||||
glog.Infof("Scaled up once")
|
||||
|
@ -193,7 +198,9 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
|
|||
config2 := createScaleUpTestConfig(nodeCount+additionalNodes1, nodeCount+additionalNodes2, rcConfig2, expectedResult2)
|
||||
|
||||
// run test #2
|
||||
testCleanup2 := simpleScaleUpTestWithEpsilon(f, config2, epsilon)
|
||||
tolerateUnreadyNodes = maxNodes / 20
|
||||
tolerateUnreadyPods = (initialReplicas + replicas1 + replicas2) / 20
|
||||
testCleanup2 := simpleScaleUpTestWithTolerance(f, config2, tolerateUnreadyNodes, tolerateUnreadyPods)
|
||||
defer testCleanup2()
|
||||
|
||||
glog.Infof("Scaled up twice")
|
||||
|
@ -201,7 +208,7 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
|
|||
|
||||
It("should scale down empty nodes [Feature:ClusterAutoscalerScalability3]", func() {
|
||||
perNodeReservation := int(float64(memCapacityMb) * 0.7)
|
||||
replicas := int(float64(maxNodes) * 0.7)
|
||||
replicas := int(math.Ceil(maxNodes * 0.7))
|
||||
totalNodes := maxNodes
|
||||
|
||||
// resize cluster to totalNodes
|
||||
|
@ -209,13 +216,15 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
|
|||
anyKey(originalSizes): totalNodes,
|
||||
}
|
||||
setMigSizes(newSizes)
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(f.ClientSet, totalNodes, largeResizeTimeout))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(f.ClientSet, totalNodes, largeResizeTimeout))
|
||||
|
||||
// run replicas
|
||||
rcConfig := reserveMemoryRCConfig(f, "some-pod", replicas, replicas*perNodeReservation, largeScaleUpTimeout)
|
||||
expectedResult := createClusterPredicates(totalNodes)
|
||||
config := createScaleUpTestConfig(totalNodes, totalNodes, rcConfig, expectedResult)
|
||||
testCleanup := simpleScaleUpTestWithEpsilon(f, config, 0.1)
|
||||
tolerateUnreadyNodes := totalNodes / 10
|
||||
tolerateUnreadyPods := replicas / 10
|
||||
testCleanup := simpleScaleUpTestWithTolerance(f, config, tolerateUnreadyNodes, tolerateUnreadyPods)
|
||||
defer testCleanup()
|
||||
|
||||
// check if empty nodes are scaled down
|
||||
|
@ -241,7 +250,7 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
|
|||
}
|
||||
setMigSizes(newSizes)
|
||||
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(f.ClientSet, totalNodes, largeResizeTimeout))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(f.ClientSet, totalNodes, largeResizeTimeout))
|
||||
|
||||
// annotate all nodes with no-scale-down
|
||||
ScaleDownDisabledKey := "cluster-autoscaler.kubernetes.io/scale-down-disabled"
|
||||
|
@ -295,7 +304,7 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
|
|||
anyKey(originalSizes): totalNodes,
|
||||
}
|
||||
setMigSizes(newSizes)
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(f.ClientSet, totalNodes, largeResizeTimeout))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(f.ClientSet, totalNodes, largeResizeTimeout))
|
||||
divider := int(float64(totalNodes) * 0.7)
|
||||
fullNodesCount := divider
|
||||
underutilizedNodesCount := totalNodes - fullNodesCount
|
||||
|
@ -321,6 +330,41 @@ var _ = framework.KubeDescribe("Cluster size autoscaler scalability [Slow]", fun
|
|||
Expect(len(nodes.Items)).Should(Equal(totalNodes))
|
||||
})
|
||||
|
||||
Specify("CA ignores unschedulable pods while scheduling schedulable pods [Feature:ClusterAutoscalerScalability6]", func() {
|
||||
// Start a number of pods saturating existing nodes.
|
||||
perNodeReservation := int(float64(memCapacityMb) * 0.80)
|
||||
replicasPerNode := 10
|
||||
initialPodReplicas := nodeCount * replicasPerNode
|
||||
initialPodsTotalMemory := nodeCount * perNodeReservation
|
||||
reservationCleanup := ReserveMemory(f, "initial-pod", initialPodReplicas, initialPodsTotalMemory, true /* wait for pods to run */, memoryReservationTimeout)
|
||||
defer reservationCleanup()
|
||||
framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c))
|
||||
|
||||
// Configure a number of unschedulable pods.
|
||||
unschedulableMemReservation := memCapacityMb * 2
|
||||
unschedulablePodReplicas := 1000
|
||||
totalMemReservation := unschedulableMemReservation * unschedulablePodReplicas
|
||||
timeToWait := 5 * time.Minute
|
||||
podsConfig := reserveMemoryRCConfig(f, "unschedulable-pod", unschedulablePodReplicas, totalMemReservation, timeToWait)
|
||||
framework.RunRC(*podsConfig) // Ignore error (it will occur because pods are unschedulable)
|
||||
defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, podsConfig.Name)
|
||||
|
||||
// Ensure that no new nodes have been added so far.
|
||||
Expect(framework.NumberOfReadyNodes(f.ClientSet)).To(Equal(nodeCount))
|
||||
|
||||
// Start a number of schedulable pods to ensure CA reacts.
|
||||
additionalNodes := maxNodes - nodeCount
|
||||
replicas := additionalNodes * replicasPerNode
|
||||
totalMemory := additionalNodes * perNodeReservation
|
||||
rcConfig := reserveMemoryRCConfig(f, "extra-pod", replicas, totalMemory, largeScaleUpTimeout)
|
||||
expectedResult := createClusterPredicates(nodeCount + additionalNodes)
|
||||
config := createScaleUpTestConfig(nodeCount, initialPodReplicas, rcConfig, expectedResult)
|
||||
|
||||
// Test that scale up happens, allowing 1000 unschedulable pods not to be scheduled.
|
||||
testCleanup := simpleScaleUpTestWithTolerance(f, config, 0, unschedulablePodReplicas)
|
||||
defer testCleanup()
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
func makeUnschedulable(f *framework.Framework, nodes []v1.Node) error {
|
||||
|
@ -350,24 +394,24 @@ func anyKey(input map[string]int) string {
|
|||
return ""
|
||||
}
|
||||
|
||||
func simpleScaleUpTestWithEpsilon(f *framework.Framework, config *scaleUpTestConfig, epsilon float64) func() error {
|
||||
func simpleScaleUpTestWithTolerance(f *framework.Framework, config *scaleUpTestConfig, tolerateMissingNodeCount int, tolerateMissingPodCount int) func() error {
|
||||
// resize cluster to start size
|
||||
// run rc based on config
|
||||
By(fmt.Sprintf("Running RC %v from config", config.extraPods.Name))
|
||||
start := time.Now()
|
||||
framework.ExpectNoError(framework.RunRC(*config.extraPods))
|
||||
// check results
|
||||
if epsilon > 0 && epsilon < 1 {
|
||||
if tolerateMissingNodeCount > 0 {
|
||||
// Tolerate some number of nodes not to be created.
|
||||
minExpectedNodeCount := int(float64(config.expectedResult.nodes) - epsilon*float64(config.expectedResult.nodes))
|
||||
minExpectedNodeCount := config.expectedResult.nodes - tolerateMissingNodeCount
|
||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
|
||||
func(size int) bool { return size >= minExpectedNodeCount }, scaleUpTimeout))
|
||||
} else {
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(f.ClientSet, config.expectedResult.nodes, scaleUpTimeout))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(f.ClientSet, config.expectedResult.nodes, scaleUpTimeout))
|
||||
}
|
||||
glog.Infof("cluster is increased")
|
||||
if epsilon > 0 && epsilon < 0 {
|
||||
framework.ExpectNoError(waitForCaPodsReadyInNamespace(f, f.ClientSet, int(epsilon*float64(config.extraPods.Replicas)+1)))
|
||||
if tolerateMissingPodCount > 0 {
|
||||
framework.ExpectNoError(waitForCaPodsReadyInNamespace(f, f.ClientSet, tolerateMissingPodCount))
|
||||
} else {
|
||||
framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, f.ClientSet))
|
||||
}
|
||||
|
@ -378,7 +422,7 @@ func simpleScaleUpTestWithEpsilon(f *framework.Framework, config *scaleUpTestCon
|
|||
}
|
||||
|
||||
func simpleScaleUpTest(f *framework.Framework, config *scaleUpTestConfig) func() error {
|
||||
return simpleScaleUpTestWithEpsilon(f, config, 0)
|
||||
return simpleScaleUpTestWithTolerance(f, config, 0, 0)
|
||||
}
|
||||
|
||||
func reserveMemoryRCConfig(f *framework.Framework, id string, replicas, megabytes int, timeout time.Duration) *testutils.RCConfig {
|
||||
|
|
|
@ -97,7 +97,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||
sum += size
|
||||
}
|
||||
// Give instances time to spin up
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(c, sum, scaleUpTimeout))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, sum, scaleUpTimeout))
|
||||
|
||||
nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
|
||||
nodeCount = len(nodes.Items)
|
||||
|
@ -127,7 +127,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||
for _, size := range originalSizes {
|
||||
expectedNodes += size
|
||||
}
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(c, expectedNodes, scaleDownTimeout))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, expectedNodes, scaleDownTimeout))
|
||||
nodes, err := c.Core().Nodes().List(metav1.ListOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
|
@ -226,7 +226,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||
const extraPoolName = "extra-pool"
|
||||
addNodePool(extraPoolName, "n1-standard-4", 1)
|
||||
defer deleteNodePool(extraPoolName)
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount+1, resizeTimeout))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+1, resizeTimeout))
|
||||
glog.Infof("Not enabling cluster autoscaler for the node pool (on purpose).")
|
||||
|
||||
By("Get memory available on new node, so we can account for it when creating RC")
|
||||
|
@ -253,7 +253,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||
const extraPoolName = "extra-pool"
|
||||
addNodePool(extraPoolName, "n1-standard-4", 1)
|
||||
defer deleteNodePool(extraPoolName)
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount+1, resizeTimeout))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+1, resizeTimeout))
|
||||
framework.ExpectNoError(enableAutoscaler(extraPoolName, 1, 2))
|
||||
framework.ExpectNoError(disableAutoscaler(extraPoolName, 1, 2))
|
||||
})
|
||||
|
@ -283,7 +283,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||
defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, "extra-pod")
|
||||
|
||||
framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c))
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount+newPods, scaleUpTimeout))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+newPods, scaleUpTimeout))
|
||||
})
|
||||
|
||||
It("should increase cluster size if pod requesting EmptyDir volume is pending [Feature:ClusterSizeAutoscalingScaleUp]", func() {
|
||||
|
@ -304,7 +304,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||
defer framework.DeleteRCAndPods(f.ClientSet, f.InternalClientset, f.Namespace.Name, "extra-pod")
|
||||
|
||||
framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c))
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount+newPods, scaleUpTimeout))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+newPods, scaleUpTimeout))
|
||||
})
|
||||
|
||||
It("should increase cluster size if pod requesting volume is pending [Feature:ClusterSizeAutoscalingScaleUp]", func() {
|
||||
|
@ -377,7 +377,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||
}()
|
||||
|
||||
framework.ExpectNoError(waitForAllCaPodsReadyInNamespace(f, c))
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount+newPods, scaleUpTimeout))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+newPods, scaleUpTimeout))
|
||||
})
|
||||
|
||||
It("should add node to the particular mig [Feature:ClusterSizeAutoscalingScaleUp]", func() {
|
||||
|
@ -478,7 +478,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||
const extraPoolName = "extra-pool"
|
||||
addNodePool(extraPoolName, "n1-standard-4", 1)
|
||||
defer deleteNodePool(extraPoolName)
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount+1, resizeTimeout))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+1, resizeTimeout))
|
||||
framework.ExpectNoError(enableAutoscaler(extraPoolName, 1, 2))
|
||||
|
||||
By("Creating rc with 2 pods too big to fit default-pool but fitting extra-pool")
|
||||
|
@ -489,7 +489,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||
// reseting all the timers in scale down code. Adding 5 extra minutes to workaround
|
||||
// this issue.
|
||||
// TODO: Remove the extra time when GKE restart is fixed.
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount+2, scaleUpTimeout+5*time.Minute))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount+2, scaleUpTimeout+5*time.Minute))
|
||||
})
|
||||
|
||||
simpleScaleDownTest := func(unready int) {
|
||||
|
@ -588,7 +588,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||
}
|
||||
err := framework.ResizeGroup(minMig, int32(0))
|
||||
framework.ExpectNoError(err)
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount-minSize, resizeTimeout))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount-minSize, resizeTimeout))
|
||||
|
||||
By("Make remaining nodes unschedulable")
|
||||
nodes, err := f.ClientSet.Core().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{
|
||||
|
@ -628,7 +628,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||
}
|
||||
err := framework.ResizeGroup(minMig, int32(1))
|
||||
framework.ExpectNoError(err)
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(c, nodeCount-minSize+1, resizeTimeout))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount-minSize+1, resizeTimeout))
|
||||
|
||||
By("Make the single node unschedulable")
|
||||
allNodes, err := f.ClientSet.Core().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{
|
||||
|
@ -699,7 +699,7 @@ var _ = SIGDescribe("Cluster size autoscaling [Slow]", func() {
|
|||
}
|
||||
testFunction()
|
||||
// Give nodes time to recover from network failure
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(c, len(nodes.Items), nodesRecoverTimeout))
|
||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, len(nodes.Items), nodesRecoverTimeout))
|
||||
})
|
||||
|
||||
})
|
||||
|
@ -937,7 +937,7 @@ func ReserveMemory(f *framework.Framework, id string, replicas, megabytes int, e
|
|||
return nil
|
||||
}
|
||||
|
||||
// WaitForClusterSize waits until the cluster size matches the given function.
|
||||
// WaitForClusterSizeFunc waits until the cluster size matches the given function.
|
||||
func WaitForClusterSizeFunc(c clientset.Interface, sizeFunc func(int) bool, timeout time.Duration) error {
|
||||
return WaitForClusterSizeFuncWithUnready(c, sizeFunc, timeout, 0)
|
||||
}
|
||||
|
|
|
@ -151,7 +151,7 @@ var _ = SIGDescribe("DNS horizontal autoscaling", func() {
|
|||
|
||||
By("Restoring cluster size")
|
||||
setMigSizes(originalSizes)
|
||||
Expect(framework.WaitForClusterSize(c, sum, scaleDownTimeout)).NotTo(HaveOccurred())
|
||||
Expect(framework.WaitForReadyNodes(c, sum, scaleDownTimeout)).NotTo(HaveOccurred())
|
||||
|
||||
By("Wait for kube-dns scaled to expected number")
|
||||
Expect(waitForDNSReplicasSatisfied(c, getExpectReplicasLinear, DNSdefaultTimeout)).NotTo(HaveOccurred())
|
||||
|
|
|
@ -3892,9 +3892,26 @@ func WaitForControllerManagerUp() error {
|
|||
return fmt.Errorf("waiting for controller-manager timed out")
|
||||
}
|
||||
|
||||
// WaitForClusterSize waits until the cluster has desired size and there is no not-ready nodes in it.
|
||||
// Returns number of ready Nodes excluding Master Node.
|
||||
func NumberOfReadyNodes(c clientset.Interface) (int, error) {
|
||||
nodes, err := c.Core().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{
|
||||
"spec.unschedulable": "false",
|
||||
}.AsSelector().String()})
|
||||
if err != nil {
|
||||
Logf("Failed to list nodes: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Filter out not-ready nodes.
|
||||
FilterNodes(nodes, func(node v1.Node) bool {
|
||||
return IsNodeConditionSetAsExpected(&node, v1.NodeReady, true)
|
||||
})
|
||||
return len(nodes.Items), nil
|
||||
}
|
||||
|
||||
// WaitForReadyNodes waits until the cluster has desired size and there is no not-ready nodes in it.
|
||||
// By cluster size we mean number of Nodes excluding Master Node.
|
||||
func WaitForClusterSize(c clientset.Interface, size int, timeout time.Duration) error {
|
||||
func WaitForReadyNodes(c clientset.Interface, size int, timeout time.Duration) error {
|
||||
for start := time.Now(); time.Since(start) < timeout; time.Sleep(20 * time.Second) {
|
||||
nodes, err := c.Core().Nodes().List(metav1.ListOptions{FieldSelector: fields.Set{
|
||||
"spec.unschedulable": "false",
|
||||
|
@ -3912,12 +3929,12 @@ func WaitForClusterSize(c clientset.Interface, size int, timeout time.Duration)
|
|||
numReady := len(nodes.Items)
|
||||
|
||||
if numNodes == size && numReady == size {
|
||||
Logf("Cluster has reached the desired size %d", size)
|
||||
Logf("Cluster has reached the desired number of ready nodes %d", size)
|
||||
return nil
|
||||
}
|
||||
Logf("Waiting for cluster size %d, current size %d, not ready nodes %d", size, numNodes, numNodes-numReady)
|
||||
Logf("Waiting for ready nodes %d, current ready %d, not ready nodes %d", size, numNodes, numNodes-numReady)
|
||||
}
|
||||
return fmt.Errorf("timeout waiting %v for cluster size to be %d", timeout, size)
|
||||
return fmt.Errorf("timeout waiting %v for number of ready nodes to be %d", timeout, size)
|
||||
}
|
||||
|
||||
func GenerateMasterRegexp(prefix string) string {
|
||||
|
|
|
@ -98,7 +98,7 @@ var _ = SIGDescribe("Nodes [Disruptive]", func() {
|
|||
if err := framework.WaitForGroupSize(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil {
|
||||
framework.Failf("Couldn't restore the original node instance group size: %v", err)
|
||||
}
|
||||
if err := framework.WaitForClusterSize(c, framework.TestContext.CloudConfig.NumNodes, 10*time.Minute); err != nil {
|
||||
if err := framework.WaitForReadyNodes(c, framework.TestContext.CloudConfig.NumNodes, 10*time.Minute); err != nil {
|
||||
framework.Failf("Couldn't restore the original cluster size: %v", err)
|
||||
}
|
||||
// Many e2e tests assume that the cluster is fully healthy before they start. Wait until
|
||||
|
@ -124,7 +124,7 @@ var _ = SIGDescribe("Nodes [Disruptive]", func() {
|
|||
Expect(err).NotTo(HaveOccurred())
|
||||
err = framework.WaitForGroupSize(group, replicas-1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = framework.WaitForClusterSize(c, int(replicas-1), 10*time.Minute)
|
||||
err = framework.WaitForReadyNodes(c, int(replicas-1), 10*time.Minute)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
By("waiting 1 minute for the watch in the podGC to catch up, remove any pods scheduled on " +
|
||||
|
@ -152,7 +152,7 @@ var _ = SIGDescribe("Nodes [Disruptive]", func() {
|
|||
Expect(err).NotTo(HaveOccurred())
|
||||
err = framework.WaitForGroupSize(group, replicas+1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = framework.WaitForClusterSize(c, int(replicas+1), 10*time.Minute)
|
||||
err = framework.WaitForReadyNodes(c, int(replicas+1), 10*time.Minute)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
By(fmt.Sprintf("increasing size of the replication controller to %d and verifying all pods are running", replicas+1))
|
||||
|
|
Loading…
Reference in New Issue