Merge pull request #44284 from MaciekPytel/ca_e2e_unflake

Automatic merge from submit-queue (batch tested with PRs 43844, 44284)

Add a retry to cluster-autoscaler e2e

This should fix https://github.com/kubernetes/kubernetes/issues/44268.

The flake was caused by following sequence of events:
1. Cluster was at minimum size (3), some node was unneeded for a while.
2. Setup for some test (scale-down, failure) would increase node group size (to 5) and wait for new nodes to come up.
3. As soon as new node come up (cluster size 4) CA would scale-down the old unneeded node (setting node group size to 4).
4. Node group would not reach size 5 (as the target was now 4) and the test would timeout and fail.

This PR makes the setup monitor re-set the target node group size if the above scenario happens.
pull/6/head
Kubernetes Submit Queue 2017-04-11 06:42:15 -07:00 committed by GitHub
commit 4ae6bd93c2
1 changed files with 17 additions and 3 deletions

View File

@ -735,7 +735,8 @@ func waitForAllCaPodsReadyInNamespace(f *framework.Framework, c clientset.Interf
return fmt.Errorf("Some pods are still not running: %v", notready)
}
func setMigSizes(sizes map[string]int) {
func setMigSizes(sizes map[string]int) bool {
madeChanges := false
for mig, desiredSize := range sizes {
currentSize, err := framework.GroupSize(mig)
framework.ExpectNoError(err)
@ -743,8 +744,10 @@ func setMigSizes(sizes map[string]int) {
By(fmt.Sprintf("Setting size of %s to %d", mig, desiredSize))
err = framework.ResizeGroup(mig, int32(desiredSize))
framework.ExpectNoError(err)
madeChanges = true
}
}
return madeChanges
}
func makeNodeUnschedulable(c clientset.Interface, node *v1.Node) error {
@ -899,8 +902,19 @@ func manuallyIncreaseClusterSize(f *framework.Framework, originalSizes map[strin
increasedSize += val + newNodesForScaledownTests
}
setMigSizes(newSizes)
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet,
func(size int) bool { return size >= increasedSize }, scaleUpTimeout))
checkClusterSize := func(size int) bool {
if size >= increasedSize {
return true
}
resized := setMigSizes(newSizes)
if resized {
glog.Warning("Unexpected node group size while waiting for cluster resize. Setting size to target again.")
}
return false
}
framework.ExpectNoError(WaitForClusterSizeFunc(f.ClientSet, checkClusterSize, scaleUpTimeout))
return increasedSize
}