Merge pull request #53234 from jiayingz/e2e-flaky

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Fixes a flakiness in GPUDevicePlugin e2e test.

Waits till nvidia gpu disappears from all nodes after deleting the
device plug DaemonSet to make sure its pods are deleted from all nodes.



**What this PR does / why we need it**:

**Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #
https://github.com/kubernetes/kubernetes/issues/53281

**Special notes for your reviewer**:

**Release note**:

```release-note
```
pull/6/head
Kubernetes Submit Queue 2017-09-29 19:50:42 -07:00 committed by GitHub
commit 1331df79fc
1 changed files with 21 additions and 3 deletions

View File

@ -139,6 +139,24 @@ func areGPUsAvailableOnAllSchedulableNodes(f *framework.Framework) bool {
return true
}
func areGPUsAvailableOnAnySchedulableNodes(f *framework.Framework) bool {
framework.Logf("Getting list of Nodes from API server")
nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{})
framework.ExpectNoError(err, "getting node list")
for _, node := range nodeList.Items {
if node.Spec.Unschedulable {
continue
}
framework.Logf("gpuResourceName %s", gpuResourceName)
if val, ok := node.Status.Capacity[gpuResourceName]; ok && val.Value() > 0 {
framework.Logf("Nvidia GPUs available on Node: %q", node.Name)
return true
}
}
framework.Logf("Nvidia GPUs don't exist on all schedulable nodes")
return false
}
func getGPUsAvailable(f *framework.Framework) int64 {
nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{})
framework.ExpectNoError(err, "getting node list")
@ -220,10 +238,10 @@ var _ = SIGDescribe("[Feature:GPUDevicePlugin]", func() {
err = f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Delete(ds.Name, &metav1.DeleteOptions{OrphanDependents: &falseVar})
framework.ExpectNoError(err, "failed to delete daemonset")
framework.Logf("Successfully deleted device plugin daemonset. Wait for resource to be removed.")
// Wait for Nvidia GPUs to be not available on nodes
// Wait for Nvidia GPUs to be unavailable on all nodes.
Eventually(func() bool {
return !areGPUsAvailableOnAllSchedulableNodes(f)
}, 5*time.Minute, time.Second).Should(BeTrue())
return !areGPUsAvailableOnAnySchedulableNodes(f)
}, 10*time.Minute, time.Second).Should(BeTrue())
// 3. Restarts the device plugin DaemonSet. Verifies GPU resource is successfully advertised
// on the nodes and we can run pods using GPUs.