Merge pull request #53234 from jiayingz/e2e-flaky

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Fixes a flakiness in GPUDevicePlugin e2e test.

Waits till nvidia gpu disappears from all nodes after deleting the
device plug DaemonSet to make sure its pods are deleted from all nodes.



**What this PR does / why we need it**:

**Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #
https://github.com/kubernetes/kubernetes/issues/53281

**Special notes for your reviewer**:

**Release note**:

```release-note
```
pull/6/head
Kubernetes Submit Queue 2017-09-29 19:50:42 -07:00 committed by GitHub
commit 1331df79fc
1 changed files with 21 additions and 3 deletions

View File

@ -139,6 +139,24 @@ func areGPUsAvailableOnAllSchedulableNodes(f *framework.Framework) bool {
return true return true
} }
func areGPUsAvailableOnAnySchedulableNodes(f *framework.Framework) bool {
framework.Logf("Getting list of Nodes from API server")
nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{})
framework.ExpectNoError(err, "getting node list")
for _, node := range nodeList.Items {
if node.Spec.Unschedulable {
continue
}
framework.Logf("gpuResourceName %s", gpuResourceName)
if val, ok := node.Status.Capacity[gpuResourceName]; ok && val.Value() > 0 {
framework.Logf("Nvidia GPUs available on Node: %q", node.Name)
return true
}
}
framework.Logf("Nvidia GPUs don't exist on all schedulable nodes")
return false
}
func getGPUsAvailable(f *framework.Framework) int64 { func getGPUsAvailable(f *framework.Framework) int64 {
nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{}) nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{})
framework.ExpectNoError(err, "getting node list") framework.ExpectNoError(err, "getting node list")
@ -220,10 +238,10 @@ var _ = SIGDescribe("[Feature:GPUDevicePlugin]", func() {
err = f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Delete(ds.Name, &metav1.DeleteOptions{OrphanDependents: &falseVar}) err = f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Delete(ds.Name, &metav1.DeleteOptions{OrphanDependents: &falseVar})
framework.ExpectNoError(err, "failed to delete daemonset") framework.ExpectNoError(err, "failed to delete daemonset")
framework.Logf("Successfully deleted device plugin daemonset. Wait for resource to be removed.") framework.Logf("Successfully deleted device plugin daemonset. Wait for resource to be removed.")
// Wait for Nvidia GPUs to be not available on nodes // Wait for Nvidia GPUs to be unavailable on all nodes.
Eventually(func() bool { Eventually(func() bool {
return !areGPUsAvailableOnAllSchedulableNodes(f) return !areGPUsAvailableOnAnySchedulableNodes(f)
}, 5*time.Minute, time.Second).Should(BeTrue()) }, 10*time.Minute, time.Second).Should(BeTrue())
// 3. Restarts the device plugin DaemonSet. Verifies GPU resource is successfully advertised // 3. Restarts the device plugin DaemonSet. Verifies GPU resource is successfully advertised
// on the nodes and we can run pods using GPUs. // on the nodes and we can run pods using GPUs.