Merge pull request #51660 from jiayingz/deviceplugin-e2e

Automatic merge from submit-queue Extend nvidia-gpus e2e test to include a device plugin based test **What this PR does / why we need it**: This is needed to verify device plugin feature. **Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes https://github.com/kubernetes/features/issues/368 **Special notes for your reviewer**: Related test_infra PR: https://github.com/kubernetes/test-infra/pull/4265 **Release note**: Add an e2e test for nvidia gpu device plugin
2017-09-08 22:50:08 -07:00 · 2017-09-08 22:50:08 -07:00 · 24ad0d211b
parent a5f766063d 01b49b4165
commit 24ad0d211b
1 changed files with 58 additions and 7 deletions
--- a/test/e2e/scheduling/nvidia-gpus.go
+++ b/test/e2e/scheduling/nvidia-gpus.go
@ -42,8 +42,14 @@ const (
 	cosOSImage        = "Container-Optimized OS from Google"
 	// Nvidia driver installation can take upwards of 5 minutes.
 	driverInstallTimeout = 10 * time.Minute
-	// Nvidia COS driver installer daemonset.
-	cosNvidiaDriverInstallerUrl = "https://raw.githubusercontent.com/ContainerEngine/accelerators/stable/cos-nvidia-gpu-installer/daemonset.yaml"
+)
+
+type podCreationFuncType func() *v1.Pod
+
+var (
+	gpuResourceName v1.ResourceName
+	dsYamlUrl       string
+	podCreationFunc podCreationFuncType
 )

 func makeCudaAdditionTestPod() *v1.Pod {
@ -60,7 +66,7 @@ func makeCudaAdditionTestPod() *v1.Pod {
 					Image: imageutils.GetE2EImage(imageutils.CudaVectorAdd),
 					Resources: v1.ResourceRequirements{
 						Limits: v1.ResourceList{
-							v1.ResourceNvidiaGPU: *resource.NewQuantity(1, resource.DecimalSI),
+							gpuResourceName: *resource.NewQuantity(1, resource.DecimalSI),
 						},
 					},
 					VolumeMounts: []v1.VolumeMount{
@ -86,6 +92,30 @@ func makeCudaAdditionTestPod() *v1.Pod {
 	return testPod
 }

+func makeCudaAdditionDevicePluginTestPod() *v1.Pod {
+	podName := testPodNamePrefix + string(uuid.NewUUID())
+	testPod := &v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: podName,
+		},
+		Spec: v1.PodSpec{
+			RestartPolicy: v1.RestartPolicyNever,
+			Containers: []v1.Container{
+				{
+					Name:  "vector-addition",
+					Image: imageutils.GetE2EImage(imageutils.CudaVectorAdd),
+					Resources: v1.ResourceRequirements{
+						Limits: v1.ResourceList{
+							gpuResourceName: *resource.NewQuantity(1, resource.DecimalSI),
+						},
+					},
+				},
+			},
+		},
+	}
+	return testPod
+}
+
 func isClusterRunningCOS(f *framework.Framework) bool {
 	nodeList, err := f.ClientSet.CoreV1().Nodes().List(metav1.ListOptions{})
 	framework.ExpectNoError(err, "getting node list")
@ -105,7 +135,8 @@ func areGPUsAvailableOnAllSchedulableNodes(f *framework.Framework) bool {
 		if node.Spec.Unschedulable {
 			continue
 		}
-		if node.Status.Capacity.NvidiaGPU().Value() == 0 {
+		framework.Logf("gpuResourceName %s", gpuResourceName)
+		if val, ok := node.Status.Capacity[gpuResourceName]; !ok || val.Value() == 0 {
 			framework.Logf("Nvidia GPUs not available on Node: %q", node.Name)
 			return false
 		}
@ -119,7 +150,9 @@ func getGPUsAvailable(f *framework.Framework) int64 {
 	framework.ExpectNoError(err, "getting node list")
 	var gpusAvailable int64
 	for _, node := range nodeList.Items {
-		gpusAvailable += node.Status.Capacity.NvidiaGPU().Value()
+		if val, ok := node.Status.Capacity[gpuResourceName]; ok {
+			gpusAvailable += (&val).Value()
+		}
 	}
 	return gpusAvailable
 }
@ -133,10 +166,21 @@ func testNvidiaGPUsOnCOS(f *framework.Framework) {
 		Skip("Nvidia GPU tests are supproted only on Container Optimized OS image currently")
 	}
 	framework.Logf("Cluster is running on COS. Proceeding with test")
+
+	if f.BaseName == "device-plugin-gpus" {
+		dsYamlUrl = "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/device-plugin-daemonset.yaml"
+		gpuResourceName = "nvidia.com/gpu"
+		podCreationFunc = makeCudaAdditionDevicePluginTestPod
+	} else {
+		dsYamlUrl = "https://raw.githubusercontent.com/ContainerEngine/accelerators/master/cos-nvidia-gpu-installer/daemonset.yaml"
+		gpuResourceName = v1.ResourceNvidiaGPU
+		podCreationFunc = makeCudaAdditionTestPod
+	}
+
 	// GPU drivers might have already been installed.
 	if !areGPUsAvailableOnAllSchedulableNodes(f) {
 		// Install Nvidia Drivers.
-		ds := dsFromManifest(cosNvidiaDriverInstallerUrl)
+		ds := dsFromManifest(dsYamlUrl)
 		ds.Namespace = f.Namespace.Name
 		_, err := f.ClientSet.Extensions().DaemonSets(f.Namespace.Name).Create(ds)
 		framework.ExpectNoError(err, "failed to create daemonset")
@ -149,7 +193,7 @@ func testNvidiaGPUsOnCOS(f *framework.Framework) {
 	framework.Logf("Creating as many pods as there are Nvidia GPUs and have the pods run a CUDA app")
 	podList := []*v1.Pod{}
 	for i := int64(0); i < getGPUsAvailable(f); i++ {
-		podList = append(podList, f.PodClient().Create(makeCudaAdditionTestPod()))
+		podList = append(podList, f.PodClient().Create(podCreationFunc()))
 	}
 	framework.Logf("Wait for all test pods to succeed")
 	// Wait for all pods to succeed
@ -192,3 +236,10 @@ var _ = SIGDescribe("[Feature:GPU]", func() {
 		testNvidiaGPUsOnCOS(f)
 	})
 })
+
+var _ = SIGDescribe("[Feature:GPUDevicePlugin]", func() {
+	f := framework.NewDefaultFramework("device-plugin-gpus")
+	It("run Nvidia GPU Device Plugin tests on Container Optimized OS only", func() {
+		testNvidiaGPUsOnCOS(f)
+	})
+})