From 4a1a2051098133ea067d876d986d241e509accda Mon Sep 17 00:00:00 2001 From: Jiaying Zhang Date: Mon, 20 Nov 2017 10:55:10 -0800 Subject: [PATCH] Changes nvidia-gpu device plugin addon config settings: - Runs as system critical pod - Makes resource limits to match its resource requets - Modifies test/e2e/scheduling/nvidia-gpus.go to cope with the recent change of running the device plugin as a system addon. - The resource settings of the addon is based on the test results from 8 nvidia-tesla-k80 gpus. --- cluster/addons/device-plugins/nvidia-gpu/daemonset.yaml | 8 +++++++- test/e2e/scheduling/nvidia-gpus.go | 5 +++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/cluster/addons/device-plugins/nvidia-gpu/daemonset.yaml b/cluster/addons/device-plugins/nvidia-gpu/daemonset.yaml index a5eaf1da76..6b5edbf733 100644 --- a/cluster/addons/device-plugins/nvidia-gpu/daemonset.yaml +++ b/cluster/addons/device-plugins/nvidia-gpu/daemonset.yaml @@ -11,7 +11,10 @@ spec: metadata: labels: k8s-app: nvidia-gpu-device-plugin + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' spec: + priorityClassName: system-node-critical affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: @@ -34,7 +37,10 @@ spec: name: nvidia-gpu-device-plugin resources: requests: - cpu: 10m + cpu: 50m + memory: 10Mi + limits: + cpu: 50m memory: 10Mi securityContext: privileged: true diff --git a/test/e2e/scheduling/nvidia-gpus.go b/test/e2e/scheduling/nvidia-gpus.go index 785791e9e9..442f758d28 100644 --- a/test/e2e/scheduling/nvidia-gpus.go +++ b/test/e2e/scheduling/nvidia-gpus.go @@ -183,6 +183,11 @@ func testNvidiaGPUsOnCOS(f *framework.Framework) { pods, err := framework.WaitForControlledPods(f.ClientSet, ds.Namespace, ds.Name, extensionsinternal.Kind("DaemonSet")) framework.ExpectNoError(err, "getting pods controlled by the daemonset") + devicepluginPods, err := framework.WaitForControlledPods(f.ClientSet, "kube-system", "nvidia-gpu-device-plugin", extensionsinternal.Kind("DaemonSet")) + if err == nil { + framework.Logf("Adding deviceplugin addon pod.") + pods.Items = append(pods.Items, devicepluginPods.Items...) + } framework.Logf("Starting ResourceUsageGather for the created DaemonSet pods.") rsgather, err := framework.NewResourceUsageGatherer(f.ClientSet, framework.ResourceGathererOptions{false, false, 2 * time.Second, 2 * time.Second, true}, pods) framework.ExpectNoError(err, "creating ResourceUsageGather for the daemonset pods")