Run nvidia-gpu device-plugin daemonset as an addon on GCE nodes that have nvidia GPUs attached.

pull/6/head
Rohit Agarwal 2017-10-30 15:50:08 -07:00
parent 9c7baf94dd
commit cf292754ba
3 changed files with 53 additions and 0 deletions

View File

@ -0,0 +1,45 @@
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: nvidia-gpu-device-plugin
namespace: kube-system
labels:
k8s-app: nvidia-gpu-device-plugin
addonmanager.kubernetes.io/mode: Reconcile
spec:
template:
metadata:
labels:
k8s-app: nvidia-gpu-device-plugin
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: cloud.google.com/gke-accelerator
operator: Exists
hostNetwork: true
hostPID: true
volumes:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins
- name: dev
hostPath:
path: /dev
containers:
- image: "gcr.io/google-containers/nvidia-gpu-device-plugin@sha256:943a62949cd80c26e7371d4e123dac61b4cc7281390721aaa95f265171094842"
command: ["/usr/bin/nvidia-gpu-device-plugin", "-logtostderr"]
name: nvidia-gpu-device-plugin
resources:
requests:
cpu: 10m
memory: 10Mi
securityContext:
privileged: true
volumeMounts:
- name: device-plugin
mountPath: /device-plugin
- name: dev
mountPath: /dev

View File

@ -873,6 +873,11 @@ EOF
if [ -n "${CLUSTER_SIGNING_DURATION:-}" ]; then
cat >>$file <<EOF
CLUSTER_SIGNING_DURATION: $(yaml-quote ${CLUSTER_SIGNING_DURATION})
EOF
fi
if [[ "${NODE_ACCELERATORS:-}" == *"type=nvidia"* ]]; then
cat >>$file <<EOF
ENABLE_NVIDIA_GPU_DEVICE_PLUGIN: $(yaml-quote "true")
EOF
fi

View File

@ -1796,6 +1796,9 @@ function start-kube-addons {
if [[ "${ENABLE_METRICS_SERVER:-}" == "true" ]]; then
setup-addon-manifests "addons" "metrics-server"
fi
if [[ "${ENABLE_NVIDIA_GPU_DEVICE_PLUGIN:-}" == "true" ]]; then
setup-addon-manifests "addons" "device-plugins/nvidia-gpu"
fi
if [[ "${ENABLE_CLUSTER_DNS:-}" == "true" ]]; then
setup-addon-manifests "addons" "dns"
local -r kubedns_file="${dst_dir}/dns/kube-dns.yaml"