WIP v0 NVIDIA GPU support

Implements part of #24071 I am not familiar with the scheduler enough to know what to do with the scores. Punting for now. Missing items from the implementation plan: limitranger, rkt support, kubectl support and user docs
2016-04-26 20:54:19 -04:00 · 2016-04-26 20:54:19 -04:00 · 362c763fca
parent 57359e4f2f
commit 362c763fca
21 changed files with 858 additions and 666 deletions
--- a/cmd/kubelet/app/options/options.go
+++ b/cmd/kubelet/app/options/options.go
@ -110,6 +110,7 @@ func NewKubeletServer() *KubeletServer {
 			MaxPerPodContainerCount:     2,
 			MaxOpenFiles:                1000000,
 			MaxPods:                     110,
+			NvidiaGPUs:                  0,
 			MinimumGCAge:                unversioned.Duration{Duration: 1 * time.Minute},
 			NetworkPluginDir:            "/usr/libexec/kubernetes/kubelet-plugins/net/exec/",
 			NetworkPluginName:           "",
@ -231,6 +232,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
 	fs.BoolVar(&s.BabysitDaemons, "babysit-daemons", s.BabysitDaemons, "If true, the node has babysitter process monitoring docker and kubelet.")
 	fs.MarkDeprecated("babysit-daemons", "Will be removed in a future version.")
 	fs.Int32Var(&s.MaxPods, "max-pods", s.MaxPods, "Number of Pods that can run on this Kubelet.")
+	fs.Int32Var(&s.NvidiaGPUs, "experimental-nvidia-gpus", s.NvidiaGPUs, "Number of NVIDIA GPU devices on this node. Only 0 (default) and 1 are currently supported.")
 	fs.StringVar(&s.DockerExecHandlerName, "docker-exec-handler", s.DockerExecHandlerName, "Handler to use when executing a command in a container. Valid values are 'native' and 'nsenter'. Defaults to 'native'.")
 	fs.StringVar(&s.NonMasqueradeCIDR, "non-masquerade-cidr", s.NonMasqueradeCIDR, "Traffic to IPs outside this range will use IP masquerade.")
 	fs.StringVar(&s.PodCIDR, "pod-cidr", "", "The CIDR to use for pod IP addresses, only used in standalone mode.  In cluster mode, this is obtained from the master.")
--- a/cmd/kubelet/app/server.go
+++ b/cmd/kubelet/app/server.go
@ -222,6 +222,7 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) {
 		MaxOpenFiles:              s.MaxOpenFiles,
 		MaxPerPodContainerCount:   int(s.MaxPerPodContainerCount),
 		MaxPods:                   int(s.MaxPods),
+		NvidiaGPUs:                int(s.NvidiaGPUs),
 		MinimumGCAge:              s.MinimumGCAge.Duration,
 		Mounter:                   mounter,
 		NetworkPluginName:         s.NetworkPluginName,
@ -551,6 +552,7 @@ func SimpleKubelet(client *clientset.Clientset,
 		MaxOpenFiles:              1024,
 		MaxPerPodContainerCount:   2,
 		MaxPods:                   maxPods,
+		NvidiaGPUs:                0,
 		MinimumGCAge:              minimumGCAge,
 		Mounter:                   mount.New(),
 		NodeStatusUpdateFrequency: nodeStatusUpdateFrequency,
@ -743,6 +745,7 @@ type KubeletConfig struct {
 	NodeLabels                     map[string]string
 	NodeStatusUpdateFrequency      time.Duration
 	NonMasqueradeCIDR              string
+	NvidiaGPUs                     int
 	OOMAdjuster                    *oom.OOMAdjuster
 	OSInterface                    kubecontainer.OSInterface
 	PodCIDR                        string
@ -852,6 +855,7 @@ func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod
 		kc.PodCIDR,
 		kc.ReconcileCIDR,
 		kc.MaxPods,
+		kc.NvidiaGPUs,
 		kc.DockerExecHandler,
 		kc.ResolverConfig,
 		kc.CPUCFSQuota,
--- a/docs/admin/kubelet.md
+++ b/docs/admin/kubelet.md
@ -91,6 +91,7 @@ kubelet
      --eviction-soft="": A set of eviction thresholds (e.g. memory.available<1.5Gi) that if met over a corresponding grace period would trigger a pod eviction.
      --eviction-soft-grace-period="": A set of eviction grace periods (e.g. memory.available=1m30s) that correspond to how long a soft eviction threshold must hold before triggering a pod eviction.
      --experimental-flannel-overlay[=false]: Experimental support for starting the kubelet with the default overlay network (flannel). Assumes flanneld is already running in client mode. [default=false]
+      --experimental-nvidia-gpus=0: Number of NVIDIA GPU devices on this node. Only 0 (default) and 1 are currently supported.
      --file-check-frequency=20s: Duration between checking config files for new data
      --google-json-key="": The Google Cloud Platform Service Account JSON Key to use for authentication.
      --hairpin-mode="promiscuous-bridge": How should the kubelet setup hairpin NAT. This allows endpoints of a Service to loadbalance back to themselves if they should try to access their own Service. Valid values are "promiscuous-bridge", "hairpin-veth" and "none".
@ -156,7 +157,7 @@ kubelet
      --volume-stats-agg-period=1m0s: Specifies interval for kubelet to calculate and cache the volume disk usage for all pods and volumes.  To disable volume calculations, set to 0.  Default: '1m'
 ```

-###### Auto generated by spf13/cobra on 21-Apr-2016
+###### Auto generated by spf13/cobra on 3-May-2016


 <!-- BEGIN MUNGE: GENERATED_ANALYTICS -->
--- a/hack/verify-flags/known-flags.txt
+++ b/hack/verify-flags/known-flags.txt
@ -125,6 +125,7 @@ executor-path
 executor-suicide-timeout
 experimental-flannel-overlay
 experimental-keystone-url
+experimental-nvidia-gpus
 experimental-prefix
 external-hostname
 external-ip
--- a/pkg/api/resource_helpers.go
+++ b/pkg/api/resource_helpers.go
@ -48,6 +48,13 @@ func (self *ResourceList) Pods() *resource.Quantity {
 	return &resource.Quantity{}
 }

+func (self *ResourceList) NvidiaGPU() *resource.Quantity {
+	if val, ok := (*self)[ResourceNvidiaGPU]; ok {
+		return &val
+	}
+	return &resource.Quantity{}
+}
+
 func GetContainerStatus(statuses []ContainerStatus, name string) (ContainerStatus, bool) {
 	for i := range statuses {
 		if statuses[i].Name == name {
--- a/pkg/api/types.go
+++ b/pkg/api/types.go
@ -1917,6 +1917,11 @@ type NodeResources struct {
 // ResourceName is the name identifying various resources in a ResourceList.
 type ResourceName string

+// Resource names must be not more than 63 characters, consisting of upper- or lower-case alphanumeric characters,
+// with the -, _, and . characters allowed anywhere, except the first or last character.
+// The default convention, matching that for annotations, is to use lower-case names, with dashes, rather than
+// camel case, separating compound words.
+// Fully-qualified resource typenames are constructed from a DNS-style subdomain, followed by a slash `/` and a name.
 const (
 	// CPU, in cores. (500m = .5 cores)
 	ResourceCPU ResourceName = "cpu"
@ -1924,6 +1929,8 @@ const (
 	ResourceMemory ResourceName = "memory"
 	// Volume size, in bytes (e,g. 5Gi = 5GiB = 5 * 1024 * 1024 * 1024)
 	ResourceStorage ResourceName = "storage"
+	// NVIDIA GPU, in devices. Alpha, might change: although fractional and allowing values >1, only one whole device per node is assigned.
+	ResourceNvidiaGPU ResourceName = "alpha.kubernetes.io/nvidia-gpu"
 	// Number of Pods that may be running on this Node: see ResourcePods
 )

--- a/pkg/api/v1/types.go
+++ b/pkg/api/v1/types.go
@ -2302,6 +2302,11 @@ type NodeAddress struct {
 // ResourceName is the name identifying various resources in a ResourceList.
 type ResourceName string

+// Resource names must be not more than 63 characters, consisting of upper- or lower-case alphanumeric characters,
+// with the -, _, and . characters allowed anywhere, except the first or last character.
+// The default convention, matching that for annotations, is to use lower-case names, with dashes, rather than
+// camel case, separating compound words.
+// Fully-qualified resource typenames are constructed from a DNS-style subdomain, followed by a slash `/` and a name.
 const (
 	// CPU, in cores. (500m = .5 cores)
 	ResourceCPU ResourceName = "cpu"
@ -2309,6 +2314,9 @@ const (
 	ResourceMemory ResourceName = "memory"
 	// Volume size, in bytes (e,g. 5Gi = 5GiB = 5 * 1024 * 1024 * 1024)
 	ResourceStorage ResourceName = "storage"
+	// NVIDIA GPU, in devices. Alpha, might change: although fractional and allowing values >1, only one whole device per node is assigned.
+	ResourceNvidiaGPU ResourceName = "alpha.kubernetes.io/nvidia-gpu"
+	// Number of Pods that may be running on this Node: see ResourcePods
 )

 // ResourceList is a set of (resource name, quantity) pairs.
--- a/pkg/api/validation/validation.go
+++ b/pkg/api/validation/validation.go
@ -2464,6 +2464,7 @@ func validateBasicResource(quantity resource.Quantity, fldPath *field.Path) fiel
 func ValidateResourceRequirements(requirements *api.ResourceRequirements, fldPath *field.Path) field.ErrorList {
 	allErrs := field.ErrorList{}
 	limPath := fldPath.Child("limits")
+	reqPath := fldPath.Child("requests")
 	for resourceName, quantity := range requirements.Limits {
 		fldPath := limPath.Key(string(resourceName))
 		// Validate resource name.
@ -2474,12 +2475,14 @@ func ValidateResourceRequirements(requirements *api.ResourceRequirements, fldPat
 		// Check that request <= limit.
 		requestQuantity, exists := requirements.Requests[resourceName]
 		if exists {
-			if quantity.Cmp(requestQuantity) < 0 {
+			// For GPUs, require that no request be set.
+			if resourceName == api.ResourceNvidiaGPU {
+				allErrs = append(allErrs, field.Invalid(reqPath, requestQuantity.String(), "cannot be set"))
+			} else if quantity.Cmp(requestQuantity) < 0 {
 				allErrs = append(allErrs, field.Invalid(fldPath, quantity.String(), "must be greater than or equal to request"))
 			}
 		}
 	}
-	reqPath := fldPath.Child("requests")
 	for resourceName, quantity := range requirements.Requests {
 		fldPath := reqPath.Key(string(resourceName))
 		// Validate resource name.
--- a/pkg/api/validation/validation_test.go
+++ b/pkg/api/validation/validation_test.go
@ -1375,6 +1375,22 @@ func TestValidateContainers(t *testing.T) {
 			},
 			ImagePullPolicy: "IfNotPresent",
 		},
+		{
+			Name:  "resources-test-with-gpu",
+			Image: "image",
+			Resources: api.ResourceRequirements{
+				Requests: api.ResourceList{
+					api.ResourceName(api.ResourceCPU):    resource.MustParse("10"),
+					api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
+				},
+				Limits: api.ResourceList{
+					api.ResourceName(api.ResourceCPU):       resource.MustParse("10"),
+					api.ResourceName(api.ResourceMemory):    resource.MustParse("10G"),
+					api.ResourceName(api.ResourceNvidiaGPU): resource.MustParse("1"),
+				},
+			},
+			ImagePullPolicy: "IfNotPresent",
+		},
 		{
 			Name:  "resources-request-limit-simple",
 			Image: "image",
@ -1598,6 +1614,25 @@ func TestValidateContainers(t *testing.T) {
 				ImagePullPolicy: "IfNotPresent",
 			},
 		},
+		"Resource can only have GPU limit": {
+			{
+				Name:  "resources-request-limit-edge",
+				Image: "image",
+				Resources: api.ResourceRequirements{
+					Requests: api.ResourceList{
+						api.ResourceName(api.ResourceCPU):       resource.MustParse("10"),
+						api.ResourceName(api.ResourceMemory):    resource.MustParse("10G"),
+						api.ResourceName(api.ResourceNvidiaGPU): resource.MustParse("1"),
+					},
+					Limits: api.ResourceList{
+						api.ResourceName(api.ResourceCPU):       resource.MustParse("10"),
+						api.ResourceName(api.ResourceMemory):    resource.MustParse("10G"),
+						api.ResourceName(api.ResourceNvidiaGPU): resource.MustParse("1"),
+					},
+				},
+				ImagePullPolicy: "IfNotPresent",
+			},
+		},
 		"Request limit simple invalid": {
 			{
 				Name:  "abc-123",
--- a/pkg/apis/componentconfig/deep_copy_generated.go
+++ b/pkg/apis/componentconfig/deep_copy_generated.go
@ -276,6 +276,7 @@ func DeepCopy_componentconfig_KubeletConfiguration(in KubeletConfiguration, out
 	out.HairpinMode = in.HairpinMode
 	out.BabysitDaemons = in.BabysitDaemons
 	out.MaxPods = in.MaxPods
+	out.NvidiaGPUs = in.NvidiaGPUs
 	out.DockerExecHandlerName = in.DockerExecHandlerName
 	out.PodCIDR = in.PodCIDR
 	out.ResolverConfig = in.ResolverConfig
--- a/pkg/apis/componentconfig/types.generated.go
+++ b/pkg/apis/componentconfig/types.generated.go
--- a/pkg/apis/componentconfig/types.go
+++ b/pkg/apis/componentconfig/types.go
@ -290,6 +290,8 @@ type KubeletConfiguration struct {
 	BabysitDaemons bool `json:"babysitDaemons"`
 	// maxPods is the number of pods that can run on this Kubelet.
 	MaxPods int32 `json:"maxPods"`
+	// nvidiaGPUs is the number of NVIDIA GPU devices on this node.
+	NvidiaGPUs int32 `json:"nvidiaGPUs"`
 	// dockerExecHandlerName is the handler to use when executing a command
 	// in a container. Valid values are 'native' and 'nsenter'. Defaults to
 	// 'native'.
--- a/pkg/controller/daemon/controller.go
+++ b/pkg/controller/daemon/controller.go
@ -701,8 +701,8 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *api.Node, ds *exte
 		}
 		pods = append(pods, pod)
 	}
-	_, notFittingCPU, notFittingMemory := predicates.CheckPodsExceedingFreeResources(pods, node.Status.Allocatable)
-	if len(notFittingCPU)+len(notFittingMemory) != 0 {
+	_, notFittingCPU, notFittingMemory, notFittingNvidiaGPU := predicates.CheckPodsExceedingFreeResources(pods, node.Status.Allocatable)
+	if len(notFittingCPU)+len(notFittingMemory)+len(notFittingNvidiaGPU) != 0 {
 		dsc.eventRecorder.Eventf(ds, api.EventTypeNormal, "FailedPlacement", "failed to place pod on %q: insufficent free resources", node.ObjectMeta.Name)
 		return false
 	}
--- a/pkg/kubelet/container/runtime.go
+++ b/pkg/kubelet/container/runtime.go
@ -357,6 +357,8 @@ type RunContainerOptions struct {
 	Envs []EnvVar
 	// The mounts for the containers.
 	Mounts []Mount
+	// The host devices mapped into the containers.
+	Devices []string
 	// The port mappings for the containers.
 	PortMappings []PortMapping
 	// If the container has specified the TerminationMessagePath, then
--- a/pkg/kubelet/dockertools/manager.go
+++ b/pkg/kubelet/dockertools/manager.go
@ -569,6 +569,7 @@ func (dm *DockerManager) runContainer(
 	memoryLimit := container.Resources.Limits.Memory().Value()
 	cpuRequest := container.Resources.Requests.Cpu()
 	cpuLimit := container.Resources.Limits.Cpu()
+	nvidiaGPULimit := container.Resources.Limits.NvidiaGPU()
 	var cpuShares int64
 	// If request is not specified, but limit is, we want request to default to limit.
 	// API server does this for new containers, but we repeat this logic in Kubelet
@ -580,6 +581,16 @@ func (dm *DockerManager) runContainer(
 		// of CPU shares.
 		cpuShares = milliCPUToShares(cpuRequest.MilliValue())
 	}
+	var devices []dockercontainer.DeviceMapping
+	if nvidiaGPULimit.Value() != 0 {
+		// Experimental. For now, we hardcode /dev/nvidia0 no matter what the user asks for
+		// (we only support one device per node).
+		devices = []dockercontainer.DeviceMapping{
+			{"/dev/nvidia0", "/dev/nvidia0", "mrw"},
+			{"/dev/nvidiactl", "/dev/nvidiactl", "mrw"},
+			{"/dev/nvidia-uvm", "/dev/nvidia-uvm", "mrw"},
+		}
+	}
 	podHasSELinuxLabel := pod.Spec.SecurityContext != nil && pod.Spec.SecurityContext.SELinuxOptions != nil
 	binds := makeMountBindings(opts.Mounts, podHasSELinuxLabel)
 	// The reason we create and mount the log file in here (not in kubelet) is because
@ -615,6 +626,7 @@ func (dm *DockerManager) runContainer(
 			Memory:     memoryLimit,
 			MemorySwap: -1,
 			CPUShares:  cpuShares,
+			Devices:    devices,
 		},
 		SecurityOpt: securityOpts,
 	}
--- a/pkg/kubelet/kubelet.go
+++ b/pkg/kubelet/kubelet.go
@ -204,6 +204,7 @@ func NewMainKubelet(
 	podCIDR string,
 	reconcileCIDR bool,
 	maxPods int,
+	nvidiaGPUs int,
 	dockerExecHandler dockertools.ExecHandler,
 	resolverConfig string,
 	cpuCFSQuota bool,
@ -329,6 +330,7 @@ func NewMainKubelet(
 		nonMasqueradeCIDR:          nonMasqueradeCIDR,
 		reconcileCIDR:              reconcileCIDR,
 		maxPods:                    maxPods,
+		nvidiaGPUs:                 nvidiaGPUs,
 		syncLoopMonitor:            atomic.Value{},
 		resolverConfig:             resolverConfig,
 		cpuCFSQuota:                cpuCFSQuota,
@ -711,6 +713,9 @@ type Kubelet struct {
 	// Maximum Number of Pods which can be run by this Kubelet
 	maxPods int

+	// Number of NVIDIA GPUs on this node
+	nvidiaGPUs int
+
 	// Monitor Kubelet's sync loop
 	syncLoopMonitor atomic.Value

@ -2933,9 +2938,10 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *api.Node) {
 		// TODO(roberthbailey): This is required for test-cmd.sh to pass.
 		// See if the test should be updated instead.
 		node.Status.Capacity = api.ResourceList{
-			api.ResourceCPU:    *resource.NewMilliQuantity(0, resource.DecimalSI),
-			api.ResourceMemory: resource.MustParse("0Gi"),
-			api.ResourcePods:   *resource.NewQuantity(int64(kl.maxPods), resource.DecimalSI),
+			api.ResourceCPU:       *resource.NewMilliQuantity(0, resource.DecimalSI),
+			api.ResourceMemory:    resource.MustParse("0Gi"),
+			api.ResourcePods:      *resource.NewQuantity(int64(kl.maxPods), resource.DecimalSI),
+			api.ResourceNvidiaGPU: *resource.NewQuantity(int64(kl.nvidiaGPUs), resource.DecimalSI),
 		}
 		glog.Errorf("Error getting machine info: %v", err)
 	} else {
@ -2944,6 +2950,8 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *api.Node) {
 		node.Status.Capacity = cadvisor.CapacityFromMachineInfo(info)
 		node.Status.Capacity[api.ResourcePods] = *resource.NewQuantity(
 			int64(kl.maxPods), resource.DecimalSI)
+		node.Status.Capacity[api.ResourceNvidiaGPU] = *resource.NewQuantity(
+			int64(kl.nvidiaGPUs), resource.DecimalSI)
 		if node.Status.NodeInfo.BootID != "" &&
 			node.Status.NodeInfo.BootID != info.BootID {
 			// TODO: This requires a transaction, either both node status is updated
--- a/pkg/kubelet/kubelet_test.go
+++ b/pkg/kubelet/kubelet_test.go
@ -2725,14 +2725,16 @@ func TestUpdateNewNodeStatus(t *testing.T) {
 				KubeProxyVersion:        version.Get().String(),
 			},
 			Capacity: api.ResourceList{
-				api.ResourceCPU:    *resource.NewMilliQuantity(2000, resource.DecimalSI),
-				api.ResourceMemory: *resource.NewQuantity(10E9, resource.BinarySI),
-				api.ResourcePods:   *resource.NewQuantity(0, resource.DecimalSI),
+				api.ResourceCPU:       *resource.NewMilliQuantity(2000, resource.DecimalSI),
+				api.ResourceMemory:    *resource.NewQuantity(10E9, resource.BinarySI),
+				api.ResourcePods:      *resource.NewQuantity(0, resource.DecimalSI),
+				api.ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI),
 			},
 			Allocatable: api.ResourceList{
-				api.ResourceCPU:    *resource.NewMilliQuantity(1800, resource.DecimalSI),
-				api.ResourceMemory: *resource.NewQuantity(9900E6, resource.BinarySI),
-				api.ResourcePods:   *resource.NewQuantity(0, resource.DecimalSI),
+				api.ResourceCPU:       *resource.NewMilliQuantity(1800, resource.DecimalSI),
+				api.ResourceMemory:    *resource.NewQuantity(9900E6, resource.BinarySI),
+				api.ResourcePods:      *resource.NewQuantity(0, resource.DecimalSI),
+				api.ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI),
 			},
 			Addresses: []api.NodeAddress{
 				{Type: api.NodeLegacyHostIP, Address: "127.0.0.1"},
@ -2956,14 +2958,16 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
 				KubeProxyVersion:        version.Get().String(),
 			},
 			Capacity: api.ResourceList{
-				api.ResourceCPU:    *resource.NewMilliQuantity(2000, resource.DecimalSI),
-				api.ResourceMemory: *resource.NewQuantity(20E9, resource.BinarySI),
-				api.ResourcePods:   *resource.NewQuantity(0, resource.DecimalSI),
+				api.ResourceCPU:       *resource.NewMilliQuantity(2000, resource.DecimalSI),
+				api.ResourceMemory:    *resource.NewQuantity(20E9, resource.BinarySI),
+				api.ResourcePods:      *resource.NewQuantity(0, resource.DecimalSI),
+				api.ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI),
 			},
 			Allocatable: api.ResourceList{
-				api.ResourceCPU:    *resource.NewMilliQuantity(1800, resource.DecimalSI),
-				api.ResourceMemory: *resource.NewQuantity(19900E6, resource.BinarySI),
-				api.ResourcePods:   *resource.NewQuantity(0, resource.DecimalSI),
+				api.ResourceCPU:       *resource.NewMilliQuantity(1800, resource.DecimalSI),
+				api.ResourceMemory:    *resource.NewQuantity(19900E6, resource.BinarySI),
+				api.ResourcePods:      *resource.NewQuantity(0, resource.DecimalSI),
+				api.ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI),
 			},
 			Addresses: []api.NodeAddress{
 				{Type: api.NodeLegacyHostIP, Address: "127.0.0.1"},
@ -3227,14 +3231,16 @@ func TestUpdateNodeStatusWithRuntimeStateError(t *testing.T) {
 				KubeProxyVersion:        version.Get().String(),
 			},
 			Capacity: api.ResourceList{
-				api.ResourceCPU:    *resource.NewMilliQuantity(2000, resource.DecimalSI),
-				api.ResourceMemory: *resource.NewQuantity(10E9, resource.BinarySI),
-				api.ResourcePods:   *resource.NewQuantity(0, resource.DecimalSI),
+				api.ResourceCPU:       *resource.NewMilliQuantity(2000, resource.DecimalSI),
+				api.ResourceMemory:    *resource.NewQuantity(10E9, resource.BinarySI),
+				api.ResourcePods:      *resource.NewQuantity(0, resource.DecimalSI),
+				api.ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI),
 			},
 			Allocatable: api.ResourceList{
-				api.ResourceCPU:    *resource.NewMilliQuantity(1800, resource.DecimalSI),
-				api.ResourceMemory: *resource.NewQuantity(9900E6, resource.BinarySI),
-				api.ResourcePods:   *resource.NewQuantity(0, resource.DecimalSI),
+				api.ResourceCPU:       *resource.NewMilliQuantity(1800, resource.DecimalSI),
+				api.ResourceMemory:    *resource.NewQuantity(9900E6, resource.BinarySI),
+				api.ResourcePods:      *resource.NewQuantity(0, resource.DecimalSI),
+				api.ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI),
 			},
 			Addresses: []api.NodeAddress{
 				{Type: api.NodeLegacyHostIP, Address: "127.0.0.1"},
--- a/plugin/pkg/scheduler/algorithm/predicates/error.go
+++ b/plugin/pkg/scheduler/algorithm/predicates/error.go
@ -19,9 +19,10 @@ package predicates
 import "fmt"

 const (
-	podCountResourceName string = "PodCount"
-	cpuResourceName      string = "CPU"
-	memoryResoureceName  string = "Memory"
+	podCountResourceName  string = "PodCount"
+	cpuResourceName       string = "CPU"
+	memoryResoureceName   string = "Memory"
+	nvidiaGpuResourceName string = "NvidiaGpu"
 )

 var (
--- a/plugin/pkg/scheduler/algorithm/predicates/predicates.go
+++ b/plugin/pkg/scheduler/algorithm/predicates/predicates.go
@ -346,8 +346,9 @@ func (c *VolumeZoneChecker) predicate(pod *api.Pod, nodeInfo *schedulercache.Nod
 }

 type resourceRequest struct {
-	milliCPU int64
-	memory   int64
+	milliCPU  int64
+	memory    int64
+	nvidiaGPU int64
 }

 func getResourceRequest(pod *api.Pod) resourceRequest {
@ -356,19 +357,23 @@ func getResourceRequest(pod *api.Pod) resourceRequest {
 		requests := container.Resources.Requests
 		result.memory += requests.Memory().Value()
 		result.milliCPU += requests.Cpu().MilliValue()
+		result.nvidiaGPU += requests.NvidiaGPU().Value()
 	}
 	return result
 }

-func CheckPodsExceedingFreeResources(pods []*api.Pod, allocatable api.ResourceList) (fitting []*api.Pod, notFittingCPU, notFittingMemory []*api.Pod) {
+func CheckPodsExceedingFreeResources(pods []*api.Pod, allocatable api.ResourceList) (fitting []*api.Pod, notFittingCPU, notFittingMemory, notFittingNvidiaGPU []*api.Pod) {
 	totalMilliCPU := allocatable.Cpu().MilliValue()
 	totalMemory := allocatable.Memory().Value()
+	totalNvidiaGPU := allocatable.NvidiaGPU().Value()
 	milliCPURequested := int64(0)
 	memoryRequested := int64(0)
+	nvidiaGPURequested := int64(0)
 	for _, pod := range pods {
 		podRequest := getResourceRequest(pod)
 		fitsCPU := (totalMilliCPU - milliCPURequested) >= podRequest.milliCPU
 		fitsMemory := (totalMemory - memoryRequested) >= podRequest.memory
+		fitsNVidiaGPU := (totalNvidiaGPU - nvidiaGPURequested) >= podRequest.nvidiaGPU
 		if !fitsCPU {
 			// the pod doesn't fit due to CPU request
 			notFittingCPU = append(notFittingCPU, pod)
@ -379,9 +384,15 @@ func CheckPodsExceedingFreeResources(pods []*api.Pod, allocatable api.ResourceLi
 			notFittingMemory = append(notFittingMemory, pod)
 			continue
 		}
+		if !fitsNVidiaGPU {
+			// the pod doesn't fit due to NvidiaGPU request
+			notFittingNvidiaGPU = append(notFittingNvidiaGPU, pod)
+			continue
+		}
 		// the pod fits
 		milliCPURequested += podRequest.milliCPU
 		memoryRequested += podRequest.memory
+		nvidiaGPURequested += podRequest.nvidiaGPU
 		fitting = append(fitting, pod)
 	}
 	return
@ -403,12 +414,13 @@ func PodFitsResources(pod *api.Pod, nodeInfo *schedulercache.NodeInfo) (bool, er
 			newInsufficientResourceError(podCountResourceName, 1, int64(len(nodeInfo.Pods())), allowedPodNumber)
 	}
 	podRequest := getResourceRequest(pod)
-	if podRequest.milliCPU == 0 && podRequest.memory == 0 {
+	if podRequest.milliCPU == 0 && podRequest.memory == 0 && podRequest.nvidiaGPU == 0 {
 		return true, nil
 	}

 	totalMilliCPU := allocatable.Cpu().MilliValue()
 	totalMemory := allocatable.Memory().Value()
+	totalNvidiaGPU := allocatable.NvidiaGPU().Value()

 	if totalMilliCPU < podRequest.milliCPU+nodeInfo.RequestedResource().MilliCPU {
 		return false,
@ -418,6 +430,10 @@ func PodFitsResources(pod *api.Pod, nodeInfo *schedulercache.NodeInfo) (bool, er
 		return false,
 			newInsufficientResourceError(memoryResoureceName, podRequest.memory, nodeInfo.RequestedResource().Memory, totalMemory)
 	}
+	if totalNvidiaGPU < podRequest.nvidiaGPU+nodeInfo.RequestedResource().NvidiaGPU {
+		return false,
+			newInsufficientResourceError(nvidiaGpuResourceName, podRequest.nvidiaGPU, nodeInfo.RequestedResource().NvidiaGPU, totalNvidiaGPU)
+	}
 	glog.V(10).Infof("Schedule Pod %+v on Node %+v is allowed, Node is running only %v out of %v Pods.",
 		podName(pod), node.Name, len(nodeInfo.Pods()), allowedPodNumber)
 	return true, nil
--- a/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go
+++ b/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go
@ -71,21 +71,23 @@ func (pvs FakePersistentVolumeInfo) GetPersistentVolumeInfo(pvID string) (*api.P
 	return nil, fmt.Errorf("Unable to find persistent volume: %s", pvID)
 }

-func makeResources(milliCPU int64, memory int64, pods int64) api.NodeResources {
+func makeResources(milliCPU int64, memory int64, nvidiaGPUs int64, pods int64) api.NodeResources {
 	return api.NodeResources{
 		Capacity: api.ResourceList{
-			api.ResourceCPU:    *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
-			api.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
-			api.ResourcePods:   *resource.NewQuantity(pods, resource.DecimalSI),
+			api.ResourceCPU:       *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
+			api.ResourceMemory:    *resource.NewQuantity(memory, resource.BinarySI),
+			api.ResourcePods:      *resource.NewQuantity(pods, resource.DecimalSI),
+			api.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
 		},
 	}
 }

-func makeAllocatableResources(milliCPU int64, memory int64, pods int64) api.ResourceList {
+func makeAllocatableResources(milliCPU int64, memory int64, nvidiaGPUs int64, pods int64) api.ResourceList {
 	return api.ResourceList{
-		api.ResourceCPU:    *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
-		api.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
-		api.ResourcePods:   *resource.NewQuantity(pods, resource.DecimalSI),
+		api.ResourceCPU:       *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
+		api.ResourceMemory:    *resource.NewQuantity(memory, resource.BinarySI),
+		api.ResourcePods:      *resource.NewQuantity(pods, resource.DecimalSI),
+		api.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
 	}
 }

@ -95,8 +97,9 @@ func newResourcePod(usage ...resourceRequest) *api.Pod {
 		containers = append(containers, api.Container{
 			Resources: api.ResourceRequirements{
 				Requests: api.ResourceList{
-					api.ResourceCPU:    *resource.NewMilliQuantity(req.milliCPU, resource.DecimalSI),
-					api.ResourceMemory: *resource.NewQuantity(req.memory, resource.BinarySI),
+					api.ResourceCPU:       *resource.NewMilliQuantity(req.milliCPU, resource.DecimalSI),
+					api.ResourceMemory:    *resource.NewQuantity(req.memory, resource.BinarySI),
+					api.ResourceNvidiaGPU: *resource.NewQuantity(req.nvidiaGPU, resource.DecimalSI),
 				},
 			},
 		})
@ -159,7 +162,7 @@ func TestPodFitsResources(t *testing.T) {
 	}

 	for _, test := range enoughPodsTests {
-		node := api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 32)}}
+		node := api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 0, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32)}}
 		test.nodeInfo.SetNode(&node)

 		fits, err := PodFitsResources(test.pod, test.nodeInfo)
@ -204,7 +207,7 @@ func TestPodFitsResources(t *testing.T) {
 		},
 	}
 	for _, test := range notEnoughPodsTests {
-		node := api.Node{Status: api.NodeStatus{Capacity: api.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 1)}}
+		node := api.Node{Status: api.NodeStatus{Capacity: api.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1)}}
 		test.nodeInfo.SetNode(&node)

 		fits, err := PodFitsResources(test.pod, test.nodeInfo)
@ -1529,7 +1532,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 				newResourcePod(resourceRequest{milliCPU: 9, memory: 19})),
 			node: &api.Node{
 				ObjectMeta: api.ObjectMeta{Name: "machine1"},
-				Status:     api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 32)},
+				Status:     api.NodeStatus{Capacity: makeResources(10, 20, 0, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32)},
 			},
 			fits: true,
 			wErr: nil,
@ -1541,12 +1544,39 @@ func TestRunGeneralPredicates(t *testing.T) {
 				newResourcePod(resourceRequest{milliCPU: 5, memory: 19})),
 			node: &api.Node{
 				ObjectMeta: api.ObjectMeta{Name: "machine1"},
-				Status:     api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 32)},
+				Status:     api.NodeStatus{Capacity: makeResources(10, 20, 0, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32)},
 			},
 			fits: false,
 			wErr: newInsufficientResourceError("CPU", 8, 5, 10),
 			test: "not enough cpu resource",
 		},
+		{
+			pod: &api.Pod{},
+			nodeInfo: schedulercache.NewNodeInfo(
+				newResourcePod(resourceRequest{milliCPU: 9, memory: 19})),
+			node: &api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 1, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32)}},
+			fits: true,
+			wErr: nil,
+			test: "no resources/port/host requested always fits on GPU machine",
+		},
+		{
+			pod: newResourcePod(resourceRequest{milliCPU: 3, memory: 1, nvidiaGPU: 1}),
+			nodeInfo: schedulercache.NewNodeInfo(
+				newResourcePod(resourceRequest{milliCPU: 5, memory: 10, nvidiaGPU: 1})),
+			node: &api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 1, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32)}},
+			fits: false,
+			wErr: newInsufficientResourceError("NvidiaGpu", 1, 1, 1),
+			test: "not enough GPU resource",
+		},
+		{
+			pod: newResourcePod(resourceRequest{milliCPU: 3, memory: 1, nvidiaGPU: 1}),
+			nodeInfo: schedulercache.NewNodeInfo(
+				newResourcePod(resourceRequest{milliCPU: 5, memory: 10, nvidiaGPU: 0})),
+			node: &api.Node{Status: api.NodeStatus{Capacity: makeResources(10, 20, 1, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32)}},
+			fits: true,
+			wErr: nil,
+			test: "enough GPU resource",
+		},
 		{
 			pod: &api.Pod{
 				Spec: api.PodSpec{
@ -1556,7 +1586,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 			nodeInfo: schedulercache.NewNodeInfo(),
 			node: &api.Node{
 				ObjectMeta: api.ObjectMeta{Name: "machine1"},
-				Status:     api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 32)},
+				Status:     api.NodeStatus{Capacity: makeResources(10, 20, 0, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32)},
 			},
 			fits: false,
 			wErr: ErrPodNotMatchHostName,
@ -1567,7 +1597,7 @@ func TestRunGeneralPredicates(t *testing.T) {
 			nodeInfo: schedulercache.NewNodeInfo(newPodWithPort(123)),
 			node: &api.Node{
 				ObjectMeta: api.ObjectMeta{Name: "machine1"},
-				Status:     api.NodeStatus{Capacity: makeResources(10, 20, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 32)},
+				Status:     api.NodeStatus{Capacity: makeResources(10, 20, 0, 32).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32)},
 			},
 			fits: false,
 			wErr: ErrPodNotFitsHostPorts,
--- a/plugin/pkg/scheduler/schedulercache/node_info.go
+++ b/plugin/pkg/scheduler/schedulercache/node_info.go
@ -43,8 +43,9 @@ type NodeInfo struct {

 // Resource is a collection of compute resource.
 type Resource struct {
-	MilliCPU int64
-	Memory   int64
+	MilliCPU  int64
+	Memory    int64
+	NvidiaGPU int64
 }

 // NewNodeInfo returns a ready to use empty NodeInfo object.
@ -115,9 +116,10 @@ func (n *NodeInfo) String() string {

 // addPod adds pod information to this NodeInfo.
 func (n *NodeInfo) addPod(pod *api.Pod) {
-	cpu, mem, non0_cpu, non0_mem := calculateResource(pod)
+	cpu, mem, nvidia_gpu, non0_cpu, non0_mem := calculateResource(pod)
 	n.requestedResource.MilliCPU += cpu
 	n.requestedResource.Memory += mem
+	n.requestedResource.NvidiaGPU += nvidia_gpu
 	n.nonzeroRequest.MilliCPU += non0_cpu
 	n.nonzeroRequest.Memory += non0_mem
 	n.pods = append(n.pods, pod)
@ -130,9 +132,10 @@ func (n *NodeInfo) removePod(pod *api.Pod) error {
 		return err
 	}

-	cpu, mem, non0_cpu, non0_mem := calculateResource(pod)
+	cpu, mem, nvidia_gpu, non0_cpu, non0_mem := calculateResource(pod)
 	n.requestedResource.MilliCPU -= cpu
 	n.requestedResource.Memory -= mem
+	n.requestedResource.NvidiaGPU -= nvidia_gpu
 	n.nonzeroRequest.MilliCPU -= non0_cpu
 	n.nonzeroRequest.Memory -= non0_mem

@ -152,15 +155,17 @@ func (n *NodeInfo) removePod(pod *api.Pod) error {
 	return fmt.Errorf("no corresponding pod in pods")
 }

-func calculateResource(pod *api.Pod) (cpu int64, mem int64, non0_cpu int64, non0_mem int64) {
+func calculateResource(pod *api.Pod) (cpu int64, mem int64, nvidia_gpu int64, non0_cpu int64, non0_mem int64) {
 	for _, c := range pod.Spec.Containers {
 		req := c.Resources.Requests
 		cpu += req.Cpu().MilliValue()
 		mem += req.Memory().Value()
+		nvidia_gpu += req.NvidiaGPU().Value()

 		non0_cpu_req, non0_mem_req := priorityutil.GetNonzeroRequests(&req)
 		non0_cpu += non0_cpu_req
 		non0_mem += non0_mem_req
+		// No non-zero resources for GPUs
 	}
 	return
 }