improve gpu integration

Signed-off-by: Vishnu kannan <vishnuk@google.com>
pull/6/head
Vishnu kannan 2017-02-25 21:16:13 -08:00
parent 57c77ffbdd
commit 3b0a408e3b
16 changed files with 392 additions and 17452 deletions

View File

@ -206,7 +206,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.BoolVar(&s.BabysitDaemons, "babysit-daemons", s.BabysitDaemons, "If true, the node has babysitter process monitoring docker and kubelet.")
fs.MarkDeprecated("babysit-daemons", "Will be removed in a future version.")
fs.Int32Var(&s.MaxPods, "max-pods", s.MaxPods, "Number of Pods that can run on this Kubelet.")
fs.BoolVar(&s.EnableExperimentalNvidiaGPU, "experimental-enable-nvidia-gpu", s.EnableExperimentalNvidiaGPU, "Enable experimental Nvidia GPU support.")
fs.BoolVar(&s.ExperimentalEnableNvidiaGPU, "experimental-enable-nvidia-gpu", s.ExperimentalEnableNvidiaGPU, "Enable experimental Nvidia GPU support.")
// TODO(#40229): Remove the docker-exec-handler flag.
fs.StringVar(&s.DockerExecHandlerName, "docker-exec-handler", s.DockerExecHandlerName, "Handler to use when executing a command in a container. Valid values are 'native' and 'nsenter'. Defaults to 'native'.")
fs.MarkDeprecated("docker-exec-handler", "this flag will be removed and only the 'native' handler will be supported in the future.")

View File

@ -363,7 +363,7 @@ type KubeletConfiguration struct {
// maxPods is the number of pods that can run on this Kubelet.
MaxPods int32
// Enable experimental Nvidia GPU
EnableExperimentalNvidiaGPU bool
ExperimentalEnableNvidiaGPU bool
// dockerExecHandlerName is the handler to use when executing a command
// in a container. Valid values are 'native' and 'nsenter'. Defaults to
// 'native'.

View File

@ -408,7 +408,7 @@ type KubeletConfiguration struct {
// maxPods is the number of pods that can run on this Kubelet.
MaxPods int32 `json:"maxPods"`
// Enable Nvidia GPU support on this node.
EnableExperimentalNvidiaGPU bool `json:"enableExperimentalNvidiaGPU"`
ExperimentalEnableNvidiaGPU bool `json:"experimentalEnableNvidiaGPU"`
// dockerExecHandlerName is the handler to use when executing a command
// in a container. Valid values are 'native' and 'nsenter'. Defaults to
// 'native'.

View File

@ -353,7 +353,7 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
out.HairpinMode = in.HairpinMode
out.BabysitDaemons = in.BabysitDaemons
out.MaxPods = in.MaxPods
out.NvidiaGPUs = in.NvidiaGPUs
out.ExperimentalEnableNvidiaGPU = in.ExperimentalEnableNvidiaGPU
out.DockerExecHandlerName = in.DockerExecHandlerName
out.PodCIDR = in.PodCIDR
out.ResolverConfig = in.ResolverConfig
@ -531,7 +531,7 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
out.HairpinMode = in.HairpinMode
out.BabysitDaemons = in.BabysitDaemons
out.MaxPods = in.MaxPods
out.NvidiaGPUs = in.NvidiaGPUs
out.ExperimentalEnableNvidiaGPU = in.ExperimentalEnableNvidiaGPU
out.DockerExecHandlerName = in.DockerExecHandlerName
out.PodCIDR = in.PodCIDR
out.ResolverConfig = in.ResolverConfig

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,41 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package gpu
import (
"fmt"
"k8s.io/kubernetes/pkg/api/v1"
)
type gpuManagerStub struct{}
func (gms *gpuManagerStub) Start() error {
return nil
}
func (gms *gpuManagerStub) Capacity() v1.ResourceList {
return nil
}
func (gms *gpuManagerStub) AllocateGPU(_ *v1.Pod, _ *v1.Container) ([]string, error) {
return nil, fmt.Errorf("GPUs are not supported")
}
func NewGPUManagerStub() GPUManager {
return &gpuManagerStub{}
}

View File

@ -0,0 +1,59 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nvidia
import "k8s.io/apimachinery/pkg/util/sets"
// podGPUs represents a list of pod to GPU mappings.
type podGPUs struct {
podGPUMapping map[string]sets.String
}
func newPodGpus() *podGPUs {
return &podGPUs{
podGPUMapping: map[string]sets.String{},
}
}
func (pgpu *podGPUs) pods() sets.String {
ret := sets.NewString()
for k := range pgpu.podGPUMapping {
ret.Insert(k)
}
return ret
}
func (pgpu *podGPUs) insert(podUID string, device string) {
if _, exists := pgpu.podGPUMapping[podUID]; !exists {
pgpu.podGPUMapping[podUID] = sets.NewString(device)
} else {
pgpu.podGPUMapping[podUID].Insert(device)
}
}
func (pgpu *podGPUs) delete(pods []string) {
for _, uid := range pods {
delete(pgpu.podGPUMapping, uid)
}
}
func (pgpu *podGPUs) devices() sets.String {
ret := sets.NewString()
for _, devices := range pgpu.podGPUMapping {
ret.Union(devices)
}
return ret
}

View File

@ -1,5 +1,5 @@
/*
Copyright 2016 The Kubernetes Authors.
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -18,12 +18,19 @@ package nvidia
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"path"
"regexp"
"sync"
"github.com/golang/glog"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/kubelet/dockertools"
"k8s.io/kubernetes/pkg/kubelet/gpu"
)
// TODO: If use NVML in the future, the implementation could be more complex,
@ -32,55 +39,42 @@ import (
const (
// All NVIDIA GPUs cards should be mounted with nvidiactl and nvidia-uvm
// If the driver installed correctly, the 2 devices must be there.
NvidiaCtlDevice string = "/dev/nvidiactl"
NvidiaUVMDevice string = "/dev/nvidia-uvm"
NvidiaCtlDevice string = "/dev/nvidiactl"
NvidiaUVMDevice string = "/dev/nvidia-uvm"
devDirectory = "/dev"
nvidiaDeviceRE = `^nvidia[0-9]*$`
nvidiaFullpathRE = `^/dev/nvidia[0-9]*$`
)
// Manage GPU devices.
type NvidiaGPUManager struct {
gpuPaths []string
gpuMutex sync.Mutex
type activePodsLister interface {
// Returns a list of active pods on the node.
GetRunningPods() ([]*v1.Pod, error)
}
// nvidiaGPUManager manages nvidia gpu devices.
type nvidiaGPUManager struct {
sync.Mutex
// All gpus available on the Node
allGPUs sets.String
allocated *podGPUs
// The interface which could get GPU mapping from all the containers.
// TODO: Should make this independent of Docker in the future.
dockerClient dockertools.DockerInterface
dockerClient dockertools.DockerInterface
activePodsLister activePodsLister
}
// Get all the paths of NVIDIA GPU card from /dev/
// TODO: Without NVML support we only can check whether there has GPU devices, but
// could not give a health check or get more information like GPU cores, memory, or
// family name. Need to support NVML in the future. But we do not need NVML until
// we want more features, features like schedule containers according to GPU family
// name.
func (ngm *NvidiaGPUManager) discovery() (err error) {
if ngm.gpuPaths == nil {
err = filepath.Walk("/dev", func(path string, f os.FileInfo, err error) error {
reg := regexp.MustCompile(`^nvidia[0-9]*$`)
gpupath := reg.FindAllString(f.Name(), -1)
if gpupath != nil && gpupath[0] != "" {
ngm.gpuPaths = append(ngm.gpuPaths, "/dev/"+gpupath[0])
}
return nil
})
if err != nil {
return err
}
// NewNvidiaGPUManager returns a GPUManager that manages local Nvidia GPUs.
// TODO: Migrate to use pod level cgroups and make it generic to all runtimes.
func NewNvidiaGPUManager(activePodsLister activePodsLister, dockerClient dockertools.DockerInterface) gpu.GPUManager {
return &nvidiaGPUManager{
allGPUs: sets.NewString(),
dockerClient: dockerClient,
activePodsLister: activePodsLister,
}
return nil
}
func Valid(path string) bool {
reg := regexp.MustCompile(`^/dev/nvidia[0-9]*$`)
check := reg.FindAllString(path, -1)
return check != nil && check[0] != ""
}
// Initialize the GPU devices, so far only needed to discover the GPU paths.
func (ngm *NvidiaGPUManager) Init(dc dockertools.DockerInterface) error {
func (ngm *nvidiaGPUManager) Start() error {
if _, err := os.Stat(NvidiaCtlDevice); err != nil {
return err
}
@ -88,94 +82,181 @@ func (ngm *NvidiaGPUManager) Init(dc dockertools.DockerInterface) error {
if _, err := os.Stat(NvidiaUVMDevice); err != nil {
return err
}
ngm.Lock()
defer ngm.Unlock()
ngm.gpuMutex.Lock()
defer ngm.gpuMutex.Unlock()
err := ngm.discovery()
ngm.dockerClient = dc
return err
}
func (ngm *NvidiaGPUManager) Shutdown() {
ngm.gpuMutex.Lock()
defer ngm.gpuMutex.Unlock()
ngm.gpuPaths = nil
if err := ngm.discoverGPUs(); err != nil {
return err
}
// Its possible that the runtime isn't available now.
allocatedGPUs, err := ngm.gpusInUse()
if err == nil {
ngm.allocated = allocatedGPUs
}
// We ignore errors with identifying allocated GPUs because it is possible that the runtime interfaces may be not be logically up.
return nil
}
// Get how many GPU cards we have.
func (ngm *NvidiaGPUManager) Capacity() int {
ngm.gpuMutex.Lock()
defer ngm.gpuMutex.Unlock()
return len(ngm.gpuPaths)
func (ngm *nvidiaGPUManager) Capacity() v1.ResourceList {
gpus := resource.NewQuantity(int64(len(ngm.allGPUs)), resource.DecimalSI)
return v1.ResourceList{
v1.ResourceNvidiaGPU: *gpus,
}
}
// Check whether the GPU device could be assigned to a container.
func (ngm *NvidiaGPUManager) isAvailable(path string) bool {
containers, err := dockertools.GetKubeletDockerContainers(ngm.dockerClient, false)
if err != nil {
return true
// AllocateGPUs returns `num` GPUs if available, error otherwise.
// Allocation is made thread safe using the following logic.
// A list of all GPUs allocated is maintained along with their respective Pod UIDs.
// It is expected that the list of active pods will not return any false positives.
// As part of initialization or allocation, the list of GPUs in use will be computed once.
// Whenever an allocation happens, the list of GPUs allocated is updated based on the list of currently active pods.
// GPUs allocated to terminated pods are freed up lazily as part of allocation.
// GPUs are allocated based on the internal list of allocatedGPUs.
// It is not safe to generate a list of GPUs in use by inspecting active containers because of the delay between GPU allocation and container creation.
// A GPU allocated to a container might be re-allocated to a subsequent container because the original container wasn't started quick enough.
// The current algorithm scans containers only once and then uses a list of active pods to track GPU usage.
// This is a sub-optimal solution and a better alternative would be that of using pod level cgroups instead.
// GPUs allocated to containers should be reflected in pod level device cgroups before completing allocations.
// The pod level cgroups will then serve as a checkpoint of GPUs in use.
func (ngm *nvidiaGPUManager) AllocateGPU(pod *v1.Pod, container *v1.Container) ([]string, error) {
gpusNeeded := container.Resources.Limits.NvidiaGPU().Value()
if gpusNeeded == 0 {
return []string{}, nil
}
for i := range containers {
containerJSON, err := ngm.dockerClient.InspectContainer(containers[i].ID)
ngm.Lock()
defer ngm.Unlock()
if ngm.allocated == nil {
// Initialization is not complete. Try now. Failures can no longer be tolerated.
allocated, err := ngm.gpusInUse()
if err != nil {
return nil, fmt.Errorf("failed to allocate GPUs because of issues identifying GPUs in use: %v", err)
}
ngm.allocated = allocated
} else {
// update internal list of GPUs in use prior to allocating new GPUs.
if err := ngm.updateAllocatedGPUs(); err != nil {
return nil, fmt.Errorf("failed to allocate GPUs because of issues with updating GPUs in use: %v", err)
}
}
// Get GPU devices in use.
devicesInUse := ngm.allocated.devices()
// Get a list of available GPUs.
available := ngm.allGPUs.Difference(devicesInUse)
if int64(available.Len()) < gpusNeeded {
return nil, fmt.Errorf("requested number of GPUs unavailable. Requested: %d, Available: %d", gpusNeeded, available.Len())
}
var ret []string
for _, device := range available.List() {
if gpusNeeded > 0 {
ret = append(ret, device)
// Update internal allocated GPU cache.
ngm.allocated.insert(string(pod.UID), device)
}
gpusNeeded--
}
return ret, nil
}
func (ngm *nvidiaGPUManager) updateAllocatedGPUs() error {
activePods, err := ngm.activePodsLister.GetRunningPods()
if err != nil {
return fmt.Errorf("failed to list active pods: %v", err)
}
activePodUids := sets.NewString()
for _, pod := range activePods {
activePodUids.Insert(string(pod.UID))
}
allocatedPodUids := ngm.allocated.pods()
podsToBeRemoved := allocatedPodUids.Difference(activePodUids)
ngm.allocated.delete(podsToBeRemoved.List())
return nil
}
// discoverGPUs identifies allGPUs NVIDIA GPU devices available on the local node by walking `/dev` directory.
// TODO: Without NVML support we only can check whether there has GPU devices, but
// could not give a health check or get more information like GPU cores, memory, or
// family name. Need to support NVML in the future. But we do not need NVML until
// we want more features, features like schedule containers according to GPU family
// name.
func (ngm *nvidiaGPUManager) discoverGPUs() error {
reg := regexp.MustCompile(nvidiaDeviceRE)
files, err := ioutil.ReadDir(devDirectory)
if err != nil {
return err
}
for _, f := range files {
if f.IsDir() {
continue
}
if reg.MatchString(f.Name()) {
glog.V(2).Infof("Found Nvidia GPU %q", f.Name())
ngm.allGPUs.Insert(path.Join(devDirectory, f.Name()))
}
}
devices := containerJSON.HostConfig.Devices
if devices == nil {
return nil
}
// gpusInUse returns a list of GPUs in use along with the respective pods that are using it.
func (ngm *nvidiaGPUManager) gpusInUse() (*podGPUs, error) {
pods, err := ngm.activePodsLister.GetRunningPods()
if err != nil {
return nil, err
}
type podContainers struct {
uid string
containerIDs sets.String
}
// List of containers to inspect.
podContainersToInspect := []podContainers{}
for _, pod := range pods {
containers := sets.NewString()
for _, container := range pod.Spec.Containers {
// GPUs are expected to be specified only in limits.
if !container.Resources.Limits.NvidiaGPU().IsZero() {
containers.Insert(container.Name)
}
}
// If no GPUs were requested skip this pod.
if containers.Len() == 0 {
continue
}
containerIDs := sets.NewString()
for _, container := range pod.Status.ContainerStatuses {
if containers.Has(container.Name) {
containerIDs.Insert(container.ContainerID)
}
}
// add the pod and its containers that need to be inspected.
podContainersToInspect = append(podContainersToInspect, podContainers{string(pod.UID), containerIDs})
}
ret := newPodGpus()
for _, podContainer := range podContainersToInspect {
for _, containerId := range podContainer.containerIDs.List() {
containerJSON, err := ngm.dockerClient.InspectContainer(containerId)
if err != nil {
glog.V(3).Infof("failed to inspect container %q in pod %q while attempting to reconcile nvidia gpus in use", containerId, podContainer.uid)
continue
}
for _, device := range devices {
if Valid(device.PathOnHost) && path == device.PathOnHost {
return false
devices := containerJSON.HostConfig.Devices
if devices == nil {
continue
}
for _, device := range devices {
if isValidPath(device.PathOnHost) {
glog.V(4).Infof("Nvidia GPU %q is in use by Docker Container: %q", device.PathOnHost, containerJSON.ID)
ret.insert(podContainer.uid, device.PathOnHost)
}
}
}
}
return true
return ret, nil
}
// Return the GPU paths as needed, otherwise, return error.
func (ngm *NvidiaGPUManager) AllocateGPUs(num int) (paths []string, err error) {
if num <= 0 {
return
}
ngm.gpuMutex.Lock()
defer ngm.gpuMutex.Unlock()
for _, path := range ngm.gpuPaths {
if ngm.isAvailable(path) {
paths = append(paths, path)
if len(paths) == num {
return
}
}
}
err = fmt.Errorf("Not enough GPUs!")
return
}
// Return the count of GPUs which are free.
func (ngm *NvidiaGPUManager) AvailableGPUs() (num int) {
ngm.gpuMutex.Lock()
defer ngm.gpuMutex.Unlock()
for _, path := range ngm.gpuPaths {
if ngm.isAvailable(path) {
num++
}
}
return
func isValidPath(path string) bool {
return regexp.MustCompile(nvidiaFullpathRE).MatchString(path)
}

32
pkg/kubelet/gpu/types.go Normal file
View File

@ -0,0 +1,32 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package gpu
import "k8s.io/kubernetes/pkg/api/v1"
// GPUManager manages GPUs on a local node.
// Implementations are expected to be thread safe.
type GPUManager interface {
// Start logically initializes GPUManager
Start() error
// Capacity returns the total number of GPUs on the node.
Capacity() v1.ResourceList
// AllocateGPU attempts to allocate GPUs for input container.
// Returns paths to allocated GPUs and nil on success.
// Returns an error on failure.
AllocateGPU(*v1.Pod, *v1.Container) ([]string, error)
}

View File

@ -67,6 +67,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/dockertools"
"k8s.io/kubernetes/pkg/kubelet/events"
"k8s.io/kubernetes/pkg/kubelet/eviction"
"k8s.io/kubernetes/pkg/kubelet/gpu"
"k8s.io/kubernetes/pkg/kubelet/gpu/nvidia"
"k8s.io/kubernetes/pkg/kubelet/images"
"k8s.io/kubernetes/pkg/kubelet/kuberuntime"
@ -450,7 +451,6 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
writer: kubeDeps.Writer,
nonMasqueradeCIDR: kubeCfg.NonMasqueradeCIDR,
maxPods: int(kubeCfg.MaxPods),
enableNvidiaGPU: kubeCfg.EnableNvidiaGPU,
podsPerCore: int(kubeCfg.PodsPerCore),
syncLoopMonitor: atomic.Value{},
resolverConfig: kubeCfg.ResolverConfig,
@ -787,7 +787,11 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
klet.appArmorValidator = apparmor.NewValidator(kubeCfg.ContainerRuntime)
klet.softAdmitHandlers.AddPodAdmitHandler(lifecycle.NewAppArmorAdmitHandler(klet.appArmorValidator))
if kubeCfg.ExperimentalEnableNvidiaGPU {
klet.gpuManager = nvidia.NewNvidiaGPUManager(klet, klet.dockerClient)
} else {
klet.gpuManager = gpu.NewGPUManagerStub()
}
// Finally, put the most recent version of the config on the Kubelet, so
// people can see how it was configured.
klet.kubeletConfiguration = *kubeCfg
@ -982,9 +986,6 @@ type Kubelet struct {
// Maximum Number of Pods which can be run by this Kubelet
maxPods int
// Enable experimental Nvidia GPU
enableExperimentalNvidiaGPU bool
// Monitor Kubelet's sync loop
syncLoopMonitor atomic.Value
@ -1091,8 +1092,8 @@ type Kubelet struct {
// experimental behavior is desired.
experimentalHostUserNamespaceDefaulting bool
// NVIDIA GPU Manager
nvidiaGPUManager nvidia.NvidiaGPUManager
// GPU Manager
gpuManager gpu.GPUManager
}
// setupDataDirs creates:
@ -1186,11 +1187,8 @@ func (kl *Kubelet) initializeModules() error {
return fmt.Errorf("Failed to start OOM watcher %v", err)
}
// Step 7: Init Nvidia Manager. Do not need to return err until we use NVML instead.
// Only works when user give true to EnableExperimentalNvidiaGPU
if kl.enableExperimentalNvidiaGPU {
kl.nvidiaGPUManager.Init(kl.dockerClient)
}
// Step 7: Initialize GPUs
kl.gpuManager.Start()
// Step 8: Start resource analyzer
kl.resourceAnalyzer.Start()

View File

@ -482,9 +482,12 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *v1.Node) {
node.Status.Capacity = v1.ResourceList{}
}
nvidiaGPUCapacity := 0
if kl.enableExperimentalNvidiaGPU {
nvidiaGPUCapacity = kl.nvidiaGPUManager.Capacity()
// populate GPU capacity.
gpuCapacity := kl.gpuManager.Capacity()
if gpuCapacity != nil {
for k, v := range gpuCapacity {
node.Status.Capacity[k] = v
}
}
// TODO: Post NotReady if we cannot get MachineInfo from cAdvisor. This needs to start
@ -496,8 +499,6 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *v1.Node) {
node.Status.Capacity[v1.ResourceCPU] = *resource.NewMilliQuantity(0, resource.DecimalSI)
node.Status.Capacity[v1.ResourceMemory] = resource.MustParse("0Gi")
node.Status.Capacity[v1.ResourcePods] = *resource.NewQuantity(int64(kl.maxPods), resource.DecimalSI)
node.Status.Capacity[v1.ResourceNvidiaGPU] = *resource.NewQuantity(int64(nvidiaGPUCapacity), resource.DecimalSI)
glog.Errorf("Error getting machine info: %v", err)
} else {
node.Status.NodeInfo.MachineID = info.MachineID
@ -514,8 +515,6 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *v1.Node) {
node.Status.Capacity[v1.ResourcePods] = *resource.NewQuantity(
int64(kl.maxPods), resource.DecimalSI)
}
node.Status.Capacity[v1.ResourceNvidiaGPU] = *resource.NewQuantity(
int64(nvidiaGPUCapacity), resource.DecimalSI)
if node.Status.NodeInfo.BootID != "" &&
node.Status.NodeInfo.BootID != info.BootID {
// TODO: This requires a transaction, either both node status is updated

View File

@ -208,16 +208,14 @@ func TestUpdateNewNodeStatus(t *testing.T) {
KubeProxyVersion: version.Get().String(),
},
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10E9, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10E9, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(1800, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(9900E6, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(1800, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(9900E6, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
},
Addresses: []v1.NodeAddress{
{Type: v1.NodeLegacyHostIP, Address: "127.0.0.1"},
@ -482,16 +480,14 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
KubeProxyVersion: version.Get().String(),
},
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(20E9, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(20E9, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(1800, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(19900E6, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(1800, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(19900E6, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
},
Addresses: []v1.NodeAddress{
{Type: v1.NodeLegacyHostIP, Address: "127.0.0.1"},
@ -790,16 +786,14 @@ func TestUpdateNodeStatusWithRuntimeStateError(t *testing.T) {
KubeProxyVersion: version.Get().String(),
},
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10E9, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10E9, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(1800, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(9900E6, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(1800, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(9900E6, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
},
Addresses: []v1.NodeAddress{
{Type: v1.NodeLegacyHostIP, Address: "127.0.0.1"},

View File

@ -87,28 +87,33 @@ func (kl *Kubelet) getActivePods() []*v1.Pod {
// makeDevices determines the devices for the given container.
// Experimental.
func (kl *Kubelet) makeDevices(container *v1.Container) []kubecontainer.DeviceInfo {
if !kl.enableExperimentalNvidiaGPU {
return nil
func (kl *Kubelet) makeDevices(pod *v1.Pod, container *v1.Container) ([]kubecontainer.DeviceInfo, error) {
if container.Resources.Limits.NvidiaGPU().IsZero() {
return nil, nil
}
nvidiaGPULimit := container.Resources.Limits.NvidiaGPU()
if nvidiaGPULimit.Value() != 0 {
if nvidiaGPUPaths, err := kl.nvidiaGPUManager.AllocateGPUs(int(nvidiaGPULimit.Value())); err == nil {
devices := []kubecontainer.DeviceInfo{{PathOnHost: nvidia.NvidiaCtlDevice, PathInContainer: nvidia.NvidiaCtlDevice, Permissions: "mrw"},
{PathOnHost: nvidia.NvidiaUVMDevice, PathInContainer: nvidia.NvidiaUVMDevice, Permissions: "mrw"}}
for i, path := range nvidiaGPUPaths {
devices = append(devices, kubecontainer.DeviceInfo{PathOnHost: path, PathInContainer: "/dev/nvidia" + strconv.Itoa(i), Permissions: "mrw"})
}
return devices
}
nvidiaGPUPaths, err := kl.gpuManager.AllocateGPU(pod, container)
if err != nil {
return nil, err
}
devices := []kubecontainer.DeviceInfo{
{
PathOnHost: nvidia.NvidiaCtlDevice,
PathInContainer: nvidia.NvidiaCtlDevice,
Permissions: "mrw",
},
{
PathOnHost: nvidia.NvidiaUVMDevice,
PathInContainer: nvidia.NvidiaUVMDevice,
Permissions: "mrw",
},
}
return nil
for i, path := range nvidiaGPUPaths {
devices = append(devices, kubecontainer.DeviceInfo{PathOnHost: path, PathInContainer: "/dev/nvidia" + strconv.Itoa(i), Permissions: "mrw"})
}
return devices, nil
}
// makeMounts determines the mount points for the given container.
@ -296,7 +301,10 @@ func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Contai
opts.PortMappings = kubecontainer.MakePortMappings(container)
// TODO(random-liu): Move following convert functions into pkg/kubelet/container
opts.Devices = kl.makeDevices(container)
opts.Devices, err = kl.makeDevices(pod, container)
if err != nil {
return nil, err
}
opts.Mounts, err = makeMounts(pod, kl.getPodDir(pod.UID), container, hostname, hostDomainName, podIP, volumes)
if err != nil {

View File

@ -27,7 +27,6 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
@ -1711,39 +1710,6 @@ func TestGetHostPortConflicts(t *testing.T) {
assert.True(t, hasHostPortConflicts(pods), "Should have port conflicts")
}
func TestMakeDevices(t *testing.T) {
testCases := []struct {
container *v1.Container
devices []kubecontainer.DeviceInfo
test string
}{
{
test: "no device",
container: &v1.Container{},
devices: nil,
},
{
test: "gpu",
container: &v1.Container{
Resources: v1.ResourceRequirements{
Limits: map[v1.ResourceName]resource.Quantity{
v1.ResourceNvidiaGPU: resource.MustParse("1000"),
},
},
},
devices: []kubecontainer.DeviceInfo{
{PathOnHost: "/dev/nvidia0", PathInContainer: "/dev/nvidia0", Permissions: "mrw"},
{PathOnHost: "/dev/nvidiactl", PathInContainer: "/dev/nvidiactl", Permissions: "mrw"},
{PathOnHost: "/dev/nvidia-uvm", PathInContainer: "/dev/nvidia-uvm", Permissions: "mrw"},
},
},
}
for _, test := range testCases {
assert.Equal(t, test.devices, makeDevices(test.container), "[test %q]", test.test)
}
}
func TestHasHostMountPVC(t *testing.T) {
tests := map[string]struct {
pvError error

View File

@ -49,6 +49,7 @@ import (
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
"k8s.io/kubernetes/pkg/kubelet/eviction"
"k8s.io/kubernetes/pkg/kubelet/gpu"
"k8s.io/kubernetes/pkg/kubelet/images"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/network"
@ -272,7 +273,7 @@ func newTestKubeletWithImageList(
kubelet.AddPodSyncLoopHandler(activeDeadlineHandler)
kubelet.AddPodSyncHandler(activeDeadlineHandler)
kubelet.gpuManager = gpu.NewGPUManagerStub()
return &TestKubelet{kubelet, fakeRuntime, mockCadvisor, fakeKubeClient, fakeMirrorClient, fakeClock, nil, plug}
}

View File

@ -150,7 +150,6 @@ func GetHollowKubeletConfig(
c.MaxContainerCount = 100
c.MaxOpenFiles = 1024
c.MaxPerPodContainerCount = 2
c.EnableExperimentalNvidiaGPU = false
c.RegisterNode = true
c.RegisterSchedulable = true
c.RegistryBurst = 10