mirror of https://github.com/k3s-io/k3s
Move gpu_util.go to e2e/framework/gpu
parent
0cd40a6d51
commit
7814865b40
|
@ -14,7 +14,6 @@ go_library(
|
||||||
"framework.go",
|
"framework.go",
|
||||||
"get-kubemark-resource-usage.go",
|
"get-kubemark-resource-usage.go",
|
||||||
"google_compute.go",
|
"google_compute.go",
|
||||||
"gpu_util.go",
|
|
||||||
"jobs_util.go",
|
"jobs_util.go",
|
||||||
"kubelet_stats.go",
|
"kubelet_stats.go",
|
||||||
"log_size_monitoring.go",
|
"log_size_monitoring.go",
|
||||||
|
@ -156,6 +155,7 @@ filegroup(
|
||||||
":package-srcs",
|
":package-srcs",
|
||||||
"//test/e2e/framework/config:all-srcs",
|
"//test/e2e/framework/config:all-srcs",
|
||||||
"//test/e2e/framework/ginkgowrapper:all-srcs",
|
"//test/e2e/framework/ginkgowrapper:all-srcs",
|
||||||
|
"//test/e2e/framework/gpu:all-srcs",
|
||||||
"//test/e2e/framework/ingress:all-srcs",
|
"//test/e2e/framework/ingress:all-srcs",
|
||||||
"//test/e2e/framework/metrics:all-srcs",
|
"//test/e2e/framework/metrics:all-srcs",
|
||||||
"//test/e2e/framework/podlogs:all-srcs",
|
"//test/e2e/framework/podlogs:all-srcs",
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||||
|
|
||||||
|
go_library(
|
||||||
|
name = "go_default_library",
|
||||||
|
srcs = ["gpu_util.go"],
|
||||||
|
importpath = "k8s.io/kubernetes/test/e2e/framework/gpu",
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
||||||
|
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||||
|
"//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
|
||||||
|
"//test/e2e/framework:go_default_library",
|
||||||
|
"//vendor/k8s.io/klog:go_default_library",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
filegroup(
|
||||||
|
name = "package-srcs",
|
||||||
|
srcs = glob(["**"]),
|
||||||
|
tags = ["automanaged"],
|
||||||
|
visibility = ["//visibility:private"],
|
||||||
|
)
|
||||||
|
|
||||||
|
filegroup(
|
||||||
|
name = "all-srcs",
|
||||||
|
srcs = [":package-srcs"],
|
||||||
|
tags = ["automanaged"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
|
@ -14,13 +14,14 @@ See the License for the specific language governing permissions and
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package framework
|
package gpu
|
||||||
|
|
||||||
import (
|
import (
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/util/uuid"
|
"k8s.io/apimachinery/pkg/util/uuid"
|
||||||
"k8s.io/klog"
|
"k8s.io/klog"
|
||||||
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -48,8 +49,8 @@ func NumberOfNVIDIAGPUs(node *v1.Node) int64 {
|
||||||
|
|
||||||
// NVIDIADevicePlugin returns the official Google Device Plugin pod for NVIDIA GPU in GKE
|
// NVIDIADevicePlugin returns the official Google Device Plugin pod for NVIDIA GPU in GKE
|
||||||
func NVIDIADevicePlugin() *v1.Pod {
|
func NVIDIADevicePlugin() *v1.Pod {
|
||||||
ds, err := DsFromManifest(GPUDevicePluginDSYAML)
|
ds, err := framework.DsFromManifest(GPUDevicePluginDSYAML)
|
||||||
ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
p := &v1.Pod{
|
p := &v1.Pod{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
Name: "device-plugin-nvidia-gpu-" + string(uuid.NewUUID()),
|
Name: "device-plugin-nvidia-gpu-" + string(uuid.NewUUID()),
|
||||||
|
@ -64,7 +65,7 @@ func NVIDIADevicePlugin() *v1.Pod {
|
||||||
|
|
||||||
// GetGPUDevicePluginImage returns the image of GPU device plugin.
|
// GetGPUDevicePluginImage returns the image of GPU device plugin.
|
||||||
func GetGPUDevicePluginImage() string {
|
func GetGPUDevicePluginImage() string {
|
||||||
ds, err := DsFromManifest(GPUDevicePluginDSYAML)
|
ds, err := framework.DsFromManifest(GPUDevicePluginDSYAML)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("Failed to parse the device plugin image: %v", err)
|
klog.Errorf("Failed to parse the device plugin image: %v", err)
|
||||||
return ""
|
return ""
|
|
@ -37,6 +37,7 @@ go_library(
|
||||||
"//test/e2e/common:go_default_library",
|
"//test/e2e/common:go_default_library",
|
||||||
"//test/e2e/framework:go_default_library",
|
"//test/e2e/framework:go_default_library",
|
||||||
"//test/e2e/framework/config:go_default_library",
|
"//test/e2e/framework/config:go_default_library",
|
||||||
|
"//test/e2e/framework/gpu:go_default_library",
|
||||||
"//test/e2e/framework/metrics:go_default_library",
|
"//test/e2e/framework/metrics:go_default_library",
|
||||||
"//test/e2e/instrumentation/common:go_default_library",
|
"//test/e2e/instrumentation/common:go_default_library",
|
||||||
"//test/e2e/scheduling:go_default_library",
|
"//test/e2e/scheduling:go_default_library",
|
||||||
|
|
|
@ -24,11 +24,12 @@ import (
|
||||||
"github.com/onsi/ginkgo"
|
"github.com/onsi/ginkgo"
|
||||||
"golang.org/x/oauth2/google"
|
"golang.org/x/oauth2/google"
|
||||||
gcm "google.golang.org/api/monitoring/v3"
|
gcm "google.golang.org/api/monitoring/v3"
|
||||||
"k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
"k8s.io/apimachinery/pkg/api/resource"
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
"k8s.io/kubernetes/test/e2e/framework"
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
"k8s.io/kubernetes/test/e2e/framework/gpu"
|
||||||
instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
|
instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
|
||||||
"k8s.io/kubernetes/test/e2e/scheduling"
|
"k8s.io/kubernetes/test/e2e/scheduling"
|
||||||
"k8s.io/kubernetes/test/utils/image"
|
"k8s.io/kubernetes/test/utils/image"
|
||||||
|
@ -88,7 +89,7 @@ func testStackdriverAcceleratorMonitoring(f *framework.Framework) {
|
||||||
Args: []string{"nvidia-smi && sleep infinity"},
|
Args: []string{"nvidia-smi && sleep infinity"},
|
||||||
Resources: v1.ResourceRequirements{
|
Resources: v1.ResourceRequirements{
|
||||||
Limits: v1.ResourceList{
|
Limits: v1.ResourceList{
|
||||||
framework.NVIDIAGPUResourceName: *resource.NewQuantity(1, resource.DecimalSI),
|
gpu.NVIDIAGPUResourceName: *resource.NewQuantity(1, resource.DecimalSI),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
|
@ -44,6 +44,7 @@ go_library(
|
||||||
"//staging/src/k8s.io/client-go/tools/cache:go_default_library",
|
"//staging/src/k8s.io/client-go/tools/cache:go_default_library",
|
||||||
"//test/e2e/common:go_default_library",
|
"//test/e2e/common:go_default_library",
|
||||||
"//test/e2e/framework:go_default_library",
|
"//test/e2e/framework:go_default_library",
|
||||||
|
"//test/e2e/framework/gpu:go_default_library",
|
||||||
"//test/e2e/framework/providers/gce:go_default_library",
|
"//test/e2e/framework/providers/gce:go_default_library",
|
||||||
"//test/utils:go_default_library",
|
"//test/utils:go_default_library",
|
||||||
"//test/utils/image:go_default_library",
|
"//test/utils/image:go_default_library",
|
||||||
|
|
|
@ -20,12 +20,13 @@ import (
|
||||||
"os"
|
"os"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
"k8s.io/apimachinery/pkg/api/resource"
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/util/uuid"
|
"k8s.io/apimachinery/pkg/util/uuid"
|
||||||
extensionsinternal "k8s.io/kubernetes/pkg/apis/extensions"
|
extensionsinternal "k8s.io/kubernetes/pkg/apis/extensions"
|
||||||
"k8s.io/kubernetes/test/e2e/framework"
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
"k8s.io/kubernetes/test/e2e/framework/gpu"
|
||||||
imageutils "k8s.io/kubernetes/test/utils/image"
|
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||||
|
|
||||||
. "github.com/onsi/ginkgo"
|
. "github.com/onsi/ginkgo"
|
||||||
|
@ -114,7 +115,7 @@ func SetupNVIDIAGPUNode(f *framework.Framework, setupResourceGatherer bool) *fra
|
||||||
} else {
|
} else {
|
||||||
dsYamlUrl = "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/daemonset.yaml"
|
dsYamlUrl = "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/daemonset.yaml"
|
||||||
}
|
}
|
||||||
gpuResourceName = framework.NVIDIAGPUResourceName
|
gpuResourceName = gpu.NVIDIAGPUResourceName
|
||||||
|
|
||||||
framework.Logf("Using %v", dsYamlUrl)
|
framework.Logf("Using %v", dsYamlUrl)
|
||||||
// Creates the DaemonSet that installs Nvidia Drivers.
|
// Creates the DaemonSet that installs Nvidia Drivers.
|
||||||
|
|
|
@ -37,6 +37,7 @@ go_library(
|
||||||
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
|
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
|
||||||
"//test/e2e/common:go_default_library",
|
"//test/e2e/common:go_default_library",
|
||||||
"//test/e2e/framework:go_default_library",
|
"//test/e2e/framework:go_default_library",
|
||||||
|
"//test/e2e/framework/gpu:go_default_library",
|
||||||
"//test/e2e/framework/testfiles:go_default_library",
|
"//test/e2e/framework/testfiles:go_default_library",
|
||||||
"//test/e2e/scheduling:go_default_library",
|
"//test/e2e/scheduling:go_default_library",
|
||||||
"//test/utils/image:go_default_library",
|
"//test/utils/image:go_default_library",
|
||||||
|
|
|
@ -23,6 +23,7 @@ import (
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
"k8s.io/apimachinery/pkg/api/resource"
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
"k8s.io/kubernetes/test/e2e/framework"
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
"k8s.io/kubernetes/test/e2e/framework/gpu"
|
||||||
"k8s.io/kubernetes/test/e2e/scheduling"
|
"k8s.io/kubernetes/test/e2e/scheduling"
|
||||||
imageutils "k8s.io/kubernetes/test/utils/image"
|
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||||
|
|
||||||
|
@ -78,7 +79,7 @@ func (t *NvidiaGPUUpgradeTest) startJob(f *framework.Framework) {
|
||||||
Command: []string{"/bin/sh", "-c", "./vectorAdd && sleep 60"},
|
Command: []string{"/bin/sh", "-c", "./vectorAdd && sleep 60"},
|
||||||
Resources: v1.ResourceRequirements{
|
Resources: v1.ResourceRequirements{
|
||||||
Limits: v1.ResourceList{
|
Limits: v1.ResourceList{
|
||||||
framework.NVIDIAGPUResourceName: *resource.NewQuantity(1, resource.DecimalSI),
|
gpu.NVIDIAGPUResourceName: *resource.NewQuantity(1, resource.DecimalSI),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
|
@ -47,6 +47,7 @@ go_library(
|
||||||
"//staging/src/k8s.io/kubelet/config/v1beta1:go_default_library",
|
"//staging/src/k8s.io/kubelet/config/v1beta1:go_default_library",
|
||||||
"//test/e2e/common:go_default_library",
|
"//test/e2e/common:go_default_library",
|
||||||
"//test/e2e/framework:go_default_library",
|
"//test/e2e/framework:go_default_library",
|
||||||
|
"//test/e2e/framework/gpu:go_default_library",
|
||||||
"//test/e2e/framework/metrics:go_default_library",
|
"//test/e2e/framework/metrics:go_default_library",
|
||||||
"//test/utils/image:go_default_library",
|
"//test/utils/image:go_default_library",
|
||||||
"//vendor/github.com/blang/semver:go_default_library",
|
"//vendor/github.com/blang/semver:go_default_library",
|
||||||
|
|
|
@ -21,10 +21,11 @@ import (
|
||||||
"strconv"
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
|
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||||
"k8s.io/kubernetes/test/e2e/framework"
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
"k8s.io/kubernetes/test/e2e/framework/gpu"
|
||||||
"k8s.io/kubernetes/test/e2e/framework/metrics"
|
"k8s.io/kubernetes/test/e2e/framework/metrics"
|
||||||
|
|
||||||
. "github.com/onsi/ginkgo"
|
. "github.com/onsi/ginkgo"
|
||||||
|
@ -46,15 +47,15 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi
|
||||||
}
|
}
|
||||||
|
|
||||||
By("Creating the Google Device Plugin pod for NVIDIA GPU in GKE")
|
By("Creating the Google Device Plugin pod for NVIDIA GPU in GKE")
|
||||||
devicePluginPod, err = f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Create(framework.NVIDIADevicePlugin())
|
devicePluginPod, err = f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Create(gpu.NVIDIADevicePlugin())
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
By("Waiting for GPUs to become available on the local node")
|
By("Waiting for GPUs to become available on the local node")
|
||||||
Eventually(func() bool {
|
Eventually(func() bool {
|
||||||
return framework.NumberOfNVIDIAGPUs(getLocalNode(f)) > 0
|
return gpu.NumberOfNVIDIAGPUs(getLocalNode(f)) > 0
|
||||||
}, 5*time.Minute, framework.Poll).Should(BeTrue())
|
}, 5*time.Minute, framework.Poll).Should(BeTrue())
|
||||||
|
|
||||||
if framework.NumberOfNVIDIAGPUs(getLocalNode(f)) < 2 {
|
if gpu.NumberOfNVIDIAGPUs(getLocalNode(f)) < 2 {
|
||||||
Skip("Not enough GPUs to execute this test (at least two needed)")
|
Skip("Not enough GPUs to execute this test (at least two needed)")
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
@ -75,7 +76,7 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi
|
||||||
It("checks that when Kubelet restarts exclusive GPU assignation to pods is kept.", func() {
|
It("checks that when Kubelet restarts exclusive GPU assignation to pods is kept.", func() {
|
||||||
By("Creating one GPU pod on a node with at least two GPUs")
|
By("Creating one GPU pod on a node with at least two GPUs")
|
||||||
podRECMD := "devs=$(ls /dev/ | egrep '^nvidia[0-9]+$') && echo gpu devices: $devs"
|
podRECMD := "devs=$(ls /dev/ | egrep '^nvidia[0-9]+$') && echo gpu devices: $devs"
|
||||||
p1 := f.PodClient().CreateSync(makeBusyboxPod(framework.NVIDIAGPUResourceName, podRECMD))
|
p1 := f.PodClient().CreateSync(makeBusyboxPod(gpu.NVIDIAGPUResourceName, podRECMD))
|
||||||
|
|
||||||
deviceIDRE := "gpu devices: (nvidia[0-9]+)"
|
deviceIDRE := "gpu devices: (nvidia[0-9]+)"
|
||||||
devId1 := parseLog(f, p1.Name, p1.Name, deviceIDRE)
|
devId1 := parseLog(f, p1.Name, p1.Name, deviceIDRE)
|
||||||
|
@ -94,9 +95,9 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi
|
||||||
restartKubelet()
|
restartKubelet()
|
||||||
framework.WaitForAllNodesSchedulable(f.ClientSet, framework.TestContext.NodeSchedulableTimeout)
|
framework.WaitForAllNodesSchedulable(f.ClientSet, framework.TestContext.NodeSchedulableTimeout)
|
||||||
Eventually(func() bool {
|
Eventually(func() bool {
|
||||||
return framework.NumberOfNVIDIAGPUs(getLocalNode(f)) > 0
|
return gpu.NumberOfNVIDIAGPUs(getLocalNode(f)) > 0
|
||||||
}, 5*time.Minute, framework.Poll).Should(BeTrue())
|
}, 5*time.Minute, framework.Poll).Should(BeTrue())
|
||||||
p2 := f.PodClient().CreateSync(makeBusyboxPod(framework.NVIDIAGPUResourceName, podRECMD))
|
p2 := f.PodClient().CreateSync(makeBusyboxPod(gpu.NVIDIAGPUResourceName, podRECMD))
|
||||||
|
|
||||||
By("Checking that pods got a different GPU")
|
By("Checking that pods got a different GPU")
|
||||||
devId2 := parseLog(f, p2.Name, p2.Name, deviceIDRE)
|
devId2 := parseLog(f, p2.Name, p2.Name, deviceIDRE)
|
||||||
|
@ -109,7 +110,7 @@ var _ = framework.KubeDescribe("NVIDIA GPU Device Plugin [Feature:GPUDevicePlugi
|
||||||
Eventually(func() bool {
|
Eventually(func() bool {
|
||||||
node, err := f.ClientSet.CoreV1().Nodes().Get(framework.TestContext.NodeName, metav1.GetOptions{})
|
node, err := f.ClientSet.CoreV1().Nodes().Get(framework.TestContext.NodeName, metav1.GetOptions{})
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
return framework.NumberOfNVIDIAGPUs(node) <= 0
|
return gpu.NumberOfNVIDIAGPUs(node) <= 0
|
||||||
}, 10*time.Minute, framework.Poll).Should(BeTrue())
|
}, 10*time.Minute, framework.Poll).Should(BeTrue())
|
||||||
By("Checking that scheduled pods can continue to run even after we delete device plugin.")
|
By("Checking that scheduled pods can continue to run even after we delete device plugin.")
|
||||||
ensurePodContainerRestart(f, p1.Name, p1.Name)
|
ensurePodContainerRestart(f, p1.Name, p1.Name)
|
||||||
|
|
|
@ -30,6 +30,7 @@ import (
|
||||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
||||||
commontest "k8s.io/kubernetes/test/e2e/common"
|
commontest "k8s.io/kubernetes/test/e2e/common"
|
||||||
"k8s.io/kubernetes/test/e2e/framework"
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
"k8s.io/kubernetes/test/e2e/framework/gpu"
|
||||||
imageutils "k8s.io/kubernetes/test/utils/image"
|
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -52,7 +53,7 @@ var NodeImageWhiteList = sets.NewString(
|
||||||
imageutils.GetE2EImage(imageutils.Netexec),
|
imageutils.GetE2EImage(imageutils.Netexec),
|
||||||
imageutils.GetE2EImage(imageutils.Nonewprivs),
|
imageutils.GetE2EImage(imageutils.Nonewprivs),
|
||||||
imageutils.GetPauseImageName(),
|
imageutils.GetPauseImageName(),
|
||||||
framework.GetGPUDevicePluginImage(),
|
gpu.GetGPUDevicePluginImage(),
|
||||||
"gcr.io/kubernetes-e2e-test-images/node-perf/npb-is:1.0",
|
"gcr.io/kubernetes-e2e-test-images/node-perf/npb-is:1.0",
|
||||||
"gcr.io/kubernetes-e2e-test-images/node-perf/npb-ep:1.0",
|
"gcr.io/kubernetes-e2e-test-images/node-perf/npb-ep:1.0",
|
||||||
"gcr.io/kubernetes-e2e-test-images/node-perf/tf-wide-deep-amd64:1.0",
|
"gcr.io/kubernetes-e2e-test-images/node-perf/tf-wide-deep-amd64:1.0",
|
||||||
|
|
Loading…
Reference in New Issue