mirror of https://github.com/k3s-io/k3s
Merge pull request #50218 from dixudx/fix_GPU_resource_zero_limit_validation
Automatic merge from submit-queue fix GPU resource validation that incorrectly allows zero limits **What this PR does / why we need it**: The validation logic for GPUs is not run if limits is not set for GPUs. We need to check limits equals requests even if just request is set for GPUs. **Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #50182 **Special notes for your reviewer**: /assign @vishh /cc @jiayingz **Release note**: ```release-note fix GPU resource validation that incorrectly allows zero limits ```pull/6/head
commit
075d209ea4
|
@ -3,6 +3,7 @@ package(default_visibility = ["//visibility:public"])
|
|||
load(
|
||||
"@io_bazel_rules_go//go:def.bzl",
|
||||
"go_library",
|
||||
"go_test",
|
||||
)
|
||||
|
||||
go_library(
|
||||
|
@ -32,3 +33,15 @@ filegroup(
|
|||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
)
|
||||
|
||||
go_test(
|
||||
name = "go_default_test",
|
||||
srcs = ["validation_test.go"],
|
||||
library = ":go_default_library",
|
||||
tags = ["automanaged"],
|
||||
deps = [
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/util/validation/field:go_default_library",
|
||||
],
|
||||
)
|
||||
|
|
|
@ -46,16 +46,6 @@ func ValidateResourceRequirements(requirements *v1.ResourceRequirements, fldPath
|
|||
// Validate resource quantity.
|
||||
allErrs = append(allErrs, ValidateResourceQuantityValue(string(resourceName), quantity, fldPath)...)
|
||||
|
||||
// Check that request <= limit.
|
||||
requestQuantity, exists := requirements.Requests[resourceName]
|
||||
if exists {
|
||||
// Ensure overcommit is allowed for the resource if request != limit
|
||||
if quantity.Cmp(requestQuantity) != 0 && !v1helper.IsOvercommitAllowed(resourceName) {
|
||||
allErrs = append(allErrs, field.Invalid(reqPath, requestQuantity.String(), fmt.Sprintf("must be equal to %s limit", resourceName)))
|
||||
} else if quantity.Cmp(requestQuantity) < 0 {
|
||||
allErrs = append(allErrs, field.Invalid(limPath, quantity.String(), fmt.Sprintf("must be greater than or equal to %s request", resourceName)))
|
||||
}
|
||||
}
|
||||
}
|
||||
for resourceName, quantity := range requirements.Requests {
|
||||
fldPath := reqPath.Key(string(resourceName))
|
||||
|
@ -63,6 +53,19 @@ func ValidateResourceRequirements(requirements *v1.ResourceRequirements, fldPath
|
|||
allErrs = append(allErrs, validateContainerResourceName(string(resourceName), fldPath)...)
|
||||
// Validate resource quantity.
|
||||
allErrs = append(allErrs, ValidateResourceQuantityValue(string(resourceName), quantity, fldPath)...)
|
||||
|
||||
// Check that request <= limit.
|
||||
limitQuantity, exists := requirements.Limits[resourceName]
|
||||
if exists {
|
||||
// For GPUs, not only requests can't exceed limits, they also can't be lower, i.e. must be equal.
|
||||
if quantity.Cmp(limitQuantity) != 0 && !v1helper.IsOvercommitAllowed(resourceName) {
|
||||
allErrs = append(allErrs, field.Invalid(reqPath, quantity.String(), fmt.Sprintf("must be equal to %s limit", resourceName)))
|
||||
} else if quantity.Cmp(limitQuantity) > 0 {
|
||||
allErrs = append(allErrs, field.Invalid(reqPath, quantity.String(), fmt.Sprintf("must be less than or equal to %s limit", resourceName)))
|
||||
}
|
||||
} else if resourceName == v1.ResourceNvidiaGPU {
|
||||
allErrs = append(allErrs, field.Invalid(reqPath, quantity.String(), fmt.Sprintf("must be equal to %s request", v1.ResourceNvidiaGPU)))
|
||||
}
|
||||
}
|
||||
|
||||
return allErrs
|
||||
|
|
|
@ -0,0 +1,179 @@
|
|||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package validation
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/util/validation/field"
|
||||
)
|
||||
|
||||
func TestValidateResourceRequirements(t *testing.T) {
|
||||
successCase := []struct {
|
||||
Name string
|
||||
requirements v1.ResourceRequirements
|
||||
}{
|
||||
{
|
||||
Name: "GPU only setting Limits",
|
||||
requirements: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("10"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "GPU setting Limits equals Requests",
|
||||
requirements: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("10"),
|
||||
},
|
||||
Requests: v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("10"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Resources with GPU with Requests",
|
||||
requirements: v1.ResourceRequirements{
|
||||
Requests: v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),
|
||||
v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"),
|
||||
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("1"),
|
||||
},
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),
|
||||
v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"),
|
||||
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("1"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Resources with only Limits",
|
||||
requirements: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),
|
||||
v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"),
|
||||
v1.ResourceName("my.org/resource"): resource.MustParse("10"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Resources with only Requests",
|
||||
requirements: v1.ResourceRequirements{
|
||||
Requests: v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),
|
||||
v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"),
|
||||
v1.ResourceName("my.org/resource"): resource.MustParse("10"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Resources with Requests Less Than Limits",
|
||||
requirements: v1.ResourceRequirements{
|
||||
Requests: v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceCPU): resource.MustParse("9"),
|
||||
v1.ResourceName(v1.ResourceMemory): resource.MustParse("9G"),
|
||||
v1.ResourceName("my.org/resource"): resource.MustParse("9"),
|
||||
},
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),
|
||||
v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"),
|
||||
v1.ResourceName("my.org/resource"): resource.MustParse("9"),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tc := range successCase {
|
||||
if errs := ValidateResourceRequirements(&tc.requirements, field.NewPath("resources")); len(errs) != 0 {
|
||||
t.Errorf("%q unexpected error: %v", tc.Name, errs)
|
||||
}
|
||||
}
|
||||
|
||||
errorCase := []struct {
|
||||
Name string
|
||||
requirements v1.ResourceRequirements
|
||||
}{
|
||||
{
|
||||
Name: "GPU only setting Requests",
|
||||
requirements: v1.ResourceRequirements{
|
||||
Requests: v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("10"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "GPU setting Limits less than Requests",
|
||||
requirements: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("10"),
|
||||
},
|
||||
Requests: v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("11"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "GPU setting Limits larger than Requests",
|
||||
requirements: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("10"),
|
||||
},
|
||||
Requests: v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("9"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Resources with Requests Larger Than Limits",
|
||||
requirements: v1.ResourceRequirements{
|
||||
Requests: v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),
|
||||
v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"),
|
||||
v1.ResourceName("my.org/resource"): resource.MustParse("10m"),
|
||||
},
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceCPU): resource.MustParse("9"),
|
||||
v1.ResourceName(v1.ResourceMemory): resource.MustParse("9G"),
|
||||
v1.ResourceName("my.org/resource"): resource.MustParse("9m"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Invalid Resources with Requests",
|
||||
requirements: v1.ResourceRequirements{
|
||||
Requests: v1.ResourceList{
|
||||
v1.ResourceName("my.org"): resource.MustParse("10m"),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Invalid Resources with Limits",
|
||||
requirements: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceName("my.org"): resource.MustParse("9m"),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tc := range errorCase {
|
||||
if errs := ValidateResourceRequirements(&tc.requirements, field.NewPath("resources")); len(errs) == 0 {
|
||||
t.Errorf("%q expected error", tc.Name)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -3718,16 +3718,6 @@ func ValidateResourceRequirements(requirements *api.ResourceRequirements, fldPat
|
|||
// Validate resource quantity.
|
||||
allErrs = append(allErrs, ValidateResourceQuantityValue(string(resourceName), quantity, fldPath)...)
|
||||
|
||||
// Check that request <= limit.
|
||||
requestQuantity, exists := requirements.Requests[resourceName]
|
||||
if exists {
|
||||
// Ensure overcommit is allowed for the resource if request != limit
|
||||
if quantity.Cmp(requestQuantity) != 0 && !helper.IsOvercommitAllowed(resourceName) {
|
||||
allErrs = append(allErrs, field.Invalid(reqPath, requestQuantity.String(), fmt.Sprintf("must be equal to %s limit", resourceName)))
|
||||
} else if quantity.Cmp(requestQuantity) < 0 {
|
||||
allErrs = append(allErrs, field.Invalid(limPath, quantity.String(), fmt.Sprintf("must be greater than or equal to %s request", resourceName)))
|
||||
}
|
||||
}
|
||||
if resourceName == api.ResourceStorageOverlay && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
|
||||
allErrs = append(allErrs, field.Forbidden(limPath, "ResourceStorageOverlay field disabled by feature-gate for ResourceRequirements"))
|
||||
}
|
||||
|
@ -3738,6 +3728,19 @@ func ValidateResourceRequirements(requirements *api.ResourceRequirements, fldPat
|
|||
allErrs = append(allErrs, validateContainerResourceName(string(resourceName), fldPath)...)
|
||||
// Validate resource quantity.
|
||||
allErrs = append(allErrs, ValidateResourceQuantityValue(string(resourceName), quantity, fldPath)...)
|
||||
|
||||
// Check that request <= limit.
|
||||
limitQuantity, exists := requirements.Limits[resourceName]
|
||||
if exists {
|
||||
// For GPUs, not only requests can't exceed limits, they also can't be lower, i.e. must be equal.
|
||||
if quantity.Cmp(limitQuantity) != 0 && !helper.IsOvercommitAllowed(resourceName) {
|
||||
allErrs = append(allErrs, field.Invalid(reqPath, quantity.String(), fmt.Sprintf("must be equal to %s limit", api.ResourceNvidiaGPU)))
|
||||
} else if quantity.Cmp(limitQuantity) > 0 {
|
||||
allErrs = append(allErrs, field.Invalid(reqPath, quantity.String(), fmt.Sprintf("must be less than or equal to %s limit", resourceName)))
|
||||
}
|
||||
} else if resourceName == api.ResourceNvidiaGPU {
|
||||
allErrs = append(allErrs, field.Invalid(reqPath, quantity.String(), fmt.Sprintf("must be equal to %s request", api.ResourceNvidiaGPU)))
|
||||
}
|
||||
}
|
||||
|
||||
return allErrs
|
||||
|
|
|
@ -3631,6 +3631,21 @@ func TestValidateContainers(t *testing.T) {
|
|||
ImagePullPolicy: "IfNotPresent",
|
||||
},
|
||||
},
|
||||
"Resource GPU invalid setting only request": {
|
||||
{
|
||||
Name: "gpu-resource-request-limit",
|
||||
Image: "image",
|
||||
Resources: api.ResourceRequirements{
|
||||
Requests: api.ResourceList{
|
||||
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
|
||||
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
|
||||
api.ResourceName(api.ResourceNvidiaGPU): resource.MustParse("1"),
|
||||
},
|
||||
},
|
||||
TerminationMessagePolicy: "File",
|
||||
ImagePullPolicy: "IfNotPresent",
|
||||
},
|
||||
},
|
||||
"Request limit simple invalid": {
|
||||
{
|
||||
Name: "abc-123",
|
||||
|
@ -5045,7 +5060,7 @@ func TestValidatePod(t *testing.T) {
|
|||
},
|
||||
},
|
||||
"invalid opaque integer resource requirement: request must be <= limit": {
|
||||
expectedError: "must be greater than or equal to pod.alpha.kubernetes.io/opaque-int-resource-A",
|
||||
expectedError: "must be less than or equal to pod.alpha.kubernetes.io/opaque-int-resource-A",
|
||||
spec: api.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "123", Namespace: "ns"},
|
||||
Spec: api.PodSpec{
|
||||
|
|
|
@ -42,7 +42,7 @@ func TestPodConstraintsFunc(t *testing.T) {
|
|||
}},
|
||||
},
|
||||
},
|
||||
err: `spec.initContainers[0].resources.limits: Invalid value: "1m": must be greater than or equal to cpu request`,
|
||||
err: `spec.initContainers[0].resources.requests: Invalid value: "2m": must be less than or equal to cpu limit`,
|
||||
},
|
||||
"container resource invalid": {
|
||||
pod: &api.Pod{
|
||||
|
@ -55,7 +55,7 @@ func TestPodConstraintsFunc(t *testing.T) {
|
|||
}},
|
||||
},
|
||||
},
|
||||
err: `spec.containers[0].resources.limits: Invalid value: "1m": must be greater than or equal to cpu request`,
|
||||
err: `spec.containers[0].resources.requests: Invalid value: "2m": must be less than or equal to cpu limit`,
|
||||
},
|
||||
"init container resource missing": {
|
||||
pod: &api.Pod{
|
||||
|
|
Loading…
Reference in New Issue