Merge pull request #50218 from dixudx/fix_GPU_resource_zero_limit_validation

Automatic merge from submit-queue

fix GPU resource validation that incorrectly allows zero limits

**What this PR does / why we need it**:

The validation logic for GPUs is not run if limits is not set for GPUs.
We need to check limits equals requests even if just request is set for GPUs.

**Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #50182

**Special notes for your reviewer**:
/assign @vishh 
/cc @jiayingz 

**Release note**:

```release-note
fix GPU resource validation that incorrectly allows zero limits
```
pull/6/head
Kubernetes Submit Queue 2017-08-17 19:57:40 -07:00 committed by GitHub
commit 075d209ea4
6 changed files with 236 additions and 23 deletions

View File

@ -3,6 +3,7 @@ package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
@ -32,3 +33,15 @@ filegroup(
srcs = [":package-srcs"],
tags = ["automanaged"],
)
go_test(
name = "go_default_test",
srcs = ["validation_test.go"],
library = ":go_default_library",
tags = ["automanaged"],
deps = [
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/validation/field:go_default_library",
],
)

View File

@ -46,16 +46,6 @@ func ValidateResourceRequirements(requirements *v1.ResourceRequirements, fldPath
// Validate resource quantity.
allErrs = append(allErrs, ValidateResourceQuantityValue(string(resourceName), quantity, fldPath)...)
// Check that request <= limit.
requestQuantity, exists := requirements.Requests[resourceName]
if exists {
// Ensure overcommit is allowed for the resource if request != limit
if quantity.Cmp(requestQuantity) != 0 && !v1helper.IsOvercommitAllowed(resourceName) {
allErrs = append(allErrs, field.Invalid(reqPath, requestQuantity.String(), fmt.Sprintf("must be equal to %s limit", resourceName)))
} else if quantity.Cmp(requestQuantity) < 0 {
allErrs = append(allErrs, field.Invalid(limPath, quantity.String(), fmt.Sprintf("must be greater than or equal to %s request", resourceName)))
}
}
}
for resourceName, quantity := range requirements.Requests {
fldPath := reqPath.Key(string(resourceName))
@ -63,6 +53,19 @@ func ValidateResourceRequirements(requirements *v1.ResourceRequirements, fldPath
allErrs = append(allErrs, validateContainerResourceName(string(resourceName), fldPath)...)
// Validate resource quantity.
allErrs = append(allErrs, ValidateResourceQuantityValue(string(resourceName), quantity, fldPath)...)
// Check that request <= limit.
limitQuantity, exists := requirements.Limits[resourceName]
if exists {
// For GPUs, not only requests can't exceed limits, they also can't be lower, i.e. must be equal.
if quantity.Cmp(limitQuantity) != 0 && !v1helper.IsOvercommitAllowed(resourceName) {
allErrs = append(allErrs, field.Invalid(reqPath, quantity.String(), fmt.Sprintf("must be equal to %s limit", resourceName)))
} else if quantity.Cmp(limitQuantity) > 0 {
allErrs = append(allErrs, field.Invalid(reqPath, quantity.String(), fmt.Sprintf("must be less than or equal to %s limit", resourceName)))
}
} else if resourceName == v1.ResourceNvidiaGPU {
allErrs = append(allErrs, field.Invalid(reqPath, quantity.String(), fmt.Sprintf("must be equal to %s request", v1.ResourceNvidiaGPU)))
}
}
return allErrs

View File

@ -0,0 +1,179 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validation
import (
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/validation/field"
)
func TestValidateResourceRequirements(t *testing.T) {
successCase := []struct {
Name string
requirements v1.ResourceRequirements
}{
{
Name: "GPU only setting Limits",
requirements: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("10"),
},
},
},
{
Name: "GPU setting Limits equals Requests",
requirements: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("10"),
},
Requests: v1.ResourceList{
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("10"),
},
},
},
{
Name: "Resources with GPU with Requests",
requirements: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),
v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"),
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("1"),
},
Limits: v1.ResourceList{
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),
v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"),
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("1"),
},
},
},
{
Name: "Resources with only Limits",
requirements: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),
v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"),
v1.ResourceName("my.org/resource"): resource.MustParse("10"),
},
},
},
{
Name: "Resources with only Requests",
requirements: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),
v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"),
v1.ResourceName("my.org/resource"): resource.MustParse("10"),
},
},
},
{
Name: "Resources with Requests Less Than Limits",
requirements: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceName(v1.ResourceCPU): resource.MustParse("9"),
v1.ResourceName(v1.ResourceMemory): resource.MustParse("9G"),
v1.ResourceName("my.org/resource"): resource.MustParse("9"),
},
Limits: v1.ResourceList{
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),
v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"),
v1.ResourceName("my.org/resource"): resource.MustParse("9"),
},
},
},
}
for _, tc := range successCase {
if errs := ValidateResourceRequirements(&tc.requirements, field.NewPath("resources")); len(errs) != 0 {
t.Errorf("%q unexpected error: %v", tc.Name, errs)
}
}
errorCase := []struct {
Name string
requirements v1.ResourceRequirements
}{
{
Name: "GPU only setting Requests",
requirements: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("10"),
},
},
},
{
Name: "GPU setting Limits less than Requests",
requirements: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("10"),
},
Requests: v1.ResourceList{
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("11"),
},
},
},
{
Name: "GPU setting Limits larger than Requests",
requirements: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("10"),
},
Requests: v1.ResourceList{
v1.ResourceName(v1.ResourceNvidiaGPU): resource.MustParse("9"),
},
},
},
{
Name: "Resources with Requests Larger Than Limits",
requirements: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),
v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"),
v1.ResourceName("my.org/resource"): resource.MustParse("10m"),
},
Limits: v1.ResourceList{
v1.ResourceName(v1.ResourceCPU): resource.MustParse("9"),
v1.ResourceName(v1.ResourceMemory): resource.MustParse("9G"),
v1.ResourceName("my.org/resource"): resource.MustParse("9m"),
},
},
},
{
Name: "Invalid Resources with Requests",
requirements: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceName("my.org"): resource.MustParse("10m"),
},
},
},
{
Name: "Invalid Resources with Limits",
requirements: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceName("my.org"): resource.MustParse("9m"),
},
},
},
}
for _, tc := range errorCase {
if errs := ValidateResourceRequirements(&tc.requirements, field.NewPath("resources")); len(errs) == 0 {
t.Errorf("%q expected error", tc.Name)
}
}
}

View File

@ -3718,16 +3718,6 @@ func ValidateResourceRequirements(requirements *api.ResourceRequirements, fldPat
// Validate resource quantity.
allErrs = append(allErrs, ValidateResourceQuantityValue(string(resourceName), quantity, fldPath)...)
// Check that request <= limit.
requestQuantity, exists := requirements.Requests[resourceName]
if exists {
// Ensure overcommit is allowed for the resource if request != limit
if quantity.Cmp(requestQuantity) != 0 && !helper.IsOvercommitAllowed(resourceName) {
allErrs = append(allErrs, field.Invalid(reqPath, requestQuantity.String(), fmt.Sprintf("must be equal to %s limit", resourceName)))
} else if quantity.Cmp(requestQuantity) < 0 {
allErrs = append(allErrs, field.Invalid(limPath, quantity.String(), fmt.Sprintf("must be greater than or equal to %s request", resourceName)))
}
}
if resourceName == api.ResourceStorageOverlay && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
allErrs = append(allErrs, field.Forbidden(limPath, "ResourceStorageOverlay field disabled by feature-gate for ResourceRequirements"))
}
@ -3738,6 +3728,19 @@ func ValidateResourceRequirements(requirements *api.ResourceRequirements, fldPat
allErrs = append(allErrs, validateContainerResourceName(string(resourceName), fldPath)...)
// Validate resource quantity.
allErrs = append(allErrs, ValidateResourceQuantityValue(string(resourceName), quantity, fldPath)...)
// Check that request <= limit.
limitQuantity, exists := requirements.Limits[resourceName]
if exists {
// For GPUs, not only requests can't exceed limits, they also can't be lower, i.e. must be equal.
if quantity.Cmp(limitQuantity) != 0 && !helper.IsOvercommitAllowed(resourceName) {
allErrs = append(allErrs, field.Invalid(reqPath, quantity.String(), fmt.Sprintf("must be equal to %s limit", api.ResourceNvidiaGPU)))
} else if quantity.Cmp(limitQuantity) > 0 {
allErrs = append(allErrs, field.Invalid(reqPath, quantity.String(), fmt.Sprintf("must be less than or equal to %s limit", resourceName)))
}
} else if resourceName == api.ResourceNvidiaGPU {
allErrs = append(allErrs, field.Invalid(reqPath, quantity.String(), fmt.Sprintf("must be equal to %s request", api.ResourceNvidiaGPU)))
}
}
return allErrs

View File

@ -3631,6 +3631,21 @@ func TestValidateContainers(t *testing.T) {
ImagePullPolicy: "IfNotPresent",
},
},
"Resource GPU invalid setting only request": {
{
Name: "gpu-resource-request-limit",
Image: "image",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
api.ResourceName(api.ResourceNvidiaGPU): resource.MustParse("1"),
},
},
TerminationMessagePolicy: "File",
ImagePullPolicy: "IfNotPresent",
},
},
"Request limit simple invalid": {
{
Name: "abc-123",
@ -5045,7 +5060,7 @@ func TestValidatePod(t *testing.T) {
},
},
"invalid opaque integer resource requirement: request must be <= limit": {
expectedError: "must be greater than or equal to pod.alpha.kubernetes.io/opaque-int-resource-A",
expectedError: "must be less than or equal to pod.alpha.kubernetes.io/opaque-int-resource-A",
spec: api.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "123", Namespace: "ns"},
Spec: api.PodSpec{

View File

@ -42,7 +42,7 @@ func TestPodConstraintsFunc(t *testing.T) {
}},
},
},
err: `spec.initContainers[0].resources.limits: Invalid value: "1m": must be greater than or equal to cpu request`,
err: `spec.initContainers[0].resources.requests: Invalid value: "2m": must be less than or equal to cpu limit`,
},
"container resource invalid": {
pod: &api.Pod{
@ -55,7 +55,7 @@ func TestPodConstraintsFunc(t *testing.T) {
}},
},
},
err: `spec.containers[0].resources.limits: Invalid value: "1m": must be greater than or equal to cpu request`,
err: `spec.containers[0].resources.requests: Invalid value: "2m": must be less than or equal to cpu limit`,
},
"init container resource missing": {
pod: &api.Pod{