mirror of https://github.com/k3s-io/k3s
Allow system critical priority classes in API validation
parent
515ba9e8d4
commit
9592a9ecf4
|
@ -324,7 +324,6 @@ pkg/registry/storage/rest
|
|||
pkg/registry/storage/storageclass
|
||||
pkg/registry/storage/storageclass/storage
|
||||
pkg/routes
|
||||
pkg/scheduler/api
|
||||
pkg/security/apparmor
|
||||
pkg/security/podsecuritypolicy
|
||||
pkg/security/podsecuritypolicy/group
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduling
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
// SystemPriorityClasses define system priority classes that are auto-created at cluster bootstrapping.
|
||||
// Our API validation logic ensures that any priority class that has a system prefix or its value
|
||||
// is higher than HighestUserDefinablePriority is equal to one of these SystemPriorityClasses.
|
||||
var systemPriorityClasses = []*PriorityClass{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: SystemNodeCritical,
|
||||
},
|
||||
Value: SystemCriticalPriority + 1000,
|
||||
Description: "Used for system critical pods that must not be moved from their current node.",
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: SystemClusterCritical,
|
||||
},
|
||||
Value: SystemCriticalPriority,
|
||||
Description: "Used for system critical pods that must run in the cluster, but can be moved to another node if necessary.",
|
||||
},
|
||||
}
|
||||
|
||||
// SystemPriorityClasses returns the list of system priority classes.
|
||||
// NOTE: be careful not to modify any of elements of the returned array directly.
|
||||
func SystemPriorityClasses() []*PriorityClass {
|
||||
return systemPriorityClasses
|
||||
}
|
||||
|
||||
// IsKnownSystemPriorityClass checks that "pc" is equal to one of the system PriorityClasses.
|
||||
// It ignores "description", labels, annotations, etc. of the PriorityClass.
|
||||
func IsKnownSystemPriorityClass(pc *PriorityClass) (bool, error) {
|
||||
for _, spc := range systemPriorityClasses {
|
||||
if spc.Name == pc.Name {
|
||||
if spc.Value != pc.Value {
|
||||
return false, fmt.Errorf("value of %v PriorityClass must be %v", spc.Name, spc.Value)
|
||||
}
|
||||
if spc.GlobalDefault != pc.GlobalDefault {
|
||||
return false, fmt.Errorf("globalDefault of %v PriorityClass must be %v", spc.Name, spc.GlobalDefault)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, fmt.Errorf("%v is not a known system priority class", pc.Name)
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduling
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
func TestIsKnownSystemPriorityClass(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
pc *PriorityClass
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "system priority class",
|
||||
pc: SystemPriorityClasses()[0],
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "non-system priority class",
|
||||
pc: &PriorityClass{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: SystemNodeCritical,
|
||||
},
|
||||
Value: SystemCriticalPriority, // This is the value of system cluster critical
|
||||
Description: "Used for system critical pods that must not be moved from their current node.",
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
if is, err := IsKnownSystemPriorityClass(test.pc); test.expected != is {
|
||||
t.Errorf("Test [%v]: Expected %v, but got %v. Error: %v", test.name, test.expected, is, err)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -17,22 +17,29 @@ limitations under the License.
|
|||
package validation
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/validation/field"
|
||||
apivalidation "k8s.io/kubernetes/pkg/apis/core/validation"
|
||||
"k8s.io/kubernetes/pkg/apis/scheduling"
|
||||
)
|
||||
|
||||
// ValidatePriorityClassName checks whether the given priority class name is valid.
|
||||
func ValidatePriorityClassName(name string, prefix bool) []string {
|
||||
return apivalidation.NameIsDNSSubdomain(name, prefix)
|
||||
}
|
||||
|
||||
// ValidatePriorityClass tests whether required fields in the PriorityClass are
|
||||
// set correctly.
|
||||
func ValidatePriorityClass(pc *scheduling.PriorityClass) field.ErrorList {
|
||||
allErrs := field.ErrorList{}
|
||||
allErrs = append(allErrs, apivalidation.ValidateObjectMeta(&pc.ObjectMeta, false, ValidatePriorityClassName, field.NewPath("metadata"))...)
|
||||
// The "Value" field can be any valid integer. So, no need to validate.
|
||||
allErrs = append(allErrs, apivalidation.ValidateObjectMeta(&pc.ObjectMeta, false, apivalidation.NameIsDNSSubdomain, field.NewPath("metadata"))...)
|
||||
// If the priorityClass starts with a system prefix, it must be one of the
|
||||
// predefined system priority classes.
|
||||
if strings.HasPrefix(pc.Name, scheduling.SystemPriorityClassPrefix) {
|
||||
if is, err := scheduling.IsKnownSystemPriorityClass(pc); !is {
|
||||
allErrs = append(allErrs, field.Forbidden(field.NewPath("metadata", "name"), "priority class names with '"+scheduling.SystemPriorityClassPrefix+"' prefix are reserved for system use only. error: "+err.Error()))
|
||||
}
|
||||
} else if pc.Value > scheduling.HighestUserDefinablePriority {
|
||||
// Non-system critical priority classes are not allowed to have a value larger than HighestUserDefinablePriority.
|
||||
allErrs = append(allErrs, field.Forbidden(field.NewPath("value"), fmt.Sprintf("maximum allowed value of a user defined priority is %v", scheduling.HighestUserDefinablePriority)))
|
||||
}
|
||||
return allErrs
|
||||
}
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ import (
|
|||
)
|
||||
|
||||
func TestValidatePriorityClass(t *testing.T) {
|
||||
spcs := scheduling.SystemPriorityClasses()
|
||||
successCases := map[string]scheduling.PriorityClass{
|
||||
"no description": {
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "tier1", Namespace: ""},
|
||||
|
@ -36,6 +37,12 @@ func TestValidatePriorityClass(t *testing.T) {
|
|||
GlobalDefault: false,
|
||||
Description: "Used for the highest priority pods.",
|
||||
},
|
||||
"system node critical": {
|
||||
ObjectMeta: metav1.ObjectMeta{Name: spcs[0].Name, Namespace: ""},
|
||||
Value: spcs[0].Value,
|
||||
GlobalDefault: spcs[0].GlobalDefault,
|
||||
Description: "system priority class 0",
|
||||
},
|
||||
}
|
||||
|
||||
for k, v := range successCases {
|
||||
|
@ -53,6 +60,16 @@ func TestValidatePriorityClass(t *testing.T) {
|
|||
ObjectMeta: metav1.ObjectMeta{Name: "tier&1", Namespace: ""},
|
||||
Value: 100,
|
||||
},
|
||||
"incorrect system class name": {
|
||||
ObjectMeta: metav1.ObjectMeta{Name: spcs[0].Name, Namespace: ""},
|
||||
Value: 0,
|
||||
GlobalDefault: spcs[0].GlobalDefault,
|
||||
},
|
||||
"incorrect system class value": {
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "system-something", Namespace: ""},
|
||||
Value: spcs[0].Value,
|
||||
GlobalDefault: spcs[0].GlobalDefault,
|
||||
},
|
||||
}
|
||||
|
||||
for k, v := range errorCases {
|
||||
|
|
|
@ -17,11 +17,16 @@ limitations under the License.
|
|||
package storage
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
genericapirequest "k8s.io/apiserver/pkg/endpoints/request"
|
||||
"k8s.io/apiserver/pkg/registry/generic"
|
||||
genericregistry "k8s.io/apiserver/pkg/registry/generic/registry"
|
||||
"k8s.io/apiserver/pkg/registry/rest"
|
||||
schedulingapi "k8s.io/kubernetes/pkg/apis/scheduling"
|
||||
"k8s.io/kubernetes/pkg/apis/scheduling"
|
||||
"k8s.io/kubernetes/pkg/registry/scheduling/priorityclass"
|
||||
)
|
||||
|
||||
|
@ -33,9 +38,9 @@ type REST struct {
|
|||
// NewREST returns a RESTStorage object that will work against priority classes.
|
||||
func NewREST(optsGetter generic.RESTOptionsGetter) *REST {
|
||||
store := &genericregistry.Store{
|
||||
NewFunc: func() runtime.Object { return &schedulingapi.PriorityClass{} },
|
||||
NewListFunc: func() runtime.Object { return &schedulingapi.PriorityClassList{} },
|
||||
DefaultQualifiedResource: schedulingapi.Resource("priorityclasses"),
|
||||
NewFunc: func() runtime.Object { return &scheduling.PriorityClass{} },
|
||||
NewListFunc: func() runtime.Object { return &scheduling.PriorityClassList{} },
|
||||
DefaultQualifiedResource: scheduling.Resource("priorityclasses"),
|
||||
|
||||
CreateStrategy: priorityclass.Strategy,
|
||||
UpdateStrategy: priorityclass.Strategy,
|
||||
|
@ -56,3 +61,14 @@ var _ rest.ShortNamesProvider = &REST{}
|
|||
func (r *REST) ShortNames() []string {
|
||||
return []string{"pc"}
|
||||
}
|
||||
|
||||
// Delete ensures that system priority classes are not deleted.
|
||||
func (r *REST) Delete(ctx genericapirequest.Context, name string, options *metav1.DeleteOptions) (runtime.Object, bool, error) {
|
||||
for _, spc := range scheduling.SystemPriorityClasses() {
|
||||
if name == spc.Name {
|
||||
return nil, false, apierrors.NewForbidden(scheduling.Resource("priorityclasses"), spc.Name, errors.New("this is a system priority class and cannot be deleted"))
|
||||
}
|
||||
}
|
||||
|
||||
return r.Store.Delete(ctx, name, options)
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import (
|
|||
"k8s.io/apimachinery/pkg/fields"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
genericapirequest "k8s.io/apiserver/pkg/endpoints/request"
|
||||
"k8s.io/apiserver/pkg/registry/generic"
|
||||
genericregistrytest "k8s.io/apiserver/pkg/registry/generic/testing"
|
||||
etcdtesting "k8s.io/apiserver/pkg/storage/etcd/testing"
|
||||
|
@ -105,6 +106,22 @@ func TestDelete(t *testing.T) {
|
|||
test.TestDelete(validNewPriorityClass())
|
||||
}
|
||||
|
||||
// TestDeleteSystemPriorityClass checks that system priority classes cannot be deleted.
|
||||
func TestDeleteSystemPriorityClass(t *testing.T) {
|
||||
storage, server := newStorage(t)
|
||||
defer server.Terminate(t)
|
||||
defer storage.Store.DestroyFunc()
|
||||
key := "test/system-node-critical"
|
||||
ctx := genericapirequest.NewContext()
|
||||
pc := scheduling.SystemPriorityClasses()[0]
|
||||
if err := storage.Store.Storage.Create(ctx, key, pc, nil, 0); err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if _, _, err := storage.Delete(ctx, pc.Name, nil); err == nil {
|
||||
t.Error("expected to receive an error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGet(t *testing.T) {
|
||||
storage, server := newStorage(t)
|
||||
defer server.Terminate(t)
|
||||
|
|
|
@ -68,9 +68,7 @@ func (priorityClassStrategy) AllowCreateOnUpdate() bool {
|
|||
|
||||
// ValidateUpdate is the default update validation for an end user.
|
||||
func (priorityClassStrategy) ValidateUpdate(ctx genericapirequest.Context, obj, old runtime.Object) field.ErrorList {
|
||||
validationErrorList := validation.ValidatePriorityClass(obj.(*scheduling.PriorityClass))
|
||||
updateErrorList := validation.ValidatePriorityClassUpdate(obj.(*scheduling.PriorityClass), old.(*scheduling.PriorityClass))
|
||||
return append(validationErrorList, updateErrorList...)
|
||||
return validation.ValidatePriorityClassUpdate(obj.(*scheduling.PriorityClass), old.(*scheduling.PriorityClass))
|
||||
}
|
||||
|
||||
// AllowUnconditionalUpdate is the default update policy for PriorityClass objects.
|
||||
|
|
|
@ -68,22 +68,6 @@ func (p RESTStorageProvider) PostStartHook() (string, genericapiserver.PostStart
|
|||
}
|
||||
|
||||
func AddSystemPriorityClasses() genericapiserver.PostStartHookFunc {
|
||||
priorityClasses := []*scheduling.PriorityClass{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: scheduling.SystemNodeCritical,
|
||||
},
|
||||
Value: scheduling.SystemCriticalPriority + 1000,
|
||||
Description: "Used for system critical pods that must not be moved from their current node.",
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: scheduling.SystemClusterCritical,
|
||||
},
|
||||
Value: scheduling.SystemCriticalPriority,
|
||||
Description: "Used for system critical pods that must run in the cluster, but can be moved to another node if necessary.",
|
||||
},
|
||||
}
|
||||
return func(hookContext genericapiserver.PostStartHookContext) error {
|
||||
// Adding system priority classes is important. If they fail to add, many critical system
|
||||
// components may fail and cluster may break.
|
||||
|
@ -94,7 +78,7 @@ func AddSystemPriorityClasses() genericapiserver.PostStartHookFunc {
|
|||
return false, nil
|
||||
}
|
||||
|
||||
for _, pc := range priorityClasses {
|
||||
for _, pc := range scheduling.SystemPriorityClasses() {
|
||||
_, err := schedClientSet.PriorityClasses().Get(pc.Name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
|
@ -106,11 +90,12 @@ func AddSystemPriorityClasses() genericapiserver.PostStartHookFunc {
|
|||
}
|
||||
} else {
|
||||
// Unable to get the priority class for reasons other than "not found".
|
||||
glog.Warningf("unable to get PriorityClass %v: %v. Retrying...", pc.Name, err)
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
}
|
||||
glog.Infof("all system priority classes are created successfully.")
|
||||
glog.Infof("all system priority classes are created successfully or already exist.")
|
||||
return true, nil
|
||||
})
|
||||
// if we're never able to make it through initialization, kill the API server.
|
||||
|
|
|
@ -19,12 +19,10 @@ package priority
|
|||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apiserver/pkg/admission"
|
||||
"k8s.io/apiserver/pkg/authentication/user"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
api "k8s.io/kubernetes/pkg/apis/core"
|
||||
"k8s.io/kubernetes/pkg/apis/scheduling"
|
||||
|
@ -188,16 +186,6 @@ func (p *priorityPlugin) validatePriorityClass(a admission.Attributes) error {
|
|||
if !ok {
|
||||
return errors.NewBadRequest("resource was marked with kind PriorityClass but was unable to be converted")
|
||||
}
|
||||
// API server adds system critical priority classes at bootstrapping. We should
|
||||
// not enforce restrictions on adding system level priority classes for API server.
|
||||
if userInfo := a.GetUserInfo(); userInfo == nil || userInfo.GetName() != user.APIServerUser {
|
||||
if pc.Value > scheduling.HighestUserDefinablePriority {
|
||||
return admission.NewForbidden(a, fmt.Errorf("maximum allowed value of a user defined priority is %v", scheduling.HighestUserDefinablePriority))
|
||||
}
|
||||
if strings.HasPrefix(pc.Name, scheduling.SystemPriorityClassPrefix) {
|
||||
return admission.NewForbidden(a, fmt.Errorf("priority class names with '"+scheduling.SystemPriorityClassPrefix+"' prefix are reserved for system use only"))
|
||||
}
|
||||
}
|
||||
// If the new PriorityClass tries to be the default priority, make sure that no other priority class is marked as default.
|
||||
if pc.GlobalDefault {
|
||||
dpc, err := p.getDefaultPriorityClass()
|
||||
|
|
|
@ -87,17 +87,6 @@ var systemClusterCritical = &scheduling.PriorityClass{
|
|||
}
|
||||
|
||||
func TestPriorityClassAdmission(t *testing.T) {
|
||||
var tooHighPriorityClass = &scheduling.PriorityClass{
|
||||
TypeMeta: metav1.TypeMeta{
|
||||
Kind: "PriorityClass",
|
||||
},
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "toohighclass",
|
||||
},
|
||||
Value: scheduling.HighestUserDefinablePriority + 1,
|
||||
Description: "Just a test priority class",
|
||||
}
|
||||
|
||||
var systemClass = &scheduling.PriorityClass{
|
||||
TypeMeta: metav1.TypeMeta{
|
||||
Kind: "PriorityClass",
|
||||
|
@ -131,21 +120,7 @@ func TestPriorityClassAdmission(t *testing.T) {
|
|||
true,
|
||||
},
|
||||
{
|
||||
"too high PriorityClass value",
|
||||
[]*scheduling.PriorityClass{},
|
||||
tooHighPriorityClass,
|
||||
nil,
|
||||
true,
|
||||
},
|
||||
{
|
||||
"system name conflict",
|
||||
[]*scheduling.PriorityClass{},
|
||||
systemClass,
|
||||
nil,
|
||||
true,
|
||||
},
|
||||
{
|
||||
"system name allowed for API server",
|
||||
"system name and value are allowed by admission controller",
|
||||
[]*scheduling.PriorityClass{},
|
||||
systemClass,
|
||||
&user.DefaultInfo{
|
||||
|
|
|
@ -325,11 +325,8 @@ var _ = SIGDescribe("PodPriorityResolution [Serial] [Feature:PodPreemption]", fu
|
|||
framework.ExpectNoError(err)
|
||||
})
|
||||
|
||||
// This test verifies that when a higher priority pod is created and no node with
|
||||
// enough resources is found, scheduler preempts a lower priority pod to schedule
|
||||
// the high priority pod.
|
||||
// This test verifies that system critical priorities are created automatically and resolved properly.
|
||||
It("validates critical system priorities are created and resolved", func() {
|
||||
var podRes v1.ResourceList
|
||||
// Create pods that use system critical priorities and
|
||||
By("Create pods that use critical system priorities.")
|
||||
systemPriorityClasses := []string{
|
||||
|
@ -339,9 +336,6 @@ var _ = SIGDescribe("PodPriorityResolution [Serial] [Feature:PodPreemption]", fu
|
|||
pod := createPausePod(f, pausePodConfig{
|
||||
Name: fmt.Sprintf("pod%d-%v", i, spc),
|
||||
PriorityClassName: spc,
|
||||
Resources: &v1.ResourceRequirements{
|
||||
Requests: podRes,
|
||||
},
|
||||
})
|
||||
Expect(pod.Spec.Priority).NotTo(BeNil())
|
||||
framework.Logf("Created pod: %v", pod.Name)
|
||||
|
|
Loading…
Reference in New Issue