mirror of https://github.com/k3s-io/k3s
Merge pull request #60519 from bsalamat/auto_prio_class
Automatic merge from submit-queue (batch tested with PRs 60519, 61099, 61218, 61166, 61714). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Automatically add system critical priority classes at cluster boostrapping **What this PR does / why we need it**: We had two PriorityClasses that were hardcoded and special cased in our code base. These two priority classes never existed in API server. Priority admission controller had code to resolve these two names. This PR removes the hardcoded PriorityClasses and adds code to create these PriorityClasses automatically when API server starts. **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes #60178 ref/ #57471 **Special notes for your reviewer**: **Release note**: ```release-note Automatically add system critical priority classes at cluster boostrapping. ``` /sig schedulingpull/8/head
commit
71050b6f2d
|
@ -323,7 +323,6 @@ pkg/registry/storage/rest
|
|||
pkg/registry/storage/storageclass
|
||||
pkg/registry/storage/storageclass/storage
|
||||
pkg/routes
|
||||
pkg/scheduler/api
|
||||
pkg/security/apparmor
|
||||
pkg/security/podsecuritypolicy
|
||||
pkg/security/podsecuritypolicy/group
|
||||
|
|
|
@ -3,12 +3,14 @@ package(default_visibility = ["//visibility:public"])
|
|||
load(
|
||||
"@io_bazel_rules_go//go:def.bzl",
|
||||
"go_library",
|
||||
"go_test",
|
||||
)
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"doc.go",
|
||||
"helpers.go",
|
||||
"register.go",
|
||||
"types.go",
|
||||
"zz_generated.deepcopy.go",
|
||||
|
@ -39,3 +41,10 @@ filegroup(
|
|||
],
|
||||
tags = ["automanaged"],
|
||||
)
|
||||
|
||||
go_test(
|
||||
name = "go_default_test",
|
||||
srcs = ["helpers_test.go"],
|
||||
embed = [":go_default_library"],
|
||||
deps = ["//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library"],
|
||||
)
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduling
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
// SystemPriorityClasses define system priority classes that are auto-created at cluster bootstrapping.
|
||||
// Our API validation logic ensures that any priority class that has a system prefix or its value
|
||||
// is higher than HighestUserDefinablePriority is equal to one of these SystemPriorityClasses.
|
||||
var systemPriorityClasses = []*PriorityClass{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: SystemNodeCritical,
|
||||
},
|
||||
Value: SystemCriticalPriority + 1000,
|
||||
Description: "Used for system critical pods that must not be moved from their current node.",
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: SystemClusterCritical,
|
||||
},
|
||||
Value: SystemCriticalPriority,
|
||||
Description: "Used for system critical pods that must run in the cluster, but can be moved to another node if necessary.",
|
||||
},
|
||||
}
|
||||
|
||||
// SystemPriorityClasses returns the list of system priority classes.
|
||||
// NOTE: be careful not to modify any of elements of the returned array directly.
|
||||
func SystemPriorityClasses() []*PriorityClass {
|
||||
return systemPriorityClasses
|
||||
}
|
||||
|
||||
// IsKnownSystemPriorityClass checks that "pc" is equal to one of the system PriorityClasses.
|
||||
// It ignores "description", labels, annotations, etc. of the PriorityClass.
|
||||
func IsKnownSystemPriorityClass(pc *PriorityClass) (bool, error) {
|
||||
for _, spc := range systemPriorityClasses {
|
||||
if spc.Name == pc.Name {
|
||||
if spc.Value != pc.Value {
|
||||
return false, fmt.Errorf("value of %v PriorityClass must be %v", spc.Name, spc.Value)
|
||||
}
|
||||
if spc.GlobalDefault != pc.GlobalDefault {
|
||||
return false, fmt.Errorf("globalDefault of %v PriorityClass must be %v", spc.Name, spc.GlobalDefault)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, fmt.Errorf("%v is not a known system priority class", pc.Name)
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduling
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
func TestIsKnownSystemPriorityClass(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
pc *PriorityClass
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "system priority class",
|
||||
pc: SystemPriorityClasses()[0],
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "non-system priority class",
|
||||
pc: &PriorityClass{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: SystemNodeCritical,
|
||||
},
|
||||
Value: SystemCriticalPriority, // This is the value of system cluster critical
|
||||
Description: "Used for system critical pods that must not be moved from their current node.",
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
if is, err := IsKnownSystemPriorityClass(test.pc); test.expected != is {
|
||||
t.Errorf("Test [%v]: Expected %v, but got %v. Error: %v", test.name, test.expected, is, err)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -23,9 +23,17 @@ const (
|
|||
// that do not specify any priority class and there is no priority class
|
||||
// marked as default.
|
||||
DefaultPriorityWhenNoDefaultClassExists = 0
|
||||
// HighestUserDefinablePriority is the highest priority for user defined priority classes. Priority values larger than 1 billion are reserved for Kubernetes system use.
|
||||
HighestUserDefinablePriority = int32(1000000000)
|
||||
// SystemCriticalPriority is the beginning of the range of priority values for critical system components.
|
||||
SystemCriticalPriority = 2 * HighestUserDefinablePriority
|
||||
// SystemPriorityClassPrefix is the prefix reserved for system priority class names. Other priority
|
||||
// classes are not allowed to start with this prefix.
|
||||
SystemPriorityClassPrefix = "system-"
|
||||
// NOTE: In order to avoid conflict of names with user-defined priority classes, all the names must
|
||||
// start with SystemPriorityClassPrefix.
|
||||
SystemClusterCritical = SystemPriorityClassPrefix + "cluster-critical"
|
||||
SystemNodeCritical = SystemPriorityClassPrefix + "node-critical"
|
||||
)
|
||||
|
||||
// +genclient
|
||||
|
|
|
@ -17,6 +17,7 @@ limitations under the License.
|
|||
package validation
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/validation/field"
|
||||
|
@ -24,22 +25,21 @@ import (
|
|||
"k8s.io/kubernetes/pkg/apis/scheduling"
|
||||
)
|
||||
|
||||
// ValidatePriorityClassName checks whether the given priority class name is valid.
|
||||
func ValidatePriorityClassName(name string, prefix bool) []string {
|
||||
var allErrs []string
|
||||
if strings.HasPrefix(name, scheduling.SystemPriorityClassPrefix) {
|
||||
allErrs = append(allErrs, "priority class names with '"+scheduling.SystemPriorityClassPrefix+"' prefix are reserved for system use only")
|
||||
}
|
||||
allErrs = append(allErrs, apivalidation.NameIsDNSSubdomain(name, prefix)...)
|
||||
return allErrs
|
||||
}
|
||||
|
||||
// ValidatePriorityClass tests whether required fields in the PriorityClass are
|
||||
// set correctly.
|
||||
func ValidatePriorityClass(pc *scheduling.PriorityClass) field.ErrorList {
|
||||
allErrs := field.ErrorList{}
|
||||
allErrs = append(allErrs, apivalidation.ValidateObjectMeta(&pc.ObjectMeta, false, ValidatePriorityClassName, field.NewPath("metadata"))...)
|
||||
// The "Value" field can be any valid integer. So, no need to validate.
|
||||
allErrs = append(allErrs, apivalidation.ValidateObjectMeta(&pc.ObjectMeta, false, apivalidation.NameIsDNSSubdomain, field.NewPath("metadata"))...)
|
||||
// If the priorityClass starts with a system prefix, it must be one of the
|
||||
// predefined system priority classes.
|
||||
if strings.HasPrefix(pc.Name, scheduling.SystemPriorityClassPrefix) {
|
||||
if is, err := scheduling.IsKnownSystemPriorityClass(pc); !is {
|
||||
allErrs = append(allErrs, field.Forbidden(field.NewPath("metadata", "name"), "priority class names with '"+scheduling.SystemPriorityClassPrefix+"' prefix are reserved for system use only. error: "+err.Error()))
|
||||
}
|
||||
} else if pc.Value > scheduling.HighestUserDefinablePriority {
|
||||
// Non-system critical priority classes are not allowed to have a value larger than HighestUserDefinablePriority.
|
||||
allErrs = append(allErrs, field.Forbidden(field.NewPath("value"), fmt.Sprintf("maximum allowed value of a user defined priority is %v", scheduling.HighestUserDefinablePriority)))
|
||||
}
|
||||
return allErrs
|
||||
}
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ import (
|
|||
)
|
||||
|
||||
func TestValidatePriorityClass(t *testing.T) {
|
||||
spcs := scheduling.SystemPriorityClasses()
|
||||
successCases := map[string]scheduling.PriorityClass{
|
||||
"no description": {
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "tier1", Namespace: ""},
|
||||
|
@ -36,6 +37,12 @@ func TestValidatePriorityClass(t *testing.T) {
|
|||
GlobalDefault: false,
|
||||
Description: "Used for the highest priority pods.",
|
||||
},
|
||||
"system node critical": {
|
||||
ObjectMeta: metav1.ObjectMeta{Name: spcs[0].Name, Namespace: ""},
|
||||
Value: spcs[0].Value,
|
||||
GlobalDefault: spcs[0].GlobalDefault,
|
||||
Description: "system priority class 0",
|
||||
},
|
||||
}
|
||||
|
||||
for k, v := range successCases {
|
||||
|
@ -53,9 +60,15 @@ func TestValidatePriorityClass(t *testing.T) {
|
|||
ObjectMeta: metav1.ObjectMeta{Name: "tier&1", Namespace: ""},
|
||||
Value: 100,
|
||||
},
|
||||
"invalid system name": {
|
||||
ObjectMeta: metav1.ObjectMeta{Name: scheduling.SystemPriorityClassPrefix + "test"},
|
||||
Value: 100,
|
||||
"incorrect system class name": {
|
||||
ObjectMeta: metav1.ObjectMeta{Name: spcs[0].Name, Namespace: ""},
|
||||
Value: 0,
|
||||
GlobalDefault: spcs[0].GlobalDefault,
|
||||
},
|
||||
"incorrect system class value": {
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "system-something", Namespace: ""},
|
||||
Value: spcs[0].Value,
|
||||
GlobalDefault: spcs[0].GlobalDefault,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ go_library(
|
|||
importpath = "k8s.io/kubernetes/pkg/kubelet/types",
|
||||
deps = [
|
||||
"//pkg/apis/core:go_default_library",
|
||||
"//pkg/scheduler/api:go_default_library",
|
||||
"//pkg/apis/scheduling:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
|
||||
|
|
|
@ -22,7 +22,7 @@ import (
|
|||
"k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
kubeapi "k8s.io/kubernetes/pkg/apis/core"
|
||||
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/pkg/apis/scheduling"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -168,7 +168,7 @@ func IsCriticalPodBasedOnPriority(ns string, priority int32) bool {
|
|||
if ns != kubeapi.NamespaceSystem {
|
||||
return false
|
||||
}
|
||||
if priority >= schedulerapi.SystemCriticalPriority {
|
||||
if priority >= scheduling.SystemCriticalPriority {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
|
|
@ -17,6 +17,7 @@ go_test(
|
|||
"//vendor/k8s.io/apimachinery/pkg/fields:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/endpoints/request:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/registry/generic:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/registry/generic/testing:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/storage/etcd/testing:go_default_library",
|
||||
|
@ -30,7 +31,10 @@ go_library(
|
|||
deps = [
|
||||
"//pkg/apis/scheduling:go_default_library",
|
||||
"//pkg/registry/scheduling/priorityclass:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/endpoints/request:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/registry/generic:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/registry/generic/registry:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/registry/rest:go_default_library",
|
||||
|
|
|
@ -17,11 +17,16 @@ limitations under the License.
|
|||
package storage
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
genericapirequest "k8s.io/apiserver/pkg/endpoints/request"
|
||||
"k8s.io/apiserver/pkg/registry/generic"
|
||||
genericregistry "k8s.io/apiserver/pkg/registry/generic/registry"
|
||||
"k8s.io/apiserver/pkg/registry/rest"
|
||||
schedulingapi "k8s.io/kubernetes/pkg/apis/scheduling"
|
||||
"k8s.io/kubernetes/pkg/apis/scheduling"
|
||||
"k8s.io/kubernetes/pkg/registry/scheduling/priorityclass"
|
||||
)
|
||||
|
||||
|
@ -33,9 +38,9 @@ type REST struct {
|
|||
// NewREST returns a RESTStorage object that will work against priority classes.
|
||||
func NewREST(optsGetter generic.RESTOptionsGetter) *REST {
|
||||
store := &genericregistry.Store{
|
||||
NewFunc: func() runtime.Object { return &schedulingapi.PriorityClass{} },
|
||||
NewListFunc: func() runtime.Object { return &schedulingapi.PriorityClassList{} },
|
||||
DefaultQualifiedResource: schedulingapi.Resource("priorityclasses"),
|
||||
NewFunc: func() runtime.Object { return &scheduling.PriorityClass{} },
|
||||
NewListFunc: func() runtime.Object { return &scheduling.PriorityClassList{} },
|
||||
DefaultQualifiedResource: scheduling.Resource("priorityclasses"),
|
||||
|
||||
CreateStrategy: priorityclass.Strategy,
|
||||
UpdateStrategy: priorityclass.Strategy,
|
||||
|
@ -56,3 +61,14 @@ var _ rest.ShortNamesProvider = &REST{}
|
|||
func (r *REST) ShortNames() []string {
|
||||
return []string{"pc"}
|
||||
}
|
||||
|
||||
// Delete ensures that system priority classes are not deleted.
|
||||
func (r *REST) Delete(ctx genericapirequest.Context, name string, options *metav1.DeleteOptions) (runtime.Object, bool, error) {
|
||||
for _, spc := range scheduling.SystemPriorityClasses() {
|
||||
if name == spc.Name {
|
||||
return nil, false, apierrors.NewForbidden(scheduling.Resource("priorityclasses"), spc.Name, errors.New("this is a system priority class and cannot be deleted"))
|
||||
}
|
||||
}
|
||||
|
||||
return r.Store.Delete(ctx, name, options)
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import (
|
|||
"k8s.io/apimachinery/pkg/fields"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
genericapirequest "k8s.io/apiserver/pkg/endpoints/request"
|
||||
"k8s.io/apiserver/pkg/registry/generic"
|
||||
genericregistrytest "k8s.io/apiserver/pkg/registry/generic/testing"
|
||||
etcdtesting "k8s.io/apiserver/pkg/storage/etcd/testing"
|
||||
|
@ -105,6 +106,22 @@ func TestDelete(t *testing.T) {
|
|||
test.TestDelete(validNewPriorityClass())
|
||||
}
|
||||
|
||||
// TestDeleteSystemPriorityClass checks that system priority classes cannot be deleted.
|
||||
func TestDeleteSystemPriorityClass(t *testing.T) {
|
||||
storage, server := newStorage(t)
|
||||
defer server.Terminate(t)
|
||||
defer storage.Store.DestroyFunc()
|
||||
key := "test/system-node-critical"
|
||||
ctx := genericapirequest.NewContext()
|
||||
pc := scheduling.SystemPriorityClasses()[0]
|
||||
if err := storage.Store.Storage.Create(ctx, key, pc, nil, 0); err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if _, _, err := storage.Delete(ctx, pc.Name, nil); err == nil {
|
||||
t.Error("expected to receive an error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGet(t *testing.T) {
|
||||
storage, server := newStorage(t)
|
||||
defer server.Terminate(t)
|
||||
|
|
|
@ -68,9 +68,7 @@ func (priorityClassStrategy) AllowCreateOnUpdate() bool {
|
|||
|
||||
// ValidateUpdate is the default update validation for an end user.
|
||||
func (priorityClassStrategy) ValidateUpdate(ctx genericapirequest.Context, obj, old runtime.Object) field.ErrorList {
|
||||
validationErrorList := validation.ValidatePriorityClass(obj.(*scheduling.PriorityClass))
|
||||
updateErrorList := validation.ValidatePriorityClassUpdate(obj.(*scheduling.PriorityClass), old.(*scheduling.PriorityClass))
|
||||
return append(validationErrorList, updateErrorList...)
|
||||
return validation.ValidatePriorityClassUpdate(obj.(*scheduling.PriorityClass), old.(*scheduling.PriorityClass))
|
||||
}
|
||||
|
||||
// AllowUnconditionalUpdate is the default update policy for PriorityClass objects.
|
||||
|
|
|
@ -13,7 +13,13 @@ go_library(
|
|||
"//pkg/api/legacyscheme:go_default_library",
|
||||
"//pkg/apis/scheduling:go_default_library",
|
||||
"//pkg/apis/scheduling/v1alpha1:go_default_library",
|
||||
"//pkg/client/clientset_generated/internalclientset/typed/scheduling/internalversion:go_default_library",
|
||||
"//pkg/registry/scheduling/priorityclass/storage:go_default_library",
|
||||
"//vendor/github.com/golang/glog:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/util/runtime:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/registry/generic:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/registry/rest:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/server:go_default_library",
|
||||
|
|
|
@ -17,6 +17,15 @@ limitations under the License.
|
|||
package rest
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/apiserver/pkg/registry/generic"
|
||||
"k8s.io/apiserver/pkg/registry/rest"
|
||||
genericapiserver "k8s.io/apiserver/pkg/server"
|
||||
|
@ -24,11 +33,16 @@ import (
|
|||
"k8s.io/kubernetes/pkg/api/legacyscheme"
|
||||
"k8s.io/kubernetes/pkg/apis/scheduling"
|
||||
schedulingapiv1alpha1 "k8s.io/kubernetes/pkg/apis/scheduling/v1alpha1"
|
||||
schedulingclient "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/scheduling/internalversion"
|
||||
priorityclassstore "k8s.io/kubernetes/pkg/registry/scheduling/priorityclass/storage"
|
||||
)
|
||||
|
||||
const PostStartHookName = "scheduling/bootstrap-system-priority-classes"
|
||||
|
||||
type RESTStorageProvider struct{}
|
||||
|
||||
var _ genericapiserver.PostStartHookProvider = RESTStorageProvider{}
|
||||
|
||||
func (p RESTStorageProvider) NewRESTStorage(apiResourceConfigSource serverstorage.APIResourceConfigSource, restOptionsGetter generic.RESTOptionsGetter) (genericapiserver.APIGroupInfo, bool) {
|
||||
apiGroupInfo := genericapiserver.NewDefaultAPIGroupInfo(scheduling.GroupName, legacyscheme.Registry, legacyscheme.Scheme, legacyscheme.ParameterCodec, legacyscheme.Codecs)
|
||||
|
||||
|
@ -49,6 +63,49 @@ func (p RESTStorageProvider) v1alpha1Storage(apiResourceConfigSource serverstora
|
|||
return storage
|
||||
}
|
||||
|
||||
func (p RESTStorageProvider) PostStartHook() (string, genericapiserver.PostStartHookFunc, error) {
|
||||
return PostStartHookName, AddSystemPriorityClasses(), nil
|
||||
}
|
||||
|
||||
func AddSystemPriorityClasses() genericapiserver.PostStartHookFunc {
|
||||
return func(hookContext genericapiserver.PostStartHookContext) error {
|
||||
// Adding system priority classes is important. If they fail to add, many critical system
|
||||
// components may fail and cluster may break.
|
||||
err := wait.Poll(1*time.Second, 30*time.Second, func() (done bool, err error) {
|
||||
schedClientSet, err := schedulingclient.NewForConfig(hookContext.LoopbackClientConfig)
|
||||
if err != nil {
|
||||
utilruntime.HandleError(fmt.Errorf("unable to initialize client: %v", err))
|
||||
return false, nil
|
||||
}
|
||||
|
||||
for _, pc := range scheduling.SystemPriorityClasses() {
|
||||
_, err := schedClientSet.PriorityClasses().Get(pc.Name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
_, err := schedClientSet.PriorityClasses().Create(pc)
|
||||
if err != nil {
|
||||
return false, err
|
||||
} else {
|
||||
glog.Infof("created PriorityClass %s with value %v", pc.Name, pc.Value)
|
||||
}
|
||||
} else {
|
||||
// Unable to get the priority class for reasons other than "not found".
|
||||
glog.Warningf("unable to get PriorityClass %v: %v. Retrying...", pc.Name, err)
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
}
|
||||
glog.Infof("all system priority classes are created successfully or already exist.")
|
||||
return true, nil
|
||||
})
|
||||
// if we're never able to make it through initialization, kill the API server.
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to add default system priority classes: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (p RESTStorageProvider) GroupName() string {
|
||||
return scheduling.GroupName
|
||||
}
|
||||
|
|
|
@ -36,14 +36,6 @@ const (
|
|||
MaxPriority = 10
|
||||
// MaxWeight defines the max weight value.
|
||||
MaxWeight = MaxInt / MaxPriority
|
||||
// HighestUserDefinablePriority is the highest priority for user defined priority classes. Priority values larger than 1 billion are reserved for Kubernetes system use.
|
||||
HighestUserDefinablePriority = int32(1000000000)
|
||||
// SystemCriticalPriority is the beginning of the range of priority values for critical system components.
|
||||
SystemCriticalPriority = 2 * HighestUserDefinablePriority
|
||||
// NOTE: In order to avoid conflict of names with user-defined priority classes, all the names must
|
||||
// start with scheduling.SystemPriorityClassPrefix which is by default "system-".
|
||||
SystemClusterCritical = "system-cluster-critical"
|
||||
SystemNodeCritical = "system-node-critical"
|
||||
)
|
||||
|
||||
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
|
||||
|
@ -298,12 +290,6 @@ type HostPriority struct {
|
|||
// HostPriorityList declares a []HostPriority type.
|
||||
type HostPriorityList []HostPriority
|
||||
|
||||
// SystemPriorityClasses defines special priority classes which are used by system critical pods that should not be preempted by workload pods.
|
||||
var SystemPriorityClasses = map[string]int32{
|
||||
SystemClusterCritical: SystemCriticalPriority,
|
||||
SystemNodeCritical: SystemCriticalPriority + 1000,
|
||||
}
|
||||
|
||||
func (h HostPriorityList) Len() int {
|
||||
return len(h)
|
||||
}
|
||||
|
|
|
@ -16,10 +16,10 @@ go_test(
|
|||
"//pkg/client/informers/informers_generated/internalversion:go_default_library",
|
||||
"//pkg/controller:go_default_library",
|
||||
"//pkg/features:go_default_library",
|
||||
"//pkg/scheduler/api:go_default_library",
|
||||
"//vendor/github.com/golang/glog:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/admission:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/authentication/user:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
|
||||
],
|
||||
)
|
||||
|
@ -37,7 +37,6 @@ go_library(
|
|||
"//pkg/features:go_default_library",
|
||||
"//pkg/kubeapiserver/admission:go_default_library",
|
||||
"//pkg/kubelet/types:go_default_library",
|
||||
"//pkg/scheduler/api:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/admission:go_default_library",
|
||||
|
|
|
@ -32,7 +32,6 @@ import (
|
|||
"k8s.io/kubernetes/pkg/features"
|
||||
kubeapiserveradmission "k8s.io/kubernetes/pkg/kubeapiserver/admission"
|
||||
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -154,7 +153,7 @@ func (p *priorityPlugin) admitPod(a admission.Attributes) error {
|
|||
if len(pod.Spec.PriorityClassName) == 0 &&
|
||||
utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
|
||||
kubelettypes.IsCritical(a.GetNamespace(), pod.Annotations) {
|
||||
pod.Spec.PriorityClassName = schedulerapi.SystemClusterCritical
|
||||
pod.Spec.PriorityClassName = scheduling.SystemClusterCritical
|
||||
}
|
||||
if len(pod.Spec.PriorityClassName) == 0 {
|
||||
var err error
|
||||
|
@ -163,22 +162,17 @@ func (p *priorityPlugin) admitPod(a admission.Attributes) error {
|
|||
return fmt.Errorf("failed to get default priority class: %v", err)
|
||||
}
|
||||
} else {
|
||||
// First try to resolve by system priority classes.
|
||||
priority, ok = schedulerapi.SystemPriorityClasses[pod.Spec.PriorityClassName]
|
||||
if !ok {
|
||||
// Now that we didn't find any system priority, try resolving by user defined priority classes.
|
||||
pc, err := p.lister.Get(pod.Spec.PriorityClassName)
|
||||
|
||||
if err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
return admission.NewForbidden(a, fmt.Errorf("no PriorityClass with name %v was found", pod.Spec.PriorityClassName))
|
||||
}
|
||||
|
||||
return fmt.Errorf("failed to get PriorityClass with name %s: %v", pod.Spec.PriorityClassName, err)
|
||||
// Try resolving the priority class name.
|
||||
pc, err := p.lister.Get(pod.Spec.PriorityClassName)
|
||||
if err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
return admission.NewForbidden(a, fmt.Errorf("no PriorityClass with name %v was found", pod.Spec.PriorityClassName))
|
||||
}
|
||||
|
||||
priority = pc.Value
|
||||
return fmt.Errorf("failed to get PriorityClass with name %s: %v", pod.Spec.PriorityClassName, err)
|
||||
}
|
||||
|
||||
priority = pc.Value
|
||||
}
|
||||
pod.Spec.Priority = &priority
|
||||
}
|
||||
|
@ -192,12 +186,6 @@ func (p *priorityPlugin) validatePriorityClass(a admission.Attributes) error {
|
|||
if !ok {
|
||||
return errors.NewBadRequest("resource was marked with kind PriorityClass but was unable to be converted")
|
||||
}
|
||||
if pc.Value > schedulerapi.HighestUserDefinablePriority {
|
||||
return admission.NewForbidden(a, fmt.Errorf("maximum allowed value of a user defined priority is %v", schedulerapi.HighestUserDefinablePriority))
|
||||
}
|
||||
if _, ok := schedulerapi.SystemPriorityClasses[pc.Name]; ok {
|
||||
return admission.NewForbidden(a, fmt.Errorf("the name of the priority class is a reserved name for system use only: %v", pc.Name))
|
||||
}
|
||||
// If the new PriorityClass tries to be the default priority, make sure that no other priority class is marked as default.
|
||||
if pc.GlobalDefault {
|
||||
dpc, err := p.getDefaultPriorityClass()
|
||||
|
|
|
@ -24,13 +24,13 @@ import (
|
|||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apiserver/pkg/admission"
|
||||
"k8s.io/apiserver/pkg/authentication/user"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
api "k8s.io/kubernetes/pkg/apis/core"
|
||||
"k8s.io/kubernetes/pkg/apis/scheduling"
|
||||
informers "k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion"
|
||||
"k8s.io/kubernetes/pkg/controller"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
|
||||
)
|
||||
|
||||
func addPriorityClasses(ctrl *priorityPlugin, priorityClasses []*scheduling.PriorityClass) {
|
||||
|
@ -75,58 +75,58 @@ var nondefaultClass1 = &scheduling.PriorityClass{
|
|||
Description: "Just a test priority class",
|
||||
}
|
||||
|
||||
func TestPriorityClassAdmission(t *testing.T) {
|
||||
var tooHighPriorityClass = &scheduling.PriorityClass{
|
||||
TypeMeta: metav1.TypeMeta{
|
||||
Kind: "PriorityClass",
|
||||
},
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "toohighclass",
|
||||
},
|
||||
Value: schedulerapi.HighestUserDefinablePriority + 1,
|
||||
Description: "Just a test priority class",
|
||||
}
|
||||
var systemClusterCritical = &scheduling.PriorityClass{
|
||||
TypeMeta: metav1.TypeMeta{
|
||||
Kind: "PriorityClass",
|
||||
},
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: scheduling.SystemClusterCritical,
|
||||
},
|
||||
Value: scheduling.SystemCriticalPriority,
|
||||
GlobalDefault: true,
|
||||
}
|
||||
|
||||
func TestPriorityClassAdmission(t *testing.T) {
|
||||
var systemClass = &scheduling.PriorityClass{
|
||||
TypeMeta: metav1.TypeMeta{
|
||||
Kind: "PriorityClass",
|
||||
},
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: schedulerapi.SystemClusterCritical,
|
||||
Name: scheduling.SystemPriorityClassPrefix + "test",
|
||||
},
|
||||
Value: schedulerapi.HighestUserDefinablePriority + 1,
|
||||
Description: "Name conflicts with system priority class names",
|
||||
Value: scheduling.HighestUserDefinablePriority + 1,
|
||||
Description: "Name has system critical prefix",
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
existingClasses []*scheduling.PriorityClass
|
||||
newClass *scheduling.PriorityClass
|
||||
userInfo user.Info
|
||||
expectError bool
|
||||
}{
|
||||
{
|
||||
"one default class",
|
||||
[]*scheduling.PriorityClass{},
|
||||
defaultClass1,
|
||||
nil,
|
||||
false,
|
||||
},
|
||||
{
|
||||
"more than one default classes",
|
||||
[]*scheduling.PriorityClass{defaultClass1},
|
||||
defaultClass2,
|
||||
nil,
|
||||
true,
|
||||
},
|
||||
{
|
||||
"too high PriorityClass value",
|
||||
[]*scheduling.PriorityClass{},
|
||||
tooHighPriorityClass,
|
||||
true,
|
||||
},
|
||||
{
|
||||
"system name conflict",
|
||||
"system name and value are allowed by admission controller",
|
||||
[]*scheduling.PriorityClass{},
|
||||
systemClass,
|
||||
true,
|
||||
&user.DefaultInfo{
|
||||
Name: user.APIServerUser,
|
||||
},
|
||||
false,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -146,7 +146,7 @@ func TestPriorityClassAdmission(t *testing.T) {
|
|||
scheduling.Resource("priorityclasses").WithVersion("version"),
|
||||
"",
|
||||
admission.Create,
|
||||
nil,
|
||||
test.userInfo,
|
||||
)
|
||||
err := ctrl.Validate(attrs)
|
||||
glog.Infof("Got %v", err)
|
||||
|
@ -322,7 +322,7 @@ func TestPodAdmission(t *testing.T) {
|
|||
Name: containerName,
|
||||
},
|
||||
},
|
||||
PriorityClassName: schedulerapi.SystemClusterCritical,
|
||||
PriorityClassName: scheduling.SystemClusterCritical,
|
||||
},
|
||||
},
|
||||
// pod[5]: mirror Pod with a system priority class name
|
||||
|
@ -419,9 +419,9 @@ func TestPodAdmission(t *testing.T) {
|
|||
},
|
||||
{
|
||||
"pod with a system priority class",
|
||||
[]*scheduling.PriorityClass{},
|
||||
[]*scheduling.PriorityClass{systemClusterCritical},
|
||||
*pods[4],
|
||||
schedulerapi.SystemCriticalPriority,
|
||||
scheduling.SystemCriticalPriority,
|
||||
false,
|
||||
},
|
||||
{
|
||||
|
@ -440,9 +440,9 @@ func TestPodAdmission(t *testing.T) {
|
|||
},
|
||||
{
|
||||
"mirror pod with system priority class",
|
||||
[]*scheduling.PriorityClass{},
|
||||
[]*scheduling.PriorityClass{systemClusterCritical},
|
||||
*pods[5],
|
||||
schedulerapi.SystemCriticalPriority,
|
||||
scheduling.SystemCriticalPriority,
|
||||
false,
|
||||
},
|
||||
{
|
||||
|
@ -454,9 +454,9 @@ func TestPodAdmission(t *testing.T) {
|
|||
},
|
||||
{
|
||||
"pod with critical pod annotation",
|
||||
[]*scheduling.PriorityClass{},
|
||||
[]*scheduling.PriorityClass{systemClusterCritical},
|
||||
*pods[7],
|
||||
schedulerapi.SystemCriticalPriority,
|
||||
scheduling.SystemCriticalPriority,
|
||||
false,
|
||||
},
|
||||
}
|
||||
|
|
|
@ -22,10 +22,10 @@ go_library(
|
|||
"//pkg/api/v1/pod:go_default_library",
|
||||
"//pkg/apis/core:go_default_library",
|
||||
"//pkg/apis/extensions:go_default_library",
|
||||
"//pkg/apis/scheduling:go_default_library",
|
||||
"//pkg/kubelet/apis:go_default_library",
|
||||
"//pkg/quota/evaluator/core:go_default_library",
|
||||
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
|
||||
"//pkg/scheduler/api:go_default_library",
|
||||
"//pkg/util/version:go_default_library",
|
||||
"//test/e2e/common:go_default_library",
|
||||
"//test/e2e/framework:go_default_library",
|
||||
|
|
|
@ -26,7 +26,7 @@ import (
|
|||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
|
||||
"k8s.io/kubernetes/pkg/apis/scheduling"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
|
@ -168,7 +168,7 @@ var _ = SIGDescribe("SchedulerPreemption [Serial] [Feature:PodPreemption]", func
|
|||
// Create a critical pod and make sure it is scheduled.
|
||||
runPausePod(f, pausePodConfig{
|
||||
Name: "critical-pod",
|
||||
PriorityClassName: schedulerapi.SystemClusterCritical,
|
||||
PriorityClassName: scheduling.SystemClusterCritical,
|
||||
Resources: &v1.ResourceRequirements{
|
||||
Requests: podRes,
|
||||
},
|
||||
|
@ -311,3 +311,34 @@ var _ = SIGDescribe("SchedulerPreemption [Serial] [Feature:PodPreemption]", func
|
|||
}
|
||||
})
|
||||
})
|
||||
|
||||
var _ = SIGDescribe("PodPriorityResolution [Serial] [Feature:PodPreemption]", func() {
|
||||
var cs clientset.Interface
|
||||
var ns string
|
||||
f := framework.NewDefaultFramework("sched-pod-priority")
|
||||
|
||||
BeforeEach(func() {
|
||||
cs = f.ClientSet
|
||||
ns = f.Namespace.Name
|
||||
|
||||
err := framework.CheckTestingNSDeletedExcept(cs, ns)
|
||||
framework.ExpectNoError(err)
|
||||
})
|
||||
|
||||
// This test verifies that system critical priorities are created automatically and resolved properly.
|
||||
It("validates critical system priorities are created and resolved", func() {
|
||||
// Create pods that use system critical priorities and
|
||||
By("Create pods that use critical system priorities.")
|
||||
systemPriorityClasses := []string{
|
||||
scheduling.SystemNodeCritical, scheduling.SystemClusterCritical,
|
||||
}
|
||||
for i, spc := range systemPriorityClasses {
|
||||
pod := createPausePod(f, pausePodConfig{
|
||||
Name: fmt.Sprintf("pod%d-%v", i, spc),
|
||||
PriorityClassName: spc,
|
||||
})
|
||||
Expect(pod.Spec.Priority).NotTo(BeNil())
|
||||
framework.Logf("Created pod: %v", pod.Name)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
|
Loading…
Reference in New Issue