Add priority admission controller

pull/6/head
Bobby (Babak) Salamat 2017-07-19 22:49:39 -07:00
parent 83ca86cddc
commit de8689c99b
7 changed files with 548 additions and 3 deletions

View File

@ -120,7 +120,7 @@ export FLANNEL_NET=${FLANNEL_NET:-"172.16.0.0/16"}
# Admission Controllers to invoke prior to persisting objects in cluster
# If we included ResourceQuota, we should keep it at the end of the list to prevent incrementing quota usage prematurely.
export ADMISSION_CONTROL=${ADMISSION_CONTROL:-"Initializers,NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultTolerationSeconds,ResourceQuota"}
export ADMISSION_CONTROL=${ADMISSION_CONTROL:-"Initializers,NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultTolerationSeconds,Priority,ResourceQuota"}
# Extra options to set on the Docker command line.
# This is useful for setting --insecure-registry for local registries.

View File

@ -234,7 +234,7 @@ fi
# Admission Controllers to invoke prior to persisting objects in cluster
# If we included ResourceQuota, we should keep it at the end of the list to prevent incrementing quota usage prematurely.
ADMISSION_CONTROL=Initializers,NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeLabel,DefaultStorageClass,DefaultTolerationSeconds,NodeRestriction,ResourceQuota
ADMISSION_CONTROL=Initializers,NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeLabel,DefaultStorageClass,DefaultTolerationSeconds,NodeRestriction,Priority,ResourceQuota
# Optional: if set to true kube-up will automatically check for existing resources and clean them up.
KUBE_UP_AUTOMATIC_CLEANUP=${KUBE_UP_AUTOMATIC_CLEANUP:-false}

View File

@ -267,7 +267,7 @@ if [ ${ENABLE_IP_ALIASES} = true ]; then
fi
# If we included ResourceQuota, we should keep it at the end of the list to prevent incrementing quota usage prematurely.
ADMISSION_CONTROL="${KUBE_ADMISSION_CONTROL:-Initializers,NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeLabel,PodPreset,DefaultStorageClass,DefaultTolerationSeconds,NodeRestriction,ResourceQuota}"
ADMISSION_CONTROL="${KUBE_ADMISSION_CONTROL:-Initializers,NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeLabel,PodPreset,DefaultStorageClass,DefaultTolerationSeconds,NodeRestriction,Priority,ResourceQuota}"
# Optional: if set to true kube-up will automatically check for existing resources and clean them up.
KUBE_UP_AUTOMATIC_CLEANUP=${KUBE_UP_AUTOMATIC_CLEANUP:-false}

View File

@ -43,6 +43,7 @@ import (
"k8s.io/kubernetes/plugin/pkg/admission/podnodeselector"
"k8s.io/kubernetes/plugin/pkg/admission/podpreset"
"k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction"
podpriority "k8s.io/kubernetes/plugin/pkg/admission/priority"
"k8s.io/kubernetes/plugin/pkg/admission/resourcequota"
"k8s.io/kubernetes/plugin/pkg/admission/security/podsecuritypolicy"
"k8s.io/kubernetes/plugin/pkg/admission/securitycontext/scdeny"
@ -73,6 +74,7 @@ func RegisterAllAdmissionPlugins(plugins *admission.Plugins) {
podtolerationrestriction.Register(plugins)
resourcequota.Register(plugins)
podsecuritypolicy.Register(plugins)
podpriority.Register(plugins)
scdeny.Register(plugins)
serviceaccount.Register(plugins)
setdefault.Register(plugins)

View File

@ -18,6 +18,13 @@ package scheduling
import metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
const (
// DefaultPriorityWhenNoDefaultClassExists is used to set priority of pods
// that do not specify any priority class and there is no priority class
// marked as default.
DefaultPriorityWhenNoDefaultClassExists = 0
)
// +genclient
// +genclient:nonNamespaced
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object

View File

@ -0,0 +1,201 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package admission
import (
"fmt"
"io"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apiserver/pkg/admission"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/apis/scheduling"
"k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
informers "k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion"
schedulinglisters "k8s.io/kubernetes/pkg/client/listers/scheduling/internalversion"
kubeapiserveradmission "k8s.io/kubernetes/pkg/kubeapiserver/admission"
)
const (
pluginName = "Priority"
// HighestUserDefinablePriority is the highest priority for user defined priority classes. Priority values larger than 1 billion are reserved for Kubernetes system use.
HighestUserDefinablePriority = 1000000000
// SystemCriticalPriority is the beginning of the range of priority values for critical system components.
SystemCriticalPriority = 2 * HighestUserDefinablePriority
)
// SystemPriorityClasses defines special priority classes which are used by system critical pods that should not be preempted by workload pods.
var SystemPriorityClasses = map[string]int32{
"system-cluster-critical": SystemCriticalPriority,
"system-node-critical": SystemCriticalPriority + 1000,
}
// Register registers a plugin
func Register(plugins *admission.Plugins) {
plugins.Register(pluginName, func(config io.Reader) (admission.Interface, error) {
return NewPlugin(), nil
})
}
// priorityPlugin is an implementation of admission.Interface.
type priorityPlugin struct {
*admission.Handler
client internalclientset.Interface
lister schedulinglisters.PriorityClassLister
}
var _ = kubeapiserveradmission.WantsInternalKubeInformerFactory(&priorityPlugin{})
var _ = kubeapiserveradmission.WantsInternalKubeClientSet(&priorityPlugin{})
// NewPlugin creates a new priority admission plugin.
func NewPlugin() admission.Interface {
return &priorityPlugin{
Handler: admission.NewHandler(admission.Create, admission.Update),
}
}
func (p *priorityPlugin) Validate() error {
if p.client == nil {
return fmt.Errorf("%s requires a client", pluginName)
}
if p.lister == nil {
return fmt.Errorf("%s requires a lister", pluginName)
}
return nil
}
func (p *priorityPlugin) SetInternalKubeClientSet(client internalclientset.Interface) {
p.client = client
}
func (p *priorityPlugin) SetInternalKubeInformerFactory(f informers.SharedInformerFactory) {
priorityInformer := f.Scheduling().InternalVersion().PriorityClasses()
p.lister = priorityInformer.Lister()
p.SetReadyFunc(priorityInformer.Informer().HasSynced)
}
var (
podResource = api.Resource("pods")
priorityClassResource = api.Resource("priorityclasses")
)
// Admit checks Pods and PriorityClasses and admits or rejects them. It also resolved the priority of pods based on their PriorityClass.
func (p *priorityPlugin) Admit(a admission.Attributes) error {
operation := a.GetOperation()
// Ignore all calls to subresources or resources other than pods.
// Ignore all operations other than Create and Update.
if len(a.GetSubresource()) != 0 || (operation != admission.Create && operation != admission.Update) {
return nil
}
switch a.GetResource().GroupResource() {
case podResource:
return p.admitPod(a)
case priorityClassResource:
return p.admitPriorityClass(a)
default:
return nil
}
}
func (p *priorityPlugin) admitPod(a admission.Attributes) error {
operation := a.GetOperation()
pod, ok := a.GetObject().(*api.Pod)
if !ok {
return errors.NewBadRequest("Resource was marked with kind Pod but was unable to be converted")
}
if _, isMirrorPod := pod.Annotations[api.MirrorPodAnnotationKey]; isMirrorPod {
return nil
}
// Make sure that the user has not set `priority` at the time of pod creation.
if operation == admission.Create && pod.Spec.Priority != nil {
return admission.NewForbidden(a, fmt.Errorf("The integer value of priority must not be provided in pod spec. The system populates the value from the given PriorityClass name"))
}
var priority int32
if len(pod.Spec.PriorityClassName) == 0 {
dpc, err := p.findDefaultPriorityClass()
if err != nil {
return fmt.Errorf("Failed to get default priority class: %v", err)
}
if dpc != nil {
priority = dpc.Value
} else {
priority = scheduling.DefaultPriorityWhenNoDefaultClassExists
}
} else {
// First try to resolve by system priority classes.
priority, ok = SystemPriorityClasses[pod.Spec.PriorityClassName]
if !ok {
// Now that we didn't find any system priority, try resolving by user defined priority classes.
pc, err := p.lister.Get(pod.Spec.PriorityClassName)
if err != nil {
return fmt.Errorf("Failed to get default priority class %s: %v", pod.Spec.PriorityClassName, err)
}
if pc == nil {
return admission.NewForbidden(a, fmt.Errorf("No PriorityClass with name %v was found", pod.Spec.PriorityClassName))
}
priority = pc.Value
}
}
pod.Spec.Priority = &priority
return nil
}
func (p *priorityPlugin) admitPriorityClass(a admission.Attributes) error {
operation := a.GetOperation()
pc, ok := a.GetObject().(*scheduling.PriorityClass)
if !ok {
return errors.NewBadRequest("Resource was marked with kind Pod but was unable to be converted")
}
if pc.Value > HighestUserDefinablePriority {
return admission.NewForbidden(a, fmt.Errorf("Maximum allowed value of a user defined priority is %v", HighestUserDefinablePriority))
}
if _, ok := SystemPriorityClasses[pc.Name]; ok {
return admission.NewForbidden(a, fmt.Errorf("The name of the priority class is a reserved name for system use only: %v", pc.Name))
}
// If the new PriorityClass tries to be the default priority, make sure that no other priority class is marked as default.
if pc.GlobalDefault {
dpc, err := p.findDefaultPriorityClass()
if err != nil {
return fmt.Errorf("Failed to get default priority class: %v", err)
}
if dpc != nil {
// Throw an error if a second default priority class is being created, or an existing priority class is being marked as default, while another default already exists.
if operation == admission.Create || (operation == admission.Update && dpc.GetName() != pc.GetName()) {
return admission.NewForbidden(a, fmt.Errorf("PriorityClass %v is already marked as default. Only one default can exist", dpc.GetName()))
}
}
}
return nil
}
func (p *priorityPlugin) findDefaultPriorityClass() (*scheduling.PriorityClass, error) {
list, err := p.lister.List(labels.Everything())
if err != nil {
return nil, err
}
for _, pci := range list {
if pci.GlobalDefault {
return pci, nil
}
}
return nil, nil
}

View File

@ -0,0 +1,335 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package admission
import (
"testing"
"github.com/golang/glog"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apiserver/pkg/admission"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/apis/scheduling"
informers "k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion"
"k8s.io/kubernetes/pkg/controller"
)
func addPriorityClasses(ctrl *priorityPlugin, priorityClasses []*scheduling.PriorityClass) {
informerFactory := informers.NewSharedInformerFactory(nil, controller.NoResyncPeriodFunc())
ctrl.SetInternalKubeInformerFactory(informerFactory)
// First add the existing classes to the cache.
for _, c := range priorityClasses {
informerFactory.Scheduling().InternalVersion().PriorityClasses().Informer().GetStore().Add(c)
}
}
var defaultClass1 = &scheduling.PriorityClass{
TypeMeta: metav1.TypeMeta{
Kind: "PriorityClass",
},
ObjectMeta: metav1.ObjectMeta{
Name: "default1",
},
Value: 1000,
GlobalDefault: true,
}
var defaultClass2 = &scheduling.PriorityClass{
TypeMeta: metav1.TypeMeta{
Kind: "PriorityClass",
},
ObjectMeta: metav1.ObjectMeta{
Name: "default2",
},
Value: 2000,
GlobalDefault: true,
}
var nondefaultClass1 = &scheduling.PriorityClass{
TypeMeta: metav1.TypeMeta{
Kind: "PriorityClass",
},
ObjectMeta: metav1.ObjectMeta{
Name: "nondefault1",
},
Value: 2000,
Description: "Just a test priority class",
}
func TestPriorityClassAdmission(t *testing.T) {
var tooHighPriorityClass = &scheduling.PriorityClass{
TypeMeta: metav1.TypeMeta{
Kind: "PriorityClass",
},
ObjectMeta: metav1.ObjectMeta{
Name: "toohighclass",
},
Value: HighestUserDefinablePriority + 1,
Description: "Just a test priority class",
}
var systemClass = &scheduling.PriorityClass{
TypeMeta: metav1.TypeMeta{
Kind: "PriorityClass",
},
ObjectMeta: metav1.ObjectMeta{
Name: "system-cluster-critical",
},
Value: HighestUserDefinablePriority + 1,
Description: "Name conflicts with system priority class names",
}
tests := []struct {
name string
existingClasses []*scheduling.PriorityClass
newClass *scheduling.PriorityClass
expectError bool
}{
{
"one default class",
[]*scheduling.PriorityClass{},
defaultClass1,
false,
},
{
"more than one default classes",
[]*scheduling.PriorityClass{defaultClass1},
defaultClass2,
true,
},
{
"too high PriorityClass value",
[]*scheduling.PriorityClass{},
tooHighPriorityClass,
true,
},
{
"system name conflict",
[]*scheduling.PriorityClass{},
systemClass,
true,
},
}
for _, test := range tests {
glog.V(4).Infof("starting test %q", test.name)
ctrl := NewPlugin().(*priorityPlugin)
// Add existing priority classes.
addPriorityClasses(ctrl, test.existingClasses)
// Now add the new class.
attrs := admission.NewAttributesRecord(
test.newClass,
nil,
api.Kind("PriorityClass").WithVersion("version"),
"",
"",
api.Resource("priorityclasses").WithVersion("version"),
"",
admission.Create,
nil,
)
err := ctrl.Admit(attrs)
glog.Infof("Got %v", err)
if err != nil && !test.expectError {
t.Errorf("Test %q: unexpected error received: %v", test.name, err)
}
if err == nil && test.expectError {
t.Errorf("Test %q: expected error and no error recevied", test.name)
}
}
}
var intPriority = int32(1000)
func TestPodAdmission(t *testing.T) {
containerName := "container"
pods := []*api.Pod{
// pod[0]: Pod with a proper priority class.
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-w-priorityclass",
Namespace: "namespace",
},
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: containerName,
},
},
PriorityClassName: "default1",
},
},
// pod[1]: Pod with no priority class
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-wo-priorityclass",
Namespace: "namespace",
},
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: containerName,
},
},
},
},
// pod[2]: Pod with non-existing priority class
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-w-non-existing-priorityclass",
Namespace: "namespace",
},
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: containerName,
},
},
PriorityClassName: "non-existing",
},
},
// pod[3]: Pod with integer value of priority
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-w-integer-priority",
Namespace: "namespace",
},
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: containerName,
},
},
PriorityClassName: "default1",
Priority: &intPriority,
},
},
// pod[4]: Pod with a system priority class name
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-w-system-priority",
Namespace: "namespace",
},
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: containerName,
},
},
PriorityClassName: "system-cluster-critical",
},
},
}
tests := []struct {
name string
existingClasses []*scheduling.PriorityClass
// Admission controller changes pod spec. So, we take an api.Pod instead of
// *api.Pod to avoid interfering with other tests.
pod api.Pod
expectedPriority int32
expectError bool
}{
{
"Pod with priority class",
[]*scheduling.PriorityClass{defaultClass1, nondefaultClass1},
*pods[0],
1000,
false,
},
{
"Pod without priority class",
[]*scheduling.PriorityClass{defaultClass1},
*pods[1],
1000,
false,
},
{
"pod without priority class and no existing priority class",
[]*scheduling.PriorityClass{},
*pods[1],
scheduling.DefaultPriorityWhenNoDefaultClassExists,
false,
},
{
"pod without priority class and no default class",
[]*scheduling.PriorityClass{nondefaultClass1},
*pods[1],
scheduling.DefaultPriorityWhenNoDefaultClassExists,
false,
},
{
"pod with a system priority class",
[]*scheduling.PriorityClass{},
*pods[4],
SystemCriticalPriority,
false,
},
{
"Pod with non-existing priority class",
[]*scheduling.PriorityClass{defaultClass1, nondefaultClass1},
*pods[2],
0,
true,
},
{
"pod with integer priority",
[]*scheduling.PriorityClass{},
*pods[3],
0,
true,
},
}
for _, test := range tests {
glog.V(4).Infof("starting test %q", test.name)
ctrl := NewPlugin().(*priorityPlugin)
// Add existing priority classes.
addPriorityClasses(ctrl, test.existingClasses)
// Create pod.
attrs := admission.NewAttributesRecord(
&test.pod,
nil,
api.Kind("Pod").WithVersion("version"),
test.pod.ObjectMeta.Namespace,
"",
api.Resource("pods").WithVersion("version"),
"",
admission.Create,
nil,
)
err := ctrl.Admit(attrs)
glog.Infof("Got %v", err)
if !test.expectError {
if err != nil {
t.Errorf("Test %q: unexpected error received: %v", test.name, err)
}
if *test.pod.Spec.Priority != test.expectedPriority {
t.Errorf("Test %q: expected priority is %d, but got %d.", test.name, test.expectedPriority, *test.pod.Spec.Priority)
}
}
if err == nil && test.expectError {
t.Errorf("Test %q: expected error and no error recevied", test.name)
}
}
}