Merge pull request #65593 from bsalamat/priority_admission

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Limit usage of system critical priority classes to the system namespace

**What this PR does / why we need it**:
Changes Priority admission controller to limit usage of system critical priority classes to the system namespace. This change is needed to mitigate the risk of creating many pods at system critical priority levels that could cause preemption of system critical components.

**Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*:
Fixes #

ref/ #65557

**Special notes for your reviewer**:

**Release note**:

```release-note
Limit the usage of system-node-critical and system-cluster-critical priority classes to kube-system namespace.
```

/sig scheduling
pull/8/head
Kubernetes Submit Queue 2018-07-02 01:09:06 -07:00 committed by GitHub
commit 7496c64b46
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 67 additions and 10 deletions

View File

@ -38,6 +38,7 @@ go_library(
"//pkg/kubeapiserver/admission:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/admission:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",

View File

@ -21,6 +21,7 @@ import (
"io"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apiserver/pkg/admission"
utilfeature "k8s.io/apiserver/pkg/util/feature"
@ -134,6 +135,20 @@ func (p *priorityPlugin) Validate(a admission.Attributes) error {
}
}
// priorityClassPermittedInNamespace returns true if we allow the given priority class name in the
// given namespace. It currently checks that system priorities are created only in the system namespace.
func priorityClassPermittedInNamespace(priorityClassName string, namespace string) bool {
// Only allow system priorities in the system namespace. This is to prevent abuse or incorrect
// usage of these priorities. Pods created at these priorities could preempt system critical
// components.
for _, spc := range scheduling.SystemPriorityClasses() {
if spc.Name == priorityClassName && namespace != metav1.NamespaceSystem {
return false
}
}
return true
}
// admitPod makes sure a new pod does not set spec.Priority field. It also makes sure that the PriorityClassName exists if it is provided and resolves the pod priority from the PriorityClassName.
func (p *priorityPlugin) admitPod(a admission.Attributes) error {
operation := a.GetOperation()
@ -162,6 +177,11 @@ func (p *priorityPlugin) admitPod(a admission.Attributes) error {
return fmt.Errorf("failed to get default priority class: %v", err)
}
} else {
pcName := pod.Spec.PriorityClassName
if !priorityClassPermittedInNamespace(pcName, a.GetNamespace()) {
return admission.NewForbidden(a, fmt.Errorf("pods with %v priorityClass is not permitted in %v namespace", pcName, a.GetNamespace()))
}
// Try resolving the priority class name.
pc, err := p.lister.Get(pod.Spec.PriorityClassName)
if err != nil {

View File

@ -314,7 +314,7 @@ func TestPodAdmission(t *testing.T) {
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-w-system-priority",
Namespace: "namespace",
Namespace: metav1.NamespaceSystem,
},
Spec: api.PodSpec{
Containers: []api.Container{
@ -329,7 +329,7 @@ func TestPodAdmission(t *testing.T) {
{
ObjectMeta: metav1.ObjectMeta{
Name: "mirror-pod-w-system-priority",
Namespace: "namespace",
Namespace: metav1.NamespaceSystem,
Annotations: map[string]string{api.MirrorPodAnnotationKey: ""},
},
Spec: api.PodSpec{
@ -374,6 +374,21 @@ func TestPodAdmission(t *testing.T) {
},
},
},
// pod[8]: Pod with a system priority class name in non-system namespace
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-w-system-priority-in-nonsystem-namespace",
Namespace: "non-system-namespace",
},
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: containerName,
},
},
PriorityClassName: scheduling.SystemClusterCritical,
},
},
}
// Enable PodPriority feature gate.
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority))
@ -459,6 +474,13 @@ func TestPodAdmission(t *testing.T) {
scheduling.SystemCriticalPriority,
false,
},
{
"pod with system critical priority in non-system namespace",
[]*scheduling.PriorityClass{systemClusterCritical},
*pods[8],
scheduling.SystemCriticalPriority,
true,
},
}
for _, test := range tests {
@ -485,8 +507,7 @@ func TestPodAdmission(t *testing.T) {
if !test.expectError {
if err != nil {
t.Errorf("Test %q: unexpected error received: %v", test.name, err)
}
if *test.pod.Spec.Priority != test.expectedPriority {
} else if *test.pod.Spec.Priority != test.expectedPriority {
t.Errorf("Test %q: expected priority is %d, but got %d.", test.name, test.expectedPriority, *test.pod.Spec.Priority)
}
}

View File

@ -47,6 +47,7 @@ var masterNodes sets.String
type pausePodConfig struct {
Name string
Namespace string
Affinity *v1.Affinity
Annotations, Labels, NodeSelector map[string]string
Resources *v1.ResourceRequirements
@ -602,9 +603,11 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
})
func initPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod {
var gracePeriod = int64(1)
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: conf.Name,
Namespace: conf.Namespace,
Labels: conf.Labels,
Annotations: conf.Annotations,
OwnerReferences: conf.OwnerReferences,
@ -619,9 +622,10 @@ func initPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod {
Ports: conf.Ports,
},
},
Tolerations: conf.Tolerations,
NodeName: conf.NodeName,
PriorityClassName: conf.PriorityClassName,
Tolerations: conf.Tolerations,
NodeName: conf.NodeName,
PriorityClassName: conf.PriorityClassName,
TerminationGracePeriodSeconds: &gracePeriod,
},
}
if conf.Resources != nil {
@ -631,7 +635,11 @@ func initPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod {
}
func createPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod {
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(initPausePod(f, conf))
namespace := conf.Namespace
if len(namespace) == 0 {
namespace = f.Namespace.Name
}
pod, err := f.ClientSet.CoreV1().Pods(namespace).Create(initPausePod(f, conf))
framework.ExpectNoError(err)
return pod
}
@ -639,7 +647,7 @@ func createPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod {
func runPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod {
pod := createPausePod(f, conf)
framework.ExpectNoError(framework.WaitForPodRunningInNamespace(f.ClientSet, pod))
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(conf.Name, metav1.GetOptions{})
pod, err := f.ClientSet.CoreV1().Pods(pod.Namespace).Get(conf.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
return pod
}

View File

@ -167,6 +167,7 @@ var _ = SIGDescribe("SchedulerPreemption [Serial] [Feature:PodPreemption]", func
// Create a critical pod and make sure it is scheduled.
runPausePod(f, pausePodConfig{
Name: "critical-pod",
Namespace: metav1.NamespaceSystem,
PriorityClassName: scheduling.SystemClusterCritical,
Resources: &v1.ResourceRequirements{
Requests: podRes,
@ -183,6 +184,9 @@ var _ = SIGDescribe("SchedulerPreemption [Serial] [Feature:PodPreemption]", func
framework.ExpectNoError(err)
Expect(livePod.DeletionTimestamp).To(BeNil())
}
// Clean-up the critical pod
err = f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Delete("critical-pod", metav1.NewDeleteOptions(0))
framework.ExpectNoError(err)
})
// This test verifies that when a high priority pod is pending and its
@ -334,10 +338,14 @@ var _ = SIGDescribe("PodPriorityResolution [Serial] [Feature:PodPreemption]", fu
for i, spc := range systemPriorityClasses {
pod := createPausePod(f, pausePodConfig{
Name: fmt.Sprintf("pod%d-%v", i, spc),
Namespace: metav1.NamespaceSystem,
PriorityClassName: spc,
})
Expect(pod.Spec.Priority).NotTo(BeNil())
framework.Logf("Created pod: %v", pod.Name)
// Clean-up the pod.
err := f.ClientSet.CoreV1().Pods(pod.Namespace).Delete(pod.Name, metav1.NewDeleteOptions(0))
framework.ExpectNoError(err)
}
})
})

View File

@ -60,7 +60,6 @@ spec:
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
priorityClassName: system-cluster-critical
tolerations:
- key: "CriticalAddonsOnly"
operator: "Exists"