mirror of https://github.com/k3s-io/k3s
Merge pull request #35342 from timstclair/rejected
Automatic merge from submit-queue [AppArmor] Hold bad AppArmor pods in pending rather than rejecting Fixes https://github.com/kubernetes/kubernetes/issues/32837 Overview of the fix: If the Kubelet needs to reject a Pod for a reason that the control plane doesn't understand (e.g. which AppArmor profiles are installed on the node), then it might contiinuously try to run the pod on the same rejecting node. This change adds a concept of "soft rejection", in which the Pod is admitted, but not allowed to run (and therefore held in a pending state). This prevents the pod from being retried on other nodes, but also prevents the high churn. This is consistent with how other missing local resources (e.g. volumes) is handled. A side effect of the change is that Pods which are not initially runnable will be retried. This is desired behavior since it avoids a race condition when a new node is brought up but the AppArmor profiles have not yet been loaded on it. ``` release-note Pods with invalid AppArmor configurations will be held in a Pending state, rather than rejected (failed). Check the pod status message to find out why it is not running. ``` @kubernetes/sig-node @timothysc @rrati @davidopppull/6/head
commit
649c0ddd0e
|
@ -1426,6 +1426,10 @@ func (dm *DockerManager) KillPod(pod *api.Pod, runningPod kubecontainer.Pod, gra
|
|||
|
||||
// NOTE(random-liu): The pod passed in could be *nil* when kubelet restarted.
|
||||
func (dm *DockerManager) killPodWithSyncResult(pod *api.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) (result kubecontainer.PodSyncResult) {
|
||||
// Short circuit if there's nothing to kill.
|
||||
if len(runningPod.Containers) == 0 {
|
||||
return
|
||||
}
|
||||
// Send the kills in parallel since they may take a long time.
|
||||
// There may be len(runningPod.Containers) or len(runningPod.Containers)-1 of result in the channel
|
||||
containerResults := make(chan *kubecontainer.SyncResult, len(runningPod.Containers))
|
||||
|
|
|
@ -757,7 +757,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
|
|||
return nil, fmt.Errorf("failed to initialize eviction manager: %v", err)
|
||||
}
|
||||
klet.evictionManager = evictionManager
|
||||
klet.AddPodAdmitHandler(evictionAdmitHandler)
|
||||
klet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)
|
||||
|
||||
// add sysctl admission
|
||||
runtimeSupport, err := sysctl.NewRuntimeAdmitHandler(klet.containerRuntime)
|
||||
|
@ -775,9 +775,9 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
klet.AddPodAdmitHandler(runtimeSupport)
|
||||
klet.AddPodAdmitHandler(safeWhitelist)
|
||||
klet.AddPodAdmitHandler(unsafeWhitelist)
|
||||
klet.admitHandlers.AddPodAdmitHandler(runtimeSupport)
|
||||
klet.admitHandlers.AddPodAdmitHandler(safeWhitelist)
|
||||
klet.admitHandlers.AddPodAdmitHandler(unsafeWhitelist)
|
||||
|
||||
// enable active deadline handler
|
||||
activeDeadlineHandler, err := newActiveDeadlineHandler(klet.statusManager, kubeDeps.Recorder, klet.clock)
|
||||
|
@ -787,14 +787,15 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
|
|||
klet.AddPodSyncLoopHandler(activeDeadlineHandler)
|
||||
klet.AddPodSyncHandler(activeDeadlineHandler)
|
||||
|
||||
klet.appArmorValidator = apparmor.NewValidator(kubeCfg.ContainerRuntime)
|
||||
klet.AddPodAdmitHandler(lifecycle.NewAppArmorAdmitHandler(klet.appArmorValidator))
|
||||
klet.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(klet.getNodeAnyWay))
|
||||
klet.admitHandlers.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(klet.getNodeAnyWay))
|
||||
// apply functional Option's
|
||||
for _, opt := range kubeDeps.Options {
|
||||
opt(klet)
|
||||
}
|
||||
|
||||
klet.appArmorValidator = apparmor.NewValidator(kubeCfg.ContainerRuntime)
|
||||
klet.softAdmitHandlers.AddPodAdmitHandler(lifecycle.NewAppArmorAdmitHandler(klet.appArmorValidator))
|
||||
|
||||
// Finally, put the most recent version of the config on the Kubelet, so
|
||||
// people can see how it was configured.
|
||||
klet.kubeletConfiguration = *kubeCfg
|
||||
|
@ -1052,7 +1053,13 @@ type Kubelet struct {
|
|||
|
||||
// TODO: think about moving this to be centralized in PodWorkers in follow-on.
|
||||
// the list of handlers to call during pod admission.
|
||||
lifecycle.PodAdmitHandlers
|
||||
admitHandlers lifecycle.PodAdmitHandlers
|
||||
|
||||
// softAdmithandlers are applied to the pod after it is admitted by the Kubelet, but before it is
|
||||
// run. A pod rejected by a softAdmitHandler will be left in a Pending state indefinitely. If a
|
||||
// rejected pod should not be recreated, or the scheduler is not aware of the rejection rule, the
|
||||
// admission rule should be applied by a softAdmitHandler.
|
||||
softAdmitHandlers lifecycle.PodAdmitHandlers
|
||||
|
||||
// the list of handlers to call during pod sync loop.
|
||||
lifecycle.PodSyncLoopHandlers
|
||||
|
@ -1386,17 +1393,42 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
|
|||
metrics.PodStartLatency.Observe(metrics.SinceInMicroseconds(firstSeenTime))
|
||||
}
|
||||
|
||||
runnable := kl.canRunPod(pod)
|
||||
if !runnable.Admit {
|
||||
// Pod is not runnable; update the Pod and Container statuses to why.
|
||||
apiPodStatus.Reason = runnable.Reason
|
||||
apiPodStatus.Message = runnable.Message
|
||||
// Waiting containers are not creating.
|
||||
const waitingReason = "Blocked"
|
||||
for _, cs := range apiPodStatus.InitContainerStatuses {
|
||||
if cs.State.Waiting != nil {
|
||||
cs.State.Waiting.Reason = waitingReason
|
||||
}
|
||||
}
|
||||
for _, cs := range apiPodStatus.ContainerStatuses {
|
||||
if cs.State.Waiting != nil {
|
||||
cs.State.Waiting.Reason = waitingReason
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update status in the status manager
|
||||
kl.statusManager.SetPodStatus(pod, apiPodStatus)
|
||||
|
||||
// Kill pod if it should not be running
|
||||
if errOuter := canRunPod(pod); errOuter != nil || pod.DeletionTimestamp != nil || apiPodStatus.Phase == api.PodFailed {
|
||||
if errInner := kl.killPod(pod, nil, podStatus, nil); errInner != nil {
|
||||
errOuter = fmt.Errorf("error killing pod: %v", errInner)
|
||||
utilruntime.HandleError(errOuter)
|
||||
if !runnable.Admit || pod.DeletionTimestamp != nil || apiPodStatus.Phase == api.PodFailed {
|
||||
var syncErr error
|
||||
if err := kl.killPod(pod, nil, podStatus, nil); err != nil {
|
||||
syncErr = fmt.Errorf("error killing pod: %v", err)
|
||||
utilruntime.HandleError(syncErr)
|
||||
} else {
|
||||
if !runnable.Admit {
|
||||
// There was no error killing the pod, but the pod cannot be run.
|
||||
// Return an error to signal that the sync loop should back off.
|
||||
syncErr = fmt.Errorf("pod cannot be run: %s", runnable.Message)
|
||||
}
|
||||
// there was no error killing the pod, but the pod cannot be run, so we return that err (if any)
|
||||
return errOuter
|
||||
}
|
||||
return syncErr
|
||||
}
|
||||
|
||||
// Create Cgroups for the pod and apply resource parameters
|
||||
|
@ -1619,7 +1651,7 @@ func (kl *Kubelet) canAdmitPod(pods []*api.Pod, pod *api.Pod) (bool, string, str
|
|||
// TODO: move out of disk check into a pod admitter
|
||||
// TODO: out of resource eviction should have a pod admitter call-out
|
||||
attrs := &lifecycle.PodAdmitAttributes{Pod: pod, OtherPods: pods}
|
||||
for _, podAdmitHandler := range kl.PodAdmitHandlers {
|
||||
for _, podAdmitHandler := range kl.admitHandlers {
|
||||
if result := podAdmitHandler.Admit(attrs); !result.Admit {
|
||||
return false, result.Reason, result.Message
|
||||
}
|
||||
|
@ -1634,6 +1666,29 @@ func (kl *Kubelet) canAdmitPod(pods []*api.Pod, pod *api.Pod) (bool, string, str
|
|||
return true, "", ""
|
||||
}
|
||||
|
||||
func (kl *Kubelet) canRunPod(pod *api.Pod) lifecycle.PodAdmitResult {
|
||||
attrs := &lifecycle.PodAdmitAttributes{Pod: pod}
|
||||
// Get "OtherPods". Rejected pods are failed, so only include admitted pods that are alive.
|
||||
attrs.OtherPods = kl.filterOutTerminatedPods(kl.podManager.GetPods())
|
||||
|
||||
for _, handler := range kl.softAdmitHandlers {
|
||||
if result := handler.Admit(attrs); !result.Admit {
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Refactor as a soft admit handler.
|
||||
if err := canRunPod(pod); err != nil {
|
||||
return lifecycle.PodAdmitResult{
|
||||
Admit: false,
|
||||
Reason: "Forbidden",
|
||||
Message: err.Error(),
|
||||
}
|
||||
}
|
||||
|
||||
return lifecycle.PodAdmitResult{Admit: true}
|
||||
}
|
||||
|
||||
// syncLoop is the main loop for processing changes. It watches for changes from
|
||||
// three channels (file, apiserver, and http) and creates a union of them. For
|
||||
// any new change seen, will run a sync against desired state and running state. If
|
||||
|
|
|
@ -220,9 +220,9 @@ func newTestKubeletWithImageList(
|
|||
require.NoError(t, err, "Failed to initialize eviction manager")
|
||||
|
||||
kubelet.evictionManager = evictionManager
|
||||
kubelet.AddPodAdmitHandler(evictionAdmitHandler)
|
||||
kubelet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)
|
||||
// Add this as cleanup predicate pod admitter
|
||||
kubelet.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(kubelet.getNodeAnyWay))
|
||||
kubelet.admitHandlers.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(kubelet.getNodeAnyWay))
|
||||
|
||||
plug := &volumetest.FakeVolumePlugin{PluginName: "fake", Host: nil}
|
||||
kubelet.volumePluginMgr, err =
|
||||
|
@ -1823,7 +1823,7 @@ func TestHandlePodAdditionsInvokesPodAdmitHandlers(t *testing.T) {
|
|||
podToAdmit := pods[1]
|
||||
podsToReject := []*api.Pod{podToReject}
|
||||
|
||||
kl.AddPodAdmitHandler(&testPodAdmitHandler{podsToReject: podsToReject})
|
||||
kl.admitHandlers.AddPodAdmitHandler(&testPodAdmitHandler{podsToReject: podsToReject})
|
||||
|
||||
kl.HandlePodAdditions(pods)
|
||||
// Check pod status stored in the status map.
|
||||
|
|
|
@ -149,6 +149,11 @@ type appArmorAdmitHandler struct {
|
|||
}
|
||||
|
||||
func (a *appArmorAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult {
|
||||
// If the pod is already running or terminated, no need to recheck AppArmor.
|
||||
if attrs.Pod.Status.Phase != api.PodPending {
|
||||
return PodAdmitResult{Admit: true}
|
||||
}
|
||||
|
||||
err := a.Validate(attrs.Pod)
|
||||
if err == nil {
|
||||
return PodAdmitResult{Admit: true}
|
||||
|
|
|
@ -121,7 +121,7 @@ func TestRunOnce(t *testing.T) {
|
|||
t.Fatalf("failed to initialize eviction manager: %v", err)
|
||||
}
|
||||
kb.evictionManager = evictionManager
|
||||
kb.AddPodAdmitHandler(evictionAdmitHandler)
|
||||
kb.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)
|
||||
if err := kb.setupDataDirs(); err != nil {
|
||||
t.Errorf("Failed to init data dirs: %v", err)
|
||||
}
|
||||
|
|
|
@ -108,5 +108,7 @@ go_test(
|
|||
"//vendor:github.com/onsi/gomega/gstruct",
|
||||
"//vendor:github.com/onsi/gomega/types",
|
||||
"//vendor:github.com/spf13/pflag",
|
||||
"//vendor:k8s.io/client-go/pkg/api/errors",
|
||||
"//vendor:k8s.io/client-go/pkg/api/unversioned",
|
||||
],
|
||||
)
|
||||
|
|
|
@ -26,8 +26,11 @@ import (
|
|||
"strconv"
|
||||
"strings"
|
||||
|
||||
"k8s.io/client-go/pkg/api/errors"
|
||||
"k8s.io/client-go/pkg/api/unversioned"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/security/apparmor"
|
||||
"k8s.io/kubernetes/pkg/watch"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
"github.com/davecgh/go-spew/spew"
|
||||
|
@ -46,12 +49,11 @@ var _ = framework.KubeDescribe("AppArmor [Feature:AppArmor]", func() {
|
|||
f := framework.NewDefaultFramework("apparmor-test")
|
||||
|
||||
It("should reject an unloaded profile", func() {
|
||||
status := runAppArmorTest(f, apparmor.ProfileNamePrefix+"non-existant-profile")
|
||||
Expect(status.Phase).To(Equal(api.PodFailed), "PodStatus: %+v", status)
|
||||
Expect(status.Reason).To(Equal("AppArmor"), "PodStatus: %+v", status)
|
||||
status := runAppArmorTest(f, false, apparmor.ProfileNamePrefix+"non-existant-profile")
|
||||
expectSoftRejection(status)
|
||||
})
|
||||
It("should enforce a profile blocking writes", func() {
|
||||
status := runAppArmorTest(f, apparmor.ProfileNamePrefix+apparmorProfilePrefix+"deny-write")
|
||||
status := runAppArmorTest(f, true, apparmor.ProfileNamePrefix+apparmorProfilePrefix+"deny-write")
|
||||
if len(status.ContainerStatuses) == 0 {
|
||||
framework.Failf("Unexpected pod status: %s", spew.Sdump(status))
|
||||
return
|
||||
|
@ -61,7 +63,7 @@ var _ = framework.KubeDescribe("AppArmor [Feature:AppArmor]", func() {
|
|||
|
||||
})
|
||||
It("should enforce a permissive profile", func() {
|
||||
status := runAppArmorTest(f, apparmor.ProfileNamePrefix+apparmorProfilePrefix+"audit-write")
|
||||
status := runAppArmorTest(f, true, apparmor.ProfileNamePrefix+apparmorProfilePrefix+"audit-write")
|
||||
if len(status.ContainerStatuses) == 0 {
|
||||
framework.Failf("Unexpected pod status: %s", spew.Sdump(status))
|
||||
return
|
||||
|
@ -75,9 +77,8 @@ var _ = framework.KubeDescribe("AppArmor [Feature:AppArmor]", func() {
|
|||
f := framework.NewDefaultFramework("apparmor-test")
|
||||
|
||||
It("should reject a pod with an AppArmor profile", func() {
|
||||
status := runAppArmorTest(f, apparmor.ProfileRuntimeDefault)
|
||||
Expect(status.Phase).To(Equal(api.PodFailed), "PodStatus: %+v", status)
|
||||
Expect(status.Reason).To(Equal("AppArmor"), "PodStatus: %+v", status)
|
||||
status := runAppArmorTest(f, false, apparmor.ProfileRuntimeDefault)
|
||||
expectSoftRejection(status)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
@ -136,11 +137,31 @@ func loadTestProfiles() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func runAppArmorTest(f *framework.Framework, profile string) api.PodStatus {
|
||||
func runAppArmorTest(f *framework.Framework, shouldRun bool, profile string) api.PodStatus {
|
||||
pod := createPodWithAppArmor(f, profile)
|
||||
if shouldRun {
|
||||
// The pod needs to start before it stops, so wait for the longer start timeout.
|
||||
framework.ExpectNoError(framework.WaitTimeoutForPodNoLongerRunningInNamespace(
|
||||
f.ClientSet, pod.Name, f.Namespace.Name, "", framework.PodStartTimeout))
|
||||
} else {
|
||||
// Pod should remain in the pending state. Wait for the Reason to be set to "AppArmor".
|
||||
w, err := f.PodClient().Watch(api.SingleObject(api.ObjectMeta{Name: pod.Name}))
|
||||
framework.ExpectNoError(err)
|
||||
_, err = watch.Until(framework.PodStartTimeout, w, func(e watch.Event) (bool, error) {
|
||||
switch e.Type {
|
||||
case watch.Deleted:
|
||||
return false, errors.NewNotFound(unversioned.GroupResource{Resource: "pods"}, pod.Name)
|
||||
}
|
||||
switch t := e.Object.(type) {
|
||||
case *api.Pod:
|
||||
if t.Status.Reason == "AppArmor" {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
framework.ExpectNoError(err)
|
||||
}
|
||||
p, err := f.PodClient().Get(pod.Name)
|
||||
framework.ExpectNoError(err)
|
||||
return p.Status
|
||||
|
@ -166,6 +187,14 @@ func createPodWithAppArmor(f *framework.Framework, profile string) *api.Pod {
|
|||
return f.PodClient().Create(pod)
|
||||
}
|
||||
|
||||
func expectSoftRejection(status api.PodStatus) {
|
||||
args := []interface{}{"PodStatus: %+v", status}
|
||||
Expect(status.Phase).To(Equal(api.PodPending), args...)
|
||||
Expect(status.Reason).To(Equal("AppArmor"), args...)
|
||||
Expect(status.Message).To(ContainSubstring("AppArmor"), args...)
|
||||
Expect(status.ContainerStatuses[0].State.Waiting.Reason).To(Equal("Blocked"), args...)
|
||||
}
|
||||
|
||||
func isAppArmorEnabled() bool {
|
||||
// TODO(timstclair): Pass this through the image setup rather than hardcoding.
|
||||
if strings.Contains(framework.TestContext.NodeName, "-gci-dev-") {
|
||||
|
|
Loading…
Reference in New Issue