diff --git a/pkg/kubelet/dockertools/docker_manager.go b/pkg/kubelet/dockertools/docker_manager.go index 63a0f290c0..50ac79c280 100644 --- a/pkg/kubelet/dockertools/docker_manager.go +++ b/pkg/kubelet/dockertools/docker_manager.go @@ -1426,6 +1426,10 @@ func (dm *DockerManager) KillPod(pod *api.Pod, runningPod kubecontainer.Pod, gra // NOTE(random-liu): The pod passed in could be *nil* when kubelet restarted. func (dm *DockerManager) killPodWithSyncResult(pod *api.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) (result kubecontainer.PodSyncResult) { + // Short circuit if there's nothing to kill. + if len(runningPod.Containers) == 0 { + return + } // Send the kills in parallel since they may take a long time. // There may be len(runningPod.Containers) or len(runningPod.Containers)-1 of result in the channel containerResults := make(chan *kubecontainer.SyncResult, len(runningPod.Containers)) diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 3ddf5edf1b..50a60ed8a3 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -757,7 +757,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub return nil, fmt.Errorf("failed to initialize eviction manager: %v", err) } klet.evictionManager = evictionManager - klet.AddPodAdmitHandler(evictionAdmitHandler) + klet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler) // add sysctl admission runtimeSupport, err := sysctl.NewRuntimeAdmitHandler(klet.containerRuntime) @@ -775,9 +775,9 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub if err != nil { return nil, err } - klet.AddPodAdmitHandler(runtimeSupport) - klet.AddPodAdmitHandler(safeWhitelist) - klet.AddPodAdmitHandler(unsafeWhitelist) + klet.admitHandlers.AddPodAdmitHandler(runtimeSupport) + klet.admitHandlers.AddPodAdmitHandler(safeWhitelist) + klet.admitHandlers.AddPodAdmitHandler(unsafeWhitelist) // enable active deadline handler activeDeadlineHandler, err := newActiveDeadlineHandler(klet.statusManager, kubeDeps.Recorder, klet.clock) @@ -787,14 +787,15 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub klet.AddPodSyncLoopHandler(activeDeadlineHandler) klet.AddPodSyncHandler(activeDeadlineHandler) - klet.appArmorValidator = apparmor.NewValidator(kubeCfg.ContainerRuntime) - klet.AddPodAdmitHandler(lifecycle.NewAppArmorAdmitHandler(klet.appArmorValidator)) - klet.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(klet.getNodeAnyWay)) + klet.admitHandlers.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(klet.getNodeAnyWay)) // apply functional Option's for _, opt := range kubeDeps.Options { opt(klet) } + klet.appArmorValidator = apparmor.NewValidator(kubeCfg.ContainerRuntime) + klet.softAdmitHandlers.AddPodAdmitHandler(lifecycle.NewAppArmorAdmitHandler(klet.appArmorValidator)) + // Finally, put the most recent version of the config on the Kubelet, so // people can see how it was configured. klet.kubeletConfiguration = *kubeCfg @@ -1052,7 +1053,13 @@ type Kubelet struct { // TODO: think about moving this to be centralized in PodWorkers in follow-on. // the list of handlers to call during pod admission. - lifecycle.PodAdmitHandlers + admitHandlers lifecycle.PodAdmitHandlers + + // softAdmithandlers are applied to the pod after it is admitted by the Kubelet, but before it is + // run. A pod rejected by a softAdmitHandler will be left in a Pending state indefinitely. If a + // rejected pod should not be recreated, or the scheduler is not aware of the rejection rule, the + // admission rule should be applied by a softAdmitHandler. + softAdmitHandlers lifecycle.PodAdmitHandlers // the list of handlers to call during pod sync loop. lifecycle.PodSyncLoopHandlers @@ -1386,17 +1393,42 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error { metrics.PodStartLatency.Observe(metrics.SinceInMicroseconds(firstSeenTime)) } + runnable := kl.canRunPod(pod) + if !runnable.Admit { + // Pod is not runnable; update the Pod and Container statuses to why. + apiPodStatus.Reason = runnable.Reason + apiPodStatus.Message = runnable.Message + // Waiting containers are not creating. + const waitingReason = "Blocked" + for _, cs := range apiPodStatus.InitContainerStatuses { + if cs.State.Waiting != nil { + cs.State.Waiting.Reason = waitingReason + } + } + for _, cs := range apiPodStatus.ContainerStatuses { + if cs.State.Waiting != nil { + cs.State.Waiting.Reason = waitingReason + } + } + } + // Update status in the status manager kl.statusManager.SetPodStatus(pod, apiPodStatus) // Kill pod if it should not be running - if errOuter := canRunPod(pod); errOuter != nil || pod.DeletionTimestamp != nil || apiPodStatus.Phase == api.PodFailed { - if errInner := kl.killPod(pod, nil, podStatus, nil); errInner != nil { - errOuter = fmt.Errorf("error killing pod: %v", errInner) - utilruntime.HandleError(errOuter) + if !runnable.Admit || pod.DeletionTimestamp != nil || apiPodStatus.Phase == api.PodFailed { + var syncErr error + if err := kl.killPod(pod, nil, podStatus, nil); err != nil { + syncErr = fmt.Errorf("error killing pod: %v", err) + utilruntime.HandleError(syncErr) + } else { + if !runnable.Admit { + // There was no error killing the pod, but the pod cannot be run. + // Return an error to signal that the sync loop should back off. + syncErr = fmt.Errorf("pod cannot be run: %s", runnable.Message) + } } - // there was no error killing the pod, but the pod cannot be run, so we return that err (if any) - return errOuter + return syncErr } // Create Cgroups for the pod and apply resource parameters @@ -1619,7 +1651,7 @@ func (kl *Kubelet) canAdmitPod(pods []*api.Pod, pod *api.Pod) (bool, string, str // TODO: move out of disk check into a pod admitter // TODO: out of resource eviction should have a pod admitter call-out attrs := &lifecycle.PodAdmitAttributes{Pod: pod, OtherPods: pods} - for _, podAdmitHandler := range kl.PodAdmitHandlers { + for _, podAdmitHandler := range kl.admitHandlers { if result := podAdmitHandler.Admit(attrs); !result.Admit { return false, result.Reason, result.Message } @@ -1634,6 +1666,29 @@ func (kl *Kubelet) canAdmitPod(pods []*api.Pod, pod *api.Pod) (bool, string, str return true, "", "" } +func (kl *Kubelet) canRunPod(pod *api.Pod) lifecycle.PodAdmitResult { + attrs := &lifecycle.PodAdmitAttributes{Pod: pod} + // Get "OtherPods". Rejected pods are failed, so only include admitted pods that are alive. + attrs.OtherPods = kl.filterOutTerminatedPods(kl.podManager.GetPods()) + + for _, handler := range kl.softAdmitHandlers { + if result := handler.Admit(attrs); !result.Admit { + return result + } + } + + // TODO: Refactor as a soft admit handler. + if err := canRunPod(pod); err != nil { + return lifecycle.PodAdmitResult{ + Admit: false, + Reason: "Forbidden", + Message: err.Error(), + } + } + + return lifecycle.PodAdmitResult{Admit: true} +} + // syncLoop is the main loop for processing changes. It watches for changes from // three channels (file, apiserver, and http) and creates a union of them. For // any new change seen, will run a sync against desired state and running state. If diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index c9787a885b..e17807222d 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -220,9 +220,9 @@ func newTestKubeletWithImageList( require.NoError(t, err, "Failed to initialize eviction manager") kubelet.evictionManager = evictionManager - kubelet.AddPodAdmitHandler(evictionAdmitHandler) + kubelet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler) // Add this as cleanup predicate pod admitter - kubelet.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(kubelet.getNodeAnyWay)) + kubelet.admitHandlers.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(kubelet.getNodeAnyWay)) plug := &volumetest.FakeVolumePlugin{PluginName: "fake", Host: nil} kubelet.volumePluginMgr, err = @@ -1823,7 +1823,7 @@ func TestHandlePodAdditionsInvokesPodAdmitHandlers(t *testing.T) { podToAdmit := pods[1] podsToReject := []*api.Pod{podToReject} - kl.AddPodAdmitHandler(&testPodAdmitHandler{podsToReject: podsToReject}) + kl.admitHandlers.AddPodAdmitHandler(&testPodAdmitHandler{podsToReject: podsToReject}) kl.HandlePodAdditions(pods) // Check pod status stored in the status map. diff --git a/pkg/kubelet/lifecycle/handlers.go b/pkg/kubelet/lifecycle/handlers.go index c4d6f4dfdc..a0c4ee6189 100644 --- a/pkg/kubelet/lifecycle/handlers.go +++ b/pkg/kubelet/lifecycle/handlers.go @@ -149,6 +149,11 @@ type appArmorAdmitHandler struct { } func (a *appArmorAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult { + // If the pod is already running or terminated, no need to recheck AppArmor. + if attrs.Pod.Status.Phase != api.PodPending { + return PodAdmitResult{Admit: true} + } + err := a.Validate(attrs.Pod) if err == nil { return PodAdmitResult{Admit: true} diff --git a/pkg/kubelet/runonce_test.go b/pkg/kubelet/runonce_test.go index 6e595ad9c7..c611f5ad0f 100644 --- a/pkg/kubelet/runonce_test.go +++ b/pkg/kubelet/runonce_test.go @@ -121,7 +121,7 @@ func TestRunOnce(t *testing.T) { t.Fatalf("failed to initialize eviction manager: %v", err) } kb.evictionManager = evictionManager - kb.AddPodAdmitHandler(evictionAdmitHandler) + kb.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler) if err := kb.setupDataDirs(); err != nil { t.Errorf("Failed to init data dirs: %v", err) } diff --git a/test/e2e_node/BUILD b/test/e2e_node/BUILD index 131bf0d41c..8ef3c1faab 100644 --- a/test/e2e_node/BUILD +++ b/test/e2e_node/BUILD @@ -108,5 +108,7 @@ go_test( "//vendor:github.com/onsi/gomega/gstruct", "//vendor:github.com/onsi/gomega/types", "//vendor:github.com/spf13/pflag", + "//vendor:k8s.io/client-go/pkg/api/errors", + "//vendor:k8s.io/client-go/pkg/api/unversioned", ], ) diff --git a/test/e2e_node/apparmor_test.go b/test/e2e_node/apparmor_test.go index 2a18defd96..440ec9068f 100644 --- a/test/e2e_node/apparmor_test.go +++ b/test/e2e_node/apparmor_test.go @@ -26,8 +26,11 @@ import ( "strconv" "strings" + "k8s.io/client-go/pkg/api/errors" + "k8s.io/client-go/pkg/api/unversioned" "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/security/apparmor" + "k8s.io/kubernetes/pkg/watch" "k8s.io/kubernetes/test/e2e/framework" "github.com/davecgh/go-spew/spew" @@ -46,12 +49,11 @@ var _ = framework.KubeDescribe("AppArmor [Feature:AppArmor]", func() { f := framework.NewDefaultFramework("apparmor-test") It("should reject an unloaded profile", func() { - status := runAppArmorTest(f, apparmor.ProfileNamePrefix+"non-existant-profile") - Expect(status.Phase).To(Equal(api.PodFailed), "PodStatus: %+v", status) - Expect(status.Reason).To(Equal("AppArmor"), "PodStatus: %+v", status) + status := runAppArmorTest(f, false, apparmor.ProfileNamePrefix+"non-existant-profile") + expectSoftRejection(status) }) It("should enforce a profile blocking writes", func() { - status := runAppArmorTest(f, apparmor.ProfileNamePrefix+apparmorProfilePrefix+"deny-write") + status := runAppArmorTest(f, true, apparmor.ProfileNamePrefix+apparmorProfilePrefix+"deny-write") if len(status.ContainerStatuses) == 0 { framework.Failf("Unexpected pod status: %s", spew.Sdump(status)) return @@ -61,7 +63,7 @@ var _ = framework.KubeDescribe("AppArmor [Feature:AppArmor]", func() { }) It("should enforce a permissive profile", func() { - status := runAppArmorTest(f, apparmor.ProfileNamePrefix+apparmorProfilePrefix+"audit-write") + status := runAppArmorTest(f, true, apparmor.ProfileNamePrefix+apparmorProfilePrefix+"audit-write") if len(status.ContainerStatuses) == 0 { framework.Failf("Unexpected pod status: %s", spew.Sdump(status)) return @@ -75,9 +77,8 @@ var _ = framework.KubeDescribe("AppArmor [Feature:AppArmor]", func() { f := framework.NewDefaultFramework("apparmor-test") It("should reject a pod with an AppArmor profile", func() { - status := runAppArmorTest(f, apparmor.ProfileRuntimeDefault) - Expect(status.Phase).To(Equal(api.PodFailed), "PodStatus: %+v", status) - Expect(status.Reason).To(Equal("AppArmor"), "PodStatus: %+v", status) + status := runAppArmorTest(f, false, apparmor.ProfileRuntimeDefault) + expectSoftRejection(status) }) }) } @@ -136,11 +137,31 @@ func loadTestProfiles() error { return nil } -func runAppArmorTest(f *framework.Framework, profile string) api.PodStatus { +func runAppArmorTest(f *framework.Framework, shouldRun bool, profile string) api.PodStatus { pod := createPodWithAppArmor(f, profile) - // The pod needs to start before it stops, so wait for the longer start timeout. - framework.ExpectNoError(framework.WaitTimeoutForPodNoLongerRunningInNamespace( - f.ClientSet, pod.Name, f.Namespace.Name, "", framework.PodStartTimeout)) + if shouldRun { + // The pod needs to start before it stops, so wait for the longer start timeout. + framework.ExpectNoError(framework.WaitTimeoutForPodNoLongerRunningInNamespace( + f.ClientSet, pod.Name, f.Namespace.Name, "", framework.PodStartTimeout)) + } else { + // Pod should remain in the pending state. Wait for the Reason to be set to "AppArmor". + w, err := f.PodClient().Watch(api.SingleObject(api.ObjectMeta{Name: pod.Name})) + framework.ExpectNoError(err) + _, err = watch.Until(framework.PodStartTimeout, w, func(e watch.Event) (bool, error) { + switch e.Type { + case watch.Deleted: + return false, errors.NewNotFound(unversioned.GroupResource{Resource: "pods"}, pod.Name) + } + switch t := e.Object.(type) { + case *api.Pod: + if t.Status.Reason == "AppArmor" { + return true, nil + } + } + return false, nil + }) + framework.ExpectNoError(err) + } p, err := f.PodClient().Get(pod.Name) framework.ExpectNoError(err) return p.Status @@ -166,6 +187,14 @@ func createPodWithAppArmor(f *framework.Framework, profile string) *api.Pod { return f.PodClient().Create(pod) } +func expectSoftRejection(status api.PodStatus) { + args := []interface{}{"PodStatus: %+v", status} + Expect(status.Phase).To(Equal(api.PodPending), args...) + Expect(status.Reason).To(Equal("AppArmor"), args...) + Expect(status.Message).To(ContainSubstring("AppArmor"), args...) + Expect(status.ContainerStatuses[0].State.Waiting.Reason).To(Equal("Blocked"), args...) +} + func isAppArmorEnabled() bool { // TODO(timstclair): Pass this through the image setup rather than hardcoding. if strings.Contains(framework.TestContext.NodeName, "-gci-dev-") {