Merge pull request #53167 from dashpole/fix_init_container

Automatic merge from submit-queue (batch tested with PRs 44596, 52708, 53163, 53167, 52692). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Do not GC exited containers in running pods

This fixes a regression introduced by #45896, and was identified by #52462.
This bug causes the kubelet to garbage collect exited containers in a running pod.
This manifests in strange and confusing state when viewing the cluster.  For example, it can show running pods as having no init container (see #52462), if that container has exited and been removed.

This PR solves this problem by only removing containers and sandboxes from terminated pods.
The important line change is:
` if cgc.podDeletionProvider.IsPodDeleted(podUID) || evictNonDeletedPods {` ---> 
`if cgc.podStateProvider.IsPodDeleted(podUID) || (cgc.podStateProvider.IsPodTerminated(podUID) && evictTerminatedPods) {`

cc @MrHohn @yujuhong @kubernetes/sig-node-bugs 

```release-note
BugFix: Exited containers are not Garbage Collected by the kubelet while the pod is running
```
pull/6/head
Kubernetes Submit Queue 2017-09-28 21:15:41 -07:00 committed by GitHub
commit dcaf8e8203
5 changed files with 85 additions and 63 deletions

View File

@ -820,6 +820,16 @@ func (kl *Kubelet) podIsTerminated(pod *v1.Pod) bool {
return status.Phase == v1.PodFailed || status.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && notRunning(status.ContainerStatuses))
}
// IsPodTerminated returns trus if the pod with the provided UID is in a terminated state ("Failed" or "Succeeded")
// or if the pod has been deleted or removed
func (kl *Kubelet) IsPodTerminated(uid types.UID) bool {
pod, podFound := kl.podManager.GetPodByUID(uid)
if !podFound {
return true
}
return kl.podIsTerminated(pod)
}
// IsPodDeleted returns true if the pod is deleted. For the pod to be deleted, either:
// 1. The pod object is deleted
// 2. The pod's status is evicted

View File

@ -43,18 +43,25 @@ func (f *fakeHTTP) Get(url string) (*http.Response, error) {
return nil, f.err
}
type fakePodDeletionProvider struct {
pods map[types.UID]struct{}
type fakePodStateProvider struct {
existingPods map[types.UID]struct{}
runningPods map[types.UID]struct{}
}
func newFakePodDeletionProvider() *fakePodDeletionProvider {
return &fakePodDeletionProvider{
pods: make(map[types.UID]struct{}),
func newFakePodStateProvider() *fakePodStateProvider {
return &fakePodStateProvider{
existingPods: make(map[types.UID]struct{}),
runningPods: make(map[types.UID]struct{}),
}
}
func (f *fakePodDeletionProvider) IsPodDeleted(uid types.UID) bool {
_, found := f.pods[uid]
func (f *fakePodStateProvider) IsPodDeleted(uid types.UID) bool {
_, found := f.existingPods[uid]
return !found
}
func (f *fakePodStateProvider) IsPodTerminated(uid types.UID) bool {
_, found := f.runningPods[uid]
return !found
}
@ -79,7 +86,7 @@ func NewFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageS
return nil, err
}
kubeRuntimeManager.containerGC = NewContainerGC(runtimeService, newFakePodDeletionProvider(), kubeRuntimeManager)
kubeRuntimeManager.containerGC = NewContainerGC(runtimeService, newFakePodStateProvider(), kubeRuntimeManager)
kubeRuntimeManager.runtimeName = typedVersion.RuntimeName
kubeRuntimeManager.imagePuller = images.NewImageManager(
kubecontainer.FilterEventRecorder(recorder),

View File

@ -33,17 +33,17 @@ import (
// containerGC is the manager of garbage collection.
type containerGC struct {
client internalapi.RuntimeService
manager *kubeGenericRuntimeManager
podDeletionProvider podDeletionProvider
client internalapi.RuntimeService
manager *kubeGenericRuntimeManager
podStateProvider podStateProvider
}
// NewContainerGC creates a new containerGC.
func NewContainerGC(client internalapi.RuntimeService, podDeletionProvider podDeletionProvider, manager *kubeGenericRuntimeManager) *containerGC {
func NewContainerGC(client internalapi.RuntimeService, podStateProvider podStateProvider, manager *kubeGenericRuntimeManager) *containerGC {
return &containerGC{
client: client,
manager: manager,
podDeletionProvider: podDeletionProvider,
client: client,
manager: manager,
podStateProvider: podStateProvider,
}
}
@ -201,7 +201,7 @@ func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByE
}
// evict all containers that are evictable
func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictTerminatedPods bool) error {
// Separate containers by evict units.
evictUnits, err := cgc.evictableContainers(gcPolicy.MinAge)
if err != nil {
@ -211,7 +211,7 @@ func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy
// Remove deleted pod containers if all sources are ready.
if allSourcesReady {
for key, unit := range evictUnits {
if cgc.podDeletionProvider.IsPodDeleted(key.uid) || evictNonDeletedPods {
if cgc.podStateProvider.IsPodDeleted(key.uid) || (cgc.podStateProvider.IsPodTerminated(key.uid) && evictTerminatedPods) {
cgc.removeOldestN(unit, len(unit)) // Remove all.
delete(evictUnits, key)
}
@ -253,7 +253,7 @@ func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy
// 2. contains no containers.
// 3. belong to a non-existent (i.e., already removed) pod, or is not the
// most recently created sandbox for the pod.
func (cgc *containerGC) evictSandboxes(evictNonDeletedPods bool) error {
func (cgc *containerGC) evictSandboxes(evictTerminatedPods bool) error {
containers, err := cgc.manager.getKubeletContainers(true)
if err != nil {
return err
@ -297,7 +297,7 @@ func (cgc *containerGC) evictSandboxes(evictNonDeletedPods bool) error {
}
for podUID, sandboxes := range sandboxesByPod {
if cgc.podDeletionProvider.IsPodDeleted(podUID) || evictNonDeletedPods {
if cgc.podStateProvider.IsPodDeleted(podUID) || (cgc.podStateProvider.IsPodTerminated(podUID) && evictTerminatedPods) {
// Remove all evictable sandboxes if the pod has been removed.
// Note that the latest dead sandbox is also removed if there is
// already an active one.
@ -323,7 +323,7 @@ func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
for _, dir := range dirs {
name := dir.Name()
podUID := types.UID(name)
if !cgc.podDeletionProvider.IsPodDeleted(podUID) {
if !cgc.podStateProvider.IsPodDeleted(podUID) {
continue
}
err := osInterface.RemoveAll(filepath.Join(podLogsRootDirectory, name))
@ -357,14 +357,14 @@ func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
// * removes oldest dead containers by enforcing gcPolicy.MaxContainers.
// * gets evictable sandboxes which are not ready and contains no containers.
// * removes evictable sandboxes.
func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictTerminatedPods bool) error {
// Remove evictable containers
if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictNonDeletedPods); err != nil {
if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictTerminatedPods); err != nil {
return err
}
// Remove sandboxes with zero containers
if err := cgc.evictSandboxes(evictNonDeletedPods); err != nil {
if err := cgc.evictSandboxes(evictTerminatedPods); err != nil {
return err
}

View File

@ -34,11 +34,11 @@ func TestSandboxGC(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
podDeletionProvider := m.containerGC.podDeletionProvider.(*fakePodDeletionProvider)
makeGCSandbox := func(pod *v1.Pod, attempt uint32, state runtimeapi.PodSandboxState, withPodGetter bool, createdAt int64) sandboxTemplate {
if withPodGetter {
podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)
makeGCSandbox := func(pod *v1.Pod, attempt uint32, state runtimeapi.PodSandboxState, withPodStateProvider bool, createdAt int64) sandboxTemplate {
if withPodStateProvider {
// initialize the pod getter
podDeletionProvider.pods[pod.UID] = struct{}{}
podStateProvider.existingPods[pod.UID] = struct{}{}
}
return sandboxTemplate{
pod: pod,
@ -67,7 +67,7 @@ func TestSandboxGC(t *testing.T) {
containers []containerTemplate // templates of containers
minAge time.Duration // sandboxMinGCAge
remain []int // template indexes of remaining sandboxes
evictNonDeletedPods bool
evictTerminatedPods bool
}{
{
description: "notready sandboxes without containers for deleted pods should be garbage collected.",
@ -76,7 +76,7 @@ func TestSandboxGC(t *testing.T) {
},
containers: []containerTemplate{},
remain: []int{},
evictNonDeletedPods: false,
evictTerminatedPods: false,
},
{
description: "ready sandboxes without containers for deleted pods should not be garbage collected.",
@ -85,7 +85,7 @@ func TestSandboxGC(t *testing.T) {
},
containers: []containerTemplate{},
remain: []int{0},
evictNonDeletedPods: false,
evictTerminatedPods: false,
},
{
description: "sandboxes for existing pods should not be garbage collected.",
@ -95,17 +95,17 @@ func TestSandboxGC(t *testing.T) {
},
containers: []containerTemplate{},
remain: []int{0, 1},
evictNonDeletedPods: false,
evictTerminatedPods: false,
},
{
description: "non-running sandboxes for existing pods should be garbage collected if evictNonDeletedPods is set.",
description: "non-running sandboxes for existing pods should be garbage collected if evictTerminatedPods is set.",
sandboxes: []sandboxTemplate{
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true, 0),
makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0),
},
containers: []containerTemplate{},
remain: []int{0},
evictNonDeletedPods: true,
evictTerminatedPods: true,
},
{
description: "sandbox with containers should not be garbage collected.",
@ -116,7 +116,7 @@ func TestSandboxGC(t *testing.T) {
{pod: pods[0], container: &pods[0].Spec.Containers[0], state: runtimeapi.ContainerState_CONTAINER_EXITED},
},
remain: []int{0},
evictNonDeletedPods: false,
evictTerminatedPods: false,
},
{
description: "multiple sandboxes should be handled properly.",
@ -136,7 +136,7 @@ func TestSandboxGC(t *testing.T) {
{pod: pods[1], container: &pods[1].Spec.Containers[0], sandboxAttempt: 1, state: runtimeapi.ContainerState_CONTAINER_EXITED},
},
remain: []int{0, 2},
evictNonDeletedPods: false,
evictTerminatedPods: false,
},
} {
t.Logf("TestCase #%d: %+v", c, test)
@ -145,7 +145,7 @@ func TestSandboxGC(t *testing.T) {
fakeRuntime.SetFakeSandboxes(fakeSandboxes)
fakeRuntime.SetFakeContainers(fakeContainers)
err := m.containerGC.evictSandboxes(test.evictNonDeletedPods)
err := m.containerGC.evictSandboxes(test.evictTerminatedPods)
assert.NoError(t, err)
realRemain, err := fakeRuntime.ListPodSandbox(nil)
assert.NoError(t, err)
@ -162,13 +162,15 @@ func TestContainerGC(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
podDeletionProvider := m.containerGC.podDeletionProvider.(*fakePodDeletionProvider)
podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)
makeGCContainer := func(podName, containerName string, attempt int, createdAt int64, state runtimeapi.ContainerState) containerTemplate {
container := makeTestContainer(containerName, "test-image")
pod := makeTestPod(podName, "test-ns", podName, []v1.Container{container})
if podName == "running" {
podStateProvider.runningPods[pod.UID] = struct{}{}
}
if podName != "deleted" {
// initialize the pod getter, explicitly exclude deleted pod
podDeletionProvider.pods[pod.UID] = struct{}{}
podStateProvider.existingPods[pod.UID] = struct{}{}
}
return containerTemplate{
pod: pod,
@ -185,7 +187,7 @@ func TestContainerGC(t *testing.T) {
containers []containerTemplate // templates of containers
policy *kubecontainer.ContainerGCPolicy // container gc policy
remain []int // template indexes of remaining containers
evictNonDeletedPods bool
evictTerminatedPods bool
}{
{
description: "all containers should be removed when max container limit is 0",
@ -194,7 +196,7 @@ func TestContainerGC(t *testing.T) {
},
policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: 1, MaxContainers: 0},
remain: []int{},
evictNonDeletedPods: false,
evictTerminatedPods: false,
},
{
description: "max containers should be complied when no max per pod container limit is set",
@ -207,7 +209,7 @@ func TestContainerGC(t *testing.T) {
},
policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: 4},
remain: []int{0, 1, 2, 3},
evictNonDeletedPods: false,
evictTerminatedPods: false,
},
{
description: "no containers should be removed if both max container and per pod container limits are not set",
@ -218,7 +220,7 @@ func TestContainerGC(t *testing.T) {
},
policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: -1},
remain: []int{0, 1, 2},
evictNonDeletedPods: false,
evictTerminatedPods: false,
},
{
description: "recently started containers should not be removed",
@ -228,7 +230,7 @@ func TestContainerGC(t *testing.T) {
makeGCContainer("foo", "bar", 0, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1, 2},
evictNonDeletedPods: false,
evictTerminatedPods: false,
},
{
description: "oldest containers should be removed when per pod container limit exceeded",
@ -238,7 +240,7 @@ func TestContainerGC(t *testing.T) {
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1},
evictNonDeletedPods: false,
evictTerminatedPods: false,
},
{
description: "running containers should not be removed",
@ -248,7 +250,7 @@ func TestContainerGC(t *testing.T) {
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING),
},
remain: []int{0, 1, 2},
evictNonDeletedPods: false,
evictTerminatedPods: false,
},
{
description: "no containers should be removed when limits are not exceeded",
@ -257,7 +259,7 @@ func TestContainerGC(t *testing.T) {
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1},
evictNonDeletedPods: false,
evictTerminatedPods: false,
},
{
description: "max container count should apply per (UID, container) pair",
@ -273,7 +275,7 @@ func TestContainerGC(t *testing.T) {
makeGCContainer("foo2", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1, 3, 4, 6, 7},
evictNonDeletedPods: false,
evictTerminatedPods: false,
},
{
description: "max limit should apply and try to keep from every pod",
@ -290,7 +292,7 @@ func TestContainerGC(t *testing.T) {
makeGCContainer("foo4", "bar4", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 2, 4, 6, 8},
evictNonDeletedPods: false,
evictTerminatedPods: false,
},
{
description: "oldest pods should be removed if limit exceeded",
@ -307,20 +309,20 @@ func TestContainerGC(t *testing.T) {
makeGCContainer("foo7", "bar7", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 2, 4, 6, 8, 9},
evictNonDeletedPods: false,
evictTerminatedPods: false,
},
{
description: "all non-running containers should be removed when evictNonDeletedPods is set",
description: "all non-running containers should be removed when evictTerminatedPods is set",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("running", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING),
},
remain: []int{5},
evictNonDeletedPods: true,
remain: []int{4, 5},
evictTerminatedPods: true,
},
{
description: "containers for deleted pods should be removed",
@ -333,7 +335,7 @@ func TestContainerGC(t *testing.T) {
makeGCContainer("deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1, 2},
evictNonDeletedPods: false,
evictTerminatedPods: false,
},
} {
t.Logf("TestCase #%d: %+v", c, test)
@ -343,7 +345,7 @@ func TestContainerGC(t *testing.T) {
if test.policy == nil {
test.policy = &defaultGCPolicy
}
err := m.containerGC.evictContainers(*test.policy, true, test.evictNonDeletedPods)
err := m.containerGC.evictContainers(*test.policy, true, test.evictTerminatedPods)
assert.NoError(t, err)
realRemain, err := fakeRuntime.ListContainers(nil)
assert.NoError(t, err)
@ -361,11 +363,13 @@ func TestPodLogDirectoryGC(t *testing.T) {
_, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
fakeOS := m.osInterface.(*containertest.FakeOS)
podDeletionProvider := m.containerGC.podDeletionProvider.(*fakePodDeletionProvider)
podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)
// pod log directories without corresponding pods should be removed.
podDeletionProvider.pods["123"] = struct{}{}
podDeletionProvider.pods["456"] = struct{}{}
podStateProvider.existingPods["123"] = struct{}{}
podStateProvider.existingPods["456"] = struct{}{}
podStateProvider.runningPods["123"] = struct{}{}
podStateProvider.runningPods["456"] = struct{}{}
files := []string{"123", "456", "789", "012"}
removed := []string{filepath.Join(podLogsRootDirectory, "789"), filepath.Join(podLogsRootDirectory, "012")}

View File

@ -65,9 +65,10 @@ var (
ErrVersionNotSupported = errors.New("Runtime api version is not supported")
)
// podDeletionProvider can determine if a pod is deleted
type podDeletionProvider interface {
// podStateProvider can determine if a pod is deleted ir terminated
type podStateProvider interface {
IsPodDeleted(kubetypes.UID) bool
IsPodTerminated(kubetypes.UID) bool
}
type kubeGenericRuntimeManager struct {
@ -127,7 +128,7 @@ func NewKubeGenericRuntimeManager(
seccompProfileRoot string,
containerRefManager *kubecontainer.RefManager,
machineInfo *cadvisorapi.MachineInfo,
podDeletionProvider podDeletionProvider,
podStateProvider podStateProvider,
osInterface kubecontainer.OSInterface,
runtimeHelper kubecontainer.RuntimeHelper,
httpClient types.HttpGetter,
@ -193,7 +194,7 @@ func NewKubeGenericRuntimeManager(
imagePullQPS,
imagePullBurst)
kubeRuntimeManager.runner = lifecycle.NewHandlerRunner(httpClient, kubeRuntimeManager, kubeRuntimeManager)
kubeRuntimeManager.containerGC = NewContainerGC(runtimeService, podDeletionProvider, kubeRuntimeManager)
kubeRuntimeManager.containerGC = NewContainerGC(runtimeService, podStateProvider, kubeRuntimeManager)
kubeRuntimeManager.versionCache = cache.NewObjectCache(
func() (interface{}, error) {