From de8ee94d14cc3752accd7e51bbe8981e0fa1ba1e Mon Sep 17 00:00:00 2001 From: Lantao Liu Date: Wed, 6 Feb 2019 16:13:43 -0800 Subject: [PATCH] Stop container in unknown state before recreate or remove. --- .../apis/cri/testing/fake_runtime_service.go | 6 + pkg/kubelet/kuberuntime/helpers.go | 9 +- .../kuberuntime/kuberuntime_container.go | 11 +- pkg/kubelet/kuberuntime/kuberuntime_gc.go | 17 ++ .../kuberuntime/kuberuntime_gc_test.go | 180 ++++++++++-------- .../kuberuntime/kuberuntime_manager.go | 22 ++- .../kuberuntime/kuberuntime_manager_test.go | 71 ++++++- 7 files changed, 226 insertions(+), 90 deletions(-) diff --git a/pkg/kubelet/apis/cri/testing/fake_runtime_service.go b/pkg/kubelet/apis/cri/testing/fake_runtime_service.go index ecdb4a3cc6..577a15fbc9 100644 --- a/pkg/kubelet/apis/cri/testing/fake_runtime_service.go +++ b/pkg/kubelet/apis/cri/testing/fake_runtime_service.go @@ -104,6 +104,12 @@ func (r *FakeRuntimeService) AssertCalls(calls []string) error { return nil } +func (r *FakeRuntimeService) GetCalls() []string { + r.Lock() + defer r.Unlock() + return append([]string{}, r.Called...) +} + func (r *FakeRuntimeService) InjectError(f string, err error) { r.Lock() defer r.Unlock() diff --git a/pkg/kubelet/kuberuntime/helpers.go b/pkg/kubelet/kuberuntime/helpers.go index 056e4d0ae7..ccabf8d004 100644 --- a/pkg/kubelet/kuberuntime/helpers.go +++ b/pkg/kubelet/kuberuntime/helpers.go @@ -141,12 +141,17 @@ func (m *kubeGenericRuntimeManager) getImageUser(image string) (*int64, string, return new(int64), "", nil } -// isContainerFailed returns true if container has exited and exitcode is not zero. -func isContainerFailed(status *kubecontainer.ContainerStatus) bool { +// isInitContainerFailed returns true if container has exited and exitcode is not zero +// or is in unknown state. +func isInitContainerFailed(status *kubecontainer.ContainerStatus) bool { if status.State == kubecontainer.ContainerStateExited && status.ExitCode != 0 { return true } + if status.State == kubecontainer.ContainerStateUnknown { + return true + } + return false } diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container.go b/pkg/kubelet/kuberuntime/kuberuntime_container.go index 4ca0980d4d..519de324de 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -635,9 +635,14 @@ func (m *kubeGenericRuntimeManager) pruneInitContainersBeforeStart(pod *v1.Pod, for name := range initContainerNames { count := 0 for _, status := range podStatus.ContainerStatuses { - if status.Name != name || !initContainerNames.Has(status.Name) || status.State != kubecontainer.ContainerStateExited { + if status.Name != name || !initContainerNames.Has(status.Name) || + (status.State != kubecontainer.ContainerStateExited && + status.State != kubecontainer.ContainerStateUnknown) { continue } + // Remove init containers in unknown state. It should have + // been stopped before pruneInitContainersBeforeStart is + // called. count++ // keep the first init container for this name if count == 1 { @@ -692,7 +697,7 @@ func (m *kubeGenericRuntimeManager) purgeInitContainers(pod *v1.Pod, podStatus * } // findNextInitContainerToRun returns the status of the last failed container, the -// next init container to start, or done if there are no further init containers. +// index of next init container to start, or done if there are no further init containers. // Status is only returned if an init container is failed, in which case next will // point to the current container. func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (status *kubecontainer.ContainerStatus, next *v1.Container, done bool) { @@ -704,7 +709,7 @@ func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus) for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- { container := &pod.Spec.InitContainers[i] status := podStatus.FindContainerStatusByName(container.Name) - if status != nil && isContainerFailed(status) { + if status != nil && isInitContainerFailed(status) { return status, container, false } } diff --git a/pkg/kubelet/kuberuntime/kuberuntime_gc.go b/pkg/kubelet/kuberuntime/kuberuntime_gc.go index e2ba00d064..1bc77c1739 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_gc.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_gc.go @@ -55,6 +55,9 @@ type containerGCInfo struct { name string // Creation time for the container. createTime time.Time + // If true, the container is in unknown state. Garbage collector should try + // to stop containers before removal. + unknown bool } // sandboxGCInfo is the internal information kept for sandboxes being considered for GC. @@ -122,6 +125,19 @@ func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int // Remove from oldest to newest (last to first). numToKeep := len(containers) - toRemove for i := len(containers) - 1; i >= numToKeep; i-- { + if containers[i].unknown { + // Containers in known state could be running, we should try + // to stop it before removal. + id := kubecontainer.ContainerID{ + Type: cgc.manager.runtimeName, + ID: containers[i].id, + } + message := "Container is in unknown state, try killing it before removal" + if err := cgc.manager.killContainer(nil, id, containers[i].name, message, nil); err != nil { + klog.Errorf("Failed to stop container %q: %v", containers[i].id, err) + continue + } + } if err := cgc.manager.removeContainer(containers[i].id); err != nil { klog.Errorf("Failed to remove container %q: %v", containers[i].id, err) } @@ -184,6 +200,7 @@ func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByE id: container.Id, name: container.Metadata.Name, createTime: createdAt, + unknown: container.State == runtimeapi.ContainerState_CONTAINER_UNKNOWN, } key := evictUnit{ uid: labeledInfo.PodUID, diff --git a/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go b/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go index 49513d4423..8c565d5675 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go @@ -179,28 +179,29 @@ func TestSandboxGC(t *testing.T) { } } +func makeGCContainer(podStateProvider *fakePodStateProvider, podName, containerName string, attempt int, createdAt int64, state runtimeapi.ContainerState) containerTemplate { + container := makeTestContainer(containerName, "test-image") + pod := makeTestPod(podName, "test-ns", podName, []v1.Container{container}) + if podName == "running" { + podStateProvider.runningPods[pod.UID] = struct{}{} + } + if podName != "deleted" { + podStateProvider.existingPods[pod.UID] = struct{}{} + } + return containerTemplate{ + pod: pod, + container: &container, + attempt: attempt, + createdAt: createdAt, + state: state, + } +} + func TestContainerGC(t *testing.T) { fakeRuntime, _, m, err := createTestRuntimeManager() assert.NoError(t, err) podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider) - makeGCContainer := func(podName, containerName string, attempt int, createdAt int64, state runtimeapi.ContainerState) containerTemplate { - container := makeTestContainer(containerName, "test-image") - pod := makeTestPod(podName, "test-ns", podName, []v1.Container{container}) - if podName == "running" { - podStateProvider.runningPods[pod.UID] = struct{}{} - } - if podName != "deleted" { - podStateProvider.existingPods[pod.UID] = struct{}{} - } - return containerTemplate{ - pod: pod, - container: &container, - attempt: attempt, - createdAt: createdAt, - state: state, - } - } defaultGCPolicy := kubecontainer.ContainerGCPolicy{MinAge: time.Hour, MaxPerPodContainer: 2, MaxContainers: 6} for c, test := range []struct { @@ -214,7 +215,7 @@ func TestContainerGC(t *testing.T) { { description: "all containers should be removed when max container limit is 0", containers: []containerTemplate{ - makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: 1, MaxContainers: 0}, remain: []int{}, @@ -224,11 +225,11 @@ func TestContainerGC(t *testing.T) { { description: "max containers should be complied when no max per pod container limit is set", containers: []containerTemplate{ - makeGCContainer("foo", "bar", 4, 4, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 3, 3, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 4, 4, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 3, 3, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: 4}, remain: []int{0, 1, 2, 3}, @@ -238,9 +239,9 @@ func TestContainerGC(t *testing.T) { { description: "no containers should be removed if both max container and per pod container limits are not set", containers: []containerTemplate{ - makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: -1}, remain: []int{0, 1, 2}, @@ -250,9 +251,9 @@ func TestContainerGC(t *testing.T) { { description: "recently started containers should not be removed", containers: []containerTemplate{ - makeGCContainer("foo", "bar", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 1, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 0, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 1, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 0, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), }, remain: []int{0, 1, 2}, evictTerminatedPods: false, @@ -261,9 +262,9 @@ func TestContainerGC(t *testing.T) { { description: "oldest containers should be removed when per pod container limit exceeded", containers: []containerTemplate{ - makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, remain: []int{0, 1}, evictTerminatedPods: false, @@ -272,9 +273,9 @@ func TestContainerGC(t *testing.T) { { description: "running containers should not be removed", containers: []containerTemplate{ - makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING), + makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING), }, remain: []int{0, 1, 2}, evictTerminatedPods: false, @@ -283,8 +284,8 @@ func TestContainerGC(t *testing.T) { { description: "no containers should be removed when limits are not exceeded", containers: []containerTemplate{ - makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, remain: []int{0, 1}, evictTerminatedPods: false, @@ -293,15 +294,15 @@ func TestContainerGC(t *testing.T) { { description: "max container count should apply per (UID, container) pair", containers: []containerTemplate{ - makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo1", "baz", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo1", "baz", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo1", "baz", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo2", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo2", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo2", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo1", "baz", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo1", "baz", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo1", "baz", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo2", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo2", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo2", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, remain: []int{0, 1, 3, 4, 6, 7}, evictTerminatedPods: false, @@ -310,16 +311,16 @@ func TestContainerGC(t *testing.T) { { description: "max limit should apply and try to keep from every pod", containers: []containerTemplate{ - makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo1", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo2", "bar2", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo3", "bar3", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo4", "bar4", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo1", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo2", "bar2", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo3", "bar3", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo4", "bar4", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, remain: []int{0, 2, 4, 6, 8}, evictTerminatedPods: false, @@ -328,16 +329,16 @@ func TestContainerGC(t *testing.T) { { description: "oldest pods should be removed if limit exceeded", containers: []containerTemplate{ - makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo5", "bar5", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo6", "bar6", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo7", "bar7", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo5", "bar5", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo6", "bar6", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo7", "bar7", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), }, remain: []int{0, 2, 4, 6, 8, 9}, evictTerminatedPods: false, @@ -346,12 +347,12 @@ func TestContainerGC(t *testing.T) { { description: "all non-running containers should be removed when evictTerminatedPods is set", containers: []containerTemplate{ - makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("running", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING), + makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "running", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING), }, remain: []int{4, 5}, evictTerminatedPods: true, @@ -360,12 +361,12 @@ func TestContainerGC(t *testing.T) { { description: "containers for deleted pods should be removed", containers: []containerTemplate{ - makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), // deleted pods still respect MinAge. - makeGCContainer("deleted", "bar1", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("deleted", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "deleted", "bar1", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "deleted", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, remain: []int{0, 1, 2}, evictTerminatedPods: false, @@ -374,7 +375,7 @@ func TestContainerGC(t *testing.T) { { description: "containers for deleted pods may not be removed if allSourcesReady is set false ", containers: []containerTemplate{ - makeGCContainer("deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer(podStateProvider, "deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, remain: []int{0}, evictTerminatedPods: true, @@ -440,3 +441,26 @@ func TestPodLogDirectoryGC(t *testing.T) { assert.NoError(t, err) assert.Empty(t, fakeOS.Removes) } + +func TestUnknownStateContainerGC(t *testing.T) { + fakeRuntime, _, m, err := createTestRuntimeManager() + assert.NoError(t, err) + + podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider) + defaultGCPolicy := kubecontainer.ContainerGCPolicy{MinAge: time.Hour, MaxPerPodContainer: 0, MaxContainers: 0} + + fakeContainers := makeFakeContainers(t, m, []containerTemplate{ + makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_UNKNOWN), + }) + fakeRuntime.SetFakeContainers(fakeContainers) + + err = m.containerGC.evictContainers(defaultGCPolicy, true, false) + assert.NoError(t, err) + + assert.Contains(t, fakeRuntime.GetCalls(), "StopContainer", "RemoveContainer", + "container in unknown state should be stopped before being removed") + + remain, err := fakeRuntime.ListContainers(nil) + assert.NoError(t, err) + assert.Empty(t, remain) +} diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 2d5fdf3071..a1de35184e 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -505,10 +505,19 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku initLastStatus, next, done := findNextInitContainerToRun(pod, podStatus) if !done { if next != nil { - initFailed := initLastStatus != nil && isContainerFailed(initLastStatus) + initFailed := initLastStatus != nil && isInitContainerFailed(initLastStatus) if initFailed && !shouldRestartOnFailure(pod) { changes.KillPod = true } else { + // Always try to stop containers in unknown state first. + if initLastStatus != nil && initLastStatus.State == kubecontainer.ContainerStateUnknown { + changes.ContainersToKill[initLastStatus.ID] = containerToKillInfo{ + name: next.Name, + container: next, + message: fmt.Sprintf("Init container is in %q state, try killing it before restart", + initLastStatus.State), + } + } changes.NextInitContainerToStart = next } } @@ -540,6 +549,17 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container) klog.V(3).Infof(message) changes.ContainersToStart = append(changes.ContainersToStart, idx) + if containerStatus != nil && containerStatus.State == kubecontainer.ContainerStateUnknown { + // If container is in unknown state, we don't know whether it + // is actually running or not, always try killing it before + // restart to avoid having 2 running instances of the same container. + changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{ + name: containerStatus.Name, + container: &pod.Spec.Containers[idx], + message: fmt.Sprintf("Container is in %q state, try killing it before restart", + containerStatus.State), + } + } } continue } diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go index ffbb40329a..ac5952d49e 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go @@ -597,9 +597,10 @@ func TestPruneInitContainers(t *testing.T) { } templates := []containerTemplate{ + {pod: pod, container: &init1, attempt: 3, createdAt: 3, state: runtimeapi.ContainerState_CONTAINER_EXITED}, {pod: pod, container: &init1, attempt: 2, createdAt: 2, state: runtimeapi.ContainerState_CONTAINER_EXITED}, - {pod: pod, container: &init1, attempt: 1, createdAt: 1, state: runtimeapi.ContainerState_CONTAINER_EXITED}, {pod: pod, container: &init2, attempt: 1, createdAt: 1, state: runtimeapi.ContainerState_CONTAINER_EXITED}, + {pod: pod, container: &init1, attempt: 1, createdAt: 1, state: runtimeapi.ContainerState_CONTAINER_UNKNOWN}, {pod: pod, container: &init2, attempt: 0, createdAt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED}, {pod: pod, container: &init1, attempt: 0, createdAt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED}, } @@ -928,6 +929,17 @@ func TestComputePodActions(t *testing.T) { ContainersToKill: map[kubecontainer.ContainerID]containerToKillInfo{}, }, }, + "Kill and recreate the container if the container is in unknown state": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.ContainerStatuses[1].State = kubecontainer.ContainerStateUnknown + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToKill: getKillMap(basePod, baseStatus, []int{1}), + ContainersToStart: []int{1}, + }, + }, } { pod, status := makeBasePodAndStatus() if test.mutatePodFn != nil { @@ -952,6 +964,17 @@ func getKillMap(pod *v1.Pod, status *kubecontainer.PodStatus, cIndexes []int) ma return m } +func getKillMapWithInitContainers(pod *v1.Pod, status *kubecontainer.PodStatus, cIndexes []int) map[kubecontainer.ContainerID]containerToKillInfo { + m := map[kubecontainer.ContainerID]containerToKillInfo{} + for _, i := range cIndexes { + m[status.ContainerStatuses[i].ID] = containerToKillInfo{ + container: &pod.Spec.InitContainers[i], + name: pod.Spec.InitContainers[i].Name, + } + } + return m +} + func verifyActions(t *testing.T, expected, actual *podActions, desc string) { if actual.ContainersToKill != nil { // Clear the message field since we don't need to verify the message. @@ -985,7 +1008,7 @@ func TestComputePodActionsWithInitContainers(t *testing.T) { actions: podActions{ SandboxID: baseStatus.SandboxStatuses[0].Id, ContainersToStart: []int{0, 1, 2}, - ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), }, }, "initialization in progress; do nothing": { @@ -1007,7 +1030,7 @@ func TestComputePodActionsWithInitContainers(t *testing.T) { Attempt: uint32(1), NextInitContainerToStart: &basePod.Spec.InitContainers[0], ContainersToStart: []int{}, - ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), }, }, "initialization failed; restart the last init container if RestartPolicy == Always": { @@ -1019,7 +1042,7 @@ func TestComputePodActionsWithInitContainers(t *testing.T) { SandboxID: baseStatus.SandboxStatuses[0].Id, NextInitContainerToStart: &basePod.Spec.InitContainers[2], ContainersToStart: []int{}, - ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), }, }, "initialization failed; restart the last init container if RestartPolicy == OnFailure": { @@ -1031,7 +1054,7 @@ func TestComputePodActionsWithInitContainers(t *testing.T) { SandboxID: baseStatus.SandboxStatuses[0].Id, NextInitContainerToStart: &basePod.Spec.InitContainers[2], ContainersToStart: []int{}, - ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), }, }, "initialization failed; kill pod if RestartPolicy == Never": { @@ -1043,7 +1066,43 @@ func TestComputePodActionsWithInitContainers(t *testing.T) { KillPod: true, SandboxID: baseStatus.SandboxStatuses[0].Id, ContainersToStart: []int{}, - ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, + "init container state unknown; kill and recreate the last init container if RestartPolicy == Always": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.ContainerStatuses[2].State = kubecontainer.ContainerStateUnknown + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + NextInitContainerToStart: &basePod.Spec.InitContainers[2], + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{2}), + }, + }, + "init container state unknown; kill and recreate the last init container if RestartPolicy == OnFailure": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyOnFailure }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.ContainerStatuses[2].State = kubecontainer.ContainerStateUnknown + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + NextInitContainerToStart: &basePod.Spec.InitContainers[2], + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{2}), + }, + }, + "init container state unknown; kill pod if RestartPolicy == Never": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.ContainerStatuses[2].State = kubecontainer.ContainerStateUnknown + }, + actions: podActions{ + KillPod: true, + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), }, }, } {