Stop container in unknown state before recreate or remove.

pull/564/head
Lantao Liu 2019-02-06 16:13:43 -08:00
parent 4992402fa5
commit de8ee94d14
7 changed files with 226 additions and 90 deletions

View File

@ -104,6 +104,12 @@ func (r *FakeRuntimeService) AssertCalls(calls []string) error {
return nil
}
func (r *FakeRuntimeService) GetCalls() []string {
r.Lock()
defer r.Unlock()
return append([]string{}, r.Called...)
}
func (r *FakeRuntimeService) InjectError(f string, err error) {
r.Lock()
defer r.Unlock()

View File

@ -141,12 +141,17 @@ func (m *kubeGenericRuntimeManager) getImageUser(image string) (*int64, string,
return new(int64), "", nil
}
// isContainerFailed returns true if container has exited and exitcode is not zero.
func isContainerFailed(status *kubecontainer.ContainerStatus) bool {
// isInitContainerFailed returns true if container has exited and exitcode is not zero
// or is in unknown state.
func isInitContainerFailed(status *kubecontainer.ContainerStatus) bool {
if status.State == kubecontainer.ContainerStateExited && status.ExitCode != 0 {
return true
}
if status.State == kubecontainer.ContainerStateUnknown {
return true
}
return false
}

View File

@ -635,9 +635,14 @@ func (m *kubeGenericRuntimeManager) pruneInitContainersBeforeStart(pod *v1.Pod,
for name := range initContainerNames {
count := 0
for _, status := range podStatus.ContainerStatuses {
if status.Name != name || !initContainerNames.Has(status.Name) || status.State != kubecontainer.ContainerStateExited {
if status.Name != name || !initContainerNames.Has(status.Name) ||
(status.State != kubecontainer.ContainerStateExited &&
status.State != kubecontainer.ContainerStateUnknown) {
continue
}
// Remove init containers in unknown state. It should have
// been stopped before pruneInitContainersBeforeStart is
// called.
count++
// keep the first init container for this name
if count == 1 {
@ -692,7 +697,7 @@ func (m *kubeGenericRuntimeManager) purgeInitContainers(pod *v1.Pod, podStatus *
}
// findNextInitContainerToRun returns the status of the last failed container, the
// next init container to start, or done if there are no further init containers.
// index of next init container to start, or done if there are no further init containers.
// Status is only returned if an init container is failed, in which case next will
// point to the current container.
func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (status *kubecontainer.ContainerStatus, next *v1.Container, done bool) {
@ -704,7 +709,7 @@ func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus)
for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- {
container := &pod.Spec.InitContainers[i]
status := podStatus.FindContainerStatusByName(container.Name)
if status != nil && isContainerFailed(status) {
if status != nil && isInitContainerFailed(status) {
return status, container, false
}
}

View File

@ -55,6 +55,9 @@ type containerGCInfo struct {
name string
// Creation time for the container.
createTime time.Time
// If true, the container is in unknown state. Garbage collector should try
// to stop containers before removal.
unknown bool
}
// sandboxGCInfo is the internal information kept for sandboxes being considered for GC.
@ -122,6 +125,19 @@ func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int
// Remove from oldest to newest (last to first).
numToKeep := len(containers) - toRemove
for i := len(containers) - 1; i >= numToKeep; i-- {
if containers[i].unknown {
// Containers in known state could be running, we should try
// to stop it before removal.
id := kubecontainer.ContainerID{
Type: cgc.manager.runtimeName,
ID: containers[i].id,
}
message := "Container is in unknown state, try killing it before removal"
if err := cgc.manager.killContainer(nil, id, containers[i].name, message, nil); err != nil {
klog.Errorf("Failed to stop container %q: %v", containers[i].id, err)
continue
}
}
if err := cgc.manager.removeContainer(containers[i].id); err != nil {
klog.Errorf("Failed to remove container %q: %v", containers[i].id, err)
}
@ -184,6 +200,7 @@ func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByE
id: container.Id,
name: container.Metadata.Name,
createTime: createdAt,
unknown: container.State == runtimeapi.ContainerState_CONTAINER_UNKNOWN,
}
key := evictUnit{
uid: labeledInfo.PodUID,

View File

@ -179,28 +179,29 @@ func TestSandboxGC(t *testing.T) {
}
}
func makeGCContainer(podStateProvider *fakePodStateProvider, podName, containerName string, attempt int, createdAt int64, state runtimeapi.ContainerState) containerTemplate {
container := makeTestContainer(containerName, "test-image")
pod := makeTestPod(podName, "test-ns", podName, []v1.Container{container})
if podName == "running" {
podStateProvider.runningPods[pod.UID] = struct{}{}
}
if podName != "deleted" {
podStateProvider.existingPods[pod.UID] = struct{}{}
}
return containerTemplate{
pod: pod,
container: &container,
attempt: attempt,
createdAt: createdAt,
state: state,
}
}
func TestContainerGC(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)
makeGCContainer := func(podName, containerName string, attempt int, createdAt int64, state runtimeapi.ContainerState) containerTemplate {
container := makeTestContainer(containerName, "test-image")
pod := makeTestPod(podName, "test-ns", podName, []v1.Container{container})
if podName == "running" {
podStateProvider.runningPods[pod.UID] = struct{}{}
}
if podName != "deleted" {
podStateProvider.existingPods[pod.UID] = struct{}{}
}
return containerTemplate{
pod: pod,
container: &container,
attempt: attempt,
createdAt: createdAt,
state: state,
}
}
defaultGCPolicy := kubecontainer.ContainerGCPolicy{MinAge: time.Hour, MaxPerPodContainer: 2, MaxContainers: 6}
for c, test := range []struct {
@ -214,7 +215,7 @@ func TestContainerGC(t *testing.T) {
{
description: "all containers should be removed when max container limit is 0",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: 1, MaxContainers: 0},
remain: []int{},
@ -224,11 +225,11 @@ func TestContainerGC(t *testing.T) {
{
description: "max containers should be complied when no max per pod container limit is set",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 4, 4, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 3, 3, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 4, 4, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 3, 3, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: 4},
remain: []int{0, 1, 2, 3},
@ -238,9 +239,9 @@ func TestContainerGC(t *testing.T) {
{
description: "no containers should be removed if both max container and per pod container limits are not set",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: -1},
remain: []int{0, 1, 2},
@ -250,9 +251,9 @@ func TestContainerGC(t *testing.T) {
{
description: "recently started containers should not be removed",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1, 2},
evictTerminatedPods: false,
@ -261,9 +262,9 @@ func TestContainerGC(t *testing.T) {
{
description: "oldest containers should be removed when per pod container limit exceeded",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1},
evictTerminatedPods: false,
@ -272,9 +273,9 @@ func TestContainerGC(t *testing.T) {
{
description: "running containers should not be removed",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING),
makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING),
},
remain: []int{0, 1, 2},
evictTerminatedPods: false,
@ -283,8 +284,8 @@ func TestContainerGC(t *testing.T) {
{
description: "no containers should be removed when limits are not exceeded",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1},
evictTerminatedPods: false,
@ -293,15 +294,15 @@ func TestContainerGC(t *testing.T) {
{
description: "max container count should apply per (UID, container) pair",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "baz", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "baz", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "baz", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo1", "baz", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo1", "baz", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo1", "baz", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo2", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo2", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo2", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1, 3, 4, 6, 7},
evictTerminatedPods: false,
@ -310,16 +311,16 @@ func TestContainerGC(t *testing.T) {
{
description: "max limit should apply and try to keep from every pod",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar2", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo3", "bar3", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo4", "bar4", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo1", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo2", "bar2", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo3", "bar3", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo4", "bar4", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 2, 4, 6, 8},
evictTerminatedPods: false,
@ -328,16 +329,16 @@ func TestContainerGC(t *testing.T) {
{
description: "oldest pods should be removed if limit exceeded",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo5", "bar5", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo6", "bar6", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo7", "bar7", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo5", "bar5", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo6", "bar6", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo7", "bar7", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 2, 4, 6, 8, 9},
evictTerminatedPods: false,
@ -346,12 +347,12 @@ func TestContainerGC(t *testing.T) {
{
description: "all non-running containers should be removed when evictTerminatedPods is set",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("running", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING),
makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "running", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING),
},
remain: []int{4, 5},
evictTerminatedPods: true,
@ -360,12 +361,12 @@ func TestContainerGC(t *testing.T) {
{
description: "containers for deleted pods should be removed",
containers: []containerTemplate{
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
// deleted pods still respect MinAge.
makeGCContainer("deleted", "bar1", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("deleted", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "deleted", "bar1", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "deleted", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0, 1, 2},
evictTerminatedPods: false,
@ -374,7 +375,7 @@ func TestContainerGC(t *testing.T) {
{
description: "containers for deleted pods may not be removed if allSourcesReady is set false ",
containers: []containerTemplate{
makeGCContainer("deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer(podStateProvider, "deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
},
remain: []int{0},
evictTerminatedPods: true,
@ -440,3 +441,26 @@ func TestPodLogDirectoryGC(t *testing.T) {
assert.NoError(t, err)
assert.Empty(t, fakeOS.Removes)
}
func TestUnknownStateContainerGC(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)
defaultGCPolicy := kubecontainer.ContainerGCPolicy{MinAge: time.Hour, MaxPerPodContainer: 0, MaxContainers: 0}
fakeContainers := makeFakeContainers(t, m, []containerTemplate{
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_UNKNOWN),
})
fakeRuntime.SetFakeContainers(fakeContainers)
err = m.containerGC.evictContainers(defaultGCPolicy, true, false)
assert.NoError(t, err)
assert.Contains(t, fakeRuntime.GetCalls(), "StopContainer", "RemoveContainer",
"container in unknown state should be stopped before being removed")
remain, err := fakeRuntime.ListContainers(nil)
assert.NoError(t, err)
assert.Empty(t, remain)
}

View File

@ -505,10 +505,19 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
initLastStatus, next, done := findNextInitContainerToRun(pod, podStatus)
if !done {
if next != nil {
initFailed := initLastStatus != nil && isContainerFailed(initLastStatus)
initFailed := initLastStatus != nil && isInitContainerFailed(initLastStatus)
if initFailed && !shouldRestartOnFailure(pod) {
changes.KillPod = true
} else {
// Always try to stop containers in unknown state first.
if initLastStatus != nil && initLastStatus.State == kubecontainer.ContainerStateUnknown {
changes.ContainersToKill[initLastStatus.ID] = containerToKillInfo{
name: next.Name,
container: next,
message: fmt.Sprintf("Init container is in %q state, try killing it before restart",
initLastStatus.State),
}
}
changes.NextInitContainerToStart = next
}
}
@ -540,6 +549,17 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container)
klog.V(3).Infof(message)
changes.ContainersToStart = append(changes.ContainersToStart, idx)
if containerStatus != nil && containerStatus.State == kubecontainer.ContainerStateUnknown {
// If container is in unknown state, we don't know whether it
// is actually running or not, always try killing it before
// restart to avoid having 2 running instances of the same container.
changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
name: containerStatus.Name,
container: &pod.Spec.Containers[idx],
message: fmt.Sprintf("Container is in %q state, try killing it before restart",
containerStatus.State),
}
}
}
continue
}

View File

@ -597,9 +597,10 @@ func TestPruneInitContainers(t *testing.T) {
}
templates := []containerTemplate{
{pod: pod, container: &init1, attempt: 3, createdAt: 3, state: runtimeapi.ContainerState_CONTAINER_EXITED},
{pod: pod, container: &init1, attempt: 2, createdAt: 2, state: runtimeapi.ContainerState_CONTAINER_EXITED},
{pod: pod, container: &init1, attempt: 1, createdAt: 1, state: runtimeapi.ContainerState_CONTAINER_EXITED},
{pod: pod, container: &init2, attempt: 1, createdAt: 1, state: runtimeapi.ContainerState_CONTAINER_EXITED},
{pod: pod, container: &init1, attempt: 1, createdAt: 1, state: runtimeapi.ContainerState_CONTAINER_UNKNOWN},
{pod: pod, container: &init2, attempt: 0, createdAt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED},
{pod: pod, container: &init1, attempt: 0, createdAt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED},
}
@ -928,6 +929,17 @@ func TestComputePodActions(t *testing.T) {
ContainersToKill: map[kubecontainer.ContainerID]containerToKillInfo{},
},
},
"Kill and recreate the container if the container is in unknown state": {
mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever },
mutateStatusFn: func(status *kubecontainer.PodStatus) {
status.ContainerStatuses[1].State = kubecontainer.ContainerStateUnknown
},
actions: podActions{
SandboxID: baseStatus.SandboxStatuses[0].Id,
ContainersToKill: getKillMap(basePod, baseStatus, []int{1}),
ContainersToStart: []int{1},
},
},
} {
pod, status := makeBasePodAndStatus()
if test.mutatePodFn != nil {
@ -952,6 +964,17 @@ func getKillMap(pod *v1.Pod, status *kubecontainer.PodStatus, cIndexes []int) ma
return m
}
func getKillMapWithInitContainers(pod *v1.Pod, status *kubecontainer.PodStatus, cIndexes []int) map[kubecontainer.ContainerID]containerToKillInfo {
m := map[kubecontainer.ContainerID]containerToKillInfo{}
for _, i := range cIndexes {
m[status.ContainerStatuses[i].ID] = containerToKillInfo{
container: &pod.Spec.InitContainers[i],
name: pod.Spec.InitContainers[i].Name,
}
}
return m
}
func verifyActions(t *testing.T, expected, actual *podActions, desc string) {
if actual.ContainersToKill != nil {
// Clear the message field since we don't need to verify the message.
@ -985,7 +1008,7 @@ func TestComputePodActionsWithInitContainers(t *testing.T) {
actions: podActions{
SandboxID: baseStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{0, 1, 2},
ContainersToKill: getKillMap(basePod, baseStatus, []int{}),
ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}),
},
},
"initialization in progress; do nothing": {
@ -1007,7 +1030,7 @@ func TestComputePodActionsWithInitContainers(t *testing.T) {
Attempt: uint32(1),
NextInitContainerToStart: &basePod.Spec.InitContainers[0],
ContainersToStart: []int{},
ContainersToKill: getKillMap(basePod, baseStatus, []int{}),
ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}),
},
},
"initialization failed; restart the last init container if RestartPolicy == Always": {
@ -1019,7 +1042,7 @@ func TestComputePodActionsWithInitContainers(t *testing.T) {
SandboxID: baseStatus.SandboxStatuses[0].Id,
NextInitContainerToStart: &basePod.Spec.InitContainers[2],
ContainersToStart: []int{},
ContainersToKill: getKillMap(basePod, baseStatus, []int{}),
ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}),
},
},
"initialization failed; restart the last init container if RestartPolicy == OnFailure": {
@ -1031,7 +1054,7 @@ func TestComputePodActionsWithInitContainers(t *testing.T) {
SandboxID: baseStatus.SandboxStatuses[0].Id,
NextInitContainerToStart: &basePod.Spec.InitContainers[2],
ContainersToStart: []int{},
ContainersToKill: getKillMap(basePod, baseStatus, []int{}),
ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}),
},
},
"initialization failed; kill pod if RestartPolicy == Never": {
@ -1043,7 +1066,43 @@ func TestComputePodActionsWithInitContainers(t *testing.T) {
KillPod: true,
SandboxID: baseStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{},
ContainersToKill: getKillMap(basePod, baseStatus, []int{}),
ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}),
},
},
"init container state unknown; kill and recreate the last init container if RestartPolicy == Always": {
mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways },
mutateStatusFn: func(status *kubecontainer.PodStatus) {
status.ContainerStatuses[2].State = kubecontainer.ContainerStateUnknown
},
actions: podActions{
SandboxID: baseStatus.SandboxStatuses[0].Id,
NextInitContainerToStart: &basePod.Spec.InitContainers[2],
ContainersToStart: []int{},
ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{2}),
},
},
"init container state unknown; kill and recreate the last init container if RestartPolicy == OnFailure": {
mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyOnFailure },
mutateStatusFn: func(status *kubecontainer.PodStatus) {
status.ContainerStatuses[2].State = kubecontainer.ContainerStateUnknown
},
actions: podActions{
SandboxID: baseStatus.SandboxStatuses[0].Id,
NextInitContainerToStart: &basePod.Spec.InitContainers[2],
ContainersToStart: []int{},
ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{2}),
},
},
"init container state unknown; kill pod if RestartPolicy == Never": {
mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever },
mutateStatusFn: func(status *kubecontainer.PodStatus) {
status.ContainerStatuses[2].State = kubecontainer.ContainerStateUnknown
},
actions: podActions{
KillPod: true,
SandboxID: baseStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{},
ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}),
},
},
} {