Stop container in unknown state before recreate or remove.

pull/564/head
Lantao Liu 2019-02-06 16:13:43 -08:00
parent 4992402fa5
commit de8ee94d14
7 changed files with 226 additions and 90 deletions

View File

@ -104,6 +104,12 @@ func (r *FakeRuntimeService) AssertCalls(calls []string) error {
return nil return nil
} }
func (r *FakeRuntimeService) GetCalls() []string {
r.Lock()
defer r.Unlock()
return append([]string{}, r.Called...)
}
func (r *FakeRuntimeService) InjectError(f string, err error) { func (r *FakeRuntimeService) InjectError(f string, err error) {
r.Lock() r.Lock()
defer r.Unlock() defer r.Unlock()

View File

@ -141,12 +141,17 @@ func (m *kubeGenericRuntimeManager) getImageUser(image string) (*int64, string,
return new(int64), "", nil return new(int64), "", nil
} }
// isContainerFailed returns true if container has exited and exitcode is not zero. // isInitContainerFailed returns true if container has exited and exitcode is not zero
func isContainerFailed(status *kubecontainer.ContainerStatus) bool { // or is in unknown state.
func isInitContainerFailed(status *kubecontainer.ContainerStatus) bool {
if status.State == kubecontainer.ContainerStateExited && status.ExitCode != 0 { if status.State == kubecontainer.ContainerStateExited && status.ExitCode != 0 {
return true return true
} }
if status.State == kubecontainer.ContainerStateUnknown {
return true
}
return false return false
} }

View File

@ -635,9 +635,14 @@ func (m *kubeGenericRuntimeManager) pruneInitContainersBeforeStart(pod *v1.Pod,
for name := range initContainerNames { for name := range initContainerNames {
count := 0 count := 0
for _, status := range podStatus.ContainerStatuses { for _, status := range podStatus.ContainerStatuses {
if status.Name != name || !initContainerNames.Has(status.Name) || status.State != kubecontainer.ContainerStateExited { if status.Name != name || !initContainerNames.Has(status.Name) ||
(status.State != kubecontainer.ContainerStateExited &&
status.State != kubecontainer.ContainerStateUnknown) {
continue continue
} }
// Remove init containers in unknown state. It should have
// been stopped before pruneInitContainersBeforeStart is
// called.
count++ count++
// keep the first init container for this name // keep the first init container for this name
if count == 1 { if count == 1 {
@ -692,7 +697,7 @@ func (m *kubeGenericRuntimeManager) purgeInitContainers(pod *v1.Pod, podStatus *
} }
// findNextInitContainerToRun returns the status of the last failed container, the // findNextInitContainerToRun returns the status of the last failed container, the
// next init container to start, or done if there are no further init containers. // index of next init container to start, or done if there are no further init containers.
// Status is only returned if an init container is failed, in which case next will // Status is only returned if an init container is failed, in which case next will
// point to the current container. // point to the current container.
func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (status *kubecontainer.ContainerStatus, next *v1.Container, done bool) { func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (status *kubecontainer.ContainerStatus, next *v1.Container, done bool) {
@ -704,7 +709,7 @@ func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus)
for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- { for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- {
container := &pod.Spec.InitContainers[i] container := &pod.Spec.InitContainers[i]
status := podStatus.FindContainerStatusByName(container.Name) status := podStatus.FindContainerStatusByName(container.Name)
if status != nil && isContainerFailed(status) { if status != nil && isInitContainerFailed(status) {
return status, container, false return status, container, false
} }
} }

View File

@ -55,6 +55,9 @@ type containerGCInfo struct {
name string name string
// Creation time for the container. // Creation time for the container.
createTime time.Time createTime time.Time
// If true, the container is in unknown state. Garbage collector should try
// to stop containers before removal.
unknown bool
} }
// sandboxGCInfo is the internal information kept for sandboxes being considered for GC. // sandboxGCInfo is the internal information kept for sandboxes being considered for GC.
@ -122,6 +125,19 @@ func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int
// Remove from oldest to newest (last to first). // Remove from oldest to newest (last to first).
numToKeep := len(containers) - toRemove numToKeep := len(containers) - toRemove
for i := len(containers) - 1; i >= numToKeep; i-- { for i := len(containers) - 1; i >= numToKeep; i-- {
if containers[i].unknown {
// Containers in known state could be running, we should try
// to stop it before removal.
id := kubecontainer.ContainerID{
Type: cgc.manager.runtimeName,
ID: containers[i].id,
}
message := "Container is in unknown state, try killing it before removal"
if err := cgc.manager.killContainer(nil, id, containers[i].name, message, nil); err != nil {
klog.Errorf("Failed to stop container %q: %v", containers[i].id, err)
continue
}
}
if err := cgc.manager.removeContainer(containers[i].id); err != nil { if err := cgc.manager.removeContainer(containers[i].id); err != nil {
klog.Errorf("Failed to remove container %q: %v", containers[i].id, err) klog.Errorf("Failed to remove container %q: %v", containers[i].id, err)
} }
@ -184,6 +200,7 @@ func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByE
id: container.Id, id: container.Id,
name: container.Metadata.Name, name: container.Metadata.Name,
createTime: createdAt, createTime: createdAt,
unknown: container.State == runtimeapi.ContainerState_CONTAINER_UNKNOWN,
} }
key := evictUnit{ key := evictUnit{
uid: labeledInfo.PodUID, uid: labeledInfo.PodUID,

View File

@ -179,28 +179,29 @@ func TestSandboxGC(t *testing.T) {
} }
} }
func makeGCContainer(podStateProvider *fakePodStateProvider, podName, containerName string, attempt int, createdAt int64, state runtimeapi.ContainerState) containerTemplate {
container := makeTestContainer(containerName, "test-image")
pod := makeTestPod(podName, "test-ns", podName, []v1.Container{container})
if podName == "running" {
podStateProvider.runningPods[pod.UID] = struct{}{}
}
if podName != "deleted" {
podStateProvider.existingPods[pod.UID] = struct{}{}
}
return containerTemplate{
pod: pod,
container: &container,
attempt: attempt,
createdAt: createdAt,
state: state,
}
}
func TestContainerGC(t *testing.T) { func TestContainerGC(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager() fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err) assert.NoError(t, err)
podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider) podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)
makeGCContainer := func(podName, containerName string, attempt int, createdAt int64, state runtimeapi.ContainerState) containerTemplate {
container := makeTestContainer(containerName, "test-image")
pod := makeTestPod(podName, "test-ns", podName, []v1.Container{container})
if podName == "running" {
podStateProvider.runningPods[pod.UID] = struct{}{}
}
if podName != "deleted" {
podStateProvider.existingPods[pod.UID] = struct{}{}
}
return containerTemplate{
pod: pod,
container: &container,
attempt: attempt,
createdAt: createdAt,
state: state,
}
}
defaultGCPolicy := kubecontainer.ContainerGCPolicy{MinAge: time.Hour, MaxPerPodContainer: 2, MaxContainers: 6} defaultGCPolicy := kubecontainer.ContainerGCPolicy{MinAge: time.Hour, MaxPerPodContainer: 2, MaxContainers: 6}
for c, test := range []struct { for c, test := range []struct {
@ -214,7 +215,7 @@ func TestContainerGC(t *testing.T) {
{ {
description: "all containers should be removed when max container limit is 0", description: "all containers should be removed when max container limit is 0",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: 1, MaxContainers: 0}, policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: 1, MaxContainers: 0},
remain: []int{}, remain: []int{},
@ -224,11 +225,11 @@ func TestContainerGC(t *testing.T) {
{ {
description: "max containers should be complied when no max per pod container limit is set", description: "max containers should be complied when no max per pod container limit is set",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer("foo", "bar", 4, 4, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 4, 4, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 3, 3, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 3, 3, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: 4}, policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: 4},
remain: []int{0, 1, 2, 3}, remain: []int{0, 1, 2, 3},
@ -238,9 +239,9 @@ func TestContainerGC(t *testing.T) {
{ {
description: "no containers should be removed if both max container and per pod container limits are not set", description: "no containers should be removed if both max container and per pod container limits are not set",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: -1}, policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: -1},
remain: []int{0, 1, 2}, remain: []int{0, 1, 2},
@ -250,9 +251,9 @@ func TestContainerGC(t *testing.T) {
{ {
description: "recently started containers should not be removed", description: "recently started containers should not be removed",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 1, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 0, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
remain: []int{0, 1, 2}, remain: []int{0, 1, 2},
evictTerminatedPods: false, evictTerminatedPods: false,
@ -261,9 +262,9 @@ func TestContainerGC(t *testing.T) {
{ {
description: "oldest containers should be removed when per pod container limit exceeded", description: "oldest containers should be removed when per pod container limit exceeded",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
remain: []int{0, 1}, remain: []int{0, 1},
evictTerminatedPods: false, evictTerminatedPods: false,
@ -272,9 +273,9 @@ func TestContainerGC(t *testing.T) {
{ {
description: "running containers should not be removed", description: "running containers should not be removed",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING), makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING),
}, },
remain: []int{0, 1, 2}, remain: []int{0, 1, 2},
evictTerminatedPods: false, evictTerminatedPods: false,
@ -283,8 +284,8 @@ func TestContainerGC(t *testing.T) {
{ {
description: "no containers should be removed when limits are not exceeded", description: "no containers should be removed when limits are not exceeded",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
remain: []int{0, 1}, remain: []int{0, 1},
evictTerminatedPods: false, evictTerminatedPods: false,
@ -293,15 +294,15 @@ func TestContainerGC(t *testing.T) {
{ {
description: "max container count should apply per (UID, container) pair", description: "max container count should apply per (UID, container) pair",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "baz", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo1", "baz", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "baz", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo1", "baz", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "baz", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo1", "baz", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo2", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo2", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo2", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
remain: []int{0, 1, 3, 4, 6, 7}, remain: []int{0, 1, 3, 4, 6, 7},
evictTerminatedPods: false, evictTerminatedPods: false,
@ -310,16 +311,16 @@ func TestContainerGC(t *testing.T) {
{ {
description: "max limit should apply and try to keep from every pod", description: "max limit should apply and try to keep from every pod",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo1", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar2", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo2", "bar2", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo3", "bar3", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo3", "bar3", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo4", "bar4", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo4", "bar4", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
remain: []int{0, 2, 4, 6, 8}, remain: []int{0, 2, 4, 6, 8},
evictTerminatedPods: false, evictTerminatedPods: false,
@ -328,16 +329,16 @@ func TestContainerGC(t *testing.T) {
{ {
description: "oldest pods should be removed if limit exceeded", description: "oldest pods should be removed if limit exceeded",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo5", "bar5", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo5", "bar5", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo6", "bar6", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo6", "bar6", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo7", "bar7", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo7", "bar7", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
remain: []int{0, 2, 4, 6, 8, 9}, remain: []int{0, 2, 4, 6, 8, 9},
evictTerminatedPods: false, evictTerminatedPods: false,
@ -346,12 +347,12 @@ func TestContainerGC(t *testing.T) {
{ {
description: "all non-running containers should be removed when evictTerminatedPods is set", description: "all non-running containers should be removed when evictTerminatedPods is set",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("running", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "running", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING), makeGCContainer(podStateProvider, "foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING),
}, },
remain: []int{4, 5}, remain: []int{4, 5},
evictTerminatedPods: true, evictTerminatedPods: true,
@ -360,12 +361,12 @@ func TestContainerGC(t *testing.T) {
{ {
description: "containers for deleted pods should be removed", description: "containers for deleted pods should be removed",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
// deleted pods still respect MinAge. // deleted pods still respect MinAge.
makeGCContainer("deleted", "bar1", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "deleted", "bar1", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("deleted", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "deleted", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
makeGCContainer("deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
remain: []int{0, 1, 2}, remain: []int{0, 1, 2},
evictTerminatedPods: false, evictTerminatedPods: false,
@ -374,7 +375,7 @@ func TestContainerGC(t *testing.T) {
{ {
description: "containers for deleted pods may not be removed if allSourcesReady is set false ", description: "containers for deleted pods may not be removed if allSourcesReady is set false ",
containers: []containerTemplate{ containers: []containerTemplate{
makeGCContainer("deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer(podStateProvider, "deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
}, },
remain: []int{0}, remain: []int{0},
evictTerminatedPods: true, evictTerminatedPods: true,
@ -440,3 +441,26 @@ func TestPodLogDirectoryGC(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
assert.Empty(t, fakeOS.Removes) assert.Empty(t, fakeOS.Removes)
} }
func TestUnknownStateContainerGC(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)
defaultGCPolicy := kubecontainer.ContainerGCPolicy{MinAge: time.Hour, MaxPerPodContainer: 0, MaxContainers: 0}
fakeContainers := makeFakeContainers(t, m, []containerTemplate{
makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_UNKNOWN),
})
fakeRuntime.SetFakeContainers(fakeContainers)
err = m.containerGC.evictContainers(defaultGCPolicy, true, false)
assert.NoError(t, err)
assert.Contains(t, fakeRuntime.GetCalls(), "StopContainer", "RemoveContainer",
"container in unknown state should be stopped before being removed")
remain, err := fakeRuntime.ListContainers(nil)
assert.NoError(t, err)
assert.Empty(t, remain)
}

View File

@ -505,10 +505,19 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
initLastStatus, next, done := findNextInitContainerToRun(pod, podStatus) initLastStatus, next, done := findNextInitContainerToRun(pod, podStatus)
if !done { if !done {
if next != nil { if next != nil {
initFailed := initLastStatus != nil && isContainerFailed(initLastStatus) initFailed := initLastStatus != nil && isInitContainerFailed(initLastStatus)
if initFailed && !shouldRestartOnFailure(pod) { if initFailed && !shouldRestartOnFailure(pod) {
changes.KillPod = true changes.KillPod = true
} else { } else {
// Always try to stop containers in unknown state first.
if initLastStatus != nil && initLastStatus.State == kubecontainer.ContainerStateUnknown {
changes.ContainersToKill[initLastStatus.ID] = containerToKillInfo{
name: next.Name,
container: next,
message: fmt.Sprintf("Init container is in %q state, try killing it before restart",
initLastStatus.State),
}
}
changes.NextInitContainerToStart = next changes.NextInitContainerToStart = next
} }
} }
@ -540,6 +549,17 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container) message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container)
klog.V(3).Infof(message) klog.V(3).Infof(message)
changes.ContainersToStart = append(changes.ContainersToStart, idx) changes.ContainersToStart = append(changes.ContainersToStart, idx)
if containerStatus != nil && containerStatus.State == kubecontainer.ContainerStateUnknown {
// If container is in unknown state, we don't know whether it
// is actually running or not, always try killing it before
// restart to avoid having 2 running instances of the same container.
changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
name: containerStatus.Name,
container: &pod.Spec.Containers[idx],
message: fmt.Sprintf("Container is in %q state, try killing it before restart",
containerStatus.State),
}
}
} }
continue continue
} }

View File

@ -597,9 +597,10 @@ func TestPruneInitContainers(t *testing.T) {
} }
templates := []containerTemplate{ templates := []containerTemplate{
{pod: pod, container: &init1, attempt: 3, createdAt: 3, state: runtimeapi.ContainerState_CONTAINER_EXITED},
{pod: pod, container: &init1, attempt: 2, createdAt: 2, state: runtimeapi.ContainerState_CONTAINER_EXITED}, {pod: pod, container: &init1, attempt: 2, createdAt: 2, state: runtimeapi.ContainerState_CONTAINER_EXITED},
{pod: pod, container: &init1, attempt: 1, createdAt: 1, state: runtimeapi.ContainerState_CONTAINER_EXITED},
{pod: pod, container: &init2, attempt: 1, createdAt: 1, state: runtimeapi.ContainerState_CONTAINER_EXITED}, {pod: pod, container: &init2, attempt: 1, createdAt: 1, state: runtimeapi.ContainerState_CONTAINER_EXITED},
{pod: pod, container: &init1, attempt: 1, createdAt: 1, state: runtimeapi.ContainerState_CONTAINER_UNKNOWN},
{pod: pod, container: &init2, attempt: 0, createdAt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED}, {pod: pod, container: &init2, attempt: 0, createdAt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED},
{pod: pod, container: &init1, attempt: 0, createdAt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED}, {pod: pod, container: &init1, attempt: 0, createdAt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED},
} }
@ -928,6 +929,17 @@ func TestComputePodActions(t *testing.T) {
ContainersToKill: map[kubecontainer.ContainerID]containerToKillInfo{}, ContainersToKill: map[kubecontainer.ContainerID]containerToKillInfo{},
}, },
}, },
"Kill and recreate the container if the container is in unknown state": {
mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever },
mutateStatusFn: func(status *kubecontainer.PodStatus) {
status.ContainerStatuses[1].State = kubecontainer.ContainerStateUnknown
},
actions: podActions{
SandboxID: baseStatus.SandboxStatuses[0].Id,
ContainersToKill: getKillMap(basePod, baseStatus, []int{1}),
ContainersToStart: []int{1},
},
},
} { } {
pod, status := makeBasePodAndStatus() pod, status := makeBasePodAndStatus()
if test.mutatePodFn != nil { if test.mutatePodFn != nil {
@ -952,6 +964,17 @@ func getKillMap(pod *v1.Pod, status *kubecontainer.PodStatus, cIndexes []int) ma
return m return m
} }
func getKillMapWithInitContainers(pod *v1.Pod, status *kubecontainer.PodStatus, cIndexes []int) map[kubecontainer.ContainerID]containerToKillInfo {
m := map[kubecontainer.ContainerID]containerToKillInfo{}
for _, i := range cIndexes {
m[status.ContainerStatuses[i].ID] = containerToKillInfo{
container: &pod.Spec.InitContainers[i],
name: pod.Spec.InitContainers[i].Name,
}
}
return m
}
func verifyActions(t *testing.T, expected, actual *podActions, desc string) { func verifyActions(t *testing.T, expected, actual *podActions, desc string) {
if actual.ContainersToKill != nil { if actual.ContainersToKill != nil {
// Clear the message field since we don't need to verify the message. // Clear the message field since we don't need to verify the message.
@ -985,7 +1008,7 @@ func TestComputePodActionsWithInitContainers(t *testing.T) {
actions: podActions{ actions: podActions{
SandboxID: baseStatus.SandboxStatuses[0].Id, SandboxID: baseStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{0, 1, 2}, ContainersToStart: []int{0, 1, 2},
ContainersToKill: getKillMap(basePod, baseStatus, []int{}), ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}),
}, },
}, },
"initialization in progress; do nothing": { "initialization in progress; do nothing": {
@ -1007,7 +1030,7 @@ func TestComputePodActionsWithInitContainers(t *testing.T) {
Attempt: uint32(1), Attempt: uint32(1),
NextInitContainerToStart: &basePod.Spec.InitContainers[0], NextInitContainerToStart: &basePod.Spec.InitContainers[0],
ContainersToStart: []int{}, ContainersToStart: []int{},
ContainersToKill: getKillMap(basePod, baseStatus, []int{}), ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}),
}, },
}, },
"initialization failed; restart the last init container if RestartPolicy == Always": { "initialization failed; restart the last init container if RestartPolicy == Always": {
@ -1019,7 +1042,7 @@ func TestComputePodActionsWithInitContainers(t *testing.T) {
SandboxID: baseStatus.SandboxStatuses[0].Id, SandboxID: baseStatus.SandboxStatuses[0].Id,
NextInitContainerToStart: &basePod.Spec.InitContainers[2], NextInitContainerToStart: &basePod.Spec.InitContainers[2],
ContainersToStart: []int{}, ContainersToStart: []int{},
ContainersToKill: getKillMap(basePod, baseStatus, []int{}), ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}),
}, },
}, },
"initialization failed; restart the last init container if RestartPolicy == OnFailure": { "initialization failed; restart the last init container if RestartPolicy == OnFailure": {
@ -1031,7 +1054,7 @@ func TestComputePodActionsWithInitContainers(t *testing.T) {
SandboxID: baseStatus.SandboxStatuses[0].Id, SandboxID: baseStatus.SandboxStatuses[0].Id,
NextInitContainerToStart: &basePod.Spec.InitContainers[2], NextInitContainerToStart: &basePod.Spec.InitContainers[2],
ContainersToStart: []int{}, ContainersToStart: []int{},
ContainersToKill: getKillMap(basePod, baseStatus, []int{}), ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}),
}, },
}, },
"initialization failed; kill pod if RestartPolicy == Never": { "initialization failed; kill pod if RestartPolicy == Never": {
@ -1043,7 +1066,43 @@ func TestComputePodActionsWithInitContainers(t *testing.T) {
KillPod: true, KillPod: true,
SandboxID: baseStatus.SandboxStatuses[0].Id, SandboxID: baseStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{}, ContainersToStart: []int{},
ContainersToKill: getKillMap(basePod, baseStatus, []int{}), ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}),
},
},
"init container state unknown; kill and recreate the last init container if RestartPolicy == Always": {
mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways },
mutateStatusFn: func(status *kubecontainer.PodStatus) {
status.ContainerStatuses[2].State = kubecontainer.ContainerStateUnknown
},
actions: podActions{
SandboxID: baseStatus.SandboxStatuses[0].Id,
NextInitContainerToStart: &basePod.Spec.InitContainers[2],
ContainersToStart: []int{},
ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{2}),
},
},
"init container state unknown; kill and recreate the last init container if RestartPolicy == OnFailure": {
mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyOnFailure },
mutateStatusFn: func(status *kubecontainer.PodStatus) {
status.ContainerStatuses[2].State = kubecontainer.ContainerStateUnknown
},
actions: podActions{
SandboxID: baseStatus.SandboxStatuses[0].Id,
NextInitContainerToStart: &basePod.Spec.InitContainers[2],
ContainersToStart: []int{},
ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{2}),
},
},
"init container state unknown; kill pod if RestartPolicy == Never": {
mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever },
mutateStatusFn: func(status *kubecontainer.PodStatus) {
status.ContainerStatuses[2].State = kubecontainer.ContainerStateUnknown
},
actions: podActions{
KillPod: true,
SandboxID: baseStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{},
ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}),
}, },
}, },
} { } {