Stop container in unknown state before recreate or remove.

k3s-v1.13.4
Lantao Liu 2019-02-06 16:13:43 -08:00 committed by Darren Shepherd
parent 5daeef1215
commit 8ab4edcd99
4 changed files with 64 additions and 15 deletions

View File

@ -141,12 +141,17 @@ func (m *kubeGenericRuntimeManager) getImageUser(image string) (*int64, string,
return new(int64), "", nil return new(int64), "", nil
} }
// isContainerFailed returns true if container has exited and exitcode is not zero. // isInitContainerFailed returns true if container has exited and exitcode is not zero
func isContainerFailed(status *kubecontainer.ContainerStatus) bool { // or is in unknown state.
func isInitContainerFailed(status *kubecontainer.ContainerStatus) bool {
if status.State == kubecontainer.ContainerStateExited && status.ExitCode != 0 { if status.State == kubecontainer.ContainerStateExited && status.ExitCode != 0 {
return true return true
} }
if status.State == kubecontainer.ContainerStateUnknown {
return true
}
return false return false
} }

View File

@ -634,9 +634,14 @@ func (m *kubeGenericRuntimeManager) pruneInitContainersBeforeStart(pod *v1.Pod,
for name := range initContainerNames { for name := range initContainerNames {
count := 0 count := 0
for _, status := range podStatus.ContainerStatuses { for _, status := range podStatus.ContainerStatuses {
if status.Name != name || !initContainerNames.Has(status.Name) || status.State != kubecontainer.ContainerStateExited { if status.Name != name || !initContainerNames.Has(status.Name) ||
(status.State != kubecontainer.ContainerStateExited &&
status.State != kubecontainer.ContainerStateUnknown) {
continue continue
} }
// Remove init containers in unknown state. It should have
// been stopped before pruneInitContainersBeforeStart is
// called.
count++ count++
// keep the first init container for this name // keep the first init container for this name
if count == 1 { if count == 1 {
@ -691,20 +696,21 @@ func (m *kubeGenericRuntimeManager) purgeInitContainers(pod *v1.Pod, podStatus *
} }
// findNextInitContainerToRun returns the status of the last failed container, the // findNextInitContainerToRun returns the status of the last failed container, the
// next init container to start, or done if there are no further init containers. // index of next init container to start, or done if there are no further init containers.
// Status is only returned if an init container is failed, in which case next will // Status is only returned if an init container is failed, in which case next will
// point to the current container. // point to the current container.
func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (status *kubecontainer.ContainerStatus, next *v1.Container, done bool) { // next < 0 if no init container to run.
func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (status *kubecontainer.ContainerStatus, next int, done bool) {
if len(pod.Spec.InitContainers) == 0 { if len(pod.Spec.InitContainers) == 0 {
return nil, nil, true return nil, -1, true
} }
// If there are failed containers, return the status of the last failed one. // If there are failed containers, return the status of the last failed one.
for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- { for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- {
container := &pod.Spec.InitContainers[i] container := &pod.Spec.InitContainers[i]
status := podStatus.FindContainerStatusByName(container.Name) status := podStatus.FindContainerStatusByName(container.Name)
if status != nil && isContainerFailed(status) { if status != nil && isInitContainerFailed(status) {
return status, container, false return status, i, false
} }
} }
@ -718,21 +724,21 @@ func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus)
// container is still running, return not done. // container is still running, return not done.
if status.State == kubecontainer.ContainerStateRunning { if status.State == kubecontainer.ContainerStateRunning {
return nil, nil, false return nil, -1, false
} }
if status.State == kubecontainer.ContainerStateExited { if status.State == kubecontainer.ContainerStateExited {
// all init containers successful // all init containers successful
if i == (len(pod.Spec.InitContainers) - 1) { if i == (len(pod.Spec.InitContainers) - 1) {
return nil, nil, true return nil, -1, true
} }
// all containers up to i successful, go to i+1 // all containers up to i successful, go to i+1
return nil, &pod.Spec.InitContainers[i+1], false return nil, i + 1, false
} }
} }
return nil, &pod.Spec.InitContainers[0], false return nil, 0, false
} }
// GetContainerLogs returns logs of a specific container. // GetContainerLogs returns logs of a specific container.

View File

@ -55,6 +55,9 @@ type containerGCInfo struct {
name string name string
// Creation time for the container. // Creation time for the container.
createTime time.Time createTime time.Time
// If true, the container is in unknown state. Garbage collector should try
// to stop containers before removal.
unknown bool
} }
// sandboxGCInfo is the internal information kept for sandboxes being considered for GC. // sandboxGCInfo is the internal information kept for sandboxes being considered for GC.
@ -122,6 +125,19 @@ func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int
// Remove from oldest to newest (last to first). // Remove from oldest to newest (last to first).
numToKeep := len(containers) - toRemove numToKeep := len(containers) - toRemove
for i := len(containers) - 1; i >= numToKeep; i-- { for i := len(containers) - 1; i >= numToKeep; i-- {
if containers[i].unknown {
// Containers in known state could be running, we should try
// to stop it before removal.
id := kubecontainer.ContainerID{
Type: cgc.manager.runtimeName,
ID: containers[i].id,
}
message := "Container is in unknown state, try killing it before removal"
if err := cgc.manager.killContainer(nil, id, containers[i].name, message, nil); err != nil {
klog.Errorf("Failed to stop container %q: %v", containers[i].id, err)
continue
}
}
if err := cgc.manager.removeContainer(containers[i].id); err != nil { if err := cgc.manager.removeContainer(containers[i].id); err != nil {
klog.Errorf("Failed to remove container %q: %v", containers[i].id, err) klog.Errorf("Failed to remove container %q: %v", containers[i].id, err)
} }
@ -184,6 +200,7 @@ func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByE
id: container.Id, id: container.Id,
name: container.Metadata.Name, name: container.Metadata.Name,
createTime: createdAt, createTime: createdAt,
unknown: container.State == runtimeapi.ContainerState_CONTAINER_UNKNOWN,
} }
key := evictUnit{ key := evictUnit{
uid: labeledInfo.PodUID, uid: labeledInfo.PodUID,

View File

@ -486,12 +486,22 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
// Check initialization progress. // Check initialization progress.
initLastStatus, next, done := findNextInitContainerToRun(pod, podStatus) initLastStatus, next, done := findNextInitContainerToRun(pod, podStatus)
if !done { if !done {
if next != nil { if next >= 0 {
initFailed := initLastStatus != nil && isContainerFailed(initLastStatus) container := pod.Spec.InitContainers[next]
initFailed := initLastStatus != nil && isInitContainerFailed(initLastStatus)
if initFailed && !shouldRestartOnFailure(pod) { if initFailed && !shouldRestartOnFailure(pod) {
changes.KillPod = true changes.KillPod = true
} else { } else {
changes.NextInitContainerToStart = next // Always try to stop containers in unknown state first.
if initLastStatus != nil && initLastStatus.State == kubecontainer.ContainerStateUnknown {
changes.ContainersToKill[initLastStatus.ID] = containerToKillInfo{
name: container.Name,
container: &container,
message: fmt.Sprintf("Init container is in %q state, try killing it before restart",
initLastStatus.State),
}
}
changes.NextInitContainerToStart = &container
} }
} }
// Initialization failed or still in progress. Skip inspecting non-init // Initialization failed or still in progress. Skip inspecting non-init
@ -522,6 +532,17 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container) message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container)
klog.V(3).Infof(message) klog.V(3).Infof(message)
changes.ContainersToStart = append(changes.ContainersToStart, idx) changes.ContainersToStart = append(changes.ContainersToStart, idx)
if containerStatus != nil && containerStatus.State == kubecontainer.ContainerStateUnknown {
// If container is in unknown state, we don't know whether it
// is actually running or not, always try killing it before
// restart to avoid having 2 running instances of the same container.
changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
name: containerStatus.Name,
container: &pod.Spec.Containers[idx],
message: fmt.Sprintf("Container is in %q state, try killing it before restart",
containerStatus.State),
}
}
} }
continue continue
} }