mirror of https://github.com/k3s-io/k3s
Stop container in unknown state before recreate or remove.
parent
5daeef1215
commit
8ab4edcd99
|
@ -141,12 +141,17 @@ func (m *kubeGenericRuntimeManager) getImageUser(image string) (*int64, string,
|
||||||
return new(int64), "", nil
|
return new(int64), "", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// isContainerFailed returns true if container has exited and exitcode is not zero.
|
// isInitContainerFailed returns true if container has exited and exitcode is not zero
|
||||||
func isContainerFailed(status *kubecontainer.ContainerStatus) bool {
|
// or is in unknown state.
|
||||||
|
func isInitContainerFailed(status *kubecontainer.ContainerStatus) bool {
|
||||||
if status.State == kubecontainer.ContainerStateExited && status.ExitCode != 0 {
|
if status.State == kubecontainer.ContainerStateExited && status.ExitCode != 0 {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if status.State == kubecontainer.ContainerStateUnknown {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -634,9 +634,14 @@ func (m *kubeGenericRuntimeManager) pruneInitContainersBeforeStart(pod *v1.Pod,
|
||||||
for name := range initContainerNames {
|
for name := range initContainerNames {
|
||||||
count := 0
|
count := 0
|
||||||
for _, status := range podStatus.ContainerStatuses {
|
for _, status := range podStatus.ContainerStatuses {
|
||||||
if status.Name != name || !initContainerNames.Has(status.Name) || status.State != kubecontainer.ContainerStateExited {
|
if status.Name != name || !initContainerNames.Has(status.Name) ||
|
||||||
|
(status.State != kubecontainer.ContainerStateExited &&
|
||||||
|
status.State != kubecontainer.ContainerStateUnknown) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
// Remove init containers in unknown state. It should have
|
||||||
|
// been stopped before pruneInitContainersBeforeStart is
|
||||||
|
// called.
|
||||||
count++
|
count++
|
||||||
// keep the first init container for this name
|
// keep the first init container for this name
|
||||||
if count == 1 {
|
if count == 1 {
|
||||||
|
@ -691,20 +696,21 @@ func (m *kubeGenericRuntimeManager) purgeInitContainers(pod *v1.Pod, podStatus *
|
||||||
}
|
}
|
||||||
|
|
||||||
// findNextInitContainerToRun returns the status of the last failed container, the
|
// findNextInitContainerToRun returns the status of the last failed container, the
|
||||||
// next init container to start, or done if there are no further init containers.
|
// index of next init container to start, or done if there are no further init containers.
|
||||||
// Status is only returned if an init container is failed, in which case next will
|
// Status is only returned if an init container is failed, in which case next will
|
||||||
// point to the current container.
|
// point to the current container.
|
||||||
func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (status *kubecontainer.ContainerStatus, next *v1.Container, done bool) {
|
// next < 0 if no init container to run.
|
||||||
|
func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (status *kubecontainer.ContainerStatus, next int, done bool) {
|
||||||
if len(pod.Spec.InitContainers) == 0 {
|
if len(pod.Spec.InitContainers) == 0 {
|
||||||
return nil, nil, true
|
return nil, -1, true
|
||||||
}
|
}
|
||||||
|
|
||||||
// If there are failed containers, return the status of the last failed one.
|
// If there are failed containers, return the status of the last failed one.
|
||||||
for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- {
|
for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- {
|
||||||
container := &pod.Spec.InitContainers[i]
|
container := &pod.Spec.InitContainers[i]
|
||||||
status := podStatus.FindContainerStatusByName(container.Name)
|
status := podStatus.FindContainerStatusByName(container.Name)
|
||||||
if status != nil && isContainerFailed(status) {
|
if status != nil && isInitContainerFailed(status) {
|
||||||
return status, container, false
|
return status, i, false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -718,21 +724,21 @@ func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus)
|
||||||
|
|
||||||
// container is still running, return not done.
|
// container is still running, return not done.
|
||||||
if status.State == kubecontainer.ContainerStateRunning {
|
if status.State == kubecontainer.ContainerStateRunning {
|
||||||
return nil, nil, false
|
return nil, -1, false
|
||||||
}
|
}
|
||||||
|
|
||||||
if status.State == kubecontainer.ContainerStateExited {
|
if status.State == kubecontainer.ContainerStateExited {
|
||||||
// all init containers successful
|
// all init containers successful
|
||||||
if i == (len(pod.Spec.InitContainers) - 1) {
|
if i == (len(pod.Spec.InitContainers) - 1) {
|
||||||
return nil, nil, true
|
return nil, -1, true
|
||||||
}
|
}
|
||||||
|
|
||||||
// all containers up to i successful, go to i+1
|
// all containers up to i successful, go to i+1
|
||||||
return nil, &pod.Spec.InitContainers[i+1], false
|
return nil, i + 1, false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil, &pod.Spec.InitContainers[0], false
|
return nil, 0, false
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetContainerLogs returns logs of a specific container.
|
// GetContainerLogs returns logs of a specific container.
|
||||||
|
|
|
@ -55,6 +55,9 @@ type containerGCInfo struct {
|
||||||
name string
|
name string
|
||||||
// Creation time for the container.
|
// Creation time for the container.
|
||||||
createTime time.Time
|
createTime time.Time
|
||||||
|
// If true, the container is in unknown state. Garbage collector should try
|
||||||
|
// to stop containers before removal.
|
||||||
|
unknown bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// sandboxGCInfo is the internal information kept for sandboxes being considered for GC.
|
// sandboxGCInfo is the internal information kept for sandboxes being considered for GC.
|
||||||
|
@ -122,6 +125,19 @@ func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int
|
||||||
// Remove from oldest to newest (last to first).
|
// Remove from oldest to newest (last to first).
|
||||||
numToKeep := len(containers) - toRemove
|
numToKeep := len(containers) - toRemove
|
||||||
for i := len(containers) - 1; i >= numToKeep; i-- {
|
for i := len(containers) - 1; i >= numToKeep; i-- {
|
||||||
|
if containers[i].unknown {
|
||||||
|
// Containers in known state could be running, we should try
|
||||||
|
// to stop it before removal.
|
||||||
|
id := kubecontainer.ContainerID{
|
||||||
|
Type: cgc.manager.runtimeName,
|
||||||
|
ID: containers[i].id,
|
||||||
|
}
|
||||||
|
message := "Container is in unknown state, try killing it before removal"
|
||||||
|
if err := cgc.manager.killContainer(nil, id, containers[i].name, message, nil); err != nil {
|
||||||
|
klog.Errorf("Failed to stop container %q: %v", containers[i].id, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
if err := cgc.manager.removeContainer(containers[i].id); err != nil {
|
if err := cgc.manager.removeContainer(containers[i].id); err != nil {
|
||||||
klog.Errorf("Failed to remove container %q: %v", containers[i].id, err)
|
klog.Errorf("Failed to remove container %q: %v", containers[i].id, err)
|
||||||
}
|
}
|
||||||
|
@ -184,6 +200,7 @@ func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByE
|
||||||
id: container.Id,
|
id: container.Id,
|
||||||
name: container.Metadata.Name,
|
name: container.Metadata.Name,
|
||||||
createTime: createdAt,
|
createTime: createdAt,
|
||||||
|
unknown: container.State == runtimeapi.ContainerState_CONTAINER_UNKNOWN,
|
||||||
}
|
}
|
||||||
key := evictUnit{
|
key := evictUnit{
|
||||||
uid: labeledInfo.PodUID,
|
uid: labeledInfo.PodUID,
|
||||||
|
|
|
@ -486,12 +486,22 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
|
||||||
// Check initialization progress.
|
// Check initialization progress.
|
||||||
initLastStatus, next, done := findNextInitContainerToRun(pod, podStatus)
|
initLastStatus, next, done := findNextInitContainerToRun(pod, podStatus)
|
||||||
if !done {
|
if !done {
|
||||||
if next != nil {
|
if next >= 0 {
|
||||||
initFailed := initLastStatus != nil && isContainerFailed(initLastStatus)
|
container := pod.Spec.InitContainers[next]
|
||||||
|
initFailed := initLastStatus != nil && isInitContainerFailed(initLastStatus)
|
||||||
if initFailed && !shouldRestartOnFailure(pod) {
|
if initFailed && !shouldRestartOnFailure(pod) {
|
||||||
changes.KillPod = true
|
changes.KillPod = true
|
||||||
} else {
|
} else {
|
||||||
changes.NextInitContainerToStart = next
|
// Always try to stop containers in unknown state first.
|
||||||
|
if initLastStatus != nil && initLastStatus.State == kubecontainer.ContainerStateUnknown {
|
||||||
|
changes.ContainersToKill[initLastStatus.ID] = containerToKillInfo{
|
||||||
|
name: container.Name,
|
||||||
|
container: &container,
|
||||||
|
message: fmt.Sprintf("Init container is in %q state, try killing it before restart",
|
||||||
|
initLastStatus.State),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
changes.NextInitContainerToStart = &container
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Initialization failed or still in progress. Skip inspecting non-init
|
// Initialization failed or still in progress. Skip inspecting non-init
|
||||||
|
@ -522,6 +532,17 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
|
||||||
message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container)
|
message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container)
|
||||||
klog.V(3).Infof(message)
|
klog.V(3).Infof(message)
|
||||||
changes.ContainersToStart = append(changes.ContainersToStart, idx)
|
changes.ContainersToStart = append(changes.ContainersToStart, idx)
|
||||||
|
if containerStatus != nil && containerStatus.State == kubecontainer.ContainerStateUnknown {
|
||||||
|
// If container is in unknown state, we don't know whether it
|
||||||
|
// is actually running or not, always try killing it before
|
||||||
|
// restart to avoid having 2 running instances of the same container.
|
||||||
|
changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
|
||||||
|
name: containerStatus.Name,
|
||||||
|
container: &pod.Spec.Containers[idx],
|
||||||
|
message: fmt.Sprintf("Container is in %q state, try killing it before restart",
|
||||||
|
containerStatus.State),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue