mirror of https://github.com/k3s-io/k3s
188 lines
6.9 KiB
Go
188 lines
6.9 KiB
Go
/*
|
|
Copyright 2017 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package server
|
|
|
|
import (
|
|
"time"
|
|
|
|
eventtypes "github.com/containerd/containerd/api/events"
|
|
"github.com/containerd/containerd/errdefs"
|
|
"github.com/docker/docker/pkg/signal"
|
|
"github.com/pkg/errors"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/net/context"
|
|
"golang.org/x/sys/unix"
|
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
|
|
|
ctrdutil "github.com/containerd/cri/pkg/containerd/util"
|
|
"github.com/containerd/cri/pkg/store"
|
|
containerstore "github.com/containerd/cri/pkg/store/container"
|
|
)
|
|
|
|
// killContainerTimeout is the timeout that we wait for the container to
|
|
// be SIGKILLed.
|
|
// The timeout is set to 1 min, because the default CRI operation timeout
|
|
// for StopContainer is (2 min + stop timeout). Set to 1 min, so that we
|
|
// have enough time for kill(all=true) and kill(all=false).
|
|
const killContainerTimeout = 1 * time.Minute
|
|
|
|
// StopContainer stops a running container with a grace period (i.e., timeout).
|
|
func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) {
|
|
// Get container config from container store.
|
|
container, err := c.containerStore.Get(r.GetContainerId())
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "an error occurred when try to find container %q", r.GetContainerId())
|
|
}
|
|
|
|
if err := c.stopContainer(ctx, container, time.Duration(r.GetTimeout())*time.Second); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &runtime.StopContainerResponse{}, nil
|
|
}
|
|
|
|
// stopContainer stops a container based on the container metadata.
|
|
func (c *criService) stopContainer(ctx context.Context, container containerstore.Container, timeout time.Duration) error {
|
|
id := container.ID
|
|
|
|
// Return without error if container is not running. This makes sure that
|
|
// stop only takes real action after the container is started.
|
|
state := container.Status.Get().State()
|
|
if state != runtime.ContainerState_CONTAINER_RUNNING &&
|
|
state != runtime.ContainerState_CONTAINER_UNKNOWN {
|
|
logrus.Infof("Container to stop %q must be in running or unknown state, current state %q",
|
|
id, criContainerStateToString(state))
|
|
return nil
|
|
}
|
|
|
|
task, err := container.Container.Task(ctx, nil)
|
|
if err != nil {
|
|
if !errdefs.IsNotFound(err) {
|
|
return errors.Wrapf(err, "failed to get task for container %q", id)
|
|
}
|
|
// Don't return for unknown state, some cleanup needs to be done.
|
|
if state == runtime.ContainerState_CONTAINER_UNKNOWN {
|
|
return cleanupUnknownContainer(ctx, id, container)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Handle unknown state.
|
|
if state == runtime.ContainerState_CONTAINER_UNKNOWN {
|
|
// Start an exit handler for containers in unknown state.
|
|
waitCtx, waitCancel := context.WithCancel(ctrdutil.NamespacedContext())
|
|
defer waitCancel()
|
|
exitCh, err := task.Wait(waitCtx)
|
|
if err != nil {
|
|
if !errdefs.IsNotFound(err) {
|
|
return errors.Wrapf(err, "failed to wait for task for %q", id)
|
|
}
|
|
return cleanupUnknownContainer(ctx, id, container)
|
|
}
|
|
|
|
exitCtx, exitCancel := context.WithCancel(context.Background())
|
|
stopCh := c.eventMonitor.startExitMonitor(exitCtx, id, task.Pid(), exitCh)
|
|
defer func() {
|
|
exitCancel()
|
|
// This ensures that exit monitor is stopped before
|
|
// `Wait` is cancelled, so no exit event is generated
|
|
// because of the `Wait` cancellation.
|
|
<-stopCh
|
|
}()
|
|
}
|
|
|
|
// We only need to kill the task. The event handler will Delete the
|
|
// task from containerd after it handles the Exited event.
|
|
if timeout > 0 {
|
|
stopSignal := "SIGTERM"
|
|
if container.StopSignal != "" {
|
|
stopSignal = container.StopSignal
|
|
} else {
|
|
// The image may have been deleted, and the `StopSignal` field is
|
|
// just introduced to handle that.
|
|
// However, for containers created before the `StopSignal` field is
|
|
// introduced, still try to get the stop signal from the image config.
|
|
// If the image has been deleted, logging an error and using the
|
|
// default SIGTERM is still better than returning error and leaving
|
|
// the container unstoppable. (See issue #990)
|
|
// TODO(random-liu): Remove this logic when containerd 1.2 is deprecated.
|
|
image, err := c.imageStore.Get(container.ImageRef)
|
|
if err != nil {
|
|
if err != store.ErrNotExist {
|
|
return errors.Wrapf(err, "failed to get image %q", container.ImageRef)
|
|
}
|
|
logrus.Warningf("Image %q not found, stop container with signal %q", container.ImageRef, stopSignal)
|
|
} else {
|
|
if image.ImageSpec.Config.StopSignal != "" {
|
|
stopSignal = image.ImageSpec.Config.StopSignal
|
|
}
|
|
}
|
|
}
|
|
sig, err := signal.ParseSignal(stopSignal)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "failed to parse stop signal %q", stopSignal)
|
|
}
|
|
logrus.Infof("Stop container %q with signal %v", id, sig)
|
|
if err = task.Kill(ctx, sig); err != nil && !errdefs.IsNotFound(err) {
|
|
return errors.Wrapf(err, "failed to stop container %q", id)
|
|
}
|
|
|
|
if err = c.waitContainerStop(ctx, container, timeout); err == nil || errors.Cause(err) == ctx.Err() {
|
|
// Do not SIGKILL container if the context is cancelled.
|
|
return err
|
|
}
|
|
logrus.WithError(err).Errorf("An error occurs during waiting for container %q to be stopped", id)
|
|
}
|
|
|
|
logrus.Infof("Kill container %q", id)
|
|
if err = task.Kill(ctx, unix.SIGKILL); err != nil && !errdefs.IsNotFound(err) {
|
|
return errors.Wrapf(err, "failed to kill container %q", id)
|
|
}
|
|
|
|
// Wait for a fixed timeout until container stop is observed by event monitor.
|
|
if err = c.waitContainerStop(ctx, container, killContainerTimeout); err == nil {
|
|
return nil
|
|
}
|
|
return errors.Wrapf(err, "an error occurs during waiting for container %q to be killed", id)
|
|
}
|
|
|
|
// waitContainerStop waits for container to be stopped until timeout exceeds or context is cancelled.
|
|
func (c *criService) waitContainerStop(ctx context.Context, container containerstore.Container, timeout time.Duration) error {
|
|
timeoutTimer := time.NewTimer(timeout)
|
|
defer timeoutTimer.Stop()
|
|
select {
|
|
case <-ctx.Done():
|
|
return errors.Wrapf(ctx.Err(), "wait container %q is cancelled", container.ID)
|
|
case <-timeoutTimer.C:
|
|
return errors.Errorf("wait container %q stop timeout", container.ID)
|
|
case <-container.Stopped():
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// cleanupUnknownContainer cleanup stopped container in unknown state.
|
|
func cleanupUnknownContainer(ctx context.Context, id string, cntr containerstore.Container) error {
|
|
// Reuse handleContainerExit to do the cleanup.
|
|
return handleContainerExit(ctx, &eventtypes.TaskExit{
|
|
ContainerID: id,
|
|
ID: id,
|
|
Pid: 0,
|
|
ExitStatus: unknownExitCode,
|
|
ExitedAt: time.Now(),
|
|
}, cntr)
|
|
}
|