2019-01-12 04:58:27 +00:00
|
|
|
/*
|
|
|
|
Copyright 2017 The Kubernetes Authors.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package server
|
|
|
|
|
|
|
|
import (
|
|
|
|
"time"
|
|
|
|
|
2019-02-08 04:04:22 +00:00
|
|
|
"github.com/containerd/containerd"
|
|
|
|
eventtypes "github.com/containerd/containerd/api/events"
|
2019-01-12 04:58:27 +00:00
|
|
|
"github.com/containerd/containerd/errdefs"
|
|
|
|
"github.com/docker/docker/pkg/signal"
|
|
|
|
"github.com/pkg/errors"
|
|
|
|
"github.com/sirupsen/logrus"
|
|
|
|
"golang.org/x/net/context"
|
|
|
|
"golang.org/x/sys/unix"
|
|
|
|
runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
|
|
|
|
|
2019-01-31 22:42:07 +00:00
|
|
|
"github.com/containerd/cri/pkg/store"
|
2019-01-12 04:58:27 +00:00
|
|
|
containerstore "github.com/containerd/cri/pkg/store/container"
|
|
|
|
)
|
|
|
|
|
|
|
|
// killContainerTimeout is the timeout that we wait for the container to
|
|
|
|
// be SIGKILLed.
|
|
|
|
// The timeout is set to 1 min, because the default CRI operation timeout
|
|
|
|
// for StopContainer is (2 min + stop timeout). Set to 1 min, so that we
|
|
|
|
// have enough time for kill(all=true) and kill(all=false).
|
|
|
|
const killContainerTimeout = 1 * time.Minute
|
|
|
|
|
|
|
|
// StopContainer stops a running container with a grace period (i.e., timeout).
|
|
|
|
func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) {
|
|
|
|
// Get container config from container store.
|
|
|
|
container, err := c.containerStore.Get(r.GetContainerId())
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Wrapf(err, "an error occurred when try to find container %q", r.GetContainerId())
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := c.stopContainer(ctx, container, time.Duration(r.GetTimeout())*time.Second); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return &runtime.StopContainerResponse{}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// stopContainer stops a container based on the container metadata.
|
|
|
|
func (c *criService) stopContainer(ctx context.Context, container containerstore.Container, timeout time.Duration) error {
|
|
|
|
id := container.ID
|
|
|
|
|
|
|
|
// Return without error if container is not running. This makes sure that
|
|
|
|
// stop only takes real action after the container is started.
|
|
|
|
state := container.Status.Get().State()
|
2019-02-08 04:04:22 +00:00
|
|
|
if state != runtime.ContainerState_CONTAINER_RUNNING &&
|
|
|
|
state != runtime.ContainerState_CONTAINER_UNKNOWN {
|
|
|
|
logrus.Infof("Container to stop %q must be in running or unknown state, current state %q",
|
2019-01-12 04:58:27 +00:00
|
|
|
id, criContainerStateToString(state))
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
task, err := container.Container.Task(ctx, nil)
|
|
|
|
if err != nil {
|
|
|
|
if !errdefs.IsNotFound(err) {
|
2019-02-08 04:04:22 +00:00
|
|
|
return errors.Wrapf(err, "failed to get task for container %q", id)
|
|
|
|
}
|
|
|
|
// Don't return for unknown state, some cleanup needs to be done.
|
|
|
|
if state != runtime.ContainerState_CONTAINER_UNKNOWN {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
// Task is an interface, explicitly set it to nil just in case.
|
|
|
|
task = nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle unknown state.
|
|
|
|
if state == runtime.ContainerState_CONTAINER_UNKNOWN {
|
|
|
|
status, err := getTaskStatus(ctx, task)
|
|
|
|
if err != nil {
|
|
|
|
return errors.Wrapf(err, "failed to get task status for %q", id)
|
|
|
|
}
|
|
|
|
switch status.Status {
|
|
|
|
case containerd.Running, containerd.Created:
|
|
|
|
// The task is still running, continue stopping the task.
|
|
|
|
case containerd.Stopped:
|
|
|
|
// The task has exited. If the task exited after containerd
|
|
|
|
// started, the event monitor will receive its exit event; if it
|
|
|
|
// exited before containerd started, the event monitor will never
|
|
|
|
// receive its exit event.
|
|
|
|
// However, we can't tell that because the task state was not
|
|
|
|
// successfully loaded during containerd start (container is
|
|
|
|
// in UNKNOWN state).
|
|
|
|
// So always do cleanup here, just in case that we've missed the
|
|
|
|
// exit event.
|
|
|
|
return cleanupUnknownContainer(ctx, id, status, container)
|
|
|
|
default:
|
|
|
|
return errors.Wrapf(err, "unsupported task status %q", status.Status)
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// We only need to kill the task. The event handler will Delete the
|
|
|
|
// task from containerd after it handles the Exited event.
|
|
|
|
if timeout > 0 {
|
2019-01-31 22:42:07 +00:00
|
|
|
stopSignal := "SIGTERM"
|
|
|
|
if container.StopSignal != "" {
|
|
|
|
stopSignal = container.StopSignal
|
|
|
|
} else {
|
|
|
|
// The image may have been deleted, and the `StopSignal` field is
|
|
|
|
// just introduced to handle that.
|
|
|
|
// However, for containers created before the `StopSignal` field is
|
|
|
|
// introduced, still try to get the stop signal from the image config.
|
|
|
|
// If the image has been deleted, logging an error and using the
|
|
|
|
// default SIGTERM is still better than returning error and leaving
|
|
|
|
// the container unstoppable. (See issue #990)
|
|
|
|
// TODO(random-liu): Remove this logic when containerd 1.2 is deprecated.
|
|
|
|
image, err := c.imageStore.Get(container.ImageRef)
|
2019-01-12 04:58:27 +00:00
|
|
|
if err != nil {
|
2019-01-31 22:42:07 +00:00
|
|
|
if err != store.ErrNotExist {
|
|
|
|
return errors.Wrapf(err, "failed to get image %q", container.ImageRef)
|
|
|
|
}
|
|
|
|
logrus.Warningf("Image %q not found, stop container with signal %q", container.ImageRef, stopSignal)
|
|
|
|
} else {
|
|
|
|
if image.ImageSpec.Config.StopSignal != "" {
|
|
|
|
stopSignal = image.ImageSpec.Config.StopSignal
|
|
|
|
}
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
}
|
2019-01-31 22:42:07 +00:00
|
|
|
sig, err := signal.ParseSignal(stopSignal)
|
|
|
|
if err != nil {
|
|
|
|
return errors.Wrapf(err, "failed to parse stop signal %q", stopSignal)
|
|
|
|
}
|
|
|
|
logrus.Infof("Stop container %q with signal %v", id, sig)
|
|
|
|
if err = task.Kill(ctx, sig); err != nil && !errdefs.IsNotFound(err) {
|
2019-01-12 04:58:27 +00:00
|
|
|
return errors.Wrapf(err, "failed to stop container %q", id)
|
|
|
|
}
|
|
|
|
|
2019-04-07 17:07:55 +00:00
|
|
|
if err = c.waitContainerStop(ctx, container, timeout); err == nil || errors.Cause(err) == ctx.Err() {
|
|
|
|
// Do not SIGKILL container if the context is cancelled.
|
|
|
|
return err
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
logrus.WithError(err).Errorf("An error occurs during waiting for container %q to be stopped", id)
|
|
|
|
}
|
|
|
|
|
|
|
|
logrus.Infof("Kill container %q", id)
|
|
|
|
if err = task.Kill(ctx, unix.SIGKILL); err != nil && !errdefs.IsNotFound(err) {
|
|
|
|
return errors.Wrapf(err, "failed to kill container %q", id)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wait for a fixed timeout until container stop is observed by event monitor.
|
|
|
|
if err = c.waitContainerStop(ctx, container, killContainerTimeout); err == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return errors.Wrapf(err, "an error occurs during waiting for container %q to be killed", id)
|
|
|
|
}
|
|
|
|
|
|
|
|
// waitContainerStop waits for container to be stopped until timeout exceeds or context is cancelled.
|
|
|
|
func (c *criService) waitContainerStop(ctx context.Context, container containerstore.Container, timeout time.Duration) error {
|
|
|
|
timeoutTimer := time.NewTimer(timeout)
|
|
|
|
defer timeoutTimer.Stop()
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
2019-04-07 17:07:55 +00:00
|
|
|
return errors.Wrapf(ctx.Err(), "wait container %q is cancelled", container.ID)
|
2019-01-12 04:58:27 +00:00
|
|
|
case <-timeoutTimer.C:
|
|
|
|
return errors.Errorf("wait container %q stop timeout", container.ID)
|
|
|
|
case <-container.Stopped():
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
2019-02-08 04:04:22 +00:00
|
|
|
|
|
|
|
// cleanupUnknownContainer cleanup stopped container in unknown state.
|
|
|
|
func cleanupUnknownContainer(ctx context.Context, id string, status containerd.Status,
|
|
|
|
cntr containerstore.Container) error {
|
|
|
|
// Reuse handleContainerExit to do the cleanup.
|
|
|
|
// NOTE(random-liu): If the task did exit after containerd started, both
|
|
|
|
// the event monitor and the cleanup function would update the container
|
|
|
|
// state. The final container state will be whatever being updated first.
|
|
|
|
// There is no way to completely avoid this race condition, and for best
|
|
|
|
// effort unknown state container cleanup, this seems acceptable.
|
|
|
|
return handleContainerExit(ctx, &eventtypes.TaskExit{
|
|
|
|
ContainerID: id,
|
|
|
|
ID: id,
|
|
|
|
Pid: 0,
|
|
|
|
ExitStatus: status.ExitStatus,
|
|
|
|
ExitedAt: status.ExitTime,
|
|
|
|
}, cntr)
|
|
|
|
}
|