k3s/vendor/github.com/containerd/cri/pkg/server/sandbox_run.go

737 lines
26 KiB
Go
Raw Normal View History

2019-01-12 04:58:27 +00:00
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
2019-09-27 21:51:53 +00:00
"encoding/json"
2019-01-12 04:58:27 +00:00
"fmt"
2019-09-27 21:51:53 +00:00
"math"
2019-01-12 04:58:27 +00:00
"os"
"strings"
"github.com/containerd/containerd"
containerdio "github.com/containerd/containerd/cio"
"github.com/containerd/containerd/errdefs"
2019-09-27 21:51:53 +00:00
"github.com/containerd/containerd/log"
2019-01-12 04:58:27 +00:00
"github.com/containerd/containerd/oci"
2019-09-27 21:51:53 +00:00
"github.com/containerd/containerd/plugin"
2019-01-12 04:58:27 +00:00
cni "github.com/containerd/go-cni"
"github.com/containerd/typeurl"
2019-04-07 17:07:55 +00:00
"github.com/davecgh/go-spew/spew"
2019-01-12 04:58:27 +00:00
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
2020-02-24 20:16:56 +00:00
"github.com/opencontainers/selinux/go-selinux/label"
2019-01-12 04:58:27 +00:00
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/net/context"
"golang.org/x/sys/unix"
2019-08-30 18:33:25 +00:00
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
2019-09-27 21:51:53 +00:00
"k8s.io/kubernetes/pkg/util/bandwidth"
2019-01-12 04:58:27 +00:00
"github.com/containerd/cri/pkg/annotations"
criconfig "github.com/containerd/cri/pkg/config"
customopts "github.com/containerd/cri/pkg/containerd/opts"
ctrdutil "github.com/containerd/cri/pkg/containerd/util"
"github.com/containerd/cri/pkg/netns"
sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
"github.com/containerd/cri/pkg/util"
)
func init() {
typeurl.Register(&sandboxstore.Metadata{},
"github.com/containerd/cri/pkg/store/sandbox", "Metadata")
}
// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
// the sandbox is in ready state.
func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandboxRequest) (_ *runtime.RunPodSandboxResponse, retErr error) {
config := r.GetConfig()
2019-09-27 21:51:53 +00:00
log.G(ctx).Debugf("Sandbox config %+v", config)
2019-01-12 04:58:27 +00:00
// Generate unique id and name for the sandbox and reserve the name.
id := util.GenerateID()
2019-09-27 21:51:53 +00:00
metadata := config.GetMetadata()
if metadata == nil {
return nil, errors.New("sandbox config must include metadata")
}
name := makeSandboxName(metadata)
log.G(ctx).Debugf("Generated id %q for sandbox %q", id, name)
2019-01-12 04:58:27 +00:00
// Reserve the sandbox name to avoid concurrent `RunPodSandbox` request starting the
// same sandbox.
if err := c.sandboxNameIndex.Reserve(name, id); err != nil {
return nil, errors.Wrapf(err, "failed to reserve sandbox name %q", name)
}
defer func() {
// Release the name if the function returns with an error.
if retErr != nil {
c.sandboxNameIndex.ReleaseByName(name)
}
}()
// Create initial internal sandbox object.
sandbox := sandboxstore.NewSandbox(
sandboxstore.Metadata{
ID: id,
Name: name,
Config: config,
RuntimeHandler: r.GetRuntimeHandler(),
},
sandboxstore.Status{
2019-07-12 03:16:45 +00:00
State: sandboxstore.StateUnknown,
2019-01-12 04:58:27 +00:00
},
)
// Ensure sandbox container image snapshot.
2019-09-27 21:51:53 +00:00
image, err := c.ensureImageExists(ctx, c.config.SandboxImage, config)
2019-01-12 04:58:27 +00:00
if err != nil {
return nil, errors.Wrapf(err, "failed to get sandbox image %q", c.config.SandboxImage)
}
2019-08-30 18:33:25 +00:00
containerdImage, err := c.toContainerdImage(ctx, *image)
if err != nil {
return nil, errors.Wrapf(err, "failed to get image from containerd %q", image.ID)
}
2019-09-27 21:51:53 +00:00
ociRuntime, err := c.getSandboxRuntime(config, r.GetRuntimeHandler())
if err != nil {
return nil, errors.Wrap(err, "failed to get sandbox runtime")
}
log.G(ctx).Debugf("Use OCI %+v for sandbox %q", ociRuntime, id)
2019-01-12 04:58:27 +00:00
securityContext := config.GetLinux().GetSecurityContext()
//Create Network Namespace if it is not in host network
hostNet := securityContext.GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE
if !hostNet {
// If it is not in host network namespace then create a namespace and set the sandbox
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
// namespaces. If the pod is in host network namespace then both are empty and should not
// be used.
sandbox.NetNS, err = netns.NewNetNS()
if err != nil {
return nil, errors.Wrapf(err, "failed to create network namespace for sandbox %q", id)
}
sandbox.NetNSPath = sandbox.NetNS.GetPath()
defer func() {
if retErr != nil {
if err := sandbox.NetNS.Remove(); err != nil {
2019-09-27 21:51:53 +00:00
log.G(ctx).WithError(err).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id)
2019-01-12 04:58:27 +00:00
}
sandbox.NetNSPath = ""
}
}()
// Setup network for sandbox.
// Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524)
// rely on the assumption that CRI shim will not be querying the network namespace to check the
// network states such as IP.
// In future runtime implementation should avoid relying on CRI shim implementation details.
// In this case however caching the IP will add a subtle performance enhancement by avoiding
// calls to network namespace of the pod to query the IP of the veth interface on every
// SandboxStatus request.
2019-10-22 17:27:11 +00:00
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
2019-01-12 04:58:27 +00:00
return nil, errors.Wrapf(err, "failed to setup network for sandbox %q", id)
}
defer func() {
if retErr != nil {
// Teardown network if an error is returned.
2019-10-22 17:27:11 +00:00
if err := c.teardownPodNetwork(ctx, sandbox); err != nil {
2019-09-27 21:51:53 +00:00
log.G(ctx).WithError(err).Errorf("Failed to destroy network for sandbox %q", id)
2019-01-12 04:58:27 +00:00
}
}
}()
}
// Create sandbox container.
2019-09-27 21:51:53 +00:00
spec, err := c.generateSandboxContainerSpec(id, config, &image.ImageSpec.Config, sandbox.NetNSPath, ociRuntime.PodAnnotations)
2019-01-12 04:58:27 +00:00
if err != nil {
return nil, errors.Wrap(err, "failed to generate sandbox container spec")
}
2019-09-27 21:51:53 +00:00
log.G(ctx).Debugf("Sandbox container %q spec: %#+v", id, spew.NewFormatter(spec))
2020-02-24 20:16:56 +00:00
sandbox.ProcessLabel = spec.Process.SelinuxLabel
defer func() {
if retErr != nil {
_ = label.ReleaseLabel(sandbox.ProcessLabel)
}
}()
if securityContext.GetPrivileged() {
// If privileged don't set selinux label, but we still record the MCS label so that
// the unused label can be freed later.
spec.Process.SelinuxLabel = ""
}
2019-01-12 04:58:27 +00:00
var specOpts []oci.SpecOpts
userstr, err := generateUserString(
"",
securityContext.GetRunAsUser(),
securityContext.GetRunAsGroup(),
)
if err != nil {
return nil, errors.Wrap(err, "failed to generate user string")
}
2019-09-27 21:51:53 +00:00
if userstr == "" {
// Lastly, since no user override was passed via CRI try to set via OCI
// Image
userstr = image.ImageSpec.Config.User
}
2019-01-12 04:58:27 +00:00
if userstr != "" {
specOpts = append(specOpts, oci.WithUser(userstr))
}
seccompSpecOpts, err := generateSeccompSpecOpts(
securityContext.GetSeccompProfilePath(),
securityContext.GetPrivileged(),
c.seccompEnabled)
if err != nil {
return nil, errors.Wrap(err, "failed to generate seccomp spec opts")
}
if seccompSpecOpts != nil {
specOpts = append(specOpts, seccompSpecOpts)
}
sandboxLabels := buildLabels(config.Labels, containerKindSandbox)
runtimeOpts, err := generateRuntimeOptions(ociRuntime, c.config)
if err != nil {
return nil, errors.Wrap(err, "failed to generate runtime options")
}
opts := []containerd.NewContainerOpts{
containerd.WithSnapshotter(c.config.ContainerdConfig.Snapshotter),
2019-08-30 18:33:25 +00:00
customopts.WithNewSnapshot(id, containerdImage),
2019-01-12 04:58:27 +00:00
containerd.WithSpec(spec, specOpts...),
containerd.WithContainerLabels(sandboxLabels),
containerd.WithContainerExtension(sandboxMetadataExtension, &sandbox.Metadata),
containerd.WithRuntime(ociRuntime.Type, runtimeOpts)}
container, err := c.client.NewContainer(ctx, id, opts...)
if err != nil {
return nil, errors.Wrap(err, "failed to create containerd container")
}
defer func() {
if retErr != nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
if err := container.Delete(deferCtx, containerd.WithSnapshotCleanup); err != nil {
2019-09-27 21:51:53 +00:00
log.G(ctx).WithError(err).Errorf("Failed to delete containerd container %q", id)
2019-01-12 04:58:27 +00:00
}
}
}()
// Create sandbox container root directories.
sandboxRootDir := c.getSandboxRootDir(id)
if err := c.os.MkdirAll(sandboxRootDir, 0755); err != nil {
return nil, errors.Wrapf(err, "failed to create sandbox root directory %q",
sandboxRootDir)
}
defer func() {
if retErr != nil {
// Cleanup the sandbox root directory.
if err := c.os.RemoveAll(sandboxRootDir); err != nil {
2019-09-27 21:51:53 +00:00
log.G(ctx).WithError(err).Errorf("Failed to remove sandbox root directory %q",
2019-01-12 04:58:27 +00:00
sandboxRootDir)
}
}
}()
volatileSandboxRootDir := c.getVolatileSandboxRootDir(id)
if err := c.os.MkdirAll(volatileSandboxRootDir, 0755); err != nil {
return nil, errors.Wrapf(err, "failed to create volatile sandbox root directory %q",
volatileSandboxRootDir)
}
defer func() {
if retErr != nil {
// Cleanup the volatile sandbox root directory.
if err := c.os.RemoveAll(volatileSandboxRootDir); err != nil {
2019-09-27 21:51:53 +00:00
log.G(ctx).WithError(err).Errorf("Failed to remove volatile sandbox root directory %q",
2019-01-12 04:58:27 +00:00
volatileSandboxRootDir)
}
}
}()
2019-04-07 17:07:55 +00:00
// Setup sandbox /dev/shm, /etc/hosts, /etc/resolv.conf and /etc/hostname.
2019-01-12 04:58:27 +00:00
if err = c.setupSandboxFiles(id, config); err != nil {
return nil, errors.Wrapf(err, "failed to setup sandbox files")
}
defer func() {
if retErr != nil {
if err = c.unmountSandboxFiles(id, config); err != nil {
2019-09-27 21:51:53 +00:00
log.G(ctx).WithError(err).Errorf("Failed to unmount sandbox files in %q",
2019-01-12 04:58:27 +00:00
sandboxRootDir)
}
}
}()
// Update sandbox created timestamp.
info, err := container.Info(ctx)
if err != nil {
return nil, errors.Wrap(err, "failed to get sandbox container info")
}
2019-07-12 03:16:45 +00:00
// Create sandbox task in containerd.
2019-09-27 21:51:53 +00:00
log.G(ctx).Tracef("Create sandbox container (id=%q, name=%q).",
2019-07-12 03:16:45 +00:00
id, name)
var taskOpts []containerd.NewTaskOpts
// TODO(random-liu): Remove this after shim v1 is deprecated.
2019-09-27 21:51:53 +00:00
if c.config.NoPivot && ociRuntime.Type == plugin.RuntimeRuncV1 {
2019-07-12 03:16:45 +00:00
taskOpts = append(taskOpts, containerd.WithNoPivotRoot)
}
// We don't need stdio for sandbox container.
task, err := container.NewTask(ctx, containerdio.NullIO, taskOpts...)
if err != nil {
return nil, errors.Wrap(err, "failed to create containerd task")
2019-01-12 04:58:27 +00:00
}
defer func() {
if retErr != nil {
2019-07-12 03:16:45 +00:00
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
// Cleanup the sandbox container if an error is returned.
if _, err := task.Delete(deferCtx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
2019-09-27 21:51:53 +00:00
log.G(ctx).WithError(err).Errorf("Failed to delete sandbox container %q", id)
2019-07-12 03:16:45 +00:00
}
2019-01-12 04:58:27 +00:00
}
}()
2019-07-12 03:16:45 +00:00
// wait is a long running background request, no timeout needed.
exitCh, err := task.Wait(ctrdutil.NamespacedContext())
if err != nil {
return nil, errors.Wrap(err, "failed to wait for sandbox container task")
}
2019-01-12 04:58:27 +00:00
2019-07-12 03:16:45 +00:00
if err := task.Start(ctx); err != nil {
return nil, errors.Wrapf(err, "failed to start sandbox container task %q", id)
}
if err := sandbox.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) {
2019-01-12 04:58:27 +00:00
// Set the pod sandbox as ready after successfully start sandbox container.
status.Pid = task.Pid()
status.State = sandboxstore.StateReady
2019-07-12 03:16:45 +00:00
status.CreatedAt = info.CreatedAt
2019-01-12 04:58:27 +00:00
return status, nil
}); err != nil {
2019-07-12 03:16:45 +00:00
return nil, errors.Wrap(err, "failed to update sandbox status")
}
// Add sandbox into sandbox store in INIT state.
sandbox.Container = container
if err := c.sandboxStore.Add(sandbox); err != nil {
return nil, errors.Wrapf(err, "failed to add sandbox %+v into store", sandbox)
2019-01-12 04:58:27 +00:00
}
2019-07-12 03:16:45 +00:00
// start the monitor after adding sandbox into the store, this ensures
// that sandbox is in the store, when event monitor receives the TaskExit event.
//
// TaskOOM from containerd may come before sandbox is added to store,
// but we don't care about sandbox TaskOOM right now, so it is fine.
c.eventMonitor.startExitMonitor(context.Background(), id, task.Pid(), exitCh)
2019-01-12 04:58:27 +00:00
return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil
}
func (c *criService) generateSandboxContainerSpec(id string, config *runtime.PodSandboxConfig,
2020-02-24 20:16:56 +00:00
imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (retSpec *runtimespec.Spec, retErr error) {
2019-01-12 04:58:27 +00:00
// Creates a spec Generator with the default spec.
// TODO(random-liu): [P1] Compare the default settings with docker and containerd default.
2019-09-27 21:51:53 +00:00
specOpts := []oci.SpecOpts{
customopts.WithoutRunMount,
customopts.WithoutDefaultSecuritySettings,
customopts.WithRelativeRoot(relativeRootfsPath),
oci.WithEnv(imageConfig.Env),
oci.WithRootFSReadonly(),
oci.WithHostname(config.GetHostname()),
2019-01-12 04:58:27 +00:00
}
if imageConfig.WorkingDir != "" {
2019-09-27 21:51:53 +00:00
specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
2019-01-12 04:58:27 +00:00
}
if len(imageConfig.Entrypoint) == 0 && len(imageConfig.Cmd) == 0 {
// Pause image must have entrypoint or cmd.
return nil, errors.Errorf("invalid empty entrypoint and cmd in image config %+v", imageConfig)
}
2019-09-27 21:51:53 +00:00
specOpts = append(specOpts, oci.WithProcessArgs(append(imageConfig.Entrypoint, imageConfig.Cmd...)...))
2019-01-12 04:58:27 +00:00
// TODO(random-liu): [P2] Consider whether to add labels and annotations to the container.
// Set cgroups parent.
2019-04-09 17:34:40 +00:00
if c.config.DisableCgroup {
2019-09-27 21:51:53 +00:00
specOpts = append(specOpts, customopts.WithDisabledCgroups)
2019-04-09 17:34:40 +00:00
} else {
if config.GetLinux().GetCgroupParent() != "" {
2019-09-27 21:51:53 +00:00
cgroupsPath := getCgroupsPath(config.GetLinux().GetCgroupParent(), id)
specOpts = append(specOpts, oci.WithCgroup(cgroupsPath))
2019-04-09 17:34:40 +00:00
}
2019-01-12 04:58:27 +00:00
}
2019-09-27 21:51:53 +00:00
2019-01-12 04:58:27 +00:00
// When cgroup parent is not set, containerd-shim will create container in a child cgroup
// of the cgroup itself is in.
// TODO(random-liu): [P2] Set default cgroup path if cgroup parent is not specified.
// Set namespace options.
2019-09-27 21:51:53 +00:00
var (
securityContext = config.GetLinux().GetSecurityContext()
nsOptions = securityContext.GetNamespaceOptions()
)
2019-01-12 04:58:27 +00:00
if nsOptions.GetNetwork() == runtime.NamespaceMode_NODE {
2019-09-27 21:51:53 +00:00
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.NetworkNamespace))
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UTSNamespace))
2019-01-12 04:58:27 +00:00
} else {
//TODO(Abhi): May be move this to containerd spec opts (WithLinuxSpaceOption)
2019-09-27 21:51:53 +00:00
specOpts = append(specOpts, oci.WithLinuxNamespace(
runtimespec.LinuxNamespace{
Type: runtimespec.NetworkNamespace,
Path: nsPath,
}))
2019-01-12 04:58:27 +00:00
}
if nsOptions.GetPid() == runtime.NamespaceMode_NODE {
2019-09-27 21:51:53 +00:00
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.PIDNamespace))
2019-01-12 04:58:27 +00:00
}
if nsOptions.GetIpc() == runtime.NamespaceMode_NODE {
2019-09-27 21:51:53 +00:00
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.IPCNamespace))
2019-01-12 04:58:27 +00:00
}
// It's fine to generate the spec before the sandbox /dev/shm
// is actually created.
sandboxDevShm := c.getSandboxDevShm(id)
if nsOptions.GetIpc() == runtime.NamespaceMode_NODE {
sandboxDevShm = devShm
}
2019-09-27 21:51:53 +00:00
specOpts = append(specOpts, oci.WithMounts([]runtimespec.Mount{
{
Source: sandboxDevShm,
Destination: devShm,
Type: "bind",
Options: []string{"rbind", "ro"},
},
}))
2019-01-12 04:58:27 +00:00
2020-02-24 20:16:56 +00:00
processLabel, mountLabel, err := initLabelsFromOpt(securityContext.GetSelinuxOptions())
2019-01-12 04:58:27 +00:00
if err != nil {
return nil, errors.Wrapf(err, "failed to init selinux options %+v", securityContext.GetSelinuxOptions())
}
2020-02-24 20:16:56 +00:00
defer func() {
if retErr != nil && processLabel != "" {
_ = label.ReleaseLabel(processLabel)
}
}()
2019-01-12 04:58:27 +00:00
supplementalGroups := securityContext.GetSupplementalGroups()
2019-09-27 21:51:53 +00:00
specOpts = append(specOpts,
customopts.WithSelinuxLabels(processLabel, mountLabel),
customopts.WithSupplementalGroups(supplementalGroups),
)
2019-01-12 04:58:27 +00:00
// Add sysctls
sysctls := config.GetLinux().GetSysctls()
2019-09-27 21:51:53 +00:00
specOpts = append(specOpts, customopts.WithSysctls(sysctls))
2019-01-12 04:58:27 +00:00
// Note: LinuxSandboxSecurityContext does not currently provide an apparmor profile
2019-04-09 17:34:40 +00:00
if !c.config.DisableCgroup {
2019-09-27 21:51:53 +00:00
specOpts = append(specOpts, customopts.WithDefaultSandboxShares)
2019-04-09 17:34:40 +00:00
}
2019-09-27 21:51:53 +00:00
specOpts = append(specOpts, customopts.WithPodOOMScoreAdj(int(defaultSandboxOOMAdj), c.config.RestrictOOMScoreAdj))
for pKey, pValue := range getPassthroughAnnotations(config.Annotations,
runtimePodAnnotations) {
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
2019-04-09 17:34:40 +00:00
}
2019-01-12 04:58:27 +00:00
2019-09-27 21:51:53 +00:00
specOpts = append(specOpts,
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeSandbox),
customopts.WithAnnotation(annotations.SandboxID, id),
customopts.WithAnnotation(annotations.SandboxLogDir, config.GetLogDirectory()),
)
2019-01-12 04:58:27 +00:00
2019-09-27 21:51:53 +00:00
return runtimeSpec(id, specOpts...)
2019-01-12 04:58:27 +00:00
}
2019-04-07 17:07:55 +00:00
// setupSandboxFiles sets up necessary sandbox files including /dev/shm, /etc/hosts,
// /etc/resolv.conf and /etc/hostname.
2019-01-12 04:58:27 +00:00
func (c *criService) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
2019-04-07 17:07:55 +00:00
sandboxEtcHostname := c.getSandboxHostname(id)
hostname := config.GetHostname()
if hostname == "" {
var err error
hostname, err = c.os.Hostname()
if err != nil {
return errors.Wrap(err, "failed to get hostname")
}
}
if err := c.os.WriteFile(sandboxEtcHostname, []byte(hostname+"\n"), 0644); err != nil {
return errors.Wrapf(err, "failed to write hostname to %q", sandboxEtcHostname)
}
2019-01-12 04:58:27 +00:00
// TODO(random-liu): Consider whether we should maintain /etc/hosts and /etc/resolv.conf in kubelet.
sandboxEtcHosts := c.getSandboxHosts(id)
if err := c.os.CopyFile(etcHosts, sandboxEtcHosts, 0644); err != nil {
return errors.Wrapf(err, "failed to generate sandbox hosts file %q", sandboxEtcHosts)
}
// Set DNS options. Maintain a resolv.conf for the sandbox.
var err error
resolvContent := ""
if dnsConfig := config.GetDnsConfig(); dnsConfig != nil {
resolvContent, err = parseDNSOptions(dnsConfig.Servers, dnsConfig.Searches, dnsConfig.Options)
if err != nil {
return errors.Wrapf(err, "failed to parse sandbox DNSConfig %+v", dnsConfig)
}
}
resolvPath := c.getResolvPath(id)
if resolvContent == "" {
// copy host's resolv.conf to resolvPath
err = c.os.CopyFile(resolvConfPath, resolvPath, 0644)
if err != nil {
return errors.Wrapf(err, "failed to copy host's resolv.conf to %q", resolvPath)
}
} else {
err = c.os.WriteFile(resolvPath, []byte(resolvContent), 0644)
if err != nil {
return errors.Wrapf(err, "failed to write resolv content to %q", resolvPath)
}
}
// Setup sandbox /dev/shm.
if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE {
if _, err := c.os.Stat(devShm); err != nil {
return errors.Wrapf(err, "host %q is not available for host ipc", devShm)
}
} else {
sandboxDevShm := c.getSandboxDevShm(id)
if err := c.os.MkdirAll(sandboxDevShm, 0700); err != nil {
return errors.Wrap(err, "failed to create sandbox shm")
}
shmproperty := fmt.Sprintf("mode=1777,size=%d", defaultShmSize)
if err := c.os.Mount("shm", sandboxDevShm, "tmpfs", uintptr(unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV), shmproperty); err != nil {
return errors.Wrap(err, "failed to mount sandbox shm")
}
}
return nil
}
// parseDNSOptions parse DNS options into resolv.conf format content,
// if none option is specified, will return empty with no error.
func parseDNSOptions(servers, searches, options []string) (string, error) {
resolvContent := ""
if len(searches) > maxDNSSearches {
2019-09-27 21:51:53 +00:00
return "", errors.Errorf("DNSOption.Searches has more than %d domains", maxDNSSearches)
2019-01-12 04:58:27 +00:00
}
if len(searches) > 0 {
resolvContent += fmt.Sprintf("search %s\n", strings.Join(searches, " "))
}
if len(servers) > 0 {
resolvContent += fmt.Sprintf("nameserver %s\n", strings.Join(servers, "\nnameserver "))
}
if len(options) > 0 {
resolvContent += fmt.Sprintf("options %s\n", strings.Join(options, " "))
}
return resolvContent, nil
}
// unmountSandboxFiles unmount some sandbox files, we rely on the removal of sandbox root directory to
// remove these files. Unmount should *NOT* return error if the mount point is already unmounted.
func (c *criService) unmountSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetIpc() != runtime.NamespaceMode_NODE {
path, err := c.os.FollowSymlinkInScope(c.getSandboxDevShm(id), "/")
if err != nil {
return errors.Wrap(err, "failed to follow symlink")
}
if err := c.os.Unmount(path); err != nil && !os.IsNotExist(err) {
return errors.Wrapf(err, "failed to unmount %q", path)
}
}
return nil
}
2019-10-22 17:27:11 +00:00
// setupPodNetwork setups up the network for a pod
func (c *criService) setupPodNetwork(ctx context.Context, sandbox *sandboxstore.Sandbox) error {
var (
id = sandbox.ID
config = sandbox.Config
path = sandbox.NetNSPath
)
2019-01-12 04:58:27 +00:00
if c.netPlugin == nil {
2019-10-22 17:27:11 +00:00
return errors.New("cni config not initialized")
2019-01-12 04:58:27 +00:00
}
labels := getPodCNILabels(id, config)
2019-09-27 21:51:53 +00:00
// Will return an error if the bandwidth limitation has the wrong unit
// or an unreasonable valure see validateBandwidthIsReasonable()
bandWidth, err := toCNIBandWidth(config.Annotations)
if err != nil {
2019-10-22 17:27:11 +00:00
return errors.Wrap(err, "failed to get bandwidth info from annotations")
2019-09-27 21:51:53 +00:00
}
result, err := c.netPlugin.Setup(ctx, id,
2019-01-12 04:58:27 +00:00
path,
cni.WithLabels(labels),
2019-09-27 21:51:53 +00:00
cni.WithCapabilityPortMap(toCNIPortMappings(config.GetPortMappings())),
cni.WithCapabilityBandWidth(*bandWidth),
)
2019-01-12 04:58:27 +00:00
if err != nil {
2019-10-22 17:27:11 +00:00
return err
2019-01-12 04:58:27 +00:00
}
2019-09-27 21:51:53 +00:00
logDebugCNIResult(ctx, id, result)
2019-01-12 04:58:27 +00:00
// Check if the default interface has IP config
if configs, ok := result.Interfaces[defaultIfName]; ok && len(configs.IPConfigs) > 0 {
2019-10-22 17:27:11 +00:00
sandbox.IP, sandbox.AdditionalIPs = selectPodIPs(configs.IPConfigs)
sandbox.CNIResult = result
return nil
2019-01-12 04:58:27 +00:00
}
// If it comes here then the result was invalid so destroy the pod network and return error
2019-10-22 17:27:11 +00:00
if err := c.teardownPodNetwork(ctx, *sandbox); err != nil {
2019-09-27 21:51:53 +00:00
log.G(ctx).WithError(err).Errorf("Failed to destroy network for sandbox %q", id)
}
2019-10-22 17:27:11 +00:00
return errors.Errorf("failed to find network info for sandbox %q", id)
2019-09-27 21:51:53 +00:00
}
// toCNIBandWidth converts CRI annotations to CNI bandwidth.
func toCNIBandWidth(annotations map[string]string) (*cni.BandWidth, error) {
ingress, egress, err := bandwidth.ExtractPodBandwidthResources(annotations)
if err != nil {
return nil, errors.Errorf("reading pod bandwidth annotations: %v", err)
2019-01-12 04:58:27 +00:00
}
2019-09-27 21:51:53 +00:00
bandWidth := &cni.BandWidth{}
if ingress != nil {
bandWidth.IngressRate = uint64(ingress.Value())
bandWidth.IngressBurst = math.MaxUint32
}
if egress != nil {
bandWidth.EgressRate = uint64(egress.Value())
bandWidth.EgressBurst = math.MaxUint32
}
return bandWidth, nil
2019-01-12 04:58:27 +00:00
}
// toCNIPortMappings converts CRI port mappings to CNI.
func toCNIPortMappings(criPortMappings []*runtime.PortMapping) []cni.PortMapping {
var portMappings []cni.PortMapping
for _, mapping := range criPortMappings {
if mapping.HostPort <= 0 {
continue
}
if mapping.Protocol != runtime.Protocol_TCP && mapping.Protocol != runtime.Protocol_UDP {
continue
}
portMappings = append(portMappings, cni.PortMapping{
HostPort: mapping.HostPort,
ContainerPort: mapping.ContainerPort,
Protocol: strings.ToLower(mapping.Protocol.String()),
HostIP: mapping.HostIp,
})
}
return portMappings
}
2019-10-22 17:27:11 +00:00
// selectPodIPs select an ip from the ip list. It prefers ipv4 more than ipv6
// and returns the additional ips
// TODO(random-liu): Revisit the ip order in the ipv6 beta stage. (cri#1278)
func selectPodIPs(ipConfigs []*cni.IPConfig) (string, []string) {
var (
additionalIPs []string
ip string
)
2019-01-12 04:58:27 +00:00
for _, c := range ipConfigs {
2019-10-22 17:27:11 +00:00
if c.IP.To4() != nil && ip == "" {
ip = c.IP.String()
} else {
additionalIPs = append(additionalIPs, c.IP.String())
2019-01-12 04:58:27 +00:00
}
}
2019-10-22 17:27:11 +00:00
if ip != "" {
return ip, additionalIPs
}
if len(ipConfigs) == 1 {
return additionalIPs[0], nil
}
return additionalIPs[0], additionalIPs[1:]
2019-01-12 04:58:27 +00:00
}
// untrustedWorkload returns true if the sandbox contains untrusted workload.
func untrustedWorkload(config *runtime.PodSandboxConfig) bool {
return config.GetAnnotations()[annotations.UntrustedWorkload] == "true"
}
// hostAccessingSandbox returns true if the sandbox configuration
// requires additional host access for the sandbox.
func hostAccessingSandbox(config *runtime.PodSandboxConfig) bool {
securityContext := config.GetLinux().GetSecurityContext()
namespaceOptions := securityContext.GetNamespaceOptions()
if namespaceOptions.GetNetwork() == runtime.NamespaceMode_NODE ||
namespaceOptions.GetPid() == runtime.NamespaceMode_NODE ||
namespaceOptions.GetIpc() == runtime.NamespaceMode_NODE {
return true
}
return false
}
// getSandboxRuntime returns the runtime configuration for sandbox.
// If the sandbox contains untrusted workload, runtime for untrusted workload will be returned,
// or else default runtime will be returned.
func (c *criService) getSandboxRuntime(config *runtime.PodSandboxConfig, runtimeHandler string) (criconfig.Runtime, error) {
if untrustedWorkload(config) {
// If the untrusted annotation is provided, runtimeHandler MUST be empty.
if runtimeHandler != "" && runtimeHandler != criconfig.RuntimeUntrusted {
return criconfig.Runtime{}, errors.New("untrusted workload with explicit runtime handler is not allowed")
}
// If the untrusted workload is requesting access to the host/node, this request will fail.
//
// Note: If the workload is marked untrusted but requests privileged, this can be granted, as the
// runtime may support this. For example, in a virtual-machine isolated runtime, privileged
// is a supported option, granting the workload to access the entire guest VM instead of host.
if hostAccessingSandbox(config) {
return criconfig.Runtime{}, errors.New("untrusted workload with host access is not allowed")
}
runtimeHandler = criconfig.RuntimeUntrusted
}
if runtimeHandler == "" {
2019-09-27 21:51:53 +00:00
runtimeHandler = c.config.ContainerdConfig.DefaultRuntimeName
2019-01-12 04:58:27 +00:00
}
handler, ok := c.config.ContainerdConfig.Runtimes[runtimeHandler]
if !ok {
return criconfig.Runtime{}, errors.Errorf("no runtime for %q is configured", runtimeHandler)
}
return handler, nil
}
2019-09-27 21:51:53 +00:00
func logDebugCNIResult(ctx context.Context, sandboxID string, result *cni.CNIResult) {
if logrus.GetLevel() < logrus.DebugLevel {
return
}
cniResult, err := json.Marshal(result)
if err != nil {
log.G(ctx).WithError(err).Errorf("Failed to marshal CNI result for sandbox %q: %v", sandboxID, err)
return
}
log.G(ctx).Debugf("cni result for sandbox %q: %s", sandboxID, string(cniResult))
}