2019-01-12 04:58:27 +00:00
|
|
|
/*
|
|
|
|
Copyright 2017 The Kubernetes Authors.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package server
|
|
|
|
|
|
|
|
import (
|
|
|
|
"path/filepath"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/containerd/containerd"
|
|
|
|
"github.com/containerd/containerd/containers"
|
|
|
|
"github.com/containerd/containerd/contrib/apparmor"
|
|
|
|
"github.com/containerd/containerd/contrib/seccomp"
|
2019-09-27 21:51:53 +00:00
|
|
|
"github.com/containerd/containerd/log"
|
2019-01-12 04:58:27 +00:00
|
|
|
"github.com/containerd/containerd/oci"
|
2019-09-27 21:51:53 +00:00
|
|
|
"github.com/containerd/cri/pkg/annotations"
|
|
|
|
"github.com/containerd/cri/pkg/config"
|
|
|
|
customopts "github.com/containerd/cri/pkg/containerd/opts"
|
|
|
|
ctrdutil "github.com/containerd/cri/pkg/containerd/util"
|
|
|
|
cio "github.com/containerd/cri/pkg/server/io"
|
|
|
|
containerstore "github.com/containerd/cri/pkg/store/container"
|
|
|
|
"github.com/containerd/cri/pkg/util"
|
2019-01-12 04:58:27 +00:00
|
|
|
"github.com/containerd/typeurl"
|
|
|
|
"github.com/davecgh/go-spew/spew"
|
|
|
|
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
|
|
|
|
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
|
2020-02-24 20:16:56 +00:00
|
|
|
"github.com/opencontainers/selinux/go-selinux/label"
|
2019-01-12 04:58:27 +00:00
|
|
|
"github.com/pkg/errors"
|
|
|
|
"golang.org/x/net/context"
|
2019-08-30 18:33:25 +00:00
|
|
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
2019-01-12 04:58:27 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
// profileNamePrefix is the prefix for loading profiles on a localhost. Eg. AppArmor localhost/profileName.
|
|
|
|
profileNamePrefix = "localhost/" // TODO (mikebrow): get localhost/ & runtime/default from CRI kubernetes/kubernetes#51747
|
|
|
|
// runtimeDefault indicates that we should use or create a runtime default profile.
|
|
|
|
runtimeDefault = "runtime/default"
|
|
|
|
// dockerDefault indicates that we should use or create a docker default profile.
|
|
|
|
dockerDefault = "docker/default"
|
|
|
|
// appArmorDefaultProfileName is name to use when creating a default apparmor profile.
|
|
|
|
appArmorDefaultProfileName = "cri-containerd.apparmor.d"
|
|
|
|
// unconfinedProfile is a string indicating one should run a pod/containerd without a security profile
|
|
|
|
unconfinedProfile = "unconfined"
|
|
|
|
// seccompDefaultProfile is the default seccomp profile.
|
|
|
|
seccompDefaultProfile = dockerDefault
|
|
|
|
)
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
typeurl.Register(&containerstore.Metadata{},
|
|
|
|
"github.com/containerd/cri/pkg/store/container", "Metadata")
|
|
|
|
}
|
|
|
|
|
|
|
|
// CreateContainer creates a new container in the given PodSandbox.
|
|
|
|
func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateContainerRequest) (_ *runtime.CreateContainerResponse, retErr error) {
|
|
|
|
config := r.GetConfig()
|
2019-09-27 21:51:53 +00:00
|
|
|
log.G(ctx).Debugf("Container config %+v", config)
|
2019-01-12 04:58:27 +00:00
|
|
|
sandboxConfig := r.GetSandboxConfig()
|
|
|
|
sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId())
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Wrapf(err, "failed to find sandbox id %q", r.GetPodSandboxId())
|
|
|
|
}
|
|
|
|
sandboxID := sandbox.ID
|
|
|
|
s, err := sandbox.Container.Task(ctx, nil)
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Wrap(err, "failed to get sandbox container task")
|
|
|
|
}
|
|
|
|
sandboxPid := s.Pid()
|
|
|
|
|
|
|
|
// Generate unique id and name for the container and reserve the name.
|
|
|
|
// Reserve the container name to avoid concurrent `CreateContainer` request creating
|
|
|
|
// the same container.
|
|
|
|
id := util.GenerateID()
|
2019-09-27 21:51:53 +00:00
|
|
|
metadata := config.GetMetadata()
|
|
|
|
if metadata == nil {
|
|
|
|
return nil, errors.New("container config must include metadata")
|
|
|
|
}
|
|
|
|
name := makeContainerName(metadata, sandboxConfig.GetMetadata())
|
|
|
|
log.G(ctx).Debugf("Generated id %q for container %q", id, name)
|
2019-01-12 04:58:27 +00:00
|
|
|
if err = c.containerNameIndex.Reserve(name, id); err != nil {
|
|
|
|
return nil, errors.Wrapf(err, "failed to reserve container name %q", name)
|
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
// Release the name if the function returns with an error.
|
|
|
|
if retErr != nil {
|
|
|
|
c.containerNameIndex.ReleaseByName(name)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
// Create initial internal container metadata.
|
|
|
|
meta := containerstore.Metadata{
|
|
|
|
ID: id,
|
|
|
|
Name: name,
|
|
|
|
SandboxID: sandboxID,
|
|
|
|
Config: config,
|
|
|
|
}
|
|
|
|
|
|
|
|
// Prepare container image snapshot. For container, the image should have
|
|
|
|
// been pulled before creating the container, so do not ensure the image.
|
|
|
|
image, err := c.localResolve(config.GetImage().GetImage())
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Wrapf(err, "failed to resolve image %q", config.GetImage().GetImage())
|
|
|
|
}
|
2019-08-30 18:33:25 +00:00
|
|
|
containerdImage, err := c.toContainerdImage(ctx, image)
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Wrapf(err, "failed to get image from containerd %q", image.ID)
|
|
|
|
}
|
2019-01-12 04:58:27 +00:00
|
|
|
|
|
|
|
// Run container using the same runtime with sandbox.
|
|
|
|
sandboxInfo, err := sandbox.Container.Info(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Wrapf(err, "failed to get sandbox %q info", sandboxID)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create container root directory.
|
|
|
|
containerRootDir := c.getContainerRootDir(id)
|
|
|
|
if err = c.os.MkdirAll(containerRootDir, 0755); err != nil {
|
|
|
|
return nil, errors.Wrapf(err, "failed to create container root directory %q",
|
|
|
|
containerRootDir)
|
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
if retErr != nil {
|
|
|
|
// Cleanup the container root directory.
|
|
|
|
if err = c.os.RemoveAll(containerRootDir); err != nil {
|
2019-09-27 21:51:53 +00:00
|
|
|
log.G(ctx).WithError(err).Errorf("Failed to remove container root directory %q",
|
2019-01-12 04:58:27 +00:00
|
|
|
containerRootDir)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
volatileContainerRootDir := c.getVolatileContainerRootDir(id)
|
|
|
|
if err = c.os.MkdirAll(volatileContainerRootDir, 0755); err != nil {
|
|
|
|
return nil, errors.Wrapf(err, "failed to create volatile container root directory %q",
|
|
|
|
volatileContainerRootDir)
|
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
if retErr != nil {
|
|
|
|
// Cleanup the volatile container root directory.
|
|
|
|
if err = c.os.RemoveAll(volatileContainerRootDir); err != nil {
|
2019-09-27 21:51:53 +00:00
|
|
|
log.G(ctx).WithError(err).Errorf("Failed to remove volatile container root directory %q",
|
2019-01-12 04:58:27 +00:00
|
|
|
volatileContainerRootDir)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
// Create container volumes mounts.
|
|
|
|
volumeMounts := c.generateVolumeMounts(containerRootDir, config.GetMounts(), &image.ImageSpec.Config)
|
|
|
|
|
|
|
|
// Generate container runtime spec.
|
|
|
|
mounts := c.generateContainerMounts(sandboxID, config)
|
|
|
|
|
2019-09-27 21:51:53 +00:00
|
|
|
ociRuntime, err := c.getSandboxRuntime(sandboxConfig, sandbox.Metadata.RuntimeHandler)
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Wrap(err, "failed to get sandbox runtime")
|
|
|
|
}
|
|
|
|
log.G(ctx).Debugf("Use OCI runtime %+v for sandbox %q and container %q", ociRuntime, sandboxID, id)
|
|
|
|
|
|
|
|
spec, err := c.generateContainerSpec(id, sandboxID, sandboxPid, config, sandboxConfig,
|
|
|
|
&image.ImageSpec.Config, append(mounts, volumeMounts...), ociRuntime)
|
2019-01-12 04:58:27 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Wrapf(err, "failed to generate container %q spec", id)
|
|
|
|
}
|
|
|
|
|
2020-02-24 20:16:56 +00:00
|
|
|
meta.ProcessLabel = spec.Process.SelinuxLabel
|
|
|
|
if config.GetLinux().GetSecurityContext().GetPrivileged() {
|
|
|
|
// If privileged don't set the SELinux label but still record it on the container so
|
|
|
|
// the unused MCS label can be release later
|
|
|
|
spec.Process.SelinuxLabel = ""
|
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
if retErr != nil {
|
|
|
|
_ = label.ReleaseLabel(spec.Process.SelinuxLabel)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2019-09-27 21:51:53 +00:00
|
|
|
log.G(ctx).Debugf("Container %q spec: %#+v", id, spew.NewFormatter(spec))
|
2019-01-12 04:58:27 +00:00
|
|
|
|
|
|
|
// Set snapshotter before any other options.
|
|
|
|
opts := []containerd.NewContainerOpts{
|
|
|
|
containerd.WithSnapshotter(c.config.ContainerdConfig.Snapshotter),
|
|
|
|
// Prepare container rootfs. This is always writeable even if
|
|
|
|
// the container wants a readonly rootfs since we want to give
|
|
|
|
// the runtime (runc) a chance to modify (e.g. to create mount
|
|
|
|
// points corresponding to spec.Mounts) before making the
|
|
|
|
// rootfs readonly (requested by spec.Root.Readonly).
|
2019-08-30 18:33:25 +00:00
|
|
|
customopts.WithNewSnapshot(id, containerdImage),
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if len(volumeMounts) > 0 {
|
|
|
|
mountMap := make(map[string]string)
|
|
|
|
for _, v := range volumeMounts {
|
2019-04-07 17:07:55 +00:00
|
|
|
mountMap[filepath.Clean(v.HostPath)] = v.ContainerPath
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
opts = append(opts, customopts.WithVolumes(mountMap))
|
|
|
|
}
|
|
|
|
meta.ImageRef = image.ID
|
2019-01-31 22:42:07 +00:00
|
|
|
meta.StopSignal = image.ImageSpec.Config.StopSignal
|
2019-01-12 04:58:27 +00:00
|
|
|
|
2019-09-27 21:51:53 +00:00
|
|
|
// Validate log paths and compose full container log path.
|
|
|
|
if sandboxConfig.GetLogDirectory() != "" && config.GetLogPath() != "" {
|
2019-04-07 17:07:55 +00:00
|
|
|
meta.LogPath = filepath.Join(sandboxConfig.GetLogDirectory(), config.GetLogPath())
|
2019-09-27 21:51:53 +00:00
|
|
|
log.G(ctx).Debugf("Composed container full log path %q using sandbox log dir %q and container log path %q",
|
|
|
|
meta.LogPath, sandboxConfig.GetLogDirectory(), config.GetLogPath())
|
|
|
|
} else {
|
|
|
|
log.G(ctx).Infof("Logging will be disabled due to empty log paths for sandbox (%q) or container (%q)",
|
|
|
|
sandboxConfig.GetLogDirectory(), config.GetLogPath())
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
containerIO, err := cio.NewContainerIO(id,
|
|
|
|
cio.WithNewFIFOs(volatileContainerRootDir, config.GetTty(), config.GetStdin()))
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Wrap(err, "failed to create container io")
|
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
if retErr != nil {
|
|
|
|
if err := containerIO.Close(); err != nil {
|
2019-09-27 21:51:53 +00:00
|
|
|
log.G(ctx).WithError(err).Errorf("Failed to close container io %q", id)
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
var specOpts []oci.SpecOpts
|
|
|
|
securityContext := config.GetLinux().GetSecurityContext()
|
|
|
|
// Set container username. This could only be done by containerd, because it needs
|
|
|
|
// access to the container rootfs. Pass user name to containerd, and let it overwrite
|
|
|
|
// the spec for us.
|
|
|
|
userstr, err := generateUserString(
|
|
|
|
securityContext.GetRunAsUsername(),
|
|
|
|
securityContext.GetRunAsUser(),
|
2019-09-27 21:51:53 +00:00
|
|
|
securityContext.GetRunAsGroup())
|
|
|
|
|
2019-01-12 04:58:27 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Wrap(err, "failed to generate user string")
|
|
|
|
}
|
2019-09-27 21:51:53 +00:00
|
|
|
if userstr == "" {
|
|
|
|
// Lastly, since no user override was passed via CRI try to set via OCI
|
|
|
|
// Image
|
|
|
|
userstr = image.ImageSpec.Config.User
|
|
|
|
}
|
2019-01-12 04:58:27 +00:00
|
|
|
if userstr != "" {
|
|
|
|
specOpts = append(specOpts, oci.WithUser(userstr))
|
|
|
|
}
|
|
|
|
|
|
|
|
if securityContext.GetRunAsUsername() != "" {
|
|
|
|
userstr = securityContext.GetRunAsUsername()
|
|
|
|
} else {
|
|
|
|
// Even if RunAsUser is not set, we still call `GetValue` to get uid 0.
|
|
|
|
// Because it is still useful to get additional gids for uid 0.
|
|
|
|
userstr = strconv.FormatInt(securityContext.GetRunAsUser().GetValue(), 10)
|
|
|
|
}
|
|
|
|
specOpts = append(specOpts, customopts.WithAdditionalGIDs(userstr))
|
|
|
|
|
|
|
|
apparmorSpecOpts, err := generateApparmorSpecOpts(
|
|
|
|
securityContext.GetApparmorProfile(),
|
|
|
|
securityContext.GetPrivileged(),
|
|
|
|
c.apparmorEnabled)
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Wrap(err, "failed to generate apparmor spec opts")
|
|
|
|
}
|
|
|
|
if apparmorSpecOpts != nil {
|
|
|
|
specOpts = append(specOpts, apparmorSpecOpts)
|
|
|
|
}
|
|
|
|
|
|
|
|
seccompSpecOpts, err := generateSeccompSpecOpts(
|
|
|
|
securityContext.GetSeccompProfilePath(),
|
|
|
|
securityContext.GetPrivileged(),
|
|
|
|
c.seccompEnabled)
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Wrap(err, "failed to generate seccomp spec opts")
|
|
|
|
}
|
|
|
|
if seccompSpecOpts != nil {
|
|
|
|
specOpts = append(specOpts, seccompSpecOpts)
|
|
|
|
}
|
|
|
|
containerLabels := buildLabels(config.Labels, containerKindContainer)
|
|
|
|
|
|
|
|
runtimeOptions, err := getRuntimeOptions(sandboxInfo)
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Wrap(err, "failed to get runtime options")
|
|
|
|
}
|
|
|
|
opts = append(opts,
|
|
|
|
containerd.WithSpec(spec, specOpts...),
|
|
|
|
containerd.WithRuntime(sandboxInfo.Runtime.Name, runtimeOptions),
|
|
|
|
containerd.WithContainerLabels(containerLabels),
|
|
|
|
containerd.WithContainerExtension(containerMetadataExtension, &meta))
|
|
|
|
var cntr containerd.Container
|
|
|
|
if cntr, err = c.client.NewContainer(ctx, id, opts...); err != nil {
|
|
|
|
return nil, errors.Wrap(err, "failed to create containerd container")
|
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
if retErr != nil {
|
|
|
|
deferCtx, deferCancel := ctrdutil.DeferContext()
|
|
|
|
defer deferCancel()
|
|
|
|
if err := cntr.Delete(deferCtx, containerd.WithSnapshotCleanup); err != nil {
|
2019-09-27 21:51:53 +00:00
|
|
|
log.G(ctx).WithError(err).Errorf("Failed to delete containerd container %q", id)
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
status := containerstore.Status{CreatedAt: time.Now().UnixNano()}
|
|
|
|
container, err := containerstore.NewContainer(meta,
|
|
|
|
containerstore.WithStatus(status, containerRootDir),
|
|
|
|
containerstore.WithContainer(cntr),
|
|
|
|
containerstore.WithContainerIO(containerIO),
|
|
|
|
)
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Wrapf(err, "failed to create internal container object for %q", id)
|
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
if retErr != nil {
|
|
|
|
// Cleanup container checkpoint on error.
|
|
|
|
if err := container.Delete(); err != nil {
|
2019-09-27 21:51:53 +00:00
|
|
|
log.G(ctx).WithError(err).Errorf("Failed to cleanup container checkpoint for %q", id)
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
// Add container into container store.
|
|
|
|
if err := c.containerStore.Add(container); err != nil {
|
|
|
|
return nil, errors.Wrapf(err, "failed to add container %q into store", id)
|
|
|
|
}
|
|
|
|
|
|
|
|
return &runtime.CreateContainerResponse{ContainerId: id}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *criService) generateContainerSpec(id string, sandboxID string, sandboxPid uint32, config *runtime.ContainerConfig,
|
2019-09-27 21:51:53 +00:00
|
|
|
sandboxConfig *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig, extraMounts []*runtime.Mount,
|
2020-02-24 20:16:56 +00:00
|
|
|
ociRuntime config.Runtime) (retSpec *runtimespec.Spec, retErr error) {
|
2019-01-12 04:58:27 +00:00
|
|
|
|
2019-09-27 21:51:53 +00:00
|
|
|
specOpts := []oci.SpecOpts{
|
|
|
|
customopts.WithoutRunMount,
|
|
|
|
customopts.WithoutDefaultSecuritySettings,
|
|
|
|
customopts.WithRelativeRoot(relativeRootfsPath),
|
|
|
|
customopts.WithProcessArgs(config, imageConfig),
|
2019-10-22 17:27:11 +00:00
|
|
|
oci.WithDefaultPathEnv,
|
2019-09-27 21:51:53 +00:00
|
|
|
// this will be set based on the security context below
|
|
|
|
oci.WithNewPrivileges,
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
if config.GetWorkingDir() != "" {
|
2019-09-27 21:51:53 +00:00
|
|
|
specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir()))
|
2019-01-12 04:58:27 +00:00
|
|
|
} else if imageConfig.WorkingDir != "" {
|
2019-09-27 21:51:53 +00:00
|
|
|
specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if config.GetTty() {
|
2019-09-27 21:51:53 +00:00
|
|
|
specOpts = append(specOpts, oci.WithTTY)
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Add HOSTNAME env.
|
2019-09-27 21:51:53 +00:00
|
|
|
var (
|
|
|
|
err error
|
|
|
|
hostname = sandboxConfig.GetHostname()
|
|
|
|
)
|
|
|
|
if hostname == "" {
|
|
|
|
if hostname, err = c.os.Hostname(); err != nil {
|
2019-01-12 04:58:27 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
2019-09-27 21:51:53 +00:00
|
|
|
specOpts = append(specOpts, oci.WithEnv([]string{hostnameEnv + "=" + hostname}))
|
2019-01-12 04:58:27 +00:00
|
|
|
|
|
|
|
// Apply envs from image config first, so that envs from container config
|
|
|
|
// can override them.
|
2019-09-27 21:51:53 +00:00
|
|
|
env := imageConfig.Env
|
2019-01-12 04:58:27 +00:00
|
|
|
for _, e := range config.GetEnvs() {
|
2019-09-27 21:51:53 +00:00
|
|
|
env = append(env, e.GetKey()+"="+e.GetValue())
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
2019-09-27 21:51:53 +00:00
|
|
|
specOpts = append(specOpts, oci.WithEnv(env))
|
2019-01-12 04:58:27 +00:00
|
|
|
|
|
|
|
securityContext := config.GetLinux().GetSecurityContext()
|
2020-02-24 20:16:56 +00:00
|
|
|
labelOptions := toLabel(securityContext.GetSelinuxOptions())
|
|
|
|
if len(labelOptions) == 0 {
|
|
|
|
// Use pod level SELinux config
|
|
|
|
if sandbox, err := c.sandboxStore.Get(sandboxID); err == nil {
|
|
|
|
labelOptions, err = label.DupSecOpt(sandbox.ProcessLabel)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
processLabel, mountLabel, err := label.InitLabels(labelOptions)
|
2019-01-12 04:58:27 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Wrapf(err, "failed to init selinux options %+v", securityContext.GetSelinuxOptions())
|
|
|
|
}
|
2020-02-24 20:16:56 +00:00
|
|
|
defer func() {
|
|
|
|
if retErr != nil {
|
|
|
|
_ = label.ReleaseLabel(processLabel)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2019-09-27 21:51:53 +00:00
|
|
|
specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel))
|
2019-01-12 04:58:27 +00:00
|
|
|
|
2019-08-30 18:33:25 +00:00
|
|
|
if !c.config.DisableProcMount {
|
|
|
|
// Apply masked paths if specified.
|
2019-09-27 21:51:53 +00:00
|
|
|
// If the container is privileged, this will be cleared later on.
|
|
|
|
specOpts = append(specOpts, oci.WithMaskedPaths(securityContext.GetMaskedPaths()))
|
2019-01-12 04:58:27 +00:00
|
|
|
|
2019-08-30 18:33:25 +00:00
|
|
|
// Apply readonly paths if specified.
|
2019-09-27 21:51:53 +00:00
|
|
|
// If the container is privileged, this will be cleared later on.
|
|
|
|
specOpts = append(specOpts, oci.WithReadonlyPaths(securityContext.GetReadonlyPaths()))
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if securityContext.GetPrivileged() {
|
|
|
|
if !sandboxConfig.GetLinux().GetSecurityContext().GetPrivileged() {
|
|
|
|
return nil, errors.New("no privileged container allowed in sandbox")
|
|
|
|
}
|
2019-09-27 21:51:53 +00:00
|
|
|
specOpts = append(specOpts, oci.WithPrivileged)
|
|
|
|
if !ociRuntime.PrivilegedWithoutHostDevices {
|
|
|
|
specOpts = append(specOpts, customopts.WithPrivilegedDevices)
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
} else { // not privileged
|
2019-09-27 21:51:53 +00:00
|
|
|
specOpts = append(specOpts, customopts.WithDevices(c.os, config), customopts.WithCapabilities(securityContext))
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
2019-09-27 21:51:53 +00:00
|
|
|
|
2019-01-12 04:58:27 +00:00
|
|
|
// Clear all ambient capabilities. The implication of non-root + caps
|
|
|
|
// is not clearly defined in Kubernetes.
|
|
|
|
// See https://github.com/kubernetes/kubernetes/issues/56374
|
|
|
|
// Keep docker's behavior for now.
|
2019-09-27 21:51:53 +00:00
|
|
|
specOpts = append(specOpts,
|
|
|
|
customopts.WithoutAmbientCaps,
|
|
|
|
customopts.WithSelinuxLabels(processLabel, mountLabel),
|
|
|
|
)
|
2019-01-12 04:58:27 +00:00
|
|
|
|
|
|
|
// TODO: Figure out whether we should set no new privilege for sandbox container by default
|
2019-09-27 21:51:53 +00:00
|
|
|
if securityContext.GetNoNewPrivs() {
|
|
|
|
specOpts = append(specOpts, oci.WithNoNewPrivileges)
|
|
|
|
}
|
2019-01-12 04:58:27 +00:00
|
|
|
// TODO(random-liu): [P1] Set selinux options (privileged or not).
|
2019-09-27 21:51:53 +00:00
|
|
|
if securityContext.GetReadonlyRootfs() {
|
|
|
|
specOpts = append(specOpts, oci.WithRootFSReadonly())
|
|
|
|
}
|
2019-01-12 04:58:27 +00:00
|
|
|
|
2019-04-09 17:34:40 +00:00
|
|
|
if c.config.DisableCgroup {
|
2019-09-27 21:51:53 +00:00
|
|
|
specOpts = append(specOpts, customopts.WithDisabledCgroups)
|
2019-04-09 17:34:40 +00:00
|
|
|
} else {
|
2019-09-27 21:51:53 +00:00
|
|
|
specOpts = append(specOpts, customopts.WithResources(config.GetLinux().GetResources()))
|
2019-04-09 17:34:40 +00:00
|
|
|
if sandboxConfig.GetLinux().GetCgroupParent() != "" {
|
2019-09-27 21:51:53 +00:00
|
|
|
cgroupsPath := getCgroupsPath(sandboxConfig.GetLinux().GetCgroupParent(), id)
|
|
|
|
specOpts = append(specOpts, oci.WithCgroup(cgroupsPath))
|
2019-04-09 17:34:40 +00:00
|
|
|
}
|
|
|
|
}
|
2019-01-12 04:58:27 +00:00
|
|
|
|
|
|
|
supplementalGroups := securityContext.GetSupplementalGroups()
|
2019-09-27 21:51:53 +00:00
|
|
|
|
|
|
|
for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations,
|
|
|
|
ociRuntime.PodAnnotations) {
|
|
|
|
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
|
2019-09-27 21:51:53 +00:00
|
|
|
specOpts = append(specOpts,
|
|
|
|
customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj),
|
|
|
|
customopts.WithPodNamespaces(securityContext, sandboxPid),
|
|
|
|
customopts.WithSupplementalGroups(supplementalGroups),
|
|
|
|
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
|
|
|
|
customopts.WithAnnotation(annotations.SandboxID, sandboxID),
|
|
|
|
)
|
2019-01-12 04:58:27 +00:00
|
|
|
|
2019-09-27 21:51:53 +00:00
|
|
|
return runtimeSpec(id, specOpts...)
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// generateVolumeMounts sets up image volumes for container. Rely on the removal of container
|
|
|
|
// root directory to do cleanup. Note that image volume will be skipped, if there is criMounts
|
|
|
|
// specified with the same destination.
|
|
|
|
func (c *criService) generateVolumeMounts(containerRootDir string, criMounts []*runtime.Mount, config *imagespec.ImageConfig) []*runtime.Mount {
|
|
|
|
if len(config.Volumes) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
var mounts []*runtime.Mount
|
|
|
|
for dst := range config.Volumes {
|
|
|
|
if isInCRIMounts(dst, criMounts) {
|
|
|
|
// Skip the image volume, if there is CRI defined volume mapping.
|
|
|
|
// TODO(random-liu): This should be handled by Kubelet in the future.
|
|
|
|
// Kubelet should decide what to use for image volume, and also de-duplicate
|
|
|
|
// the image volume and user mounts.
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
volumeID := util.GenerateID()
|
|
|
|
src := filepath.Join(containerRootDir, "volumes", volumeID)
|
|
|
|
// addOCIBindMounts will create these volumes.
|
|
|
|
mounts = append(mounts, &runtime.Mount{
|
|
|
|
ContainerPath: dst,
|
|
|
|
HostPath: src,
|
|
|
|
// Use default mount propagation.
|
|
|
|
// TODO(random-liu): What about selinux relabel?
|
|
|
|
})
|
|
|
|
}
|
|
|
|
return mounts
|
|
|
|
}
|
|
|
|
|
|
|
|
// generateContainerMounts sets up necessary container mounts including /dev/shm, /etc/hosts
|
|
|
|
// and /etc/resolv.conf.
|
|
|
|
func (c *criService) generateContainerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount {
|
|
|
|
var mounts []*runtime.Mount
|
|
|
|
securityContext := config.GetLinux().GetSecurityContext()
|
2019-04-07 17:07:55 +00:00
|
|
|
if !isInCRIMounts(etcHostname, config.GetMounts()) {
|
|
|
|
// /etc/hostname is added since 1.1.6, 1.2.4 and 1.3.
|
|
|
|
// For in-place upgrade, the old sandbox doesn't have the hostname file,
|
|
|
|
// do not mount this in that case.
|
|
|
|
// TODO(random-liu): Remove the check and always mount this when
|
|
|
|
// containerd 1.1 and 1.2 are deprecated.
|
|
|
|
hostpath := c.getSandboxHostname(sandboxID)
|
|
|
|
if _, err := c.os.Stat(hostpath); err == nil {
|
|
|
|
mounts = append(mounts, &runtime.Mount{
|
|
|
|
ContainerPath: etcHostname,
|
|
|
|
HostPath: hostpath,
|
|
|
|
Readonly: securityContext.GetReadonlyRootfs(),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-12 04:58:27 +00:00
|
|
|
if !isInCRIMounts(etcHosts, config.GetMounts()) {
|
|
|
|
mounts = append(mounts, &runtime.Mount{
|
|
|
|
ContainerPath: etcHosts,
|
|
|
|
HostPath: c.getSandboxHosts(sandboxID),
|
|
|
|
Readonly: securityContext.GetReadonlyRootfs(),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// Mount sandbox resolv.config.
|
|
|
|
// TODO: Need to figure out whether we should always mount it as read-only
|
|
|
|
if !isInCRIMounts(resolvConfPath, config.GetMounts()) {
|
|
|
|
mounts = append(mounts, &runtime.Mount{
|
|
|
|
ContainerPath: resolvConfPath,
|
|
|
|
HostPath: c.getResolvPath(sandboxID),
|
|
|
|
Readonly: securityContext.GetReadonlyRootfs(),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
if !isInCRIMounts(devShm, config.GetMounts()) {
|
|
|
|
sandboxDevShm := c.getSandboxDevShm(sandboxID)
|
|
|
|
if securityContext.GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE {
|
|
|
|
sandboxDevShm = devShm
|
|
|
|
}
|
|
|
|
mounts = append(mounts, &runtime.Mount{
|
|
|
|
ContainerPath: devShm,
|
|
|
|
HostPath: sandboxDevShm,
|
|
|
|
Readonly: false,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
return mounts
|
|
|
|
}
|
|
|
|
|
2019-09-27 21:51:53 +00:00
|
|
|
// runtimeSpec returns a default runtime spec used in cri-containerd.
|
|
|
|
func runtimeSpec(id string, opts ...oci.SpecOpts) (*runtimespec.Spec, error) {
|
2019-01-12 04:58:27 +00:00
|
|
|
// GenerateSpec needs namespace.
|
|
|
|
ctx := ctrdutil.NamespacedContext()
|
2019-09-27 21:51:53 +00:00
|
|
|
spec, err := oci.GenerateSpec(ctx, nil, &containers.Container{ID: id}, opts...)
|
2019-01-12 04:58:27 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return spec, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// generateSeccompSpecOpts generates containerd SpecOpts for seccomp.
|
|
|
|
func generateSeccompSpecOpts(seccompProf string, privileged, seccompEnabled bool) (oci.SpecOpts, error) {
|
|
|
|
if privileged {
|
|
|
|
// Do not set seccomp profile when container is privileged
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
// Set seccomp profile
|
|
|
|
if seccompProf == runtimeDefault || seccompProf == dockerDefault {
|
|
|
|
// use correct default profile (Eg. if not configured otherwise, the default is docker/default)
|
|
|
|
seccompProf = seccompDefaultProfile
|
|
|
|
}
|
|
|
|
if !seccompEnabled {
|
|
|
|
if seccompProf != "" && seccompProf != unconfinedProfile {
|
|
|
|
return nil, errors.New("seccomp is not supported")
|
|
|
|
}
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
switch seccompProf {
|
|
|
|
case "", unconfinedProfile:
|
|
|
|
// Do not set seccomp profile.
|
|
|
|
return nil, nil
|
|
|
|
case dockerDefault:
|
|
|
|
// Note: WithDefaultProfile specOpts must be added after capabilities
|
|
|
|
return seccomp.WithDefaultProfile(), nil
|
|
|
|
default:
|
|
|
|
// Require and Trim default profile name prefix
|
|
|
|
if !strings.HasPrefix(seccompProf, profileNamePrefix) {
|
|
|
|
return nil, errors.Errorf("invalid seccomp profile %q", seccompProf)
|
|
|
|
}
|
|
|
|
return seccomp.WithProfile(strings.TrimPrefix(seccompProf, profileNamePrefix)), nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// generateApparmorSpecOpts generates containerd SpecOpts for apparmor.
|
|
|
|
func generateApparmorSpecOpts(apparmorProf string, privileged, apparmorEnabled bool) (oci.SpecOpts, error) {
|
|
|
|
if !apparmorEnabled {
|
|
|
|
// Should fail loudly if user try to specify apparmor profile
|
|
|
|
// but we don't support it.
|
|
|
|
if apparmorProf != "" && apparmorProf != unconfinedProfile {
|
|
|
|
return nil, errors.New("apparmor is not supported")
|
|
|
|
}
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
switch apparmorProf {
|
2019-09-27 21:51:53 +00:00
|
|
|
// Based on kubernetes#51746, default apparmor profile should be applied
|
|
|
|
// for when apparmor is not specified.
|
|
|
|
case runtimeDefault, "":
|
2019-01-12 04:58:27 +00:00
|
|
|
if privileged {
|
2019-09-27 21:51:53 +00:00
|
|
|
// Do not set apparmor profile when container is privileged
|
2019-01-12 04:58:27 +00:00
|
|
|
return nil, nil
|
|
|
|
}
|
2019-09-27 21:51:53 +00:00
|
|
|
// TODO (mikebrow): delete created apparmor default profile
|
2019-01-12 04:58:27 +00:00
|
|
|
return apparmor.WithDefaultProfile(appArmorDefaultProfileName), nil
|
2019-09-27 21:51:53 +00:00
|
|
|
case unconfinedProfile:
|
|
|
|
return nil, nil
|
2019-01-12 04:58:27 +00:00
|
|
|
default:
|
|
|
|
// Require and Trim default profile name prefix
|
|
|
|
if !strings.HasPrefix(apparmorProf, profileNamePrefix) {
|
|
|
|
return nil, errors.Errorf("invalid apparmor profile %q", apparmorProf)
|
|
|
|
}
|
|
|
|
return apparmor.WithProfile(strings.TrimPrefix(apparmorProf, profileNamePrefix)), nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-09-27 21:51:53 +00:00
|
|
|
// generateUserString generates valid user string based on OCI Image Spec
|
|
|
|
// v1.0.0.
|
|
|
|
//
|
|
|
|
// CRI defines that the following combinations are valid:
|
|
|
|
//
|
|
|
|
// (none) -> ""
|
|
|
|
// username -> username
|
|
|
|
// username, uid -> username
|
|
|
|
// username, uid, gid -> username:gid
|
|
|
|
// username, gid -> username:gid
|
|
|
|
// uid -> uid
|
|
|
|
// uid, gid -> uid:gid
|
|
|
|
// gid -> error
|
|
|
|
//
|
2019-01-12 04:58:27 +00:00
|
|
|
// TODO(random-liu): Add group name support in CRI.
|
|
|
|
func generateUserString(username string, uid, gid *runtime.Int64Value) (string, error) {
|
|
|
|
var userstr, groupstr string
|
|
|
|
if uid != nil {
|
|
|
|
userstr = strconv.FormatInt(uid.GetValue(), 10)
|
|
|
|
}
|
|
|
|
if username != "" {
|
|
|
|
userstr = username
|
|
|
|
}
|
|
|
|
if gid != nil {
|
|
|
|
groupstr = strconv.FormatInt(gid.GetValue(), 10)
|
|
|
|
}
|
|
|
|
if userstr == "" {
|
|
|
|
if groupstr != "" {
|
|
|
|
return "", errors.Errorf("user group %q is specified without user", groupstr)
|
|
|
|
}
|
|
|
|
return "", nil
|
|
|
|
}
|
|
|
|
if groupstr != "" {
|
|
|
|
userstr = userstr + ":" + groupstr
|
|
|
|
}
|
|
|
|
return userstr, nil
|
|
|
|
}
|