k3s/pkg/agent/containerd/containerd.go

403 lines
12 KiB
Go

package containerd
import (
"bufio"
"compress/bzip2"
"compress/gzip"
"context"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/containerd/containerd"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/images"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/reference/docker"
"github.com/klauspost/compress/zstd"
"github.com/natefinch/lumberjack"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/pierrec/lz4"
"github.com/pkg/errors"
"github.com/rancher/k3s/pkg/agent/templates"
util2 "github.com/rancher/k3s/pkg/agent/util"
"github.com/rancher/k3s/pkg/daemons/agent"
"github.com/rancher/k3s/pkg/daemons/config"
"github.com/rancher/k3s/pkg/untar"
"github.com/rancher/k3s/pkg/version"
"github.com/rancher/wrangler/pkg/merr"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"google.golang.org/grpc"
yaml "gopkg.in/yaml.v2"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
"k8s.io/kubernetes/pkg/kubelet/util"
)
const (
maxMsgSize = 1024 * 1024 * 16
)
// Run configures and starts containerd as a child process. Once it is up, images are preloaded
// or pulled from files found in the agent images directory.
func Run(ctx context.Context, cfg *config.Node) error {
args := []string{
"containerd",
"-c", cfg.Containerd.Config,
"-a", cfg.Containerd.Address,
"--state", cfg.Containerd.State,
"--root", cfg.Containerd.Root,
}
if err := setupContainerdConfig(ctx, cfg); err != nil {
return err
}
if os.Getenv("CONTAINERD_LOG_LEVEL") != "" {
args = append(args, "-l", os.Getenv("CONTAINERD_LOG_LEVEL"))
}
stdOut := io.Writer(os.Stdout)
stdErr := io.Writer(os.Stderr)
if cfg.Containerd.Log != "" {
logrus.Infof("Logging containerd to %s", cfg.Containerd.Log)
stdOut = &lumberjack.Logger{
Filename: cfg.Containerd.Log,
MaxSize: 50,
MaxBackups: 3,
MaxAge: 28,
Compress: true,
}
stdErr = stdOut
}
go func() {
logrus.Infof("Running containerd %s", config.ArgString(args[1:]))
cmd := exec.Command(args[0], args[1:]...)
cmd.Stdout = stdOut
cmd.Stderr = stdErr
cmd.Env = os.Environ()
// elide NOTIFY_SOCKET to prevent spurious notifications to systemd
for i := range cmd.Env {
if strings.HasPrefix(cmd.Env[i], "NOTIFY_SOCKET=") {
cmd.Env = append(cmd.Env[:i], cmd.Env[i+1:]...)
break
}
}
addDeathSig(cmd)
if err := cmd.Run(); err != nil {
fmt.Fprintf(os.Stderr, "containerd: %s\n", err)
}
os.Exit(1)
}()
first := true
for {
conn, err := criConnection(ctx, cfg.Containerd.Address)
if err == nil {
conn.Close()
break
}
if first {
first = false
} else {
logrus.Infof("Waiting for containerd startup: %v", err)
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(time.Second):
}
}
logrus.Info("Containerd is now running")
return preloadImages(ctx, cfg)
}
// criConnection connects to a CRI socket at the given path.
func criConnection(ctx context.Context, address string) (*grpc.ClientConn, error) {
addr, dialer, err := util.GetAddressAndDialer("unix://" + address)
if err != nil {
return nil, err
}
conn, err := grpc.Dial(addr, grpc.WithInsecure(), grpc.WithTimeout(3*time.Second), grpc.WithContextDialer(dialer), grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(maxMsgSize)))
if err != nil {
return nil, err
}
c := runtimeapi.NewRuntimeServiceClient(conn)
_, err = c.Version(ctx, &runtimeapi.VersionRequest{
Version: "0.1.0",
})
if err != nil {
conn.Close()
return nil, err
}
return conn, nil
}
// preloadImages reads the contents of the agent images directory, and attempts to
// import into containerd any files found there. Supported compressed types are decompressed, and
// any .txt files are processed as a list of images that should be pre-pulled from remote registries.
// If configured, imported images are retagged as being pulled from additional registries.
func preloadImages(ctx context.Context, cfg *config.Node) error {
fileInfo, err := os.Stat(cfg.Images)
if os.IsNotExist(err) {
return nil
} else if err != nil {
logrus.Errorf("Unable to find images in %s: %v", cfg.Images, err)
return nil
}
if !fileInfo.IsDir() {
return nil
}
fileInfos, err := ioutil.ReadDir(cfg.Images)
if err != nil {
logrus.Errorf("Unable to read images in %s: %v", cfg.Images, err)
return nil
}
client, err := containerd.New(cfg.Containerd.Address)
if err != nil {
return err
}
defer client.Close()
criConn, err := criConnection(ctx, cfg.Containerd.Address)
if err != nil {
return err
}
defer criConn.Close()
// Ensure that nothing else can modify the image store while we're importing,
// and that our images are imported into the k8s.io namespace
ctx, done, err := client.WithLease(namespaces.WithNamespace(ctx, "k8s.io"))
if err != nil {
return err
}
defer done(ctx)
for _, fileInfo := range fileInfos {
if fileInfo.IsDir() {
continue
}
start := time.Now()
filePath := filepath.Join(cfg.Images, fileInfo.Name())
if err := preloadFile(ctx, cfg, client, criConn, filePath); err != nil {
logrus.Errorf("Error encountered while importing %s: %v", filePath, err)
continue
}
logrus.Debugf("Imported images from %s in %s", filePath, time.Since(start))
}
return nil
}
// preloadFile handles loading images from a single tarball or pre-pull image list.
// This is in its own function so that we can ensure that the various readers are properly closed, as some
// decompressing readers need to be explicitly closed and others do not.
func preloadFile(ctx context.Context, cfg *config.Node, client *containerd.Client, criConn *grpc.ClientConn, filePath string) error {
file, err := os.Open(filePath)
if err != nil {
return err
}
defer file.Close()
var imageReader io.Reader
switch {
case util2.HasSuffixI(filePath, ".txt"):
return prePullImages(ctx, criConn, file)
case util2.HasSuffixI(filePath, ".tar"):
imageReader = file
case util2.HasSuffixI(filePath, ".tar.lz4"):
imageReader = lz4.NewReader(file)
case util2.HasSuffixI(filePath, ".tar.bz2", ".tbz"):
imageReader = bzip2.NewReader(file)
case util2.HasSuffixI(filePath, ".tar.gz", ".tgz"):
zr, err := gzip.NewReader(file)
if err != nil {
return err
}
defer zr.Close()
imageReader = zr
case util2.HasSuffixI(filePath, "tar.zst", ".tzst"):
zr, err := zstd.NewReader(file, zstd.WithDecoderMaxMemory(untar.MaxDecoderMemory))
if err != nil {
return err
}
defer zr.Close()
imageReader = zr
default:
return errors.New("unhandled file type")
}
logrus.Infof("Importing images from %s", filePath)
images, err := client.Import(ctx, imageReader, containerd.WithAllPlatforms(true))
if err != nil {
return err
}
return retagImages(ctx, client, images, cfg.AgentConfig.AirgapExtraRegistry)
}
// retagImages retags all listed images as having been pulled from the given remote registries.
// If duplicate images exist, they are overwritten. This is most useful when using a private registry
// for all images, as can be configured by the RKE2/Rancher system-default-registry setting.
func retagImages(ctx context.Context, client *containerd.Client, images []images.Image, registries []string) error {
var errs []error
imageService := client.ImageService()
for _, image := range images {
name, err := parseNamedTagged(image.Name)
if err != nil {
errs = append(errs, errors.Wrap(err, "failed to parse image name"))
continue
}
logrus.Infof("Imported %s", image.Name)
for _, registry := range registries {
image.Name = fmt.Sprintf("%s/%s:%s", registry, docker.Path(name), name.Tag())
if _, err = imageService.Create(ctx, image); err != nil {
if errdefs.IsAlreadyExists(err) {
if err = imageService.Delete(ctx, image.Name); err != nil {
errs = append(errs, errors.Wrap(err, "failed to delete existing image"))
continue
}
if _, err = imageService.Create(ctx, image); err != nil {
errs = append(errs, errors.Wrap(err, "failed to tag after deleting existing image"))
continue
}
} else {
errs = append(errs, errors.Wrap(err, "failed to tag image"))
continue
}
}
logrus.Infof("Tagged %s", image.Name)
}
}
return merr.NewErrors(errs...)
}
// parseNamedTagged parses and normalizes an image name, and converts the resulting reference
// to a type that exposes the tag.
func parseNamedTagged(name string) (docker.NamedTagged, error) {
ref, err := docker.ParseNormalizedNamed(name)
if err != nil {
return nil, err
}
tagged, ok := ref.(docker.NamedTagged)
if !ok {
return nil, fmt.Errorf("can't cast %T to NamedTagged", ref)
}
return tagged, nil
}
// prePullImages asks containerd to pull images in a given list, so that they
// are ready when the containers attempt to start later.
func prePullImages(ctx context.Context, conn *grpc.ClientConn, images io.Reader) error {
imageClient := runtimeapi.NewImageServiceClient(conn)
scanner := bufio.NewScanner(images)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
resp, err := imageClient.ImageStatus(ctx, &runtimeapi.ImageStatusRequest{
Image: &runtimeapi.ImageSpec{
Image: line,
},
})
if err == nil && resp.Image != nil {
continue
}
logrus.Infof("Pulling image %s...", line)
_, err = imageClient.PullImage(ctx, &runtimeapi.PullImageRequest{
Image: &runtimeapi.ImageSpec{
Image: line,
},
})
if err != nil {
logrus.Errorf("Failed to pull %s: %v", line, err)
}
}
return nil
}
// setupContainerdConfig generates the containerd.toml, using a template combined with various
// runtime configurations and registry mirror settings provided by the administrator.
func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
privRegistries, err := getPrivateRegistries(ctx, cfg)
if err != nil {
return err
}
isRunningInUserNS := system.RunningInUserNS()
_, _, hasCFS, hasPIDs := agent.CheckCgroups()
// "/sys/fs/cgroup" is namespaced
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
disableCgroup := isRunningInUserNS && (!hasCFS || !hasPIDs || !cgroupfsWritable)
if disableCgroup {
logrus.Warn("cgroup v2 controllers are not delegated for rootless. Disabling cgroup.")
}
var containerdTemplate string
containerdConfig := templates.ContainerdConfig{
NodeConfig: cfg,
DisableCgroup: disableCgroup,
IsRunningInUserNS: isRunningInUserNS,
PrivateRegistryConfig: privRegistries,
}
selEnabled, selConfigured, err := selinuxStatus()
if err != nil {
return errors.Wrap(err, "failed to detect selinux")
}
switch {
case !cfg.SELinux && selEnabled:
logrus.Warn("SELinux is enabled on this host, but " + version.Program + " has not been started with --selinux - containerd SELinux support is disabled")
case cfg.SELinux && !selConfigured:
logrus.Warnf("SELinux is enabled for "+version.Program+" but process is not running in context '%s', "+version.Program+"-selinux policy may need to be applied", SELinuxContextType)
}
containerdTemplateBytes, err := ioutil.ReadFile(cfg.Containerd.Template)
if err == nil {
logrus.Infof("Using containerd template at %s", cfg.Containerd.Template)
containerdTemplate = string(containerdTemplateBytes)
} else if os.IsNotExist(err) {
containerdTemplate = templates.ContainerdConfigTemplate
} else {
return err
}
parsedTemplate, err := templates.ParseTemplateFromConfig(containerdTemplate, containerdConfig)
if err != nil {
return err
}
return util2.WriteFile(cfg.Containerd.Config, parsedTemplate)
}
// getPrivateRegistries loads the registry mirror configuration from registries.yaml
func getPrivateRegistries(ctx context.Context, cfg *config.Node) (*templates.Registry, error) {
privRegistries := &templates.Registry{}
privRegistryFile, err := ioutil.ReadFile(cfg.AgentConfig.PrivateRegistry)
if err != nil {
if os.IsNotExist(err) {
return nil, nil
}
return nil, err
}
logrus.Infof("Using registry config file at %s", cfg.AgentConfig.PrivateRegistry)
if err := yaml.Unmarshal(privRegistryFile, &privRegistries); err != nil {
return nil, err
}
return privRegistries, nil
}