mirror of https://github.com/k3s-io/k3s
403 lines
12 KiB
Go
403 lines
12 KiB
Go
package containerd
|
|
|
|
import (
|
|
"bufio"
|
|
"compress/bzip2"
|
|
"compress/gzip"
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/containerd/containerd"
|
|
"github.com/containerd/containerd/errdefs"
|
|
"github.com/containerd/containerd/images"
|
|
"github.com/containerd/containerd/namespaces"
|
|
"github.com/containerd/containerd/reference/docker"
|
|
"github.com/klauspost/compress/zstd"
|
|
"github.com/natefinch/lumberjack"
|
|
"github.com/opencontainers/runc/libcontainer/system"
|
|
"github.com/pierrec/lz4"
|
|
"github.com/pkg/errors"
|
|
"github.com/rancher/k3s/pkg/agent/templates"
|
|
util2 "github.com/rancher/k3s/pkg/agent/util"
|
|
"github.com/rancher/k3s/pkg/daemons/agent"
|
|
"github.com/rancher/k3s/pkg/daemons/config"
|
|
"github.com/rancher/k3s/pkg/untar"
|
|
"github.com/rancher/k3s/pkg/version"
|
|
"github.com/rancher/wrangler/pkg/merr"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
"google.golang.org/grpc"
|
|
yaml "gopkg.in/yaml.v2"
|
|
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
|
"k8s.io/kubernetes/pkg/kubelet/util"
|
|
)
|
|
|
|
const (
|
|
maxMsgSize = 1024 * 1024 * 16
|
|
)
|
|
|
|
// Run configures and starts containerd as a child process. Once it is up, images are preloaded
|
|
// or pulled from files found in the agent images directory.
|
|
func Run(ctx context.Context, cfg *config.Node) error {
|
|
args := []string{
|
|
"containerd",
|
|
"-c", cfg.Containerd.Config,
|
|
"-a", cfg.Containerd.Address,
|
|
"--state", cfg.Containerd.State,
|
|
"--root", cfg.Containerd.Root,
|
|
}
|
|
|
|
if err := setupContainerdConfig(ctx, cfg); err != nil {
|
|
return err
|
|
}
|
|
|
|
if os.Getenv("CONTAINERD_LOG_LEVEL") != "" {
|
|
args = append(args, "-l", os.Getenv("CONTAINERD_LOG_LEVEL"))
|
|
}
|
|
|
|
stdOut := io.Writer(os.Stdout)
|
|
stdErr := io.Writer(os.Stderr)
|
|
|
|
if cfg.Containerd.Log != "" {
|
|
logrus.Infof("Logging containerd to %s", cfg.Containerd.Log)
|
|
stdOut = &lumberjack.Logger{
|
|
Filename: cfg.Containerd.Log,
|
|
MaxSize: 50,
|
|
MaxBackups: 3,
|
|
MaxAge: 28,
|
|
Compress: true,
|
|
}
|
|
stdErr = stdOut
|
|
}
|
|
|
|
go func() {
|
|
logrus.Infof("Running containerd %s", config.ArgString(args[1:]))
|
|
cmd := exec.Command(args[0], args[1:]...)
|
|
cmd.Stdout = stdOut
|
|
cmd.Stderr = stdErr
|
|
cmd.Env = os.Environ()
|
|
// elide NOTIFY_SOCKET to prevent spurious notifications to systemd
|
|
for i := range cmd.Env {
|
|
if strings.HasPrefix(cmd.Env[i], "NOTIFY_SOCKET=") {
|
|
cmd.Env = append(cmd.Env[:i], cmd.Env[i+1:]...)
|
|
break
|
|
}
|
|
}
|
|
addDeathSig(cmd)
|
|
if err := cmd.Run(); err != nil {
|
|
fmt.Fprintf(os.Stderr, "containerd: %s\n", err)
|
|
}
|
|
os.Exit(1)
|
|
}()
|
|
|
|
first := true
|
|
for {
|
|
conn, err := criConnection(ctx, cfg.Containerd.Address)
|
|
if err == nil {
|
|
conn.Close()
|
|
break
|
|
}
|
|
if first {
|
|
first = false
|
|
} else {
|
|
logrus.Infof("Waiting for containerd startup: %v", err)
|
|
}
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case <-time.After(time.Second):
|
|
}
|
|
}
|
|
logrus.Info("Containerd is now running")
|
|
|
|
return preloadImages(ctx, cfg)
|
|
}
|
|
|
|
// criConnection connects to a CRI socket at the given path.
|
|
func criConnection(ctx context.Context, address string) (*grpc.ClientConn, error) {
|
|
addr, dialer, err := util.GetAddressAndDialer("unix://" + address)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
conn, err := grpc.Dial(addr, grpc.WithInsecure(), grpc.WithTimeout(3*time.Second), grpc.WithContextDialer(dialer), grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(maxMsgSize)))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
c := runtimeapi.NewRuntimeServiceClient(conn)
|
|
_, err = c.Version(ctx, &runtimeapi.VersionRequest{
|
|
Version: "0.1.0",
|
|
})
|
|
if err != nil {
|
|
conn.Close()
|
|
return nil, err
|
|
}
|
|
|
|
return conn, nil
|
|
}
|
|
|
|
// preloadImages reads the contents of the agent images directory, and attempts to
|
|
// import into containerd any files found there. Supported compressed types are decompressed, and
|
|
// any .txt files are processed as a list of images that should be pre-pulled from remote registries.
|
|
// If configured, imported images are retagged as being pulled from additional registries.
|
|
func preloadImages(ctx context.Context, cfg *config.Node) error {
|
|
fileInfo, err := os.Stat(cfg.Images)
|
|
if os.IsNotExist(err) {
|
|
return nil
|
|
} else if err != nil {
|
|
logrus.Errorf("Unable to find images in %s: %v", cfg.Images, err)
|
|
return nil
|
|
}
|
|
|
|
if !fileInfo.IsDir() {
|
|
return nil
|
|
}
|
|
|
|
fileInfos, err := ioutil.ReadDir(cfg.Images)
|
|
if err != nil {
|
|
logrus.Errorf("Unable to read images in %s: %v", cfg.Images, err)
|
|
return nil
|
|
}
|
|
|
|
client, err := containerd.New(cfg.Containerd.Address)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer client.Close()
|
|
|
|
criConn, err := criConnection(ctx, cfg.Containerd.Address)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer criConn.Close()
|
|
|
|
// Ensure that nothing else can modify the image store while we're importing,
|
|
// and that our images are imported into the k8s.io namespace
|
|
ctx, done, err := client.WithLease(namespaces.WithNamespace(ctx, "k8s.io"))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer done(ctx)
|
|
|
|
for _, fileInfo := range fileInfos {
|
|
if fileInfo.IsDir() {
|
|
continue
|
|
}
|
|
|
|
start := time.Now()
|
|
filePath := filepath.Join(cfg.Images, fileInfo.Name())
|
|
|
|
if err := preloadFile(ctx, cfg, client, criConn, filePath); err != nil {
|
|
logrus.Errorf("Error encountered while importing %s: %v", filePath, err)
|
|
continue
|
|
}
|
|
logrus.Debugf("Imported images from %s in %s", filePath, time.Since(start))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// preloadFile handles loading images from a single tarball or pre-pull image list.
|
|
// This is in its own function so that we can ensure that the various readers are properly closed, as some
|
|
// decompressing readers need to be explicitly closed and others do not.
|
|
func preloadFile(ctx context.Context, cfg *config.Node, client *containerd.Client, criConn *grpc.ClientConn, filePath string) error {
|
|
file, err := os.Open(filePath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer file.Close()
|
|
|
|
var imageReader io.Reader
|
|
switch {
|
|
case util2.HasSuffixI(filePath, ".txt"):
|
|
return prePullImages(ctx, criConn, file)
|
|
case util2.HasSuffixI(filePath, ".tar"):
|
|
imageReader = file
|
|
case util2.HasSuffixI(filePath, ".tar.lz4"):
|
|
imageReader = lz4.NewReader(file)
|
|
case util2.HasSuffixI(filePath, ".tar.bz2", ".tbz"):
|
|
imageReader = bzip2.NewReader(file)
|
|
case util2.HasSuffixI(filePath, ".tar.gz", ".tgz"):
|
|
zr, err := gzip.NewReader(file)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer zr.Close()
|
|
imageReader = zr
|
|
case util2.HasSuffixI(filePath, "tar.zst", ".tzst"):
|
|
zr, err := zstd.NewReader(file, zstd.WithDecoderMaxMemory(untar.MaxDecoderMemory))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer zr.Close()
|
|
imageReader = zr
|
|
default:
|
|
return errors.New("unhandled file type")
|
|
}
|
|
|
|
logrus.Infof("Importing images from %s", filePath)
|
|
|
|
images, err := client.Import(ctx, imageReader, containerd.WithAllPlatforms(true))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return retagImages(ctx, client, images, cfg.AgentConfig.AirgapExtraRegistry)
|
|
}
|
|
|
|
// retagImages retags all listed images as having been pulled from the given remote registries.
|
|
// If duplicate images exist, they are overwritten. This is most useful when using a private registry
|
|
// for all images, as can be configured by the RKE2/Rancher system-default-registry setting.
|
|
func retagImages(ctx context.Context, client *containerd.Client, images []images.Image, registries []string) error {
|
|
var errs []error
|
|
imageService := client.ImageService()
|
|
for _, image := range images {
|
|
name, err := parseNamedTagged(image.Name)
|
|
if err != nil {
|
|
errs = append(errs, errors.Wrap(err, "failed to parse image name"))
|
|
continue
|
|
}
|
|
logrus.Infof("Imported %s", image.Name)
|
|
for _, registry := range registries {
|
|
image.Name = fmt.Sprintf("%s/%s:%s", registry, docker.Path(name), name.Tag())
|
|
if _, err = imageService.Create(ctx, image); err != nil {
|
|
if errdefs.IsAlreadyExists(err) {
|
|
if err = imageService.Delete(ctx, image.Name); err != nil {
|
|
errs = append(errs, errors.Wrap(err, "failed to delete existing image"))
|
|
continue
|
|
}
|
|
if _, err = imageService.Create(ctx, image); err != nil {
|
|
errs = append(errs, errors.Wrap(err, "failed to tag after deleting existing image"))
|
|
continue
|
|
}
|
|
} else {
|
|
errs = append(errs, errors.Wrap(err, "failed to tag image"))
|
|
continue
|
|
}
|
|
}
|
|
logrus.Infof("Tagged %s", image.Name)
|
|
}
|
|
}
|
|
return merr.NewErrors(errs...)
|
|
}
|
|
|
|
// parseNamedTagged parses and normalizes an image name, and converts the resulting reference
|
|
// to a type that exposes the tag.
|
|
func parseNamedTagged(name string) (docker.NamedTagged, error) {
|
|
ref, err := docker.ParseNormalizedNamed(name)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
tagged, ok := ref.(docker.NamedTagged)
|
|
if !ok {
|
|
return nil, fmt.Errorf("can't cast %T to NamedTagged", ref)
|
|
}
|
|
return tagged, nil
|
|
}
|
|
|
|
// prePullImages asks containerd to pull images in a given list, so that they
|
|
// are ready when the containers attempt to start later.
|
|
func prePullImages(ctx context.Context, conn *grpc.ClientConn, images io.Reader) error {
|
|
imageClient := runtimeapi.NewImageServiceClient(conn)
|
|
scanner := bufio.NewScanner(images)
|
|
for scanner.Scan() {
|
|
line := strings.TrimSpace(scanner.Text())
|
|
resp, err := imageClient.ImageStatus(ctx, &runtimeapi.ImageStatusRequest{
|
|
Image: &runtimeapi.ImageSpec{
|
|
Image: line,
|
|
},
|
|
})
|
|
if err == nil && resp.Image != nil {
|
|
continue
|
|
}
|
|
|
|
logrus.Infof("Pulling image %s...", line)
|
|
_, err = imageClient.PullImage(ctx, &runtimeapi.PullImageRequest{
|
|
Image: &runtimeapi.ImageSpec{
|
|
Image: line,
|
|
},
|
|
})
|
|
if err != nil {
|
|
logrus.Errorf("Failed to pull %s: %v", line, err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// setupContainerdConfig generates the containerd.toml, using a template combined with various
|
|
// runtime configurations and registry mirror settings provided by the administrator.
|
|
func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
|
|
privRegistries, err := getPrivateRegistries(ctx, cfg)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
isRunningInUserNS := system.RunningInUserNS()
|
|
_, _, hasCFS, hasPIDs := agent.CheckCgroups()
|
|
// "/sys/fs/cgroup" is namespaced
|
|
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
|
|
disableCgroup := isRunningInUserNS && (!hasCFS || !hasPIDs || !cgroupfsWritable)
|
|
if disableCgroup {
|
|
logrus.Warn("cgroup v2 controllers are not delegated for rootless. Disabling cgroup.")
|
|
}
|
|
|
|
var containerdTemplate string
|
|
containerdConfig := templates.ContainerdConfig{
|
|
NodeConfig: cfg,
|
|
DisableCgroup: disableCgroup,
|
|
IsRunningInUserNS: isRunningInUserNS,
|
|
PrivateRegistryConfig: privRegistries,
|
|
}
|
|
|
|
selEnabled, selConfigured, err := selinuxStatus()
|
|
if err != nil {
|
|
return errors.Wrap(err, "failed to detect selinux")
|
|
}
|
|
switch {
|
|
case !cfg.SELinux && selEnabled:
|
|
logrus.Warn("SELinux is enabled on this host, but " + version.Program + " has not been started with --selinux - containerd SELinux support is disabled")
|
|
case cfg.SELinux && !selConfigured:
|
|
logrus.Warnf("SELinux is enabled for "+version.Program+" but process is not running in context '%s', "+version.Program+"-selinux policy may need to be applied", SELinuxContextType)
|
|
}
|
|
|
|
containerdTemplateBytes, err := ioutil.ReadFile(cfg.Containerd.Template)
|
|
if err == nil {
|
|
logrus.Infof("Using containerd template at %s", cfg.Containerd.Template)
|
|
containerdTemplate = string(containerdTemplateBytes)
|
|
} else if os.IsNotExist(err) {
|
|
containerdTemplate = templates.ContainerdConfigTemplate
|
|
} else {
|
|
return err
|
|
}
|
|
parsedTemplate, err := templates.ParseTemplateFromConfig(containerdTemplate, containerdConfig)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return util2.WriteFile(cfg.Containerd.Config, parsedTemplate)
|
|
}
|
|
|
|
// getPrivateRegistries loads the registry mirror configuration from registries.yaml
|
|
func getPrivateRegistries(ctx context.Context, cfg *config.Node) (*templates.Registry, error) {
|
|
privRegistries := &templates.Registry{}
|
|
privRegistryFile, err := ioutil.ReadFile(cfg.AgentConfig.PrivateRegistry)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return nil, nil
|
|
}
|
|
return nil, err
|
|
}
|
|
logrus.Infof("Using registry config file at %s", cfg.AgentConfig.PrivateRegistry)
|
|
if err := yaml.Unmarshal(privRegistryFile, &privRegistries); err != nil {
|
|
return nil, err
|
|
}
|
|
return privRegistries, nil
|
|
}
|