k3s/pkg/daemons/agent/agent_linux.go

191 lines
6.3 KiB
Go

//go:build linux
// +build linux
package agent
import (
"net"
"os"
"path/filepath"
"strings"
"github.com/k3s-io/k3s/pkg/cgroups"
"github.com/k3s-io/k3s/pkg/daemons/config"
"github.com/k3s-io/k3s/pkg/util"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"k8s.io/kubernetes/pkg/kubeapiserver/authorizer/modes"
utilsnet "k8s.io/utils/net"
)
const socketPrefix = "unix://"
func createRootlessConfig(argsMap map[string]string, controllers map[string]bool) {
argsMap["feature-gates=KubeletInUserNamespace"] = "true"
// "/sys/fs/cgroup" is namespaced
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
if controllers["cpu"] && controllers["pids"] && cgroupfsWritable {
logrus.Info("cgroup v2 controllers are delegated for rootless.")
return
}
logrus.Fatal("delegated cgroup v2 controllers are required for rootless.")
}
func kubeProxyArgs(cfg *config.Agent) map[string]string {
bindAddress := "127.0.0.1"
isIPv6 := utilsnet.IsIPv6(net.ParseIP([]string{cfg.NodeIP}[0]))
if isIPv6 {
bindAddress = "::1"
}
argsMap := map[string]string{
"proxy-mode": "iptables",
"healthz-bind-address": bindAddress,
"kubeconfig": cfg.KubeConfigKubeProxy,
"cluster-cidr": util.JoinIPNets(cfg.ClusterCIDRs),
"conntrack-max-per-core": "0",
"conntrack-tcp-timeout-established": "0s",
"conntrack-tcp-timeout-close-wait": "0s",
}
if cfg.NodeName != "" {
argsMap["hostname-override"] = cfg.NodeName
}
return argsMap
}
func kubeletArgs(cfg *config.Agent) map[string]string {
bindAddress := "127.0.0.1"
isIPv6 := utilsnet.IsIPv6(net.ParseIP([]string{cfg.NodeIP}[0]))
if isIPv6 {
bindAddress = "::1"
}
argsMap := map[string]string{
"healthz-bind-address": bindAddress,
"read-only-port": "0",
"cluster-domain": cfg.ClusterDomain,
"kubeconfig": cfg.KubeConfigKubelet,
"eviction-hard": "imagefs.available<5%,nodefs.available<5%",
"eviction-minimum-reclaim": "imagefs.available=10%,nodefs.available=10%",
"fail-swap-on": "false",
"cgroup-driver": "cgroupfs",
"authentication-token-webhook": "true",
"anonymous-auth": "false",
"authorization-mode": modes.ModeWebhook,
}
if cfg.PodManifests != "" && argsMap["pod-manifest-path"] == "" {
argsMap["pod-manifest-path"] = cfg.PodManifests
}
if err := os.MkdirAll(argsMap["pod-manifest-path"], 0755); err != nil {
logrus.Errorf("Failed to mkdir %s: %v", argsMap["pod-manifest-path"], err)
}
if cfg.RootDir != "" {
argsMap["root-dir"] = cfg.RootDir
argsMap["cert-dir"] = filepath.Join(cfg.RootDir, "pki")
}
if len(cfg.ClusterDNS) > 0 {
argsMap["cluster-dns"] = util.JoinIPs(cfg.ClusterDNSs)
}
if cfg.ResolvConf != "" {
argsMap["resolv-conf"] = cfg.ResolvConf
}
if cfg.RuntimeSocket != "" {
argsMap["serialize-image-pulls"] = "false"
if strings.Contains(cfg.RuntimeSocket, "containerd") {
argsMap["containerd"] = cfg.RuntimeSocket
}
// cadvisor wants the containerd CRI socket without the prefix, but kubelet wants it with the prefix
if strings.HasPrefix(cfg.RuntimeSocket, socketPrefix) {
argsMap["container-runtime-endpoint"] = cfg.RuntimeSocket
} else {
argsMap["container-runtime-endpoint"] = socketPrefix + cfg.RuntimeSocket
}
}
if cfg.PauseImage != "" {
argsMap["pod-infra-container-image"] = cfg.PauseImage
}
if cfg.ImageServiceSocket != "" {
if strings.HasPrefix(cfg.ImageServiceSocket, socketPrefix) {
argsMap["image-service-endpoint"] = cfg.ImageServiceSocket
} else {
argsMap["image-service-endpoint"] = socketPrefix + cfg.ImageServiceSocket
}
}
if cfg.ListenAddress != "" {
argsMap["address"] = cfg.ListenAddress
}
if cfg.ClientCA != "" {
argsMap["anonymous-auth"] = "false"
argsMap["client-ca-file"] = cfg.ClientCA
}
if cfg.ServingKubeletCert != "" && cfg.ServingKubeletKey != "" {
argsMap["tls-cert-file"] = cfg.ServingKubeletCert
argsMap["tls-private-key-file"] = cfg.ServingKubeletKey
}
if cfg.NodeName != "" {
argsMap["hostname-override"] = cfg.NodeName
}
// If the embedded CCM is disabled, don't assume that dual-stack node IPs are safe.
// When using an external CCM, the user wants dual-stack node IPs, they will need to set the node-ip kubelet arg directly.
// This should be fine since most cloud providers have their own way of finding node IPs that doesn't depend on the kubelet
// setting them.
if cfg.DisableCCM {
dualStack, err := utilsnet.IsDualStackIPs(cfg.NodeIPs)
if err == nil && !dualStack {
argsMap["node-ip"] = cfg.NodeIP
}
} else {
// Cluster is using the embedded CCM, we know that the feature-gate will be enabled there as well.
argsMap["feature-gates"] = util.AddFeatureGate(argsMap["feature-gates"], "CloudDualStackNodeIPs=true")
if nodeIPs := util.JoinIPs(cfg.NodeIPs); nodeIPs != "" {
argsMap["node-ip"] = util.JoinIPs(cfg.NodeIPs)
}
}
kubeletRoot, runtimeRoot, controllers := cgroups.CheckCgroups()
if !controllers["cpu"] {
logrus.Warn("Disabling CPU quotas due to missing cpu controller or cpu.cfs_period_us")
argsMap["cpu-cfs-quota"] = "false"
}
if !controllers["pids"] {
logrus.Fatal("pids cgroup controller not found")
}
if kubeletRoot != "" {
argsMap["kubelet-cgroups"] = kubeletRoot
}
if runtimeRoot != "" {
argsMap["runtime-cgroups"] = runtimeRoot
}
argsMap["node-labels"] = strings.Join(cfg.NodeLabels, ",")
if len(cfg.NodeTaints) > 0 {
argsMap["register-with-taints"] = strings.Join(cfg.NodeTaints, ",")
}
if !cfg.DisableCCM {
argsMap["cloud-provider"] = "external"
}
if ImageCredProvAvailable(cfg) {
logrus.Infof("Kubelet image credential provider bin dir and configuration file found.")
argsMap["feature-gates"] = util.AddFeatureGate(argsMap["feature-gates"], "KubeletCredentialProviders=true")
argsMap["image-credential-provider-bin-dir"] = cfg.ImageCredProvBinDir
argsMap["image-credential-provider-config"] = cfg.ImageCredProvConfig
}
if cfg.Rootless {
createRootlessConfig(argsMap, controllers)
}
if cfg.Systemd {
argsMap["cgroup-driver"] = "systemd"
}
if cfg.ProtectKernelDefaults {
argsMap["protect-kernel-defaults"] = "true"
}
if !cfg.DisableServiceLB {
argsMap["allowed-unsafe-sysctls"] = "net.ipv4.ip_forward,net.ipv6.conf.all.forwarding"
}
return argsMap
}