mirror of https://github.com/k3s-io/k3s
Add systemd cgroup controller support
Signed-off-by: Brad Davidson <brad.davidson@rancher.com>pull/5518/head
parent
1caae63140
commit
333311c7ee
|
@ -45,18 +45,22 @@ func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
|
|||
}
|
||||
|
||||
isRunningInUserNS := userns.RunningInUserNS()
|
||||
_, _, hasCFS, hasPIDs := cgroups.CheckCgroups()
|
||||
_, _, controllers := cgroups.CheckCgroups()
|
||||
// "/sys/fs/cgroup" is namespaced
|
||||
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
|
||||
disableCgroup := isRunningInUserNS && (!hasCFS || !hasPIDs || !cgroupfsWritable)
|
||||
disableCgroup := isRunningInUserNS && (!controllers["cpu"] || !controllers["pids"] || !cgroupfsWritable)
|
||||
if disableCgroup {
|
||||
logrus.Warn("cgroup v2 controllers are not delegated for rootless. Disabling cgroup.")
|
||||
}
|
||||
|
||||
systemdCgroup := controllers["cpuset"] && os.Getenv("NOTIFY_SOCKET") != ""
|
||||
cfg.AgentConfig.Systemd = systemdCgroup
|
||||
|
||||
var containerdTemplate string
|
||||
containerdConfig := templates.ContainerdConfig{
|
||||
NodeConfig: cfg,
|
||||
DisableCgroup: disableCgroup,
|
||||
SystemdCgroup: systemdCgroup,
|
||||
IsRunningInUserNS: isRunningInUserNS,
|
||||
PrivateRegistryConfig: privRegistries.Registry,
|
||||
ExtraRuntimes: findNvidiaContainerRuntimes(os.DirFS(string(os.PathSeparator))),
|
||||
|
|
|
@ -45,6 +45,7 @@ func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
|
|||
containerdConfig := templates.ContainerdConfig{
|
||||
NodeConfig: cfg,
|
||||
DisableCgroup: true,
|
||||
SystemdCgroup: false,
|
||||
IsRunningInUserNS: false,
|
||||
PrivateRegistryConfig: privRegistries.Registry,
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@ type ContainerdRuntimeConfig struct {
|
|||
type ContainerdConfig struct {
|
||||
NodeConfig *config.Node
|
||||
DisableCgroup bool
|
||||
SystemdCgroup bool
|
||||
IsRunningInUserNS bool
|
||||
PrivateRegistryConfig *registries.Registry
|
||||
ExtraRuntimes map[string]ContainerdRuntimeConfig
|
||||
|
|
|
@ -81,6 +81,9 @@ enable_keychain = true
|
|||
[plugins.cri.containerd.runtimes.runc]
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
|
||||
[plugins.cri.containerd.runtimes.runc.options]
|
||||
SystemdCgroup = {{ .SystemdCgroup }}
|
||||
|
||||
{{ if .PrivateRegistryConfig }}
|
||||
{{ if .PrivateRegistryConfig.Mirrors }}
|
||||
[plugins.cri.registry.mirrors]{{end}}
|
||||
|
|
|
@ -65,34 +65,30 @@ func validateCgroupsV2() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
|
||||
func CheckCgroups() (kubeletRoot, runtimeRoot string, controllers map[string]bool) {
|
||||
cgroupsModeV2 := cgroups.Mode() == cgroups.Unified
|
||||
controllers = make(map[string]bool)
|
||||
|
||||
// For Unified (v2) cgroups we can directly check to see what controllers are mounted
|
||||
// under the unified hierarchy.
|
||||
if cgroupsModeV2 {
|
||||
m, err := cgroupsv2.LoadManager("/sys/fs/cgroup", "/")
|
||||
if err != nil {
|
||||
return "", "", false, false
|
||||
return
|
||||
}
|
||||
controllers, err := m.Controllers()
|
||||
enabledControllers, err := m.Controllers()
|
||||
if err != nil {
|
||||
return "", "", false, false
|
||||
return
|
||||
}
|
||||
// Intentionally using an expressionless switch to match the logic below
|
||||
for _, controller := range controllers {
|
||||
switch {
|
||||
case controller == "cpu":
|
||||
hasCFS = true
|
||||
case controller == "pids":
|
||||
hasPIDs = true
|
||||
}
|
||||
for _, controller := range enabledControllers {
|
||||
controllers[controller] = true
|
||||
}
|
||||
}
|
||||
|
||||
f, err := os.Open("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", "", false, false
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
|
@ -102,10 +98,10 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
|
|||
if len(parts) < 3 {
|
||||
continue
|
||||
}
|
||||
controllers := strings.Split(parts[1], ",")
|
||||
enabledControllers := strings.Split(parts[1], ",")
|
||||
// For v1 or hybrid, controller can be a single value {"blkio"}, or a comounted set {"cpu","cpuacct"}
|
||||
// For v2, controllers = {""} (only contains a single empty string)
|
||||
for _, controller := range controllers {
|
||||
// For v2, controllers = {""} (only contains a single empty string) so this section is not used.
|
||||
for _, controller := range enabledControllers {
|
||||
switch {
|
||||
case controller == "name=systemd" || cgroupsModeV2:
|
||||
// If we detect that we are running under a `.scope` unit with systemd
|
||||
|
@ -128,10 +124,10 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
|
|||
// can fail if we use the comma-separated name. Instead, we check for the controller using the symlink.
|
||||
p := filepath.Join("/sys/fs/cgroup", controller, parts[2], "cpu.cfs_period_us")
|
||||
if _, err := os.Stat(p); err == nil {
|
||||
hasCFS = true
|
||||
controllers[controller] = true
|
||||
}
|
||||
case controller == "pids":
|
||||
hasPIDs = true
|
||||
default:
|
||||
controllers[controller] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -146,7 +142,7 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
|
|||
// a host PID scenario but we don't support this.
|
||||
g, err := os.Open("/proc/1/cgroup")
|
||||
if err != nil {
|
||||
return "", "", false, false
|
||||
return
|
||||
}
|
||||
defer g.Close()
|
||||
scan = bufio.NewScanner(g)
|
||||
|
@ -170,5 +166,5 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
|
|||
}
|
||||
}
|
||||
}
|
||||
return kubeletRoot, runtimeRoot, hasCFS, hasPIDs
|
||||
return
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
//go:build windows
|
||||
// +build windows
|
||||
|
||||
package cgroups
|
||||
|
@ -6,6 +7,6 @@ func Validate() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
|
||||
return "", "", false, false
|
||||
func CheckCgroups() (kubeletRoot, runtimeRoot string, controllers map[string]bool) {
|
||||
return
|
||||
}
|
||||
|
|
|
@ -18,17 +18,15 @@ import (
|
|||
"k8s.io/kubernetes/pkg/kubeapiserver/authorizer/modes"
|
||||
)
|
||||
|
||||
func createRootlessConfig(argsMap map[string]string, hasCFS, hasPIDs bool) {
|
||||
func createRootlessConfig(argsMap map[string]string, controllers map[string]bool) {
|
||||
argsMap["feature-gates=KubeletInUserNamespace"] = "true"
|
||||
// "/sys/fs/cgroup" is namespaced
|
||||
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
|
||||
if hasCFS && hasPIDs && cgroupfsWritable {
|
||||
if controllers["cpu"] && controllers["pids"] && cgroupfsWritable {
|
||||
logrus.Info("cgroup v2 controllers are delegated for rootless.")
|
||||
// cgroupfs v2, delegated for rootless by systemd
|
||||
argsMap["cgroup-driver"] = "cgroupfs"
|
||||
} else {
|
||||
logrus.Fatal("delegated cgroup v2 controllers are required for rootless.")
|
||||
return
|
||||
}
|
||||
logrus.Fatal("delegated cgroup v2 controllers are required for rootless.")
|
||||
}
|
||||
|
||||
func checkRuntimeEndpoint(cfg *config.Agent, argsMap map[string]string) {
|
||||
|
@ -67,14 +65,13 @@ func kubeletArgs(cfg *config.Agent) map[string]string {
|
|||
bindAddress = "::1"
|
||||
}
|
||||
argsMap := map[string]string{
|
||||
"healthz-bind-address": bindAddress,
|
||||
"read-only-port": "0",
|
||||
"cluster-domain": cfg.ClusterDomain,
|
||||
"kubeconfig": cfg.KubeConfigKubelet,
|
||||
"eviction-hard": "imagefs.available<5%,nodefs.available<5%",
|
||||
"eviction-minimum-reclaim": "imagefs.available=10%,nodefs.available=10%",
|
||||
"fail-swap-on": "false",
|
||||
//"cgroup-root": "/k3s",
|
||||
"healthz-bind-address": bindAddress,
|
||||
"read-only-port": "0",
|
||||
"cluster-domain": cfg.ClusterDomain,
|
||||
"kubeconfig": cfg.KubeConfigKubelet,
|
||||
"eviction-hard": "imagefs.available<5%,nodefs.available<5%",
|
||||
"eviction-minimum-reclaim": "imagefs.available=10%,nodefs.available=10%",
|
||||
"fail-swap-on": "false",
|
||||
"cgroup-driver": "cgroupfs",
|
||||
"authentication-token-webhook": "true",
|
||||
"anonymous-auth": "false",
|
||||
|
@ -138,13 +135,13 @@ func kubeletArgs(cfg *config.Agent) map[string]string {
|
|||
if err != nil || defaultIP.String() != cfg.NodeIP {
|
||||
argsMap["node-ip"] = cfg.NodeIP
|
||||
}
|
||||
kubeletRoot, runtimeRoot, hasCFS, hasPIDs := cgroups.CheckCgroups()
|
||||
if !hasCFS {
|
||||
logrus.Warn("Disabling CPU quotas due to missing cpu.cfs_period_us")
|
||||
kubeletRoot, runtimeRoot, controllers := cgroups.CheckCgroups()
|
||||
if !controllers["cpu"] {
|
||||
logrus.Warn("Disabling CPU quotas due to missing cpu controller or cpu.cfs_period_us")
|
||||
argsMap["cpu-cfs-quota"] = "false"
|
||||
}
|
||||
if !hasPIDs {
|
||||
logrus.Fatal("PIDS cgroup support not found")
|
||||
if !controllers["pids"] {
|
||||
logrus.Fatal("pids cgroup controller not found")
|
||||
}
|
||||
if kubeletRoot != "" {
|
||||
argsMap["kubelet-cgroups"] = kubeletRoot
|
||||
|
@ -172,7 +169,11 @@ func kubeletArgs(cfg *config.Agent) map[string]string {
|
|||
}
|
||||
|
||||
if cfg.Rootless {
|
||||
createRootlessConfig(argsMap, hasCFS, hasCFS)
|
||||
createRootlessConfig(argsMap, controllers)
|
||||
}
|
||||
|
||||
if cfg.Systemd {
|
||||
argsMap["cgroup-driver"] = "systemd"
|
||||
}
|
||||
|
||||
if cfg.ProtectKernelDefaults {
|
||||
|
|
|
@ -90,6 +90,7 @@ type Agent struct {
|
|||
ExtraKubeProxyArgs []string
|
||||
PauseImage string
|
||||
Snapshotter string
|
||||
Systemd bool
|
||||
CNIPlugin bool
|
||||
NodeTaints []string
|
||||
NodeLabels []string
|
||||
|
|
Loading…
Reference in New Issue