mirror of https://github.com/k3s-io/k3s
rootless: enable resource limitation (requires cgroup v2, systemd)
Now rootless mode can be used with cgroup v2 resource limitations. A pod is executed in a cgroup like "/user.slice/user-1001.slice/user@1001.service/k3s-rootless.service/kubepods/podd0eb6921-c81a-4214-b36c-d3b9bb212fac/63b5a253a1fd4627da16bfce9bec58d72144cf30fe833e0ca9a6d60ebf837475". This is accomplished by running `kubelet` in a cgroup namespace, and enabling `cgroupfs` driver for the cgroup hierarchy delegated by systemd. To enable cgroup v2 resource limitation, `k3s server --rootless` needs to be launched as `systemctl --user` service. Please see the comment lines in `k3s-rootless.service` for the usage. Running `k3s server --rootless` via a terminal is not supported. When it really needs to be launched via a terminal, `systemd-run --user -p Delegate --tty` needs to be prepended to create a systemd scope. Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>pull/3106/head
parent
11ef43011a
commit
6e8284e3d4
|
@ -0,0 +1,45 @@
|
||||||
|
# systemd unit file for k3s (rootless)
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# - [Optional] Enable cgroup v2 delegation, see https://rootlesscontaine.rs/getting-started/common/cgroup2/ .
|
||||||
|
# This step is optional, but highly recommended for enabling CPU and memory resource limtitation.
|
||||||
|
#
|
||||||
|
# - Copy this file as `~/.config/systemd/user/k3s-rootless.service`.
|
||||||
|
# Installing this file as a system-wide service (`/etc/systemd/...`) is not supported.
|
||||||
|
# Depending on the path of `k3s` binary, you might need to modify the `ExecStart=/usr/local/bin/k3s ...` line of this file.
|
||||||
|
#
|
||||||
|
# - Run `systemctl --user daemon-reload`
|
||||||
|
#
|
||||||
|
# - Run `systemctl --user enable --now k3s-rootless`
|
||||||
|
#
|
||||||
|
# - Run `KUBECONFIG=~/.kube/k3s.yaml kubectl get pods -A`, and make sure the pods are running.
|
||||||
|
#
|
||||||
|
# Troubleshooting:
|
||||||
|
# - See `systemctl --user status k3s-rootless` to check the daemon status
|
||||||
|
# - See `journalctl --user -f -u k3s-rootless` to see the daemon log
|
||||||
|
# - See also https://rootlesscontaine.rs/
|
||||||
|
|
||||||
|
[Unit]
|
||||||
|
Description=k3s (Rootless)
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Environment=PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||||
|
# NOTE: Don't try to run `k3s server --rootless` on a terminal, as it doesn't enable cgroup v2 delegation.
|
||||||
|
# If you really need to try it on a terminal, prepend `systemd-run --user -p Delegate=yes --tty` to create a systemd scope.
|
||||||
|
ExecStart=/usr/local/bin/k3s server --rootless
|
||||||
|
ExecReload=/bin/kill -s HUP $MAINPID
|
||||||
|
TimeoutSec=0
|
||||||
|
RestartSec=2
|
||||||
|
Restart=always
|
||||||
|
StartLimitBurst=3
|
||||||
|
StartLimitInterval=60s
|
||||||
|
LimitNOFILE=infinity
|
||||||
|
LimitNPROC=infinity
|
||||||
|
LimitCORE=infinity
|
||||||
|
TasksMax=infinity
|
||||||
|
Delegate=yes
|
||||||
|
Type=simple
|
||||||
|
KillMode=mixed
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=default.target
|
|
@ -26,11 +26,13 @@ import (
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
"github.com/rancher/k3s/pkg/agent/templates"
|
"github.com/rancher/k3s/pkg/agent/templates"
|
||||||
util2 "github.com/rancher/k3s/pkg/agent/util"
|
util2 "github.com/rancher/k3s/pkg/agent/util"
|
||||||
|
"github.com/rancher/k3s/pkg/daemons/agent"
|
||||||
"github.com/rancher/k3s/pkg/daemons/config"
|
"github.com/rancher/k3s/pkg/daemons/config"
|
||||||
"github.com/rancher/k3s/pkg/untar"
|
"github.com/rancher/k3s/pkg/untar"
|
||||||
"github.com/rancher/k3s/pkg/version"
|
"github.com/rancher/k3s/pkg/version"
|
||||||
"github.com/rancher/wrangler/pkg/merr"
|
"github.com/rancher/wrangler/pkg/merr"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
"google.golang.org/grpc"
|
"google.golang.org/grpc"
|
||||||
yaml "gopkg.in/yaml.v2"
|
yaml "gopkg.in/yaml.v2"
|
||||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
||||||
|
@ -336,10 +338,21 @@ func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
isRunningInUserNS := system.RunningInUserNS()
|
||||||
|
_, _, hasCFS, hasPIDs := agent.CheckCgroups()
|
||||||
|
// "/sys/fs/cgroup" is namespaced
|
||||||
|
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
|
||||||
|
disableCgroup := isRunningInUserNS && (!hasCFS || !hasPIDs || !cgroupfsWritable)
|
||||||
|
if disableCgroup {
|
||||||
|
logrus.Warn("cgroup v2 controllers are not delegated for rootless. Disabling cgroup.")
|
||||||
|
}
|
||||||
|
|
||||||
var containerdTemplate string
|
var containerdTemplate string
|
||||||
containerdConfig := templates.ContainerdConfig{
|
containerdConfig := templates.ContainerdConfig{
|
||||||
NodeConfig: cfg,
|
NodeConfig: cfg,
|
||||||
IsRunningInUserNS: system.RunningInUserNS(),
|
DisableCgroup: disableCgroup,
|
||||||
|
IsRunningInUserNS: isRunningInUserNS,
|
||||||
PrivateRegistryConfig: privRegistries,
|
PrivateRegistryConfig: privRegistries,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,7 @@ import (
|
||||||
|
|
||||||
type ContainerdConfig struct {
|
type ContainerdConfig struct {
|
||||||
NodeConfig *config.Node
|
NodeConfig *config.Node
|
||||||
|
DisableCgroup bool
|
||||||
IsRunningInUserNS bool
|
IsRunningInUserNS bool
|
||||||
PrivateRegistryConfig *Registry
|
PrivateRegistryConfig *Registry
|
||||||
}
|
}
|
||||||
|
@ -22,8 +23,10 @@ const ContainerdConfigTemplate = `
|
||||||
stream_server_port = "10010"
|
stream_server_port = "10010"
|
||||||
enable_selinux = {{ .NodeConfig.SELinux }}
|
enable_selinux = {{ .NodeConfig.SELinux }}
|
||||||
|
|
||||||
{{- if .IsRunningInUserNS }}
|
{{- if .DisableCgroup}}
|
||||||
disable_cgroup = true
|
disable_cgroup = true
|
||||||
|
{{end}}
|
||||||
|
{{- if .IsRunningInUserNS }}
|
||||||
disable_apparmor = true
|
disable_apparmor = true
|
||||||
restrict_oom_score_adj = true
|
restrict_oom_score_adj = true
|
||||||
{{end}}
|
{{end}}
|
||||||
|
|
|
@ -15,6 +15,7 @@ import (
|
||||||
"github.com/rancher/k3s/pkg/daemons/executor"
|
"github.com/rancher/k3s/pkg/daemons/executor"
|
||||||
"github.com/rancher/k3s/pkg/version"
|
"github.com/rancher/k3s/pkg/version"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
"k8s.io/apimachinery/pkg/util/net"
|
"k8s.io/apimachinery/pkg/util/net"
|
||||||
"k8s.io/component-base/logs"
|
"k8s.io/component-base/logs"
|
||||||
"k8s.io/kubernetes/pkg/kubeapiserver/authorizer/modes"
|
"k8s.io/kubernetes/pkg/kubeapiserver/authorizer/modes"
|
||||||
|
@ -128,7 +129,7 @@ func startKubelet(cfg *config.Agent) error {
|
||||||
if err != nil || defaultIP.String() != cfg.NodeIP {
|
if err != nil || defaultIP.String() != cfg.NodeIP {
|
||||||
argsMap["node-ip"] = cfg.NodeIP
|
argsMap["node-ip"] = cfg.NodeIP
|
||||||
}
|
}
|
||||||
kubeletRoot, runtimeRoot, hasCFS, hasPIDs := checkCgroups()
|
kubeletRoot, runtimeRoot, hasCFS, hasPIDs := CheckCgroups()
|
||||||
if !hasCFS {
|
if !hasCFS {
|
||||||
logrus.Warn("Disabling CPU quotas due to missing cpu.cfs_period_us")
|
logrus.Warn("Disabling CPU quotas due to missing cpu.cfs_period_us")
|
||||||
argsMap["cpu-cfs-quota"] = "false"
|
argsMap["cpu-cfs-quota"] = "false"
|
||||||
|
@ -158,11 +159,20 @@ func startKubelet(cfg *config.Agent) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
if cfg.Rootless {
|
if cfg.Rootless {
|
||||||
// flags are from https://github.com/rootless-containers/usernetes/blob/v20190826.0/boot/kubelet.sh
|
// "/sys/fs/cgroup" is namespaced
|
||||||
argsMap["cgroup-driver"] = "none"
|
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
|
||||||
argsMap["feature-gates=SupportNoneCgroupDriver"] = "true"
|
if hasCFS && hasPIDs && cgroupfsWritable {
|
||||||
argsMap["cgroups-per-qos"] = "false"
|
logrus.Info("cgroup v2 controllers are delegated for rootless.")
|
||||||
argsMap["enforce-node-allocatable"] = ""
|
// cgroupfs v2, delegated for rootless by systemd
|
||||||
|
argsMap["cgroup-driver"] = "cgroupfs"
|
||||||
|
} else {
|
||||||
|
logrus.Warn("cgroup v2 controllers are not delegated for rootless. Setting cgroup driver to \"none\".")
|
||||||
|
// flags are from https://github.com/rootless-containers/usernetes/blob/v20190826.0/boot/kubelet.sh
|
||||||
|
argsMap["cgroup-driver"] = "none"
|
||||||
|
argsMap["feature-gates=SupportNoneCgroupDriver"] = "true"
|
||||||
|
argsMap["cgroups-per-qos"] = "false"
|
||||||
|
argsMap["enforce-node-allocatable"] = ""
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if cfg.ProtectKernelDefaults {
|
if cfg.ProtectKernelDefaults {
|
||||||
|
@ -182,7 +192,7 @@ func addFeatureGate(current, new string) string {
|
||||||
return current + "," + new
|
return current + "," + new
|
||||||
}
|
}
|
||||||
|
|
||||||
func checkCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
|
func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
|
||||||
cgroupsModeV2 := cgroups.Mode() == cgroups.Unified
|
cgroupsModeV2 := cgroups.Mode() == cgroups.Unified
|
||||||
|
|
||||||
// For Unified (v2) cgroups we can directly check to see what controllers are mounted
|
// For Unified (v2) cgroups we can directly check to see what controllers are mounted
|
||||||
|
|
|
@ -8,8 +8,10 @@ import (
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
"github.com/rootless-containers/rootlesskit/pkg/child"
|
"github.com/rootless-containers/rootlesskit/pkg/child"
|
||||||
"github.com/rootless-containers/rootlesskit/pkg/copyup/tmpfssymlink"
|
"github.com/rootless-containers/rootlesskit/pkg/copyup/tmpfssymlink"
|
||||||
|
@ -17,12 +19,14 @@ import (
|
||||||
"github.com/rootless-containers/rootlesskit/pkg/parent"
|
"github.com/rootless-containers/rootlesskit/pkg/parent"
|
||||||
portbuiltin "github.com/rootless-containers/rootlesskit/pkg/port/builtin"
|
portbuiltin "github.com/rootless-containers/rootlesskit/pkg/port/builtin"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
pipeFD = "_K3S_ROOTLESS_FD"
|
pipeFD = "_K3S_ROOTLESS_FD"
|
||||||
childEnv = "_K3S_ROOTLESS_SOCK"
|
childEnv = "_K3S_ROOTLESS_SOCK"
|
||||||
Sock = ""
|
evacuateCgroup2Env = "_K3S_ROOTLESS_EVACUATE_CGROUP2" // boolean
|
||||||
|
Sock = ""
|
||||||
)
|
)
|
||||||
|
|
||||||
func Rootless(stateDir string) error {
|
func Rootless(stateDir string) error {
|
||||||
|
@ -61,6 +65,9 @@ func Rootless(stateDir string) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
os.Setenv(childEnv, filepath.Join(parentOpt.StateDir, parent.StateFileAPISock))
|
os.Setenv(childEnv, filepath.Join(parentOpt.StateDir, parent.StateFileAPISock))
|
||||||
|
if parentOpt.EvacuateCgroup2 != "" {
|
||||||
|
os.Setenv(evacuateCgroup2Env, "1")
|
||||||
|
}
|
||||||
if err := parent.Parent(*parentOpt); err != nil {
|
if err := parent.Parent(*parentOpt); err != nil {
|
||||||
logrus.Fatal(err)
|
logrus.Fatal(err)
|
||||||
}
|
}
|
||||||
|
@ -128,8 +135,26 @@ func createParentOpt(stateDir string) (*parent.Opt, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
opt := &parent.Opt{
|
opt := &parent.Opt{
|
||||||
StateDir: stateDir,
|
StateDir: stateDir,
|
||||||
CreatePIDNS: true,
|
CreatePIDNS: true,
|
||||||
|
CreateCgroupNS: true,
|
||||||
|
CreateUTSNS: true,
|
||||||
|
CreateIPCNS: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
selfCgroupMap, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if selfCgroup2 := selfCgroupMap[""]; selfCgroup2 == "" {
|
||||||
|
logrus.Warnf("enabling cgroup2 is highly recommended, see https://rootlesscontaine.rs/getting-started/common/cgroup2/")
|
||||||
|
} else {
|
||||||
|
selfCgroup2Dir := filepath.Join("/sys/fs/cgroup", selfCgroup2)
|
||||||
|
if unix.Access(selfCgroup2Dir, unix.W_OK) == nil {
|
||||||
|
opt.EvacuateCgroup2 = "k3s_evac"
|
||||||
|
} else {
|
||||||
|
logrus.Warn("cannot set cgroup2 evacuation, make sure to run k3s as a systemd unit")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mtu := 0
|
mtu := 0
|
||||||
|
@ -177,5 +202,12 @@ func createChildOpt() (*child.Opt, error) {
|
||||||
opt.CopyUpDriver = tmpfssymlink.NewChildDriver()
|
opt.CopyUpDriver = tmpfssymlink.NewChildDriver()
|
||||||
opt.MountProcfs = true
|
opt.MountProcfs = true
|
||||||
opt.Reaper = true
|
opt.Reaper = true
|
||||||
|
if v := os.Getenv(evacuateCgroup2Env); v != "" {
|
||||||
|
var err error
|
||||||
|
opt.EvacuateCgroup2, err = strconv.ParseBool(v)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
return opt, nil
|
return opt, nil
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue