mirror of https://github.com/k3s-io/k3s
Handle conntrack-related sysctls in supervisor agent setup
Signed-off-by: Brad Davidson <brad.davidson@rancher.com>pull/3340/head
parent
424d75ad43
commit
7e175e8ad4
2
go.mod
2
go.mod
|
@ -26,6 +26,7 @@ replace (
|
|||
// LOOK TO scripts/download FOR THE VERSION OF runc THAT WE ARE BUILDING/SHIPPING
|
||||
github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.0-rc93.0.20210414171415-3397a09ee932
|
||||
github.com/opencontainers/runtime-spec => github.com/opencontainers/runtime-spec v1.0.3-0.20210316141917-a8c4a9ee0f6b
|
||||
github.com/rancher/k3s/pkg/data => ./pkg/data
|
||||
go.etcd.io/etcd => github.com/k3s-io/etcd v0.5.0-alpha.5.0.20201208200253-50621aee4aea
|
||||
golang.org/x/crypto => golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83
|
||||
golang.org/x/net => golang.org/x/net v0.0.0-20210224082022-3d97a244fca7
|
||||
|
@ -78,6 +79,7 @@ require (
|
|||
github.com/go-bindata/go-bindata v3.1.2+incompatible
|
||||
github.com/go-sql-driver/mysql v1.4.1
|
||||
github.com/golangplus/testing v1.0.0 // indirect
|
||||
github.com/google/cadvisor v0.39.0
|
||||
github.com/google/tcpproxy v0.0.0-20180808230851-dfa16c61dad2
|
||||
github.com/google/uuid v1.2.0
|
||||
github.com/gorilla/mux v1.8.0
|
||||
|
|
|
@ -38,7 +38,10 @@ import (
|
|||
v1 "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||
"k8s.io/client-go/tools/clientcmd"
|
||||
"k8s.io/controller-manager/app"
|
||||
app2 "k8s.io/kubernetes/cmd/kube-proxy/app"
|
||||
kubeproxyconfig "k8s.io/kubernetes/pkg/proxy/apis/config"
|
||||
utilsnet "k8s.io/utils/net"
|
||||
utilpointer "k8s.io/utils/pointer"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -86,7 +89,12 @@ func run(ctx context.Context, cfg cmds.Agent, proxy proxy.Proxy) error {
|
|||
return errors.Wrap(err, "failed to validate node-ip")
|
||||
}
|
||||
|
||||
syssetup.Configure(dualCluster || dualService || dualNode)
|
||||
enableIPv6 := dualCluster || dualService || dualNode
|
||||
conntrackConfig, err := getConntrackConfig(nodeConfig)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to validate kube-proxy conntrack configuration")
|
||||
}
|
||||
syssetup.Configure(enableIPv6, conntrackConfig)
|
||||
|
||||
if err := setupCriCtlConfig(cfg, nodeConfig); err != nil {
|
||||
return err
|
||||
|
@ -138,6 +146,49 @@ func run(ctx context.Context, cfg cmds.Agent, proxy proxy.Proxy) error {
|
|||
return ctx.Err()
|
||||
}
|
||||
|
||||
// getConntrackConfig uses the kube-proxy code to parse the user-provided kube-proxy-arg values, and
|
||||
// extract the conntrack settings so that K3s can set them itself. This allows us to soft-fail when
|
||||
// running K3s in Docker, where kube-proxy is no longer allowed to set conntrack sysctls on newer kernels.
|
||||
// When running rootless, we do not attempt to set conntrack sysctls - this behavior is copied from kubeadm.
|
||||
func getConntrackConfig(nodeConfig *daemonconfig.Node) (*kubeproxyconfig.KubeProxyConntrackConfiguration, error) {
|
||||
ctConfig := &kubeproxyconfig.KubeProxyConntrackConfiguration{
|
||||
MaxPerCore: utilpointer.Int32Ptr(0),
|
||||
Min: utilpointer.Int32Ptr(0),
|
||||
TCPEstablishedTimeout: &metav1.Duration{},
|
||||
TCPCloseWaitTimeout: &metav1.Duration{},
|
||||
}
|
||||
|
||||
if nodeConfig.AgentConfig.Rootless {
|
||||
return ctConfig, nil
|
||||
}
|
||||
|
||||
cmd := app2.NewProxyCommand()
|
||||
if err := cmd.ParseFlags(daemonconfig.GetArgsList(map[string]string{}, nodeConfig.AgentConfig.ExtraKubeProxyArgs)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
maxPerCore, err := cmd.Flags().GetInt32("conntrack-max-per-core")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ctConfig.MaxPerCore = &maxPerCore
|
||||
min, err := cmd.Flags().GetInt32("conntrack-min")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ctConfig.Min = &min
|
||||
establishedTimeout, err := cmd.Flags().GetDuration("conntrack-tcp-timeout-established")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ctConfig.TCPEstablishedTimeout.Duration = establishedTimeout
|
||||
closeWaitTimeout, err := cmd.Flags().GetDuration("conntrack-tcp-timeout-close-wait")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ctConfig.TCPCloseWaitTimeout.Duration = closeWaitTimeout
|
||||
return ctConfig, nil
|
||||
}
|
||||
|
||||
func coreClient(cfg string) (kubernetes.Interface, error) {
|
||||
restConfig, err := clientcmd.BuildConfigFromFlags("", cfg)
|
||||
if err != nil {
|
||||
|
|
|
@ -3,11 +3,16 @@
|
|||
package syssetup
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"time"
|
||||
|
||||
"github.com/google/cadvisor/machine"
|
||||
"github.com/google/cadvisor/utils/sysfs"
|
||||
"github.com/sirupsen/logrus"
|
||||
kubeproxyconfig "k8s.io/kubernetes/pkg/proxy/apis/config"
|
||||
"k8s.io/kubernetes/pkg/util/sysctl"
|
||||
)
|
||||
|
||||
func loadKernelModule(moduleName string) {
|
||||
|
@ -16,18 +21,14 @@ func loadKernelModule(moduleName string) {
|
|||
return
|
||||
}
|
||||
|
||||
if err := exec.Command("modprobe", moduleName).Run(); err != nil {
|
||||
logrus.Warn("Failed to start " + moduleName + " module")
|
||||
if err := exec.Command("modprobe", "--", moduleName).Run(); err != nil {
|
||||
logrus.Warnf("Failed to load kernel module %v with modprobe", moduleName)
|
||||
}
|
||||
}
|
||||
|
||||
func enableSystemControl(file string) {
|
||||
if err := ioutil.WriteFile(file, []byte("1"), 0640); err != nil {
|
||||
logrus.Warnf("Failed to write value 1 at "+file+": %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Configure(enableIPv6 bool) {
|
||||
// Configure loads required kernel modules and sets sysctls required for other components to
|
||||
// function properly.
|
||||
func Configure(enableIPv6 bool, config *kubeproxyconfig.KubeProxyConntrackConfiguration) {
|
||||
loadKernelModule("overlay")
|
||||
loadKernelModule("nf_conntrack")
|
||||
loadKernelModule("br_netfilter")
|
||||
|
@ -39,12 +40,66 @@ func Configure(enableIPv6 bool) {
|
|||
// Kernel is inconsistent about how devconf is configured for
|
||||
// new network namespaces between ipv4 and ipv6. Make sure to
|
||||
// enable forwarding on all and default for both ipv4 and ipv6.
|
||||
enableSystemControl("/proc/sys/net/ipv4/conf/all/forwarding")
|
||||
enableSystemControl("/proc/sys/net/ipv4/conf/default/forwarding")
|
||||
enableSystemControl("/proc/sys/net/bridge/bridge-nf-call-iptables")
|
||||
sysctls := map[string]int{
|
||||
"net/ipv4/conf/all/forwarding": 1,
|
||||
"net/ipv4/conf/default/forwarding": 1,
|
||||
"net/bridge/bridge-nf-call-iptables": 1,
|
||||
}
|
||||
|
||||
if enableIPv6 {
|
||||
enableSystemControl("/proc/sys/net/ipv6/conf/all/forwarding")
|
||||
enableSystemControl("/proc/sys/net/ipv6/conf/default/forwarding")
|
||||
enableSystemControl("/proc/sys/net/bridge/bridge-nf-call-ip6tables")
|
||||
sysctls["net/ipv6/conf/all/forwarding"] = 1
|
||||
sysctls["net/ipv6/conf/default/forwarding"] = 1
|
||||
sysctls["net/bridge/bridge-nf-call-ip6tables"] = 1
|
||||
}
|
||||
|
||||
if conntrackMax := getConntrackMax(config); conntrackMax > 0 {
|
||||
sysctls["net/netfilter/nf_conntrack_max"] = conntrackMax
|
||||
}
|
||||
if config.TCPEstablishedTimeout.Duration > 0 {
|
||||
sysctls["net/netfilter/nf_conntrack_tcp_timeout_established"] = int(config.TCPEstablishedTimeout.Duration / time.Second)
|
||||
}
|
||||
if config.TCPCloseWaitTimeout.Duration > 0 {
|
||||
sysctls["net/netfilter/nf_conntrack_tcp_timeout_close_wait"] = int(config.TCPCloseWaitTimeout.Duration / time.Second)
|
||||
}
|
||||
|
||||
sys := sysctl.New()
|
||||
for entry, value := range sysctls {
|
||||
if val, _ := sys.GetSysctl(entry); val != value {
|
||||
logrus.Infof("Set sysctl '%v' to %v", entry, value)
|
||||
if err := sys.SetSysctl(entry, value); err != nil {
|
||||
logrus.Errorf("Failed to set sysctl: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// getConntrackMax is cribbed from kube-proxy, as recent kernels no longer allow non-init namespaces
|
||||
// to set conntrack-related sysctls.
|
||||
// ref: https://github.com/kubernetes/kubernetes/blob/v1.21.1/cmd/kube-proxy/app/server.go#L780
|
||||
// ref: https://github.com/kubernetes-sigs/kind/issues/2240
|
||||
func getConntrackMax(config *kubeproxyconfig.KubeProxyConntrackConfiguration) int {
|
||||
if config.MaxPerCore != nil && *config.MaxPerCore > 0 {
|
||||
floor := 0
|
||||
if config.Min != nil {
|
||||
floor = int(*config.Min)
|
||||
}
|
||||
scaled := int(*config.MaxPerCore) * detectNumCPU()
|
||||
if scaled > floor {
|
||||
logrus.Debugf("getConntrackMax: using scaled conntrack-max-per-core")
|
||||
return scaled
|
||||
}
|
||||
logrus.Debugf("getConntrackMax: using conntrack-min")
|
||||
return floor
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// detectNumCPU is also cribbed from kube-proxy
|
||||
func detectNumCPU() int {
|
||||
// try get numCPU from /sys firstly due to a known issue (https://github.com/kubernetes/kubernetes/issues/99225)
|
||||
_, numCPU, err := machine.GetTopology(sysfs.NewRealSysFs())
|
||||
if err != nil || numCPU < 1 {
|
||||
return runtime.NumCPU()
|
||||
}
|
||||
return numCPU
|
||||
}
|
||||
|
|
|
@ -49,6 +49,9 @@ func startKubeProxy(cfg *config.Agent) error {
|
|||
"healthz-bind-address": "127.0.0.1",
|
||||
"kubeconfig": cfg.KubeConfigKubeProxy,
|
||||
"cluster-cidr": util.JoinIPNets(cfg.ClusterCIDRs),
|
||||
"conntrack-max-per-core": "0",
|
||||
"conntrack-tcp-timeout-established": "0s",
|
||||
"conntrack-tcp-timeout-close-wait": "0s",
|
||||
}
|
||||
if cfg.NodeName != "" {
|
||||
argsMap["hostname-override"] = cfg.NodeName
|
||||
|
|
|
@ -8,17 +8,20 @@ RUNC_VERSION=v1.0.0-rc94
|
|||
ROOT_VERSION=v0.8.1
|
||||
TRAEFIK_VERSION=9.18.2 # appVersion: 2.4.8
|
||||
CHARTS_DIR=build/static/charts
|
||||
RUNC_DIR=build/src/github.com/opencontainers/runc
|
||||
DATA_DIR=build/data
|
||||
export TZ=UTC
|
||||
|
||||
umask 022
|
||||
rm -rf ${CHARTS_DIR}
|
||||
rm -rf ${RUNC_DIR}
|
||||
mkdir -p ${CHARTS_DIR}
|
||||
mkdir -p ${DATA_DIR}
|
||||
|
||||
curl --compressed -sfL https://github.com/k3s-io/k3s-root/releases/download/${ROOT_VERSION}/k3s-root-${ARCH}.tar | tar xf -
|
||||
|
||||
git clone --depth=1 https://github.com/opencontainers/runc build/src/github.com/opencontainers/runc || true
|
||||
pushd build/src/github.com/opencontainers/runc
|
||||
git clone --depth=1 https://github.com/opencontainers/runc ${RUNC_DIR} || true
|
||||
pushd ${RUNC_DIR}
|
||||
git fetch --all --tags
|
||||
git checkout ${RUNC_VERSION} -b k3s
|
||||
popd
|
||||
|
|
|
@ -554,6 +554,7 @@ github.com/golang/snappy
|
|||
# github.com/google/btree v1.0.0
|
||||
github.com/google/btree
|
||||
# github.com/google/cadvisor v0.39.0
|
||||
## explicit
|
||||
github.com/google/cadvisor/accelerators
|
||||
github.com/google/cadvisor/cache/memory
|
||||
github.com/google/cadvisor/collector
|
||||
|
@ -3195,6 +3196,7 @@ sigs.k8s.io/yaml
|
|||
# github.com/matryer/moq => github.com/rancher/moq v0.0.0-20190404221404-ee5226d43009
|
||||
# github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.0-rc93.0.20210414171415-3397a09ee932
|
||||
# github.com/opencontainers/runtime-spec => github.com/opencontainers/runtime-spec v1.0.3-0.20210316141917-a8c4a9ee0f6b
|
||||
# github.com/rancher/k3s/pkg/data => ./pkg/data
|
||||
# go.etcd.io/etcd => github.com/k3s-io/etcd v0.5.0-alpha.5.0.20201208200253-50621aee4aea
|
||||
# golang.org/x/crypto => golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83
|
||||
# golang.org/x/net => golang.org/x/net v0.0.0-20210224082022-3d97a244fca7
|
||||
|
|
Loading…
Reference in New Issue