mirror of https://github.com/k3s-io/k3s
Merge pull request #31996 from mtaufen/detect-swap-enabled
Automatic merge from submit-queue Fail kubelet creation if swap enabled and configured with memory eviction thresholds /cc @vishh @derekwaynecarr **Release note**: ``` This adds an opt-in Kubelet flag (--fail-swap-on) that causes the Kubelet to fail to start if swap is enabled on the node. This is a temporary opt-in, and the Kubelet will by default fail with swap enabled starting in 1.6.0. The KubeletConfiguration equivalent to the flag is FailSwapOn. ```pull/6/head
commit
31fbb771a2
|
@ -78,6 +78,9 @@ func NewKubeletServer() *KubeletServer {
|
|||
|
||||
// AddFlags adds flags for a specific KubeletServer to the specified FlagSet
|
||||
func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
|
||||
// TODO(#34726:1.8.0): Remove the opt-in for failing when swap is enabled.
|
||||
fs.BoolVar(&s.ExperimentalFailSwapOn, "experimental-fail-swap-on", s.ExperimentalFailSwapOn, "Makes the Kubelet fail to start if swap is enabled on the node. This is a temporary opton to maintain legacy behavior, failing due to swap enabled will happen by default in v1.6.")
|
||||
|
||||
fs.Var(&s.KubeConfig, "kubeconfig", "Path to a kubeconfig file, specifying how to connect to the API server. --api-servers will be used for the location unless --require-kubeconfig is set.")
|
||||
fs.BoolVar(&s.RequireKubeConfig, "require-kubeconfig", s.RequireKubeConfig, "If true the Kubelet will exit if there are configuration errors, and will ignore the value of --api-servers in favor of the server defined in the kubeconfig file.")
|
||||
|
||||
|
|
|
@ -434,17 +434,22 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) {
|
|||
if s.SystemCgroups != "" && s.CgroupRoot == "" {
|
||||
return fmt.Errorf("invalid configuration: system container was specified and cgroup root was not specified")
|
||||
}
|
||||
kubeDeps.ContainerManager, err = cm.NewContainerManager(kubeDeps.Mounter, kubeDeps.CAdvisorInterface, cm.NodeConfig{
|
||||
RuntimeCgroupsName: s.RuntimeCgroups,
|
||||
SystemCgroupsName: s.SystemCgroups,
|
||||
KubeletCgroupsName: s.KubeletCgroups,
|
||||
ContainerRuntime: s.ContainerRuntime,
|
||||
CgroupsPerQOS: s.CgroupsPerQOS,
|
||||
CgroupRoot: s.CgroupRoot,
|
||||
CgroupDriver: s.CgroupDriver,
|
||||
ProtectKernelDefaults: s.ProtectKernelDefaults,
|
||||
EnableCRI: s.EnableCRI,
|
||||
})
|
||||
kubeDeps.ContainerManager, err = cm.NewContainerManager(
|
||||
kubeDeps.Mounter,
|
||||
kubeDeps.CAdvisorInterface,
|
||||
cm.NodeConfig{
|
||||
RuntimeCgroupsName: s.RuntimeCgroups,
|
||||
SystemCgroupsName: s.SystemCgroups,
|
||||
KubeletCgroupsName: s.KubeletCgroups,
|
||||
ContainerRuntime: s.ContainerRuntime,
|
||||
CgroupsPerQOS: s.CgroupsPerQOS,
|
||||
CgroupRoot: s.CgroupRoot,
|
||||
CgroupDriver: s.CgroupDriver,
|
||||
ProtectKernelDefaults: s.ProtectKernelDefaults,
|
||||
EnableCRI: s.EnableCRI,
|
||||
},
|
||||
s.ExperimentalFailSwapOn)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -202,6 +202,7 @@ external-etcd-keyfile
|
|||
external-hostname
|
||||
external-ip
|
||||
extra-peer-dirs
|
||||
experimental-fail-swap-on
|
||||
failover-timeout
|
||||
failure-domains
|
||||
fake-clientset
|
||||
|
|
|
@ -465,6 +465,9 @@ type KubeletConfiguration struct {
|
|||
// Enable Container Runtime Interface (CRI) integration.
|
||||
// +optional
|
||||
EnableCRI bool `json:"enableCRI,omitempty"`
|
||||
// TODO(#34726:1.8.0): Remove the opt-in for failing when swap is enabled.
|
||||
// Tells the Kubelet to fail to start if swap is enabled on the node.
|
||||
ExperimentalFailSwapOn bool `json:"experimentalFailSwapOn,omitempty"`
|
||||
}
|
||||
|
||||
type KubeletAuthorizationMode string
|
||||
|
|
|
@ -504,6 +504,9 @@ type KubeletConfiguration struct {
|
|||
// Enable Container Runtime Interface (CRI) integration.
|
||||
// +optional
|
||||
EnableCRI bool `json:"enableCRI,omitempty"`
|
||||
// TODO(#34726:1.8.0): Remove the opt-in for failing when swap is enabled.
|
||||
// Tells the Kubelet to fail to start if swap is enabled on the node.
|
||||
ExperimentalFailSwapOn bool `json:"experimentalFailSwapOn,omitempty"`
|
||||
}
|
||||
|
||||
type KubeletAuthorizationMode string
|
||||
|
|
|
@ -407,6 +407,7 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
|
|||
out.AllowedUnsafeSysctls = *(*[]string)(unsafe.Pointer(&in.AllowedUnsafeSysctls))
|
||||
out.FeatureGates = in.FeatureGates
|
||||
out.EnableCRI = in.EnableCRI
|
||||
out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -575,6 +576,7 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
|
|||
out.AllowedUnsafeSysctls = *(*[]string)(unsafe.Pointer(&in.AllowedUnsafeSysctls))
|
||||
out.FeatureGates = in.FeatureGates
|
||||
out.EnableCRI = in.EnableCRI
|
||||
out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -461,6 +461,7 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c *
|
|||
}
|
||||
out.FeatureGates = in.FeatureGates
|
||||
out.EnableCRI = in.EnableCRI
|
||||
out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
|
|
@ -392,6 +392,7 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface
|
|||
}
|
||||
out.FeatureGates = in.FeatureGates
|
||||
out.EnableCRI = in.EnableCRI
|
||||
out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3003,8 +3003,15 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
|
|||
Format: "",
|
||||
},
|
||||
},
|
||||
"experimentalFailSwapOn": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "Tells the Kubelet to fail to start if swap is enabled on the node.",
|
||||
Type: []string{"boolean"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginMTU", "networkPluginDir", "cniConfDir", "cniBinDir", "volumePluginDir", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "experimentalMounterPath", "experimentalMounterRootfsPath", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit", "featureGates"},
|
||||
Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginMTU", "networkPluginDir", "cniConfDir", "cniBinDir", "volumePluginDir", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "experimentalMounterPath", "experimentalMounterRootfsPath", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit", "featureGates", "experimentalFailSwapOn"},
|
||||
},
|
||||
},
|
||||
Dependencies: []string{
|
||||
|
@ -14757,8 +14764,15 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
|
|||
Format: "",
|
||||
},
|
||||
},
|
||||
"experimentalFailSwapOn": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "Tells the Kubelet to fail to start if swap is enabled on the node.",
|
||||
Type: []string{"boolean"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "experimentalMounterPath", "experimentalMounterRootfsPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit", "featureGates"},
|
||||
Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "experimentalMounterPath", "experimentalMounterRootfsPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit", "featureGates", "experimentalFailSwapOn"},
|
||||
},
|
||||
},
|
||||
Dependencies: []string{
|
||||
|
|
|
@ -19,9 +19,11 @@ limitations under the License.
|
|||
package cm
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"strconv"
|
||||
"sync"
|
||||
|
@ -164,12 +166,44 @@ func validateSystemRequirements(mountUtil mount.Interface) (features, error) {
|
|||
// TODO(vmarmol): Add limits to the system containers.
|
||||
// Takes the absolute name of the specified containers.
|
||||
// Empty container name disables use of the specified container.
|
||||
func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig) (ContainerManager, error) {
|
||||
func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig, failSwapOn bool) (ContainerManager, error) {
|
||||
subsystems, err := GetCgroupSubsystems()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get mounted cgroup subsystems: %v", err)
|
||||
}
|
||||
|
||||
// Check whether swap is enabled. The Kubelet does not support running with swap enabled.
|
||||
cmd := exec.Command("cat", "/proc/swaps")
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := cmd.Start(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var buf []string
|
||||
scanner := bufio.NewScanner(stdout)
|
||||
for scanner.Scan() { // Splits on newlines by default
|
||||
buf = append(buf, scanner.Text())
|
||||
}
|
||||
if err := cmd.Wait(); err != nil { // Clean up
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// TODO(#34726:1.8.0): Remove the opt-in for failing when swap is enabled.
|
||||
// Running with swap enabled should be considered an error, but in order to maintain legacy
|
||||
// behavior we have to require an opt-in to this error for a period of time.
|
||||
|
||||
// If there is more than one line (table headers) in /proc/swaps, swap is enabled and we should error out.
|
||||
if len(buf) > 1 {
|
||||
if failSwapOn {
|
||||
return nil, fmt.Errorf("Running with swap on is not supported, please disable swap! /proc/swaps contained: %v", buf)
|
||||
}
|
||||
glog.Warningf("Running with swap on is not supported, please disable swap! " +
|
||||
"This will be a fatal error by default starting in K8s v1.6! " +
|
||||
"In the meantime, you can opt-in to making this a fatal error by enabling --experimental-fail-swap-on.")
|
||||
}
|
||||
|
||||
// Check if Cgroup-root actually exists on the node
|
||||
if nodeConfig.CgroupsPerQOS {
|
||||
// this does default to / when enabled, but this tests against regressions.
|
||||
|
|
Loading…
Reference in New Issue