mirror of https://github.com/k3s-io/k3s
Merge pull request #38258 from derekwaynecarr/kernel-memcg-flag
Automatic merge from submit-queue (batch tested with PRs 38318, 38258) kernel memcg notification enabled via experimental flag Kubelet integrates with kernel memcg notification API if and only if enabled via experimental flag.pull/6/head
commit
ca049360e2
|
@ -249,6 +249,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
|
|||
fs.DurationVar(&s.EvictionPressureTransitionPeriod.Duration, "eviction-pressure-transition-period", s.EvictionPressureTransitionPeriod.Duration, "Duration for which the kubelet has to wait before transitioning out of an eviction pressure condition.")
|
||||
fs.Int32Var(&s.EvictionMaxPodGracePeriod, "eviction-max-pod-grace-period", s.EvictionMaxPodGracePeriod, "Maximum allowed grace period (in seconds) to use when terminating pods in response to a soft eviction threshold being met. If negative, defer to pod specified value.")
|
||||
fs.StringVar(&s.EvictionMinimumReclaim, "eviction-minimum-reclaim", s.EvictionMinimumReclaim, "A set of minimum reclaims (e.g. imagefs.available=2Gi) that describes the minimum amount of resource the kubelet will reclaim when performing a pod eviction if that resource is under pressure.")
|
||||
fs.BoolVar(&s.ExperimentalKernelMemcgNotification, "experimental-kernel-memcg-notification", s.ExperimentalKernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.")
|
||||
fs.Int32Var(&s.PodsPerCore, "pods-per-core", s.PodsPerCore, "Number of Pods per core that can run on this Kubelet. The total number of Pods on this Kubelet cannot exceed max-pods, so max-pods will be used if this calculation results in a larger number of Pods allowed on the Kubelet. A value of 0 disables this limit.")
|
||||
fs.BoolVar(&s.ProtectKernelDefaults, "protect-kernel-defaults", s.ProtectKernelDefaults, "Default kubelet behaviour for kernel tuning. If set, kubelet errors if any of kernel tunables is different than kubelet defaults.")
|
||||
|
||||
|
|
|
@ -202,6 +202,7 @@ experimental-nvidia-gpus
|
|||
experimental-prefix
|
||||
experimental-cri
|
||||
experimental-check-node-capabilities-before-mount
|
||||
experimental-kernel-memcg-notification
|
||||
external-etcd-cafile
|
||||
external-etcd-certfile
|
||||
external-etcd-endpoints
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -430,6 +430,9 @@ type KubeletConfiguration struct {
|
|||
// Comma-delimited list of minimum reclaims (e.g. imagefs.available=2Gi) that describes the minimum amount of resource the kubelet will reclaim when performing a pod eviction if that resource is under pressure.
|
||||
// +optional
|
||||
EvictionMinimumReclaim string `json:"evictionMinimumReclaim,omitempty"`
|
||||
// If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.
|
||||
// +optional
|
||||
ExperimentalKernelMemcgNotification bool `json:"experimentalKernelMemcgNotification"`
|
||||
// Maximum number of pods per core. Cannot exceed MaxPods
|
||||
PodsPerCore int32 `json:"podsPerCore"`
|
||||
// enableControllerAttachDetach enables the Attach/Detach controller to
|
||||
|
|
|
@ -374,6 +374,9 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
|
|||
if obj.EvictionPressureTransitionPeriod == zeroDuration {
|
||||
obj.EvictionPressureTransitionPeriod = metav1.Duration{Duration: 5 * time.Minute}
|
||||
}
|
||||
if obj.ExperimentalKernelMemcgNotification == nil {
|
||||
obj.ExperimentalKernelMemcgNotification = boolVar(false)
|
||||
}
|
||||
if obj.SystemReserved == nil {
|
||||
obj.SystemReserved = make(map[string]string)
|
||||
}
|
||||
|
|
|
@ -472,6 +472,8 @@ type KubeletConfiguration struct {
|
|||
EvictionMaxPodGracePeriod int32 `json:"evictionMaxPodGracePeriod"`
|
||||
// Comma-delimited list of minimum reclaims (e.g. imagefs.available=2Gi) that describes the minimum amount of resource the kubelet will reclaim when performing a pod eviction if that resource is under pressure.
|
||||
EvictionMinimumReclaim string `json:"evictionMinimumReclaim"`
|
||||
// If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.
|
||||
ExperimentalKernelMemcgNotification *bool `json:"experimentalKernelMemcgNotification"`
|
||||
// Maximum number of pods per core. Cannot exceed MaxPods
|
||||
PodsPerCore int32 `json:"podsPerCore"`
|
||||
// enableControllerAttachDetach enables the Attach/Detach controller to
|
||||
|
|
|
@ -391,6 +391,9 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
|
|||
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
|
||||
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
|
||||
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
|
||||
if err := api.Convert_Pointer_bool_To_bool(&in.ExperimentalKernelMemcgNotification, &out.ExperimentalKernelMemcgNotification, s); err != nil {
|
||||
return err
|
||||
}
|
||||
out.PodsPerCore = in.PodsPerCore
|
||||
if err := api.Convert_Pointer_bool_To_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil {
|
||||
return err
|
||||
|
@ -561,6 +564,9 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
|
|||
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
|
||||
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
|
||||
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
|
||||
if err := api.Convert_bool_To_Pointer_bool(&in.ExperimentalKernelMemcgNotification, &out.ExperimentalKernelMemcgNotification, s); err != nil {
|
||||
return err
|
||||
}
|
||||
out.PodsPerCore = in.PodsPerCore
|
||||
if err := api.Convert_bool_To_Pointer_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil {
|
||||
return err
|
||||
|
|
|
@ -414,6 +414,13 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c *
|
|||
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
|
||||
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
|
||||
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
|
||||
if in.ExperimentalKernelMemcgNotification != nil {
|
||||
in, out := &in.ExperimentalKernelMemcgNotification, &out.ExperimentalKernelMemcgNotification
|
||||
*out = new(bool)
|
||||
**out = **in
|
||||
} else {
|
||||
out.ExperimentalKernelMemcgNotification = nil
|
||||
}
|
||||
out.PodsPerCore = in.PodsPerCore
|
||||
if in.EnableControllerAttachDetach != nil {
|
||||
in, out := &in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach
|
||||
|
|
|
@ -369,6 +369,7 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface
|
|||
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
|
||||
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
|
||||
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
|
||||
out.ExperimentalKernelMemcgNotification = in.ExperimentalKernelMemcgNotification
|
||||
out.PodsPerCore = in.PodsPerCore
|
||||
out.EnableControllerAttachDetach = in.EnableControllerAttachDetach
|
||||
if in.SystemReserved != nil {
|
||||
|
|
|
@ -2918,6 +2918,13 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
|
|||
Format: "",
|
||||
},
|
||||
},
|
||||
"experimentalKernelMemcgNotification": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.",
|
||||
Type: []string{"boolean"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"podsPerCore": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "Maximum number of pods per core. Cannot exceed MaxPods",
|
||||
|
@ -14806,6 +14813,13 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
|
|||
Format: "",
|
||||
},
|
||||
},
|
||||
"experimentalKernelMemcgNotification": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.",
|
||||
Type: []string{"boolean"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"podsPerCore": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "Maximum number of pods per core. Cannot exceed MaxPods",
|
||||
|
@ -14919,7 +14933,7 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
|
|||
},
|
||||
},
|
||||
},
|
||||
Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit"},
|
||||
Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "experimentalKernelMemcgNotification", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit"},
|
||||
},
|
||||
},
|
||||
Dependencies: []string{
|
||||
|
|
|
@ -204,7 +204,8 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
|||
}
|
||||
|
||||
// attempt to create a threshold notifier to improve eviction response time
|
||||
if !m.notifiersInitialized {
|
||||
if m.config.KernelMemcgNotification && !m.notifiersInitialized {
|
||||
glog.Infof("eviction manager attempting to integrate with kernel memcg notification api")
|
||||
m.notifiersInitialized = true
|
||||
// start soft memory notification
|
||||
err = startMemoryThresholdNotifier(m.config.Thresholds, observations, false, func(desc string) {
|
||||
|
|
|
@ -69,6 +69,8 @@ type Config struct {
|
|||
MaxPodGracePeriodSeconds int64
|
||||
// Thresholds define the set of conditions monitored to trigger eviction.
|
||||
Thresholds []Threshold
|
||||
// KernelMemcgNotification if true will integrate with the kernel memcg notification to determine if memory thresholds are crossed.
|
||||
KernelMemcgNotification bool
|
||||
}
|
||||
|
||||
// ThresholdValue is a value holder that abstracts literal versus percentage based quantity
|
||||
|
|
|
@ -354,6 +354,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
|
|||
PressureTransitionPeriod: kubeCfg.EvictionPressureTransitionPeriod.Duration,
|
||||
MaxPodGracePeriodSeconds: int64(kubeCfg.EvictionMaxPodGracePeriod),
|
||||
Thresholds: thresholds,
|
||||
KernelMemcgNotification: kubeCfg.ExperimentalKernelMemcgNotification,
|
||||
}
|
||||
|
||||
reservation, err := ParseReservation(kubeCfg.KubeReserved, kubeCfg.SystemReserved)
|
||||
|
|
Loading…
Reference in New Issue