Merge pull request #38258 from derekwaynecarr/kernel-memcg-flag

Automatic merge from submit-queue (batch tested with PRs 38318, 38258)

kernel memcg notification enabled via experimental flag

Kubelet integrates with kernel memcg notification API if and only if enabled via experimental flag.
pull/6/head
Kubernetes Submit Queue 2016-12-07 15:00:10 -08:00 committed by GitHub
commit ca049360e2
13 changed files with 3215 additions and 3132 deletions

View File

@ -249,6 +249,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.DurationVar(&s.EvictionPressureTransitionPeriod.Duration, "eviction-pressure-transition-period", s.EvictionPressureTransitionPeriod.Duration, "Duration for which the kubelet has to wait before transitioning out of an eviction pressure condition.")
fs.Int32Var(&s.EvictionMaxPodGracePeriod, "eviction-max-pod-grace-period", s.EvictionMaxPodGracePeriod, "Maximum allowed grace period (in seconds) to use when terminating pods in response to a soft eviction threshold being met. If negative, defer to pod specified value.")
fs.StringVar(&s.EvictionMinimumReclaim, "eviction-minimum-reclaim", s.EvictionMinimumReclaim, "A set of minimum reclaims (e.g. imagefs.available=2Gi) that describes the minimum amount of resource the kubelet will reclaim when performing a pod eviction if that resource is under pressure.")
fs.BoolVar(&s.ExperimentalKernelMemcgNotification, "experimental-kernel-memcg-notification", s.ExperimentalKernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.")
fs.Int32Var(&s.PodsPerCore, "pods-per-core", s.PodsPerCore, "Number of Pods per core that can run on this Kubelet. The total number of Pods on this Kubelet cannot exceed max-pods, so max-pods will be used if this calculation results in a larger number of Pods allowed on the Kubelet. A value of 0 disables this limit.")
fs.BoolVar(&s.ProtectKernelDefaults, "protect-kernel-defaults", s.ProtectKernelDefaults, "Default kubelet behaviour for kernel tuning. If set, kubelet errors if any of kernel tunables is different than kubelet defaults.")

View File

@ -202,6 +202,7 @@ experimental-nvidia-gpus
experimental-prefix
experimental-cri
experimental-check-node-capabilities-before-mount
experimental-kernel-memcg-notification
external-etcd-cafile
external-etcd-certfile
external-etcd-endpoints

File diff suppressed because it is too large Load Diff

View File

@ -430,6 +430,9 @@ type KubeletConfiguration struct {
// Comma-delimited list of minimum reclaims (e.g. imagefs.available=2Gi) that describes the minimum amount of resource the kubelet will reclaim when performing a pod eviction if that resource is under pressure.
// +optional
EvictionMinimumReclaim string `json:"evictionMinimumReclaim,omitempty"`
// If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.
// +optional
ExperimentalKernelMemcgNotification bool `json:"experimentalKernelMemcgNotification"`
// Maximum number of pods per core. Cannot exceed MaxPods
PodsPerCore int32 `json:"podsPerCore"`
// enableControllerAttachDetach enables the Attach/Detach controller to

View File

@ -374,6 +374,9 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
if obj.EvictionPressureTransitionPeriod == zeroDuration {
obj.EvictionPressureTransitionPeriod = metav1.Duration{Duration: 5 * time.Minute}
}
if obj.ExperimentalKernelMemcgNotification == nil {
obj.ExperimentalKernelMemcgNotification = boolVar(false)
}
if obj.SystemReserved == nil {
obj.SystemReserved = make(map[string]string)
}

View File

@ -472,6 +472,8 @@ type KubeletConfiguration struct {
EvictionMaxPodGracePeriod int32 `json:"evictionMaxPodGracePeriod"`
// Comma-delimited list of minimum reclaims (e.g. imagefs.available=2Gi) that describes the minimum amount of resource the kubelet will reclaim when performing a pod eviction if that resource is under pressure.
EvictionMinimumReclaim string `json:"evictionMinimumReclaim"`
// If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.
ExperimentalKernelMemcgNotification *bool `json:"experimentalKernelMemcgNotification"`
// Maximum number of pods per core. Cannot exceed MaxPods
PodsPerCore int32 `json:"podsPerCore"`
// enableControllerAttachDetach enables the Attach/Detach controller to

View File

@ -391,6 +391,9 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
if err := api.Convert_Pointer_bool_To_bool(&in.ExperimentalKernelMemcgNotification, &out.ExperimentalKernelMemcgNotification, s); err != nil {
return err
}
out.PodsPerCore = in.PodsPerCore
if err := api.Convert_Pointer_bool_To_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil {
return err
@ -561,6 +564,9 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
if err := api.Convert_bool_To_Pointer_bool(&in.ExperimentalKernelMemcgNotification, &out.ExperimentalKernelMemcgNotification, s); err != nil {
return err
}
out.PodsPerCore = in.PodsPerCore
if err := api.Convert_bool_To_Pointer_bool(&in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach, s); err != nil {
return err

View File

@ -414,6 +414,13 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c *
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
if in.ExperimentalKernelMemcgNotification != nil {
in, out := &in.ExperimentalKernelMemcgNotification, &out.ExperimentalKernelMemcgNotification
*out = new(bool)
**out = **in
} else {
out.ExperimentalKernelMemcgNotification = nil
}
out.PodsPerCore = in.PodsPerCore
if in.EnableControllerAttachDetach != nil {
in, out := &in.EnableControllerAttachDetach, &out.EnableControllerAttachDetach

View File

@ -369,6 +369,7 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface
out.EvictionPressureTransitionPeriod = in.EvictionPressureTransitionPeriod
out.EvictionMaxPodGracePeriod = in.EvictionMaxPodGracePeriod
out.EvictionMinimumReclaim = in.EvictionMinimumReclaim
out.ExperimentalKernelMemcgNotification = in.ExperimentalKernelMemcgNotification
out.PodsPerCore = in.PodsPerCore
out.EnableControllerAttachDetach = in.EnableControllerAttachDetach
if in.SystemReserved != nil {

View File

@ -2918,6 +2918,13 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
Format: "",
},
},
"experimentalKernelMemcgNotification": {
SchemaProps: spec.SchemaProps{
Description: "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.",
Type: []string{"boolean"},
Format: "",
},
},
"podsPerCore": {
SchemaProps: spec.SchemaProps{
Description: "Maximum number of pods per core. Cannot exceed MaxPods",
@ -14806,6 +14813,13 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
Format: "",
},
},
"experimentalKernelMemcgNotification": {
SchemaProps: spec.SchemaProps{
Description: "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.",
Type: []string{"boolean"},
Format: "",
},
},
"podsPerCore": {
SchemaProps: spec.SchemaProps{
Description: "Maximum number of pods per core. Cannot exceed MaxPods",
@ -14919,7 +14933,7 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
},
},
},
Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit"},
Required: []string{"TypeMeta", "podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "reconcileCIDR", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "experimentalKernelMemcgNotification", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit"},
},
},
Dependencies: []string{

View File

@ -204,7 +204,8 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
}
// attempt to create a threshold notifier to improve eviction response time
if !m.notifiersInitialized {
if m.config.KernelMemcgNotification && !m.notifiersInitialized {
glog.Infof("eviction manager attempting to integrate with kernel memcg notification api")
m.notifiersInitialized = true
// start soft memory notification
err = startMemoryThresholdNotifier(m.config.Thresholds, observations, false, func(desc string) {

View File

@ -69,6 +69,8 @@ type Config struct {
MaxPodGracePeriodSeconds int64
// Thresholds define the set of conditions monitored to trigger eviction.
Thresholds []Threshold
// KernelMemcgNotification if true will integrate with the kernel memcg notification to determine if memory thresholds are crossed.
KernelMemcgNotification bool
}
// ThresholdValue is a value holder that abstracts literal versus percentage based quantity

View File

@ -354,6 +354,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
PressureTransitionPeriod: kubeCfg.EvictionPressureTransitionPeriod.Duration,
MaxPodGracePeriodSeconds: int64(kubeCfg.EvictionMaxPodGracePeriod),
Thresholds: thresholds,
KernelMemcgNotification: kubeCfg.ExperimentalKernelMemcgNotification,
}
reservation, err := ParseReservation(kubeCfg.KubeReserved, kubeCfg.SystemReserved)