diff --git a/cmd/kubelet/app/options/options.go b/cmd/kubelet/app/options/options.go index 47f9eda3db..1e69d5804a 100644 --- a/cmd/kubelet/app/options/options.go +++ b/cmd/kubelet/app/options/options.go @@ -560,7 +560,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig fs.Int32Var(&c.MaxPods, "max-pods", c.MaxPods, "Number of Pods that can run on this Kubelet.") fs.StringVar(&c.PodCIDR, "pod-cidr", c.PodCIDR, "The CIDR to use for pod IP addresses, only used in standalone mode. In cluster mode, this is obtained from the master. For IPv6, the maximum number of IP's allocated is 65536") - fs.Int64Var(&c.PodPidsLimit, "pod-max-pids", c.PodPidsLimit, " Set the maximum number of processes per pod.") + fs.Int64Var(&c.PodPidsLimit, "pod-max-pids", c.PodPidsLimit, "Set the maximum number of processes per pod. If -1, the kubelet defaults to the node allocatable pid capacity.") fs.StringVar(&c.ResolverConfig, "resolv-conf", c.ResolverConfig, "Resolver configuration file used as the basis for the container DNS resolution configuration.") fs.BoolVar(&c.CPUCFSQuota, "cpu-cfs-quota", c.CPUCFSQuota, "Enable CPU CFS quota enforcement for containers that specify CPU limits") diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index 1cc34889ea..8a9412c586 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -236,8 +236,9 @@ const ( // Implement IPVS-based in-cluster service load balancing SupportIPVSProxyMode utilfeature.Feature = "SupportIPVSProxyMode" - // owner: @dims + // owner: @dims, @derekwaynecarr // alpha: v1.10 + // beta: v1.14 // // Implement support for limiting pids in pods SupportPodPidsLimit utilfeature.Feature = "SupportPodPidsLimit" @@ -433,7 +434,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS StorageObjectInUseProtection: {Default: true, PreRelease: utilfeature.GA}, ResourceLimitsPriorityFunction: {Default: false, PreRelease: utilfeature.Alpha}, SupportIPVSProxyMode: {Default: true, PreRelease: utilfeature.GA}, - SupportPodPidsLimit: {Default: false, PreRelease: utilfeature.Alpha}, + SupportPodPidsLimit: {Default: true, PreRelease: utilfeature.Beta}, HyperVContainer: {Default: false, PreRelease: utilfeature.Alpha}, ScheduleDaemonSetPods: {Default: true, PreRelease: utilfeature.Beta}, TokenRequest: {Default: true, PreRelease: utilfeature.Beta}, diff --git a/pkg/kubelet/apis/config/types.go b/pkg/kubelet/apis/config/types.go index 941d837486..7061a62e0c 100644 --- a/pkg/kubelet/apis/config/types.go +++ b/pkg/kubelet/apis/config/types.go @@ -218,7 +218,7 @@ type KubeletConfiguration struct { // The CIDR to use for pod IP addresses, only used in standalone mode. // In cluster mode, this is obtained from the master. PodCIDR string - // PodPidsLimit is the maximum number of pids in any pod. + // The maximum number of processes per pod. If -1, the kubelet defaults to the node allocatable pid capacity. PodPidsLimit int64 // ResolverConfig is the resolver configuration file used as the basis // for the container DNS resolution configuration. diff --git a/pkg/kubelet/apis/config/v1beta1/defaults.go b/pkg/kubelet/apis/config/v1beta1/defaults.go index 0aebb8447b..8c1568e91e 100644 --- a/pkg/kubelet/apis/config/v1beta1/defaults.go +++ b/pkg/kubelet/apis/config/v1beta1/defaults.go @@ -158,7 +158,8 @@ func SetDefaults_KubeletConfiguration(obj *kubeletconfigv1beta1.KubeletConfigura if obj.MaxPods == 0 { obj.MaxPods = 110 } - if obj.PodPidsLimit == nil { + // default nil or negative value to -1 (implies node allocatable pid limit) + if obj.PodPidsLimit == nil || *obj.PodPidsLimit < int64(0) { temp := int64(-1) obj.PodPidsLimit = &temp } diff --git a/pkg/kubelet/cm/cgroup_manager_linux.go b/pkg/kubelet/cm/cgroup_manager_linux.go index f1c8136936..977fb5b29f 100644 --- a/pkg/kubelet/cm/cgroup_manager_linux.go +++ b/pkg/kubelet/cm/cgroup_manager_linux.go @@ -257,7 +257,9 @@ func (m *cgroupManagerImpl) Exists(name CgroupName) bool { // in https://github.com/opencontainers/runc/issues/1440 // once resolved, we can remove this code. whitelistControllers := sets.NewString("cpu", "cpuacct", "cpuset", "memory", "systemd") - + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) { + whitelistControllers.Insert("pids") + } var missingPaths []string // If even one cgroup path doesn't exist, then the cgroup doesn't exist. for controller, path := range cgroupPaths { @@ -377,7 +379,11 @@ func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcont if resourceConfig.CpuPeriod != nil { resources.CpuPeriod = *resourceConfig.CpuPeriod } - + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) { + if resourceConfig.PodPidsLimit != nil { + resources.PidsLimit = *resourceConfig.PodPidsLimit + } + } // if huge pages are enabled, we set them in libcontainer if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.HugePages) { // for each page size enumerated, set that value diff --git a/test/e2e_node/BUILD b/test/e2e_node/BUILD index 51c902c0e3..98cb572379 100644 --- a/test/e2e_node/BUILD +++ b/test/e2e_node/BUILD @@ -97,6 +97,7 @@ go_test( "mirror_pod_test.go", "node_container_manager_test.go", "node_perf_test.go", + "pids_test.go", "pods_container_manager_test.go", "resource_usage_test.go", "restart_test.go", diff --git a/test/e2e_node/pids_test.go b/test/e2e_node/pids_test.go new file mode 100644 index 0000000000..534c758148 --- /dev/null +++ b/test/e2e_node/pids_test.go @@ -0,0 +1,150 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e_node + +import ( + "fmt" + "time" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/uuid" + + kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" + "k8s.io/kubernetes/pkg/kubelet/cm" + "k8s.io/kubernetes/test/e2e/framework" + imageutils "k8s.io/kubernetes/test/utils/image" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +// makePodToVerifyPids returns a pod that verifies specified cgroup with pids +func makePodToVerifyPids(baseName string, pidsLimit resource.Quantity) *apiv1.Pod { + // convert the cgroup name to its literal form + cgroupFsName := "" + cgroupName := cm.NewCgroupName(cm.RootCgroupName, defaultNodeAllocatableCgroup, baseName) + if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" { + cgroupFsName = cgroupName.ToSystemd() + } else { + cgroupFsName = cgroupName.ToCgroupfs() + } + + // this command takes the expected value and compares it against the actual value for the pod cgroup pids.max + command := fmt.Sprintf("expected=%v; actual=$(cat /tmp/pids/%v/pids.max); if [ \"$expected\" -ne \"$actual\" ]; then exit 1; fi; ", pidsLimit.Value(), cgroupFsName) + framework.Logf("Pod to run command: %v", command) + pod := &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod" + string(uuid.NewUUID()), + }, + Spec: apiv1.PodSpec{ + RestartPolicy: apiv1.RestartPolicyNever, + Containers: []apiv1.Container{ + { + Image: busyboxImage, + Name: "container" + string(uuid.NewUUID()), + Command: []string{"sh", "-c", command}, + VolumeMounts: []apiv1.VolumeMount{ + { + Name: "sysfscgroup", + MountPath: "/tmp", + }, + }, + }, + }, + Volumes: []apiv1.Volume{ + { + Name: "sysfscgroup", + VolumeSource: apiv1.VolumeSource{ + HostPath: &apiv1.HostPathVolumeSource{Path: "/sys/fs/cgroup"}, + }, + }, + }, + }, + } + return pod +} + +// enablePodPidsLimitInKubelet enables pod pid limit feature for kubelet with a sensible default test limit +func enablePodPidsLimitInKubelet(f *framework.Framework) *kubeletconfig.KubeletConfiguration { + oldCfg, err := getCurrentKubeletConfig() + framework.ExpectNoError(err) + newCfg := oldCfg.DeepCopy() + if newCfg.FeatureGates == nil { + newCfg.FeatureGates = make(map[string]bool) + newCfg.FeatureGates["SupportPodPidsLimit"] = true + } + newCfg.PodPidsLimit = int64(1024) + // Update the Kubelet configuration. + framework.ExpectNoError(setKubeletConfiguration(f, newCfg)) + + // Wait for the Kubelet to be ready. + Eventually(func() bool { + nodeList := framework.GetReadySchedulableNodesOrDie(f.ClientSet) + return len(nodeList.Items) == 1 + }, time.Minute, time.Second).Should(BeTrue()) + + return oldCfg +} + +func runPodPidsLimitTests(f *framework.Framework) { + It("should set pids.max for Pod", func() { + By("by creating a G pod") + pod := f.PodClient().Create(&apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod" + string(uuid.NewUUID()), + Namespace: f.Namespace.Name, + }, + Spec: apiv1.PodSpec{ + Containers: []apiv1.Container{ + { + Image: imageutils.GetPauseImageName(), + Name: "container" + string(uuid.NewUUID()), + Resources: apiv1.ResourceRequirements{ + Limits: apiv1.ResourceList{ + apiv1.ResourceName("cpu"): resource.MustParse("10m"), + apiv1.ResourceName("memory"): resource.MustParse("100Mi"), + }, + }, + }, + }, + }, + }) + podUID := string(pod.UID) + By("checking if the expected pids settings were applied") + verifyPod := makePodToVerifyPids("pod"+podUID, resource.MustParse("1024")) + f.PodClient().Create(verifyPod) + err := framework.WaitForPodSuccessInNamespace(f.ClientSet, verifyPod.Name, f.Namespace.Name) + Expect(err).NotTo(HaveOccurred()) + }) +} + +// Serial because the test updates kubelet configuration. +var _ = SIGDescribe("PodPidsLimit [Serial] [Feature:SupportPodPidsLimit][NodeFeature:SupportPodPidsLimit]", func() { + f := framework.NewDefaultFramework("pids-limit-test") + Context("With config updated with pids feature enabled", func() { + tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { + if initialConfig.FeatureGates == nil { + initialConfig.FeatureGates = make(map[string]bool) + } + initialConfig.FeatureGates["SupportPodPidsLimit"] = true + initialConfig.PodPidsLimit = int64(1024) + }) + runPodPidsLimitTests(f) + }) +})