Merge pull request #73651 from RobertKrawitz/node_pids_limit

Support total process ID limiting for nodes
pull/564/head
Kubernetes Prow Robot 2019-02-13 17:31:18 -08:00 committed by GitHub
commit 888ff4097a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 211 additions and 41 deletions

View File

@ -68,6 +68,7 @@ go_library(
"//pkg/kubelet/kubeletconfig/configfiles:go_default_library",
"//pkg/kubelet/server:go_default_library",
"//pkg/kubelet/server/streaming:go_default_library",
"//pkg/kubelet/stats/pidlimit:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/util/configz:go_default_library",
"//pkg/util/filesystem:go_default_library",

View File

@ -590,8 +590,8 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig
fs.BoolVar(&c.ProtectKernelDefaults, "protect-kernel-defaults", c.ProtectKernelDefaults, "Default kubelet behaviour for kernel tuning. If set, kubelet errors if any of kernel tunables is different than kubelet defaults.")
// Node Allocatable Flags
fs.Var(flag.NewMapStringString(&c.SystemReserved), "system-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=500Mi,ephemeral-storage=1Gi) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
fs.Var(flag.NewMapStringString(&c.KubeReserved), "kube-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=500Mi,ephemeral-storage=1Gi) pairs that describe resources reserved for kubernetes system components. Currently cpu, memory and local ephemeral storage for root file system are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
fs.Var(flag.NewMapStringString(&c.SystemReserved), "system-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=500Mi,ephemeral-storage=1Gi,pid=100) pairs that describe resources reserved for non-kubernetes components. Currently only cpu, memory, and pid (process IDs) are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
fs.Var(flag.NewMapStringString(&c.KubeReserved), "kube-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=500Mi,ephemeral-storage=1Gi,pid=100) pairs that describe resources reserved for kubernetes system components. Currently cpu, memory, local ephemeral storage for root file system, and pid (process IDs) are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
fs.StringSliceVar(&c.EnforceNodeAllocatable, "enforce-node-allocatable", c.EnforceNodeAllocatable, "A comma separated list of levels of node allocatable enforcement to be enforced by kubelet. Acceptable options are 'none', 'pods', 'system-reserved', and 'kube-reserved'. If the latter two options are specified, '--system-reserved-cgroup' and '--kube-reserved-cgroup' must also be set, respectively. If 'none' is specified, no additional options should be set. See https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/ for more details.")
fs.StringVar(&c.SystemReservedCgroup, "system-reserved-cgroup", c.SystemReservedCgroup, "Absolute name of the top level cgroup that is used to manage non-kubernetes components for which compute resources were reserved via '--system-reserved' flag. Ex. '/system-reserved'. [default='']")
fs.StringVar(&c.KubeReservedCgroup, "kube-reserved-cgroup", c.KubeReservedCgroup, "Absolute name of the top level cgroup that is used to manage kubernetes components for which compute resources were reserved via '--kube-reserved' flag. Ex. '/kube-reserved'. [default='']")

View File

@ -82,6 +82,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/kubeletconfig/configfiles"
"k8s.io/kubernetes/pkg/kubelet/server"
"k8s.io/kubernetes/pkg/kubelet/server/streaming"
"k8s.io/kubernetes/pkg/kubelet/stats/pidlimit"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/util/configz"
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
@ -1152,8 +1153,9 @@ func parseResourceList(m map[string]string) (v1.ResourceList, error) {
rl := make(v1.ResourceList)
for k, v := range m {
switch v1.ResourceName(k) {
// CPU, memory and local storage resources are supported.
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourceEphemeralStorage:
// CPU, memory, local storage, and PID resources are supported.
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourceEphemeralStorage, pidlimit.PIDs:
if v1.ResourceName(k) != pidlimit.PIDs || utilfeature.DefaultFeatureGate.Enabled(features.SupportNodePidsLimit) {
q, err := resource.ParseQuantity(v)
if err != nil {
return nil, err
@ -1162,6 +1164,7 @@ func parseResourceList(m map[string]string) (v1.ResourceList, error) {
return nil, fmt.Errorf("resource quantity for %q cannot be negative: %v", k, v)
}
rl[v1.ResourceName(k)] = q
}
default:
return nil, fmt.Errorf("cannot reserve %q resource", k)
}

View File

@ -405,6 +405,12 @@ const (
//
// Enables the AWS EBS in-tree driver to AWS EBS CSI Driver migration feature.
CSIMigrationAWS utilfeature.Feature = "CSIMigrationAWS"
// owner: @RobertKrawitz
// alpha: v1.14
//
// Implement support for limiting pids in nodes
SupportNodePidsLimit utilfeature.Feature = "SupportNodePidsLimit"
)
func init() {
@ -450,6 +456,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS
ResourceLimitsPriorityFunction: {Default: false, PreRelease: utilfeature.Alpha},
SupportIPVSProxyMode: {Default: true, PreRelease: utilfeature.GA},
SupportPodPidsLimit: {Default: true, PreRelease: utilfeature.Beta},
SupportNodePidsLimit: {Default: false, PreRelease: utilfeature.Alpha},
HyperVContainer: {Default: false, PreRelease: utilfeature.Alpha},
ScheduleDaemonSetPods: {Default: true, PreRelease: utilfeature.Beta},
TokenRequest: {Default: true, PreRelease: utilfeature.Beta},

View File

@ -291,12 +291,12 @@ type KubeletConfiguration struct {
/* the following fields are meant for Node Allocatable */
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G,pids=100) pairs
// that describe resources reserved for non-kubernetes components.
// Currently only cpu and memory are supported.
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.
SystemReserved map[string]string
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs
// A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G,pids=100) pairs
// that describe resources reserved for kubernetes system components.
// Currently cpu, memory and local ephemeral storage for root file system are supported.
// See http://kubernetes.io/docs/user-guide/compute-resources for more detail.

View File

@ -73,6 +73,7 @@ go_library(
"//pkg/kubelet/events:go_default_library",
"//pkg/kubelet/metrics:go_default_library",
"//pkg/kubelet/qos:go_default_library",
"//pkg/kubelet/stats/pidlimit:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/util/mount:go_default_library",
"//pkg/util/oom:go_default_library",

View File

@ -257,7 +257,7 @@ func (m *cgroupManagerImpl) Exists(name CgroupName) bool {
// in https://github.com/opencontainers/runc/issues/1440
// once resolved, we can remove this code.
whitelistControllers := sets.NewString("cpu", "cpuacct", "cpuset", "memory", "systemd")
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) {
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) || utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportNodePidsLimit) {
whitelistControllers.Insert("pids")
}
var missingPaths []string
@ -325,10 +325,11 @@ func getSupportedSubsystems() map[subsystem]bool {
supportedSubsystems := map[subsystem]bool{
&cgroupfs.MemoryGroup{}: true,
&cgroupfs.CpuGroup{}: true,
&cgroupfs.PidsGroup{}: true,
}
// not all hosts support hugetlb cgroup, and in the absent of hugetlb, we will fail silently by reporting no capacity.
supportedSubsystems[&cgroupfs.HugetlbGroup{}] = false
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) {
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) || utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportNodePidsLimit) {
supportedSubsystems[&cgroupfs.PidsGroup{}] = true
}
return supportedSubsystems
@ -377,9 +378,9 @@ func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcont
if resourceConfig.CpuPeriod != nil {
resources.CpuPeriod = *resourceConfig.CpuPeriod
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) {
if resourceConfig.PodPidsLimit != nil {
resources.PidsLimit = *resourceConfig.PodPidsLimit
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) || utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportNodePidsLimit) {
if resourceConfig.PidsLimit != nil {
resources.PidsLimit = *resourceConfig.PidsLimit
}
}
// if huge pages are enabled, we set them in libcontainer
@ -431,8 +432,8 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error {
libcontainerCgroupConfig.Path = cgroupConfig.Name.ToCgroupfs()
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters != nil && cgroupConfig.ResourceParameters.PodPidsLimit != nil {
libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters != nil && cgroupConfig.ResourceParameters.PidsLimit != nil {
libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PidsLimit
}
if err := setSupportedSubsystems(libcontainerCgroupConfig); err != nil {
@ -461,8 +462,8 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error {
libcontainerCgroupConfig.Path = cgroupConfig.Name.ToCgroupfs()
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters != nil && cgroupConfig.ResourceParameters.PodPidsLimit != nil {
libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters != nil && cgroupConfig.ResourceParameters.PidsLimit != nil {
libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PidsLimit
}
// get the manager with the specified cgroup configuration

View File

@ -53,6 +53,7 @@ import (
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/qos"
"k8s.io/kubernetes/pkg/kubelet/stats/pidlimit"
"k8s.io/kubernetes/pkg/kubelet/status"
"k8s.io/kubernetes/pkg/kubelet/util/pluginwatcher"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
@ -123,6 +124,8 @@ type containerManagerImpl struct {
cgroupManager CgroupManager
// Capacity of this node.
capacity v1.ResourceList
// Capacity of this node, including internal resources.
internalCapacity v1.ResourceList
// Absolute cgroupfs path to a cgroup that Kubelet needs to place all pods under.
// This path include a top level container for enforcing Node Allocatable.
cgroupRoot CgroupName
@ -219,6 +222,7 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
}
var capacity = v1.ResourceList{}
var internalCapacity = v1.ResourceList{}
// It is safe to invoke `MachineInfo` on cAdvisor before logically initializing cAdvisor here because
// machine info is computed and cached once as part of cAdvisor object creation.
// But `RootFsInfo` and `ImagesFsInfo` are not available at this moment so they will be called later during manager starts
@ -227,6 +231,15 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
return nil, err
}
capacity = cadvisor.CapacityFromMachineInfo(machineInfo)
for k, v := range capacity {
internalCapacity[k] = v
}
pidlimits, err := pidlimit.Stats()
if err == nil && pidlimits != nil && pidlimits.MaxPID != nil {
internalCapacity[pidlimit.PIDs] = *resource.NewQuantity(
int64(*pidlimits.MaxPID),
resource.DecimalSI)
}
// Turn CgroupRoot from a string (in cgroupfs path format) to internal CgroupName
cgroupRoot := ParseCgroupfsToCgroupName(nodeConfig.CgroupRoot)
@ -264,6 +277,7 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
subsystems: subsystems,
cgroupManager: cgroupManager,
capacity: capacity,
internalCapacity: internalCapacity,
cgroupRoot: cgroupRoot,
recorder: recorder,
qosContainerManager: qosContainerManager,

View File

@ -28,6 +28,7 @@ import (
"k8s.io/apimachinery/pkg/types"
"k8s.io/klog"
"k8s.io/kubernetes/pkg/kubelet/events"
"k8s.io/kubernetes/pkg/kubelet/stats/pidlimit"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
)
@ -40,7 +41,7 @@ func (cm *containerManagerImpl) createNodeAllocatableCgroups() error {
cgroupConfig := &CgroupConfig{
Name: cm.cgroupRoot,
// The default limits for cpu shares can be very low which can lead to CPU starvation for pods.
ResourceParameters: getCgroupConfig(cm.capacity),
ResourceParameters: getCgroupConfig(cm.internalCapacity),
}
if cm.cgroupManager.Exists(cgroupConfig.Name) {
return nil
@ -58,10 +59,10 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
// We need to update limits on node allocatable cgroup no matter what because
// default cpu shares on cgroups are low and can cause cpu starvation.
nodeAllocatable := cm.capacity
nodeAllocatable := cm.internalCapacity
// Use Node Allocatable limits instead of capacity if the user requested enforcing node allocatable.
if cm.CgroupsPerQOS && nc.EnforceNodeAllocatable.Has(kubetypes.NodeAllocatableEnforcementKey) {
nodeAllocatable = cm.getNodeAllocatableAbsolute()
nodeAllocatable = cm.getNodeAllocatableInternalAbsolute()
}
klog.V(4).Infof("Attempting to enforce Node Allocatable with config: %+v", nc)
@ -130,7 +131,7 @@ func enforceExistingCgroup(cgroupManager CgroupManager, cName CgroupName, rl v1.
if cgroupConfig.ResourceParameters == nil {
return fmt.Errorf("%q cgroup is not config properly", cgroupConfig.Name)
}
klog.V(4).Infof("Enforcing limits on cgroup %q with %d cpu shares and %d bytes of memory", cName, cgroupConfig.ResourceParameters.CpuShares, cgroupConfig.ResourceParameters.Memory)
klog.V(4).Infof("Enforcing limits on cgroup %q with %d cpu shares, %d bytes of memory, and %d processes", cName, cgroupConfig.ResourceParameters.CpuShares, cgroupConfig.ResourceParameters.Memory, cgroupConfig.ResourceParameters.PidsLimit)
if !cgroupManager.Exists(cgroupConfig.Name) {
return fmt.Errorf("%q cgroup does not exist", cgroupConfig.Name)
}
@ -157,6 +158,10 @@ func getCgroupConfig(rl v1.ResourceList) *ResourceConfig {
val := MilliCPUToShares(q.MilliValue())
rc.CpuShares = &val
}
if q, exists := rl[pidlimit.PIDs]; exists {
val := q.Value()
rc.PidsLimit = &val
}
rc.HugePageLimit = HugePageLimits(rl)
return &rc
@ -166,8 +171,12 @@ func getCgroupConfig(rl v1.ResourceList) *ResourceConfig {
// Note that not all resources that are available on the node are included in the returned list of resources.
// Returns a ResourceList.
func (cm *containerManagerImpl) getNodeAllocatableAbsolute() v1.ResourceList {
return cm.getNodeAllocatableAbsoluteImpl(cm.capacity)
}
func (cm *containerManagerImpl) getNodeAllocatableAbsoluteImpl(capacity v1.ResourceList) v1.ResourceList {
result := make(v1.ResourceList)
for k, v := range cm.capacity {
for k, v := range capacity {
value := *(v.Copy())
if cm.NodeConfig.SystemReserved != nil {
value.Sub(cm.NodeConfig.SystemReserved[k])
@ -182,7 +191,13 @@ func (cm *containerManagerImpl) getNodeAllocatableAbsolute() v1.ResourceList {
result[k] = value
}
return result
}
// getNodeAllocatableInternalAbsolute is similar to getNodeAllocatableAbsolute except that
// it also includes internal resources (currently process IDs). It is intended for setting
// up top level cgroups only.
func (cm *containerManagerImpl) getNodeAllocatableInternalAbsolute() v1.ResourceList {
return cm.getNodeAllocatableAbsoluteImpl(cm.internalCapacity)
}
// GetNodeAllocatableReservation returns amount of compute or storage resource that have to be reserved on this node from scheduling.

View File

@ -87,7 +87,7 @@ func (m *podContainerManagerImpl) EnsureExists(pod *v1.Pod) error {
ResourceParameters: ResourceConfigForPod(pod, m.enforceCPULimits, m.cpuCFSQuotaPeriod),
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && m.podPidsLimit > 0 {
containerConfig.ResourceParameters.PodPidsLimit = &m.podPidsLimit
containerConfig.ResourceParameters.PidsLimit = &m.podPidsLimit
}
if err := m.cgroupManager.Create(containerConfig); err != nil {
return fmt.Errorf("failed to create container for %v : %v", podContainerName, err)

View File

@ -34,7 +34,7 @@ type ResourceConfig struct {
// HugePageLimit map from page size (in bytes) to limit (in bytes)
HugePageLimit map[int64]int64
// Maximum number of pids
PodPidsLimit *int64
PidsLimit *int64
}
// CgroupName is the abstract name of a cgroup prior to any driver specific conversion.

View File

@ -10,8 +10,6 @@ go_library(
"helper.go",
"log_metrics_provider.go",
"stats_provider.go",
"stats_provider_linux.go",
"stats_provider_unsupported.go",
],
importpath = "k8s.io/kubernetes/pkg/kubelet/stats",
visibility = ["//visibility:public"],
@ -26,6 +24,7 @@ go_library(
"//pkg/kubelet/leaky:go_default_library",
"//pkg/kubelet/pod:go_default_library",
"//pkg/kubelet/server/stats:go_default_library",
"//pkg/kubelet/stats/pidlimit:go_default_library",
"//pkg/kubelet/status:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/volume:go_default_library",
@ -52,7 +51,10 @@ filegroup(
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
srcs = [
":package-srcs",
"//pkg/kubelet/stats/pidlimit:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,65 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = [
"pidlimit.go",
"pidlimit_linux.go",
"pidlimit_unsupported.go",
],
importpath = "k8s.io/kubernetes/pkg/kubelet/stats/pidlimit",
visibility = ["//visibility:public"],
deps = [
"//staging/src/k8s.io/api/core/v1:go_default_library",
] + select({
"@io_bazel_rules_go//go/platform:android": [
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
],
"@io_bazel_rules_go//go/platform:darwin": [
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
],
"@io_bazel_rules_go//go/platform:dragonfly": [
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
],
"@io_bazel_rules_go//go/platform:freebsd": [
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
],
"@io_bazel_rules_go//go/platform:linux": [
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
],
"@io_bazel_rules_go//go/platform:nacl": [
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
],
"@io_bazel_rules_go//go/platform:netbsd": [
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
],
"@io_bazel_rules_go//go/platform:openbsd": [
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
],
"@io_bazel_rules_go//go/platform:plan9": [
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
],
"@io_bazel_rules_go//go/platform:solaris": [
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
],
"@io_bazel_rules_go//go/platform:windows": [
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
],
"//conditions:default": [],
}),
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,26 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package pidlimit
import (
"k8s.io/api/core/v1"
)
const (
// PIDs is the (internal) name for this resource
PIDs v1.ResourceName = "pid"
)

View File

@ -16,7 +16,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package stats
package pidlimit
import (
"io/ioutil"
@ -28,7 +28,8 @@ import (
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
)
func (p *StatsProvider) RlimitStats() (*statsapi.RlimitStats, error) {
// Stats provides basic information about max and current process count
func Stats() (*statsapi.RlimitStats, error) {
rlimit := &statsapi.RlimitStats{}
if content, err := ioutil.ReadFile("/proc/sys/kernel/pid_max"); err == nil {

View File

@ -16,12 +16,13 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package stats
package pidlimit
import (
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
)
func (p *StatsProvider) RlimitStats() (*statsapi.RlimitStats, error) {
// Stats provides basic information about max and current process count
func Stats() (*statsapi.RlimitStats, error) {
return nil, nil
}

View File

@ -28,6 +28,7 @@ import (
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
"k8s.io/kubernetes/pkg/kubelet/server/stats"
"k8s.io/kubernetes/pkg/kubelet/stats/pidlimit"
"k8s.io/kubernetes/pkg/kubelet/status"
)
@ -96,6 +97,11 @@ type rlimitStatsProvider interface {
RlimitStats() (*statsapi.RlimitStats, error)
}
// RlimitStats returns base information about process count
func (p *StatsProvider) RlimitStats() (*statsapi.RlimitStats, error) {
return pidlimit.Stats()
}
// GetCgroupStats returns the stats of the cgroup with the cgroupName. Note that
// this function doesn't generate filesystem stats.
func (p *StatsProvider) GetCgroupStats(cgroupName string, updateStats bool) (*statsapi.ContainerStats, *statsapi.NetworkStats, error) {

View File

@ -165,6 +165,7 @@ go_test(
] + select({
"@io_bazel_rules_go//go/platform:linux": [
"//cmd/kubeadm/app/util/system:go_default_library",
"//pkg/kubelet/stats/pidlimit:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/yaml:go_default_library",

View File

@ -29,8 +29,10 @@ import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/features"
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
"k8s.io/kubernetes/pkg/kubelet/cm"
"k8s.io/kubernetes/pkg/kubelet/stats/pidlimit"
"k8s.io/kubernetes/test/e2e/framework"
. "github.com/onsi/ginkgo"
@ -38,14 +40,17 @@ import (
)
func setDesiredConfiguration(initialConfig *kubeletconfig.KubeletConfiguration) {
initialConfig.FeatureGates[string(features.SupportNodePidsLimit)] = true
initialConfig.EnforceNodeAllocatable = []string{"pods", kubeReservedCgroup, systemReservedCgroup}
initialConfig.SystemReserved = map[string]string{
string(v1.ResourceCPU): "100m",
string(v1.ResourceMemory): "100Mi",
string(pidlimit.PIDs): "1000",
}
initialConfig.KubeReserved = map[string]string{
string(v1.ResourceCPU): "100m",
string(v1.ResourceMemory): "100Mi",
string(pidlimit.PIDs): "738",
}
initialConfig.EvictionHard = map[string]string{"memory.available": "100Mi"}
// Necessary for allocatable cgroup creation.
@ -81,8 +86,8 @@ func expectFileValToEqual(filePath string, expectedValue, delta int64) error {
return nil
}
func getAllocatableLimits(cpu, memory string, capacity v1.ResourceList) (*resource.Quantity, *resource.Quantity) {
var allocatableCPU, allocatableMemory *resource.Quantity
func getAllocatableLimits(cpu, memory, pids string, capacity v1.ResourceList) (*resource.Quantity, *resource.Quantity, *resource.Quantity) {
var allocatableCPU, allocatableMemory, allocatablePIDs *resource.Quantity
// Total cpu reservation is 200m.
for k, v := range capacity {
if k == v1.ResourceCPU {
@ -94,7 +99,13 @@ func getAllocatableLimits(cpu, memory string, capacity v1.ResourceList) (*resour
allocatableMemory.Sub(resource.MustParse(memory))
}
}
return allocatableCPU, allocatableMemory
// Process IDs are not a node allocatable, so we have to do this ad hoc
pidlimits, err := pidlimit.Stats()
if err == nil && pidlimits != nil && pidlimits.MaxPID != nil {
allocatablePIDs = resource.NewQuantity(int64(*pidlimits.MaxPID), resource.DecimalSI)
allocatablePIDs.Sub(resource.MustParse(pids))
}
return allocatableCPU, allocatableMemory, allocatablePIDs
}
const (
@ -189,7 +200,7 @@ func runTest(f *framework.Framework) error {
}
node := nodeList.Items[0]
capacity := node.Status.Capacity
allocatableCPU, allocatableMemory := getAllocatableLimits("200m", "200Mi", capacity)
allocatableCPU, allocatableMemory, allocatablePIDs := getAllocatableLimits("200m", "200Mi", "1738", capacity)
// Total Memory reservation is 200Mi excluding eviction thresholds.
// Expect CPU shares on node allocatable cgroup to equal allocatable.
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], "kubepods", "cpu.shares"), int64(cm.MilliCPUToShares(allocatableCPU.MilliValue())), 10); err != nil {
@ -199,11 +210,16 @@ func runTest(f *framework.Framework) error {
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], "kubepods", "memory.limit_in_bytes"), allocatableMemory.Value(), 0); err != nil {
return err
}
// Expect PID limit on node allocatable cgroup to equal allocatable.
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["pids"], "kubepods", "pids.max"), allocatablePIDs.Value(), 0); err != nil {
return err
}
// Check that Allocatable reported to scheduler includes eviction thresholds.
schedulerAllocatable := node.Status.Allocatable
// Memory allocatable should take into account eviction thresholds.
allocatableCPU, allocatableMemory = getAllocatableLimits("200m", "300Mi", capacity)
// Process IDs are not a scheduler resource and as such cannot be tested here.
allocatableCPU, allocatableMemory, _ = getAllocatableLimits("200m", "300Mi", "1738", capacity)
// Expect allocatable to include all resources in capacity.
if len(schedulerAllocatable) != len(capacity) {
return fmt.Errorf("Expected all resources in capacity to be found in allocatable")
@ -232,6 +248,11 @@ func runTest(f *framework.Framework) error {
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], cgroupManager.Name(kubeReservedCgroupName), "memory.limit_in_bytes"), kubeReservedMemory.Value(), 0); err != nil {
return err
}
// Expect process ID limit kube reserved cgroup to equal configured value `738`.
kubeReservedPIDs := resource.MustParse(currentConfig.KubeReserved[string(pidlimit.PIDs)])
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["pids"], cgroupManager.Name(kubeReservedCgroupName), "pids.max"), kubeReservedPIDs.Value(), 0); err != nil {
return err
}
systemReservedCgroupName := cm.NewCgroupName(cm.RootCgroupName, systemReservedCgroup)
if !cgroupManager.Exists(systemReservedCgroupName) {
return fmt.Errorf("Expected system reserved cgroup Does not exist")
@ -246,5 +267,10 @@ func runTest(f *framework.Framework) error {
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], cgroupManager.Name(systemReservedCgroupName), "memory.limit_in_bytes"), systemReservedMemory.Value(), 0); err != nil {
return err
}
// Expect process ID limit system reserved cgroup to equal configured value `1000`.
systemReservedPIDs := resource.MustParse(currentConfig.SystemReserved[string(pidlimit.PIDs)])
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["pids"], cgroupManager.Name(systemReservedCgroupName), "pids.max"), systemReservedPIDs.Value(), 0); err != nil {
return err
}
return nil
}