Merge pull request #72844 from dashpole/fork_bomb_test

Fix PidPressure, and add fork-bomb e2e-node test
pull/564/head
Kubernetes Prow Robot 2019-01-16 15:59:01 -08:00 committed by GitHub
commit 5d19fda5e8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 98 additions and 22 deletions

View File

@ -63,6 +63,7 @@ var OpForSignal = map[Signal]ThresholdOperator{
SignalNodeFsInodesFree: OpLessThan,
SignalImageFsAvailable: OpLessThan,
SignalImageFsInodesFree: OpLessThan,
SignalPIDAvailable: OpLessThan,
}
// ThresholdValue is a value holder that abstracts literal versus percentage based quantity

View File

@ -157,7 +157,7 @@ func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAd
return lifecycle.PodAdmitResult{
Admit: false,
Reason: Reason,
Message: fmt.Sprintf(nodeLowMessageFmt, m.nodeConditions),
Message: fmt.Sprintf(nodeConditionMessageFmt, m.nodeConditions),
}
}

View File

@ -40,6 +40,8 @@ const (
Reason = "Evicted"
// nodeLowMessageFmt is the message for evictions due to resource pressure.
nodeLowMessageFmt = "The node was low on resource: %v. "
// nodeConditionMessageFmt is the message for evictions due to resource pressure.
nodeConditionMessageFmt = "The node had condition: %v. "
// containerMessageFmt provides additional information for containers exceeding requests
containerMessageFmt = "Container %s was using %s, which exceeds its request of %s. "
// containerEphemeralStorageMessageFmt provides additional information for containers which have exceeded their ES limit
@ -50,6 +52,8 @@ const (
emptyDirMessageFmt = "Usage of EmptyDir volume %q exceeds the limit %q. "
// inodes, number. internal to this module, used to account for local disk inode consumption.
resourceInodes v1.ResourceName = "inodes"
// resourcePids, number. internal to this module, used to account for local pid consumption.
resourcePids v1.ResourceName = "pids"
// OffendingContainersKey is the key in eviction event annotations for the list of container names which exceeded their requests
OffendingContainersKey = "offending_containers"
// OffendingContainersUsageKey is the key in eviction event annotations for the list of usage of containers which exceeded their requests
@ -84,6 +88,7 @@ func init() {
signalToResource[evictionapi.SignalImageFsInodesFree] = resourceInodes
signalToResource[evictionapi.SignalNodeFsAvailable] = v1.ResourceEphemeralStorage
signalToResource[evictionapi.SignalNodeFsInodesFree] = resourceInodes
signalToResource[evictionapi.SignalPIDAvailable] = resourcePids
}
// validSignal returns true if the signal is supported.
@ -674,6 +679,11 @@ func rankMemoryPressure(pods []*v1.Pod, stats statsFunc) {
orderedBy(exceedMemoryRequests(stats), priority, memory(stats)).Sort(pods)
}
// rankPIDPressure orders the input pods by priority in response to PID pressure.
func rankPIDPressure(pods []*v1.Pod, stats statsFunc) {
orderedBy(priority).Sort(pods)
}
// rankDiskPressureFunc returns a rankFunc that measures the specified fs stats.
func rankDiskPressureFunc(fsStatsToMeasure []fsStatsType, diskResource v1.ResourceName) rankFunc {
return func(pods []*v1.Pod, stats statsFunc) {
@ -987,6 +997,7 @@ func buildSignalToRankFunc(withImageFs bool) map[evictionapi.Signal]rankFunc {
signalToRankFunc := map[evictionapi.Signal]rankFunc{
evictionapi.SignalMemoryAvailable: rankMemoryPressure,
evictionapi.SignalAllocatableMemoryAvailable: rankMemoryPressure,
evictionapi.SignalPIDAvailable: rankPIDPressure,
}
// usage of an imagefs is optional
if withImageFs {

View File

@ -943,13 +943,13 @@ func TestSortByEvictionPriority(t *testing.T) {
expected: []evictionapi.Threshold{},
},
{
name: "memory first, PID last",
name: "memory first",
thresholds: []evictionapi.Threshold{
{
Signal: evictionapi.SignalPIDAvailable,
Signal: evictionapi.SignalNodeFsAvailable,
},
{
Signal: evictionapi.SignalNodeFsAvailable,
Signal: evictionapi.SignalPIDAvailable,
},
{
Signal: evictionapi.SignalMemoryAvailable,
@ -968,13 +968,13 @@ func TestSortByEvictionPriority(t *testing.T) {
},
},
{
name: "allocatable memory first, PID last",
name: "allocatable memory first",
thresholds: []evictionapi.Threshold{
{
Signal: evictionapi.SignalPIDAvailable,
Signal: evictionapi.SignalNodeFsAvailable,
},
{
Signal: evictionapi.SignalNodeFsAvailable,
Signal: evictionapi.SignalPIDAvailable,
},
{
Signal: evictionapi.SignalAllocatableMemoryAvailable,

View File

@ -123,6 +123,7 @@ go_test(
"//pkg/kubelet/cm/cpuset:go_default_library",
"//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/eviction/api:go_default_library",
"//pkg/kubelet/images:go_default_library",
"//pkg/kubelet/kubeletconfig:go_default_library",
"//pkg/kubelet/kubeletconfig/status:go_default_library",

View File

@ -33,6 +33,7 @@ import (
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/kubelet/eviction"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/test/e2e/framework"
@ -78,7 +79,7 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive][NodeF
if inodesFree <= inodesConsumed {
framework.Skipf("Too few inodes free on the host for the InodeEviction test to run")
}
initialConfig.EvictionHard = map[string]string{"nodefs.inodesFree": fmt.Sprintf("%d", inodesFree-inodesConsumed)}
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsInodesFree): fmt.Sprintf("%d", inodesFree-inodesConsumed)}
initialConfig.EvictionMinimumReclaim = map[string]string{}
})
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logInodeMetrics, []podEvictSpec{
@ -114,7 +115,7 @@ var _ = framework.KubeDescribe("ImageGCNoEviction [Slow] [Serial] [Disruptive][N
if inodesFree <= inodesConsumed {
framework.Skipf("Too few inodes free on the host for the InodeEviction test to run")
}
initialConfig.EvictionHard = map[string]string{"nodefs.inodesFree": fmt.Sprintf("%d", inodesFree-inodesConsumed)}
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsInodesFree): fmt.Sprintf("%d", inodesFree-inodesConsumed)}
initialConfig.EvictionMinimumReclaim = map[string]string{}
})
// Consume enough inodes to induce disk pressure,
@ -173,7 +174,7 @@ var _ = framework.KubeDescribe("LocalStorageEviction [Slow] [Serial] [Disruptive
diskConsumed := resource.MustParse("100Mi")
summary := eventuallyGetSummary()
availableBytes := *(summary.Node.Fs.AvailableBytes)
initialConfig.EvictionHard = map[string]string{"nodefs.available": fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsAvailable): fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
initialConfig.EvictionMinimumReclaim = map[string]string{}
})
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{
@ -205,14 +206,14 @@ var _ = framework.KubeDescribe("LocalStorageSoftEviction [Slow] [Serial] [Disrup
if availableBytes <= uint64(diskConsumed.Value()) {
framework.Skipf("Too little disk free on the host for the LocalStorageSoftEviction test to run")
}
initialConfig.EvictionSoft = map[string]string{"nodefs.available": fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
initialConfig.EvictionSoftGracePeriod = map[string]string{"nodefs.available": "1m"}
initialConfig.EvictionSoft = map[string]string{string(evictionapi.SignalNodeFsAvailable): fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
initialConfig.EvictionSoftGracePeriod = map[string]string{string(evictionapi.SignalNodeFsAvailable): "1m"}
// Defer to the pod default grace period
initialConfig.EvictionMaxPodGracePeriod = 30
initialConfig.EvictionMinimumReclaim = map[string]string{}
// Ensure that pods are not evicted because of the eviction-hard threshold
// setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
initialConfig.EvictionHard = map[string]string{"memory.available": "0%"}
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalMemoryAvailable): "0%"}
})
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{
{
@ -234,7 +235,7 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se
Context(fmt.Sprintf(testContextFmt, "evictions due to pod local storage violations"), func() {
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
// setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
initialConfig.EvictionHard = map[string]string{"memory.available": "0%"}
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalMemoryAvailable): "0%"}
})
sizeLimit := resource.MustParse("100Mi")
useOverLimit := 101 /* Mb */
@ -297,7 +298,7 @@ var _ = framework.KubeDescribe("PriorityMemoryEvictionOrdering [Slow] [Serial] [
if availableBytes <= uint64(memoryConsumed.Value()) {
framework.Skipf("Too little memory free on the host for the PriorityMemoryEvictionOrdering test to run")
}
initialConfig.EvictionHard = map[string]string{"memory.available": fmt.Sprintf("%d", availableBytes-uint64(memoryConsumed.Value()))}
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalMemoryAvailable): fmt.Sprintf("%d", availableBytes-uint64(memoryConsumed.Value()))}
initialConfig.EvictionMinimumReclaim = map[string]string{}
})
BeforeEach(func() {
@ -354,7 +355,7 @@ var _ = framework.KubeDescribe("PriorityLocalStorageEvictionOrdering [Slow] [Ser
if availableBytes <= uint64(diskConsumed.Value()) {
framework.Skipf("Too little disk free on the host for the PriorityLocalStorageEvictionOrdering test to run")
}
initialConfig.EvictionHard = map[string]string{"nodefs.available": fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsAvailable): fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
initialConfig.EvictionMinimumReclaim = map[string]string{}
})
BeforeEach(func() {
@ -392,6 +393,47 @@ var _ = framework.KubeDescribe("PriorityLocalStorageEvictionOrdering [Slow] [Ser
})
})
// PriorityPidEvictionOrdering tests that the node emits pid pressure in response to a fork bomb, and evicts pods by priority
var _ = framework.KubeDescribe("PriorityPidEvictionOrdering [Slow] [Serial] [Disruptive][NodeFeature:Eviction]", func() {
f := framework.NewDefaultFramework("pidpressure-eviction-test")
pressureTimeout := 2 * time.Minute
expectedNodeCondition := v1.NodePIDPressure
expectedStarvedResource := noStarvedResource
highPriorityClassName := f.BaseName + "-high-priority"
highPriority := int32(999999999)
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
pidsConsumed := int64(10000)
summary := eventuallyGetSummary()
availablePids := *(summary.Node.Rlimit.MaxPID) - *(summary.Node.Rlimit.NumOfRunningProcesses)
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalPIDAvailable): fmt.Sprintf("%d", availablePids-pidsConsumed)}
initialConfig.EvictionMinimumReclaim = map[string]string{}
})
BeforeEach(func() {
_, err := f.ClientSet.SchedulingV1beta1().PriorityClasses().Create(&schedulerapi.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: highPriorityClassName}, Value: highPriority})
Expect(err == nil || errors.IsAlreadyExists(err)).To(BeTrue())
})
AfterEach(func() {
err := f.ClientSet.SchedulingV1beta1().PriorityClasses().Delete(highPriorityClassName, &metav1.DeleteOptions{})
Expect(err).NotTo(HaveOccurred())
})
specs := []podEvictSpec{
{
evictionPriority: 1,
pod: pidConsumingPod("fork-bomb-container", 12000),
},
{
evictionPriority: 0,
pod: innocentPod(),
},
}
specs[1].pod.Spec.PriorityClassName = highPriorityClassName
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logPidMetrics, specs)
})
})
// Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods
type podEvictSpec struct {
// P0 should never be evicted, P1 shouldn't evict before P2, etc.
@ -722,6 +764,17 @@ func logMemoryMetrics() {
}
}
func logPidMetrics() {
summary, err := getNodeSummary()
if err != nil {
framework.Logf("Error getting summary: %v", err)
return
}
if summary.Node.Rlimit != nil && summary.Node.Rlimit.MaxPID != nil && summary.Node.Rlimit.NumOfRunningProcesses != nil {
framework.Logf("Node.Rlimit.MaxPID: %d, Node.Rlimit.RunningProcesses: %d", *summary.Node.Rlimit.MaxPID, *summary.Node.Rlimit.NumOfRunningProcesses)
}
}
func eventuallyGetSummary() (s *stats.Summary) {
Eventually(func() error {
summary, err := getNodeSummary()
@ -764,23 +817,33 @@ const (
)
func inodeConsumingPod(name string, numFiles int, volumeSource *v1.VolumeSource) *v1.Pod {
path := ""
if volumeSource != nil {
path = volumeMountPath
}
// Each iteration creates an empty file
return podWithCommand(volumeSource, v1.ResourceRequirements{}, numFiles, name, "touch %s${i}.txt; sleep 0.001")
return podWithCommand(volumeSource, v1.ResourceRequirements{}, numFiles, name, fmt.Sprintf("touch %s${i}.txt; sleep 0.001;", filepath.Join(path, "file")))
}
func diskConsumingPod(name string, diskConsumedMB int, volumeSource *v1.VolumeSource, resources v1.ResourceRequirements) *v1.Pod {
path := ""
if volumeSource != nil {
path = volumeMountPath
}
// Each iteration writes 1 Mb, so do diskConsumedMB iterations.
return podWithCommand(volumeSource, resources, diskConsumedMB, name, "dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null")
return podWithCommand(volumeSource, resources, diskConsumedMB, name, fmt.Sprintf("dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null;", filepath.Join(path, "file")))
}
func pidConsumingPod(name string, numProcesses int) *v1.Pod {
// Each iteration forks once, but creates two processes
return podWithCommand(nil, v1.ResourceRequirements{}, numProcesses/2, name, "(while true; do sleep 5; done)&")
}
// podWithCommand returns a pod with the provided volumeSource and resourceRequirements.
// If a volumeSource is provided, then the volumeMountPath to the volume is inserted into the provided command.
func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirements, iterations int, name, command string) *v1.Pod {
path := ""
volumeMounts := []v1.VolumeMount{}
volumes := []v1.Volume{}
if volumeSource != nil {
path = volumeMountPath
volumeMounts = []v1.VolumeMount{{MountPath: volumeMountPath, Name: volumeName}}
volumes = []v1.Volume{{Name: volumeName, VolumeSource: *volumeSource}}
}
@ -795,7 +858,7 @@ func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirem
Command: []string{
"sh",
"-c",
fmt.Sprintf("i=0; while [ $i -lt %d ]; do %s; i=$(($i+1)); done; while true; do sleep 5; done", iterations, fmt.Sprintf(command, filepath.Join(path, "file"))),
fmt.Sprintf("i=0; while [ $i -lt %d ]; do %s i=$(($i+1)); done; while true; do sleep 5; done", iterations, command),
},
Resources: resources,
VolumeMounts: volumeMounts,