2017-09-23 00:18:19 +00:00
/ *
Copyright 2016 The Kubernetes Authors .
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package e2e_node
import (
"fmt"
"path/filepath"
2018-01-16 19:22:17 +00:00
"strconv"
2018-05-23 23:12:54 +00:00
"strings"
2017-09-23 00:18:19 +00:00
"time"
"k8s.io/api/core/v1"
2019-02-20 22:19:17 +00:00
schedulerapi "k8s.io/api/scheduling/v1"
2018-07-19 08:04:58 +00:00
"k8s.io/apimachinery/pkg/api/errors"
2017-09-23 00:18:19 +00:00
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2018-05-23 23:12:54 +00:00
"k8s.io/apimachinery/pkg/fields"
2017-09-23 00:18:19 +00:00
nodeutil "k8s.io/kubernetes/pkg/api/v1/node"
2018-08-29 16:07:52 +00:00
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
2017-09-23 00:18:19 +00:00
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
2018-05-07 22:01:20 +00:00
"k8s.io/kubernetes/pkg/kubelet/eviction"
2019-01-14 17:41:36 +00:00
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
2017-09-23 00:18:19 +00:00
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
2017-12-01 03:24:04 +00:00
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
2017-09-23 00:18:19 +00:00
"k8s.io/kubernetes/test/e2e/framework"
2018-01-31 07:06:32 +00:00
imageutils "k8s.io/kubernetes/test/utils/image"
2017-09-23 00:18:19 +00:00
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
// Eviction Policy is described here:
2018-08-24 09:03:57 +00:00
// https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node/kubelet-eviction.md
2017-09-23 00:18:19 +00:00
const (
postTestConditionMonitoringPeriod = 1 * time . Minute
evictionPollInterval = 2 * time . Second
pressureDissapearTimeout = 1 * time . Minute
longPodDeletionTimeout = 10 * time . Minute
// pressure conditions often surface after evictions because the kubelet only updates
// node conditions periodically.
// we wait this period after evictions to make sure that we wait out this delay
2018-05-23 23:12:54 +00:00
pressureDelay = 20 * time . Second
testContextFmt = "when we run containers that should cause %s"
noPressure = v1 . NodeConditionType ( "NoPressure" )
lotsOfDisk = 10240 // 10 Gb in Mb
lotsOfFiles = 1000000000 // 1 billion
resourceInodes = v1 . ResourceName ( "inodes" )
noStarvedResource = v1 . ResourceName ( "none" )
2017-09-23 00:18:19 +00:00
)
// InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods.
// Node disk pressure is induced by consuming all inodes on the node.
2018-05-29 16:15:29 +00:00
var _ = framework . KubeDescribe ( "InodeEviction [Slow] [Serial] [Disruptive][NodeFeature:Eviction]" , func ( ) {
2017-09-23 00:18:19 +00:00
f := framework . NewDefaultFramework ( "inode-eviction-test" )
expectedNodeCondition := v1 . NodeDiskPressure
2018-05-23 23:12:54 +00:00
expectedStarvedResource := resourceInodes
2017-09-23 00:18:19 +00:00
pressureTimeout := 15 * time . Minute
inodesConsumed := uint64 ( 200000 )
Context ( fmt . Sprintf ( testContextFmt , expectedNodeCondition ) , func ( ) {
tempSetCurrentKubeletConfig ( f , func ( initialConfig * kubeletconfig . KubeletConfiguration ) {
// Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction.
summary := eventuallyGetSummary ( )
inodesFree := * summary . Node . Fs . InodesFree
if inodesFree <= inodesConsumed {
framework . Skipf ( "Too few inodes free on the host for the InodeEviction test to run" )
}
2019-01-14 17:41:36 +00:00
initialConfig . EvictionHard = map [ string ] string { string ( evictionapi . SignalNodeFsInodesFree ) : fmt . Sprintf ( "%d" , inodesFree - inodesConsumed ) }
Lift embedded structure out of eviction-related KubeletConfiguration fields
- Changes the following KubeletConfiguration fields from `string` to
`map[string]string`:
- `EvictionHard`
- `EvictionSoft`
- `EvictionSoftGracePeriod`
- `EvictionMinimumReclaim`
- Adds flag parsing shims to maintain Kubelet's public flags API, while
enabling structured input in the file API.
- Also removes `kubeletconfig.ConfigurationMap`, which was an ad-hoc flag
parsing shim living in the kubeletconfig API group, and replaces it
with the `MapStringString` shim introduced in this PR. Flag parsing
shims belong in a common place, not in the kubeletconfig API.
I manually audited these to ensure that this wouldn't cause errors
parsing the command line for syntax that would have previously been
error free (`kubeletconfig.ConfigurationMap` was unique in that it
allowed keys to be provided on the CLI without values. I believe this was
done in `flags.ConfigurationMap` to facilitate the `--node-labels` flag,
which rightfully accepts value-free keys, and that this shim was then
just copied to `kubeletconfig`). Fortunately, the affected fields
(`ExperimentalQOSReserved`, `SystemReserved`, and `KubeReserved`) expect
non-empty strings in the values of the map, and as a result passing the
empty string is already an error. Thus requiring keys shouldn't break
anyone's scripts.
- Updates code and tests accordingly.
Regarding eviction operators, directionality is already implicit in the
signal type (for a given signal, the decision to evict will be made when
crossing the threshold from either above or below, never both). There is
no need to expose an operator, such as `<`, in the API. By changing
`EvictionHard` and `EvictionSoft` to `map[string]string`, this PR
simplifies the experience of working with these fields via the
`KubeletConfiguration` type. Again, flags stay the same.
Other things:
- There is another flag parsing shim, `flags.ConfigurationMap`, from the
shared flag utility. The `NodeLabels` field still uses
`flags.ConfigurationMap`. This PR moves the allocation of the
`map[string]string` for the `NodeLabels` field from
`AddKubeletConfigFlags` to the defaulter for the external
`KubeletConfiguration` type. Flags are layered on top of an internal
object that has undergone conversion from a defaulted external object,
which means that previously the mere registration of flags would have
overwritten any previously-defined defaults for `NodeLabels` (fortunately
there were none).
2017-10-19 22:42:07 +00:00
initialConfig . EvictionMinimumReclaim = map [ string ] string { }
2017-09-23 00:18:19 +00:00
} )
2018-05-23 23:12:54 +00:00
runEvictionTest ( f , pressureTimeout , expectedNodeCondition , expectedStarvedResource , logInodeMetrics , [ ] podEvictSpec {
2017-09-23 00:18:19 +00:00
{
evictionPriority : 1 ,
2018-02-16 16:35:24 +00:00
pod : inodeConsumingPod ( "container-inode-hog" , lotsOfFiles , nil ) ,
2017-09-23 00:18:19 +00:00
} ,
{
evictionPriority : 1 ,
2018-02-16 16:35:24 +00:00
pod : inodeConsumingPod ( "volume-inode-hog" , lotsOfFiles , & v1 . VolumeSource { EmptyDir : & v1 . EmptyDirVolumeSource { } } ) ,
2017-09-23 00:18:19 +00:00
} ,
{
evictionPriority : 0 ,
pod : innocentPod ( ) ,
} ,
} )
} )
} )
2018-02-16 16:35:24 +00:00
// ImageGCNoEviction tests that the node does not evict pods when inodes are consumed by images
// Disk pressure is induced by pulling large images
2018-05-22 00:24:29 +00:00
var _ = framework . KubeDescribe ( "ImageGCNoEviction [Slow] [Serial] [Disruptive][NodeFeature:Eviction]" , func ( ) {
2018-02-16 16:35:24 +00:00
f := framework . NewDefaultFramework ( "image-gc-eviction-test" )
pressureTimeout := 10 * time . Minute
expectedNodeCondition := v1 . NodeDiskPressure
2018-05-23 23:12:54 +00:00
expectedStarvedResource := resourceInodes
2018-02-16 16:35:24 +00:00
inodesConsumed := uint64 ( 100000 )
Context ( fmt . Sprintf ( testContextFmt , expectedNodeCondition ) , func ( ) {
tempSetCurrentKubeletConfig ( f , func ( initialConfig * kubeletconfig . KubeletConfiguration ) {
// Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction.
summary := eventuallyGetSummary ( )
inodesFree := * summary . Node . Fs . InodesFree
if inodesFree <= inodesConsumed {
framework . Skipf ( "Too few inodes free on the host for the InodeEviction test to run" )
}
2019-01-14 17:41:36 +00:00
initialConfig . EvictionHard = map [ string ] string { string ( evictionapi . SignalNodeFsInodesFree ) : fmt . Sprintf ( "%d" , inodesFree - inodesConsumed ) }
2018-02-16 16:35:24 +00:00
initialConfig . EvictionMinimumReclaim = map [ string ] string { }
} )
// Consume enough inodes to induce disk pressure,
// but expect that image garbage collection can reduce it enough to avoid an eviction
2018-05-23 23:12:54 +00:00
runEvictionTest ( f , pressureTimeout , expectedNodeCondition , expectedStarvedResource , logDiskMetrics , [ ] podEvictSpec {
2018-02-16 16:35:24 +00:00
{
evictionPriority : 0 ,
pod : inodeConsumingPod ( "container-inode" , 110000 , nil ) ,
} ,
} )
} )
} )
2017-09-23 00:18:19 +00:00
// MemoryAllocatableEviction tests that the node responds to node memory pressure by evicting only responsible pods.
// Node memory pressure is only encountered because we reserve the majority of the node's capacity via kube-reserved.
2018-05-29 16:15:29 +00:00
var _ = framework . KubeDescribe ( "MemoryAllocatableEviction [Slow] [Serial] [Disruptive][NodeFeature:Eviction]" , func ( ) {
2017-09-23 00:18:19 +00:00
f := framework . NewDefaultFramework ( "memory-allocatable-eviction-test" )
expectedNodeCondition := v1 . NodeMemoryPressure
2018-05-23 23:12:54 +00:00
expectedStarvedResource := v1 . ResourceMemory
2017-09-23 00:18:19 +00:00
pressureTimeout := 10 * time . Minute
Context ( fmt . Sprintf ( testContextFmt , expectedNodeCondition ) , func ( ) {
tempSetCurrentKubeletConfig ( f , func ( initialConfig * kubeletconfig . KubeletConfiguration ) {
// Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds.
kubeReserved := getNodeCPUAndMemoryCapacity ( f ) [ v1 . ResourceMemory ]
// The default hard eviction threshold is 250Mb, so Allocatable = Capacity - Reserved - 250Mb
2018-04-06 21:01:11 +00:00
// We want Allocatable = 50Mb, so set Reserved = Capacity - Allocatable - 250Mb = Capacity - 300Mb
kubeReserved . Sub ( resource . MustParse ( "300Mi" ) )
Lift embedded structure out of eviction-related KubeletConfiguration fields
- Changes the following KubeletConfiguration fields from `string` to
`map[string]string`:
- `EvictionHard`
- `EvictionSoft`
- `EvictionSoftGracePeriod`
- `EvictionMinimumReclaim`
- Adds flag parsing shims to maintain Kubelet's public flags API, while
enabling structured input in the file API.
- Also removes `kubeletconfig.ConfigurationMap`, which was an ad-hoc flag
parsing shim living in the kubeletconfig API group, and replaces it
with the `MapStringString` shim introduced in this PR. Flag parsing
shims belong in a common place, not in the kubeletconfig API.
I manually audited these to ensure that this wouldn't cause errors
parsing the command line for syntax that would have previously been
error free (`kubeletconfig.ConfigurationMap` was unique in that it
allowed keys to be provided on the CLI without values. I believe this was
done in `flags.ConfigurationMap` to facilitate the `--node-labels` flag,
which rightfully accepts value-free keys, and that this shim was then
just copied to `kubeletconfig`). Fortunately, the affected fields
(`ExperimentalQOSReserved`, `SystemReserved`, and `KubeReserved`) expect
non-empty strings in the values of the map, and as a result passing the
empty string is already an error. Thus requiring keys shouldn't break
anyone's scripts.
- Updates code and tests accordingly.
Regarding eviction operators, directionality is already implicit in the
signal type (for a given signal, the decision to evict will be made when
crossing the threshold from either above or below, never both). There is
no need to expose an operator, such as `<`, in the API. By changing
`EvictionHard` and `EvictionSoft` to `map[string]string`, this PR
simplifies the experience of working with these fields via the
`KubeletConfiguration` type. Again, flags stay the same.
Other things:
- There is another flag parsing shim, `flags.ConfigurationMap`, from the
shared flag utility. The `NodeLabels` field still uses
`flags.ConfigurationMap`. This PR moves the allocation of the
`map[string]string` for the `NodeLabels` field from
`AddKubeletConfigFlags` to the defaulter for the external
`KubeletConfiguration` type. Flags are layered on top of an internal
object that has undergone conversion from a defaulted external object,
which means that previously the mere registration of flags would have
overwritten any previously-defined defaults for `NodeLabels` (fortunately
there were none).
2017-10-19 22:42:07 +00:00
initialConfig . KubeReserved = map [ string ] string {
string ( v1 . ResourceMemory ) : kubeReserved . String ( ) ,
}
2017-12-01 03:24:04 +00:00
initialConfig . EnforceNodeAllocatable = [ ] string { kubetypes . NodeAllocatableEnforcementKey }
2017-09-23 00:18:19 +00:00
initialConfig . CgroupsPerQOS = true
} )
2018-05-23 23:12:54 +00:00
runEvictionTest ( f , pressureTimeout , expectedNodeCondition , expectedStarvedResource , logMemoryMetrics , [ ] podEvictSpec {
2017-09-23 00:18:19 +00:00
{
evictionPriority : 1 ,
pod : getMemhogPod ( "memory-hog-pod" , "memory-hog" , v1 . ResourceRequirements { } ) ,
} ,
{
evictionPriority : 0 ,
pod : innocentPod ( ) ,
} ,
} )
} )
} )
// LocalStorageEviction tests that the node responds to node disk pressure by evicting only responsible pods
// Disk pressure is induced by running pods which consume disk space.
2018-05-29 16:15:29 +00:00
var _ = framework . KubeDescribe ( "LocalStorageEviction [Slow] [Serial] [Disruptive][NodeFeature:Eviction]" , func ( ) {
2017-09-23 00:18:19 +00:00
f := framework . NewDefaultFramework ( "localstorage-eviction-test" )
pressureTimeout := 10 * time . Minute
expectedNodeCondition := v1 . NodeDiskPressure
2018-05-23 23:12:54 +00:00
expectedStarvedResource := v1 . ResourceEphemeralStorage
2017-09-23 00:18:19 +00:00
Context ( fmt . Sprintf ( testContextFmt , expectedNodeCondition ) , func ( ) {
tempSetCurrentKubeletConfig ( f , func ( initialConfig * kubeletconfig . KubeletConfiguration ) {
2019-01-29 19:02:48 +00:00
diskConsumed := resource . MustParse ( "200Mi" )
2017-09-23 00:18:19 +00:00
summary := eventuallyGetSummary ( )
availableBytes := * ( summary . Node . Fs . AvailableBytes )
2019-01-14 17:41:36 +00:00
initialConfig . EvictionHard = map [ string ] string { string ( evictionapi . SignalNodeFsAvailable ) : fmt . Sprintf ( "%d" , availableBytes - uint64 ( diskConsumed . Value ( ) ) ) }
Lift embedded structure out of eviction-related KubeletConfiguration fields
- Changes the following KubeletConfiguration fields from `string` to
`map[string]string`:
- `EvictionHard`
- `EvictionSoft`
- `EvictionSoftGracePeriod`
- `EvictionMinimumReclaim`
- Adds flag parsing shims to maintain Kubelet's public flags API, while
enabling structured input in the file API.
- Also removes `kubeletconfig.ConfigurationMap`, which was an ad-hoc flag
parsing shim living in the kubeletconfig API group, and replaces it
with the `MapStringString` shim introduced in this PR. Flag parsing
shims belong in a common place, not in the kubeletconfig API.
I manually audited these to ensure that this wouldn't cause errors
parsing the command line for syntax that would have previously been
error free (`kubeletconfig.ConfigurationMap` was unique in that it
allowed keys to be provided on the CLI without values. I believe this was
done in `flags.ConfigurationMap` to facilitate the `--node-labels` flag,
which rightfully accepts value-free keys, and that this shim was then
just copied to `kubeletconfig`). Fortunately, the affected fields
(`ExperimentalQOSReserved`, `SystemReserved`, and `KubeReserved`) expect
non-empty strings in the values of the map, and as a result passing the
empty string is already an error. Thus requiring keys shouldn't break
anyone's scripts.
- Updates code and tests accordingly.
Regarding eviction operators, directionality is already implicit in the
signal type (for a given signal, the decision to evict will be made when
crossing the threshold from either above or below, never both). There is
no need to expose an operator, such as `<`, in the API. By changing
`EvictionHard` and `EvictionSoft` to `map[string]string`, this PR
simplifies the experience of working with these fields via the
`KubeletConfiguration` type. Again, flags stay the same.
Other things:
- There is another flag parsing shim, `flags.ConfigurationMap`, from the
shared flag utility. The `NodeLabels` field still uses
`flags.ConfigurationMap`. This PR moves the allocation of the
`map[string]string` for the `NodeLabels` field from
`AddKubeletConfigFlags` to the defaulter for the external
`KubeletConfiguration` type. Flags are layered on top of an internal
object that has undergone conversion from a defaulted external object,
which means that previously the mere registration of flags would have
overwritten any previously-defined defaults for `NodeLabels` (fortunately
there were none).
2017-10-19 22:42:07 +00:00
initialConfig . EvictionMinimumReclaim = map [ string ] string { }
2017-09-23 00:18:19 +00:00
} )
2018-05-23 23:12:54 +00:00
runEvictionTest ( f , pressureTimeout , expectedNodeCondition , expectedStarvedResource , logDiskMetrics , [ ] podEvictSpec {
2017-09-23 00:18:19 +00:00
{
evictionPriority : 1 ,
2017-11-15 22:21:33 +00:00
pod : diskConsumingPod ( "container-disk-hog" , lotsOfDisk , nil , v1 . ResourceRequirements { } ) ,
2017-09-23 00:18:19 +00:00
} ,
{
evictionPriority : 0 ,
pod : innocentPod ( ) ,
} ,
} )
} )
} )
// LocalStorageEviction tests that the node responds to node disk pressure by evicting only responsible pods
// Disk pressure is induced by running pods which consume disk space, which exceed the soft eviction threshold.
// Note: This test's purpose is to test Soft Evictions. Local storage was chosen since it is the least costly to run.
2018-05-29 16:15:29 +00:00
var _ = framework . KubeDescribe ( "LocalStorageSoftEviction [Slow] [Serial] [Disruptive][NodeFeature:Eviction]" , func ( ) {
2017-09-23 00:18:19 +00:00
f := framework . NewDefaultFramework ( "localstorage-eviction-test" )
pressureTimeout := 10 * time . Minute
expectedNodeCondition := v1 . NodeDiskPressure
2018-05-23 23:12:54 +00:00
expectedStarvedResource := v1 . ResourceEphemeralStorage
2017-09-23 00:18:19 +00:00
Context ( fmt . Sprintf ( testContextFmt , expectedNodeCondition ) , func ( ) {
tempSetCurrentKubeletConfig ( f , func ( initialConfig * kubeletconfig . KubeletConfiguration ) {
2019-01-29 19:02:48 +00:00
diskConsumed := resource . MustParse ( "200Mi" )
2017-09-23 00:18:19 +00:00
summary := eventuallyGetSummary ( )
availableBytes := * ( summary . Node . Fs . AvailableBytes )
2017-11-15 22:21:33 +00:00
if availableBytes <= uint64 ( diskConsumed . Value ( ) ) {
framework . Skipf ( "Too little disk free on the host for the LocalStorageSoftEviction test to run" )
}
2019-01-14 17:41:36 +00:00
initialConfig . EvictionSoft = map [ string ] string { string ( evictionapi . SignalNodeFsAvailable ) : fmt . Sprintf ( "%d" , availableBytes - uint64 ( diskConsumed . Value ( ) ) ) }
initialConfig . EvictionSoftGracePeriod = map [ string ] string { string ( evictionapi . SignalNodeFsAvailable ) : "1m" }
2017-09-23 00:18:19 +00:00
// Defer to the pod default grace period
initialConfig . EvictionMaxPodGracePeriod = 30
Lift embedded structure out of eviction-related KubeletConfiguration fields
- Changes the following KubeletConfiguration fields from `string` to
`map[string]string`:
- `EvictionHard`
- `EvictionSoft`
- `EvictionSoftGracePeriod`
- `EvictionMinimumReclaim`
- Adds flag parsing shims to maintain Kubelet's public flags API, while
enabling structured input in the file API.
- Also removes `kubeletconfig.ConfigurationMap`, which was an ad-hoc flag
parsing shim living in the kubeletconfig API group, and replaces it
with the `MapStringString` shim introduced in this PR. Flag parsing
shims belong in a common place, not in the kubeletconfig API.
I manually audited these to ensure that this wouldn't cause errors
parsing the command line for syntax that would have previously been
error free (`kubeletconfig.ConfigurationMap` was unique in that it
allowed keys to be provided on the CLI without values. I believe this was
done in `flags.ConfigurationMap` to facilitate the `--node-labels` flag,
which rightfully accepts value-free keys, and that this shim was then
just copied to `kubeletconfig`). Fortunately, the affected fields
(`ExperimentalQOSReserved`, `SystemReserved`, and `KubeReserved`) expect
non-empty strings in the values of the map, and as a result passing the
empty string is already an error. Thus requiring keys shouldn't break
anyone's scripts.
- Updates code and tests accordingly.
Regarding eviction operators, directionality is already implicit in the
signal type (for a given signal, the decision to evict will be made when
crossing the threshold from either above or below, never both). There is
no need to expose an operator, such as `<`, in the API. By changing
`EvictionHard` and `EvictionSoft` to `map[string]string`, this PR
simplifies the experience of working with these fields via the
`KubeletConfiguration` type. Again, flags stay the same.
Other things:
- There is another flag parsing shim, `flags.ConfigurationMap`, from the
shared flag utility. The `NodeLabels` field still uses
`flags.ConfigurationMap`. This PR moves the allocation of the
`map[string]string` for the `NodeLabels` field from
`AddKubeletConfigFlags` to the defaulter for the external
`KubeletConfiguration` type. Flags are layered on top of an internal
object that has undergone conversion from a defaulted external object,
which means that previously the mere registration of flags would have
overwritten any previously-defined defaults for `NodeLabels` (fortunately
there were none).
2017-10-19 22:42:07 +00:00
initialConfig . EvictionMinimumReclaim = map [ string ] string { }
2017-09-23 00:18:19 +00:00
// Ensure that pods are not evicted because of the eviction-hard threshold
2018-02-09 22:51:52 +00:00
// setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
2019-01-14 17:41:36 +00:00
initialConfig . EvictionHard = map [ string ] string { string ( evictionapi . SignalMemoryAvailable ) : "0%" }
2017-09-23 00:18:19 +00:00
} )
2018-05-23 23:12:54 +00:00
runEvictionTest ( f , pressureTimeout , expectedNodeCondition , expectedStarvedResource , logDiskMetrics , [ ] podEvictSpec {
2017-09-23 00:18:19 +00:00
{
evictionPriority : 1 ,
2017-11-15 22:21:33 +00:00
pod : diskConsumingPod ( "container-disk-hog" , lotsOfDisk , nil , v1 . ResourceRequirements { } ) ,
2017-09-23 00:18:19 +00:00
} ,
{
evictionPriority : 0 ,
pod : innocentPod ( ) ,
} ,
} )
} )
} )
// LocalStorageCapacityIsolationEviction tests that container and volume local storage limits are enforced through evictions
2018-05-22 00:24:29 +00:00
var _ = framework . KubeDescribe ( "LocalStorageCapacityIsolationEviction [Slow] [Serial] [Disruptive] [Feature:LocalStorageCapacityIsolation][NodeFeature:Eviction]" , func ( ) {
2017-09-23 00:18:19 +00:00
f := framework . NewDefaultFramework ( "localstorage-eviction-test" )
evictionTestTimeout := 10 * time . Minute
Context ( fmt . Sprintf ( testContextFmt , "evictions due to pod local storage violations" ) , func ( ) {
tempSetCurrentKubeletConfig ( f , func ( initialConfig * kubeletconfig . KubeletConfiguration ) {
2018-02-09 22:51:52 +00:00
// setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
2019-01-14 17:41:36 +00:00
initialConfig . EvictionHard = map [ string ] string { string ( evictionapi . SignalMemoryAvailable ) : "0%" }
2017-09-23 00:18:19 +00:00
} )
sizeLimit := resource . MustParse ( "100Mi" )
2017-11-15 22:21:33 +00:00
useOverLimit := 101 /* Mb */
useUnderLimit := 99 /* Mb */
2017-09-23 00:18:19 +00:00
containerLimit := v1 . ResourceList { v1 . ResourceEphemeralStorage : sizeLimit }
2018-05-23 23:12:54 +00:00
runEvictionTest ( f , evictionTestTimeout , noPressure , noStarvedResource , logDiskMetrics , [ ] podEvictSpec {
2017-09-23 00:18:19 +00:00
{
evictionPriority : 1 , // This pod should be evicted because emptyDir (default storage type) usage violation
2017-11-15 22:21:33 +00:00
pod : diskConsumingPod ( "emptydir-disk-sizelimit" , useOverLimit , & v1 . VolumeSource {
2017-09-23 00:18:19 +00:00
EmptyDir : & v1 . EmptyDirVolumeSource { SizeLimit : & sizeLimit } ,
} , v1 . ResourceRequirements { } ) ,
} ,
{
evictionPriority : 1 , // This pod should be evicted because of memory emptyDir usage violation
2017-11-15 22:21:33 +00:00
pod : diskConsumingPod ( "emptydir-memory-sizelimit" , useOverLimit , & v1 . VolumeSource {
2017-09-23 00:18:19 +00:00
EmptyDir : & v1 . EmptyDirVolumeSource { Medium : "Memory" , SizeLimit : & sizeLimit } ,
} , v1 . ResourceRequirements { } ) ,
} ,
{
evictionPriority : 1 , // This pod should cross the container limit by writing to its writable layer.
2017-11-15 22:21:33 +00:00
pod : diskConsumingPod ( "container-disk-limit" , useOverLimit , nil , v1 . ResourceRequirements { Limits : containerLimit } ) ,
2017-09-23 00:18:19 +00:00
} ,
{
evictionPriority : 1 , // This pod should hit the container limit by writing to an emptydir
2017-11-15 22:21:33 +00:00
pod : diskConsumingPod ( "container-emptydir-disk-limit" , useOverLimit , & v1 . VolumeSource { EmptyDir : & v1 . EmptyDirVolumeSource { } } ,
2017-09-23 00:18:19 +00:00
v1 . ResourceRequirements { Limits : containerLimit } ) ,
} ,
{
evictionPriority : 0 , // This pod should not be evicted because it uses less than its limit
2017-11-15 22:21:33 +00:00
pod : diskConsumingPod ( "emptydir-disk-below-sizelimit" , useUnderLimit , & v1 . VolumeSource {
2017-09-23 00:18:19 +00:00
EmptyDir : & v1 . EmptyDirVolumeSource { SizeLimit : & sizeLimit } ,
} , v1 . ResourceRequirements { } ) ,
} ,
2017-12-06 20:58:53 +00:00
{
evictionPriority : 0 , // This pod should not be evicted because it uses less than its limit
2017-12-07 00:55:28 +00:00
pod : diskConsumingPod ( "container-disk-below-sizelimit" , useUnderLimit , nil , v1 . ResourceRequirements { Limits : containerLimit } ) ,
2017-12-06 20:58:53 +00:00
} ,
2017-09-23 00:18:19 +00:00
} )
} )
} )
2017-11-15 22:21:33 +00:00
// PriorityMemoryEvictionOrdering tests that the node responds to node memory pressure by evicting pods.
2017-10-10 00:34:54 +00:00
// This test tests that the guaranteed pod is never evicted, and that the lower-priority pod is evicted before
// the higher priority pod.
2018-05-29 16:15:29 +00:00
var _ = framework . KubeDescribe ( "PriorityMemoryEvictionOrdering [Slow] [Serial] [Disruptive][NodeFeature:Eviction]" , func ( ) {
2017-11-15 22:21:33 +00:00
f := framework . NewDefaultFramework ( "priority-memory-eviction-ordering-test" )
2017-10-10 00:34:54 +00:00
expectedNodeCondition := v1 . NodeMemoryPressure
2018-05-23 23:12:54 +00:00
expectedStarvedResource := v1 . ResourceMemory
2017-10-10 00:34:54 +00:00
pressureTimeout := 10 * time . Minute
2018-07-19 08:04:58 +00:00
highPriorityClassName := f . BaseName + "-high-priority"
highPriority := int32 ( 999999999 )
2017-10-10 00:34:54 +00:00
Context ( fmt . Sprintf ( testContextFmt , expectedNodeCondition ) , func ( ) {
tempSetCurrentKubeletConfig ( f , func ( initialConfig * kubeletconfig . KubeletConfiguration ) {
memoryConsumed := resource . MustParse ( "600Mi" )
summary := eventuallyGetSummary ( )
availableBytes := * ( summary . Node . Memory . AvailableBytes )
2017-11-15 22:21:33 +00:00
if availableBytes <= uint64 ( memoryConsumed . Value ( ) ) {
framework . Skipf ( "Too little memory free on the host for the PriorityMemoryEvictionOrdering test to run" )
}
2019-01-14 17:41:36 +00:00
initialConfig . EvictionHard = map [ string ] string { string ( evictionapi . SignalMemoryAvailable ) : fmt . Sprintf ( "%d" , availableBytes - uint64 ( memoryConsumed . Value ( ) ) ) }
Lift embedded structure out of eviction-related KubeletConfiguration fields
- Changes the following KubeletConfiguration fields from `string` to
`map[string]string`:
- `EvictionHard`
- `EvictionSoft`
- `EvictionSoftGracePeriod`
- `EvictionMinimumReclaim`
- Adds flag parsing shims to maintain Kubelet's public flags API, while
enabling structured input in the file API.
- Also removes `kubeletconfig.ConfigurationMap`, which was an ad-hoc flag
parsing shim living in the kubeletconfig API group, and replaces it
with the `MapStringString` shim introduced in this PR. Flag parsing
shims belong in a common place, not in the kubeletconfig API.
I manually audited these to ensure that this wouldn't cause errors
parsing the command line for syntax that would have previously been
error free (`kubeletconfig.ConfigurationMap` was unique in that it
allowed keys to be provided on the CLI without values. I believe this was
done in `flags.ConfigurationMap` to facilitate the `--node-labels` flag,
which rightfully accepts value-free keys, and that this shim was then
just copied to `kubeletconfig`). Fortunately, the affected fields
(`ExperimentalQOSReserved`, `SystemReserved`, and `KubeReserved`) expect
non-empty strings in the values of the map, and as a result passing the
empty string is already an error. Thus requiring keys shouldn't break
anyone's scripts.
- Updates code and tests accordingly.
Regarding eviction operators, directionality is already implicit in the
signal type (for a given signal, the decision to evict will be made when
crossing the threshold from either above or below, never both). There is
no need to expose an operator, such as `<`, in the API. By changing
`EvictionHard` and `EvictionSoft` to `map[string]string`, this PR
simplifies the experience of working with these fields via the
`KubeletConfiguration` type. Again, flags stay the same.
Other things:
- There is another flag parsing shim, `flags.ConfigurationMap`, from the
shared flag utility. The `NodeLabels` field still uses
`flags.ConfigurationMap`. This PR moves the allocation of the
`map[string]string` for the `NodeLabels` field from
`AddKubeletConfigFlags` to the defaulter for the external
`KubeletConfiguration` type. Flags are layered on top of an internal
object that has undergone conversion from a defaulted external object,
which means that previously the mere registration of flags would have
overwritten any previously-defined defaults for `NodeLabels` (fortunately
there were none).
2017-10-19 22:42:07 +00:00
initialConfig . EvictionMinimumReclaim = map [ string ] string { }
2017-10-10 00:34:54 +00:00
} )
2018-09-07 20:35:13 +00:00
BeforeEach ( func ( ) {
2019-02-20 22:19:17 +00:00
_ , err := f . ClientSet . SchedulingV1 ( ) . PriorityClasses ( ) . Create ( & schedulerapi . PriorityClass { ObjectMeta : metav1 . ObjectMeta { Name : highPriorityClassName } , Value : highPriority } )
2018-09-07 20:35:13 +00:00
Expect ( err == nil || errors . IsAlreadyExists ( err ) ) . To ( BeTrue ( ) )
} )
AfterEach ( func ( ) {
2019-02-20 22:19:17 +00:00
err := f . ClientSet . SchedulingV1 ( ) . PriorityClasses ( ) . Delete ( highPriorityClassName , & metav1 . DeleteOptions { } )
2018-09-07 20:35:13 +00:00
Expect ( err ) . NotTo ( HaveOccurred ( ) )
} )
2017-10-10 00:34:54 +00:00
specs := [ ] podEvictSpec {
{
evictionPriority : 2 ,
pod : getMemhogPod ( "memory-hog-pod" , "memory-hog" , v1 . ResourceRequirements { } ) ,
} ,
{
evictionPriority : 1 ,
pod : getMemhogPod ( "high-priority-memory-hog-pod" , "high-priority-memory-hog" , v1 . ResourceRequirements { } ) ,
} ,
{
evictionPriority : 0 ,
pod : getMemhogPod ( "guaranteed-pod" , "guaranteed-pod" , v1 . ResourceRequirements {
Requests : v1 . ResourceList {
v1 . ResourceMemory : resource . MustParse ( "300Mi" ) ,
} ,
Limits : v1 . ResourceList {
v1 . ResourceMemory : resource . MustParse ( "300Mi" ) ,
} ,
} ) ,
} ,
}
2018-07-19 08:04:58 +00:00
specs [ 1 ] . pod . Spec . PriorityClassName = highPriorityClassName
2018-05-23 23:12:54 +00:00
runEvictionTest ( f , pressureTimeout , expectedNodeCondition , expectedStarvedResource , logMemoryMetrics , specs )
2017-10-10 00:34:54 +00:00
} )
} )
2017-11-15 22:21:33 +00:00
// PriorityLocalStorageEvictionOrdering tests that the node responds to node disk pressure by evicting pods.
// This test tests that the guaranteed pod is never evicted, and that the lower-priority pod is evicted before
// the higher priority pod.
2018-05-29 16:15:29 +00:00
var _ = framework . KubeDescribe ( "PriorityLocalStorageEvictionOrdering [Slow] [Serial] [Disruptive][NodeFeature:Eviction]" , func ( ) {
2017-11-15 22:21:33 +00:00
f := framework . NewDefaultFramework ( "priority-disk-eviction-ordering-test" )
expectedNodeCondition := v1 . NodeDiskPressure
2018-05-23 23:12:54 +00:00
expectedStarvedResource := v1 . ResourceEphemeralStorage
2017-11-15 22:21:33 +00:00
pressureTimeout := 10 * time . Minute
2018-07-19 08:04:58 +00:00
highPriorityClassName := f . BaseName + "-high-priority"
highPriority := int32 ( 999999999 )
2017-11-15 22:21:33 +00:00
Context ( fmt . Sprintf ( testContextFmt , expectedNodeCondition ) , func ( ) {
tempSetCurrentKubeletConfig ( f , func ( initialConfig * kubeletconfig . KubeletConfiguration ) {
diskConsumed := resource . MustParse ( "350Mi" )
summary := eventuallyGetSummary ( )
availableBytes := * ( summary . Node . Fs . AvailableBytes )
if availableBytes <= uint64 ( diskConsumed . Value ( ) ) {
framework . Skipf ( "Too little disk free on the host for the PriorityLocalStorageEvictionOrdering test to run" )
}
2019-01-14 17:41:36 +00:00
initialConfig . EvictionHard = map [ string ] string { string ( evictionapi . SignalNodeFsAvailable ) : fmt . Sprintf ( "%d" , availableBytes - uint64 ( diskConsumed . Value ( ) ) ) }
2017-11-15 22:21:33 +00:00
initialConfig . EvictionMinimumReclaim = map [ string ] string { }
} )
2018-09-07 20:35:13 +00:00
BeforeEach ( func ( ) {
2019-02-20 22:19:17 +00:00
_ , err := f . ClientSet . SchedulingV1 ( ) . PriorityClasses ( ) . Create ( & schedulerapi . PriorityClass { ObjectMeta : metav1 . ObjectMeta { Name : highPriorityClassName } , Value : highPriority } )
2018-09-07 20:35:13 +00:00
Expect ( err == nil || errors . IsAlreadyExists ( err ) ) . To ( BeTrue ( ) )
} )
AfterEach ( func ( ) {
2019-02-20 22:19:17 +00:00
err := f . ClientSet . SchedulingV1 ( ) . PriorityClasses ( ) . Delete ( highPriorityClassName , & metav1 . DeleteOptions { } )
2018-09-07 20:35:13 +00:00
Expect ( err ) . NotTo ( HaveOccurred ( ) )
} )
2017-11-15 22:21:33 +00:00
specs := [ ] podEvictSpec {
{
evictionPriority : 2 ,
pod : diskConsumingPod ( "best-effort-disk" , lotsOfDisk , nil , v1 . ResourceRequirements { } ) ,
} ,
{
evictionPriority : 1 ,
pod : diskConsumingPod ( "high-priority-disk" , lotsOfDisk , nil , v1 . ResourceRequirements { } ) ,
} ,
{
evictionPriority : 0 ,
2017-12-06 20:58:53 +00:00
// Only require 99% accuracy (297/300 Mb) because on some OS distributions, the file itself (excluding contents), consumes disk space.
pod : diskConsumingPod ( "guaranteed-disk" , 297 /* Mb */ , nil , v1 . ResourceRequirements {
2017-11-15 22:21:33 +00:00
Requests : v1 . ResourceList {
v1 . ResourceEphemeralStorage : resource . MustParse ( "300Mi" ) ,
} ,
Limits : v1 . ResourceList {
v1 . ResourceEphemeralStorage : resource . MustParse ( "300Mi" ) ,
} ,
} ) ,
} ,
}
2018-07-19 08:04:58 +00:00
specs [ 1 ] . pod . Spec . PriorityClassName = highPriorityClassName
2018-05-23 23:12:54 +00:00
runEvictionTest ( f , pressureTimeout , expectedNodeCondition , expectedStarvedResource , logDiskMetrics , specs )
2017-11-15 22:21:33 +00:00
} )
} )
2019-01-14 17:41:36 +00:00
// PriorityPidEvictionOrdering tests that the node emits pid pressure in response to a fork bomb, and evicts pods by priority
var _ = framework . KubeDescribe ( "PriorityPidEvictionOrdering [Slow] [Serial] [Disruptive][NodeFeature:Eviction]" , func ( ) {
f := framework . NewDefaultFramework ( "pidpressure-eviction-test" )
pressureTimeout := 2 * time . Minute
expectedNodeCondition := v1 . NodePIDPressure
expectedStarvedResource := noStarvedResource
highPriorityClassName := f . BaseName + "-high-priority"
highPriority := int32 ( 999999999 )
Context ( fmt . Sprintf ( testContextFmt , expectedNodeCondition ) , func ( ) {
tempSetCurrentKubeletConfig ( f , func ( initialConfig * kubeletconfig . KubeletConfiguration ) {
pidsConsumed := int64 ( 10000 )
summary := eventuallyGetSummary ( )
availablePids := * ( summary . Node . Rlimit . MaxPID ) - * ( summary . Node . Rlimit . NumOfRunningProcesses )
initialConfig . EvictionHard = map [ string ] string { string ( evictionapi . SignalPIDAvailable ) : fmt . Sprintf ( "%d" , availablePids - pidsConsumed ) }
initialConfig . EvictionMinimumReclaim = map [ string ] string { }
} )
BeforeEach ( func ( ) {
2019-02-20 22:19:17 +00:00
_ , err := f . ClientSet . SchedulingV1 ( ) . PriorityClasses ( ) . Create ( & schedulerapi . PriorityClass { ObjectMeta : metav1 . ObjectMeta { Name : highPriorityClassName } , Value : highPriority } )
2019-01-14 17:41:36 +00:00
Expect ( err == nil || errors . IsAlreadyExists ( err ) ) . To ( BeTrue ( ) )
} )
AfterEach ( func ( ) {
2019-02-20 22:19:17 +00:00
err := f . ClientSet . SchedulingV1 ( ) . PriorityClasses ( ) . Delete ( highPriorityClassName , & metav1 . DeleteOptions { } )
2019-01-14 17:41:36 +00:00
Expect ( err ) . NotTo ( HaveOccurred ( ) )
} )
specs := [ ] podEvictSpec {
{
evictionPriority : 1 ,
pod : pidConsumingPod ( "fork-bomb-container" , 12000 ) ,
} ,
{
evictionPriority : 0 ,
pod : innocentPod ( ) ,
} ,
}
specs [ 1 ] . pod . Spec . PriorityClassName = highPriorityClassName
runEvictionTest ( f , pressureTimeout , expectedNodeCondition , expectedStarvedResource , logPidMetrics , specs )
} )
} )
2017-09-23 00:18:19 +00:00
// Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods
type podEvictSpec struct {
// P0 should never be evicted, P1 shouldn't evict before P2, etc.
// If two are ranked at P1, either is permitted to fail before the other.
// The test ends when all pods other than p0 have been evicted
evictionPriority int
pod * v1 . Pod
}
// runEvictionTest sets up a testing environment given the provided pods, and checks a few things:
// It ensures that the desired expectedNodeCondition is actually triggered.
// It ensures that evictionPriority 0 pods are not evicted
// It ensures that lower evictionPriority pods are always evicted before higher evictionPriority pods (2 evicted before 1, etc.)
// It ensures that all pods with non-zero evictionPriority are eventually evicted.
// runEvictionTest then cleans up the testing environment by deleting provided pods, and ensures that expectedNodeCondition no longer exists
2018-05-23 23:12:54 +00:00
func runEvictionTest ( f * framework . Framework , pressureTimeout time . Duration , expectedNodeCondition v1 . NodeConditionType , expectedStarvedResource v1 . ResourceName , logFunc func ( ) , testSpecs [ ] podEvictSpec ) {
2017-09-23 00:18:19 +00:00
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
Context ( "" , func ( ) {
BeforeEach ( func ( ) {
2018-04-06 21:01:11 +00:00
// reduce memory usage in the allocatable cgroup to ensure we do not have MemoryPressure
reduceAllocatableMemoryUsage ( )
2017-09-23 00:18:19 +00:00
// Nodes do not immediately report local storage capacity
// Sleep so that pods requesting local storage do not fail to schedule
time . Sleep ( 30 * time . Second )
By ( "seting up pods to be used by tests" )
2019-01-29 19:02:48 +00:00
pods := [ ] * v1 . Pod { }
2017-09-23 00:18:19 +00:00
for _ , spec := range testSpecs {
2019-01-29 19:02:48 +00:00
pods = append ( pods , spec . pod )
2017-09-23 00:18:19 +00:00
}
2019-01-29 19:02:48 +00:00
f . PodClient ( ) . CreateBatch ( pods )
2017-09-23 00:18:19 +00:00
} )
It ( "should eventually evict all of the correct pods" , func ( ) {
By ( fmt . Sprintf ( "Waiting for node to have NodeCondition: %s" , expectedNodeCondition ) )
Eventually ( func ( ) error {
logFunc ( )
if expectedNodeCondition == noPressure || hasNodeCondition ( f , expectedNodeCondition ) {
return nil
}
return fmt . Errorf ( "NodeCondition: %s not encountered" , expectedNodeCondition )
} , pressureTimeout , evictionPollInterval ) . Should ( BeNil ( ) )
By ( "Waiting for evictions to occur" )
Eventually ( func ( ) error {
if expectedNodeCondition != noPressure {
if hasNodeCondition ( f , expectedNodeCondition ) {
framework . Logf ( "Node has %s" , expectedNodeCondition )
} else {
framework . Logf ( "Node does NOT have %s" , expectedNodeCondition )
}
}
2018-05-15 23:08:46 +00:00
logKubeletLatencyMetrics ( kubeletmetrics . EvictionStatsAgeKey )
2017-09-23 00:18:19 +00:00
logFunc ( )
return verifyEvictionOrdering ( f , testSpecs )
} , pressureTimeout , evictionPollInterval ) . Should ( BeNil ( ) )
// We observe pressure from the API server. The eviction manager observes pressure from the kubelet internal stats.
// This means the eviction manager will observe pressure before we will, creating a delay between when the eviction manager
// evicts a pod, and when we observe the pressure by querying the API server. Add a delay here to account for this delay
By ( "making sure pressure from test has surfaced before continuing" )
time . Sleep ( pressureDelay )
By ( fmt . Sprintf ( "Waiting for NodeCondition: %s to no longer exist on the node" , expectedNodeCondition ) )
Eventually ( func ( ) error {
logFunc ( )
2018-05-15 23:08:46 +00:00
logKubeletLatencyMetrics ( kubeletmetrics . EvictionStatsAgeKey )
2017-09-23 00:18:19 +00:00
if expectedNodeCondition != noPressure && hasNodeCondition ( f , expectedNodeCondition ) {
return fmt . Errorf ( "Conditions havent returned to normal, node still has %s" , expectedNodeCondition )
}
return nil
} , pressureDissapearTimeout , evictionPollInterval ) . Should ( BeNil ( ) )
By ( "checking for stable, pressure-free condition without unexpected pod failures" )
Consistently ( func ( ) error {
if expectedNodeCondition != noPressure && hasNodeCondition ( f , expectedNodeCondition ) {
return fmt . Errorf ( "%s dissappeared and then reappeared" , expectedNodeCondition )
}
logFunc ( )
2018-05-15 23:08:46 +00:00
logKubeletLatencyMetrics ( kubeletmetrics . EvictionStatsAgeKey )
2017-09-23 00:18:19 +00:00
return verifyEvictionOrdering ( f , testSpecs )
} , postTestConditionMonitoringPeriod , evictionPollInterval ) . Should ( BeNil ( ) )
2018-05-23 23:12:54 +00:00
By ( "checking for correctly formatted eviction events" )
verifyEvictionEvents ( f , testSpecs , expectedStarvedResource )
2017-09-23 00:18:19 +00:00
} )
AfterEach ( func ( ) {
By ( "deleting pods" )
for _ , spec := range testSpecs {
By ( fmt . Sprintf ( "deleting pod: %s" , spec . pod . Name ) )
f . PodClient ( ) . DeleteSync ( spec . pod . Name , & metav1 . DeleteOptions { } , 10 * time . Minute )
}
2018-04-06 21:01:11 +00:00
reduceAllocatableMemoryUsage ( )
2017-09-29 18:58:38 +00:00
if expectedNodeCondition == v1 . NodeDiskPressure && framework . TestContext . PrepullImages {
// The disk eviction test may cause the prepulled images to be evicted,
// prepull those images again to ensure this test not affect following tests.
PrePullAllImages ( )
}
2017-09-23 00:18:19 +00:00
By ( "making sure we can start a new pod after the test" )
podName := "test-admit-pod"
f . PodClient ( ) . CreateSync ( & v1 . Pod {
ObjectMeta : metav1 . ObjectMeta {
Name : podName ,
} ,
Spec : v1 . PodSpec {
RestartPolicy : v1 . RestartPolicyNever ,
Containers : [ ] v1 . Container {
{
2018-01-09 06:42:02 +00:00
Image : imageutils . GetPauseImageName ( ) ,
2017-09-23 00:18:19 +00:00
Name : podName ,
} ,
} ,
} ,
} )
if CurrentGinkgoTestDescription ( ) . Failed {
if framework . TestContext . DumpLogsOnFailure {
logPodEvents ( f )
logNodeEvents ( f )
}
}
} )
} )
}
// verifyEvictionOrdering returns an error if all non-zero priority pods have not been evicted, nil otherwise
// This function panics (via Expect) if eviction ordering is violated, or if a priority-zero pod fails.
func verifyEvictionOrdering ( f * framework . Framework , testSpecs [ ] podEvictSpec ) error {
// Gather current information
updatedPodList , err := f . ClientSet . CoreV1 ( ) . Pods ( f . Namespace . Name ) . List ( metav1 . ListOptions { } )
if err != nil {
return err
}
updatedPods := updatedPodList . Items
for _ , p := range updatedPods {
framework . Logf ( "fetching pod %s; phase= %v" , p . Name , p . Status . Phase )
}
By ( "checking eviction ordering and ensuring important pods dont fail" )
done := true
for _ , priorityPodSpec := range testSpecs {
var priorityPod v1 . Pod
for _ , p := range updatedPods {
if p . Name == priorityPodSpec . pod . Name {
priorityPod = p
}
}
Expect ( priorityPod ) . NotTo ( BeNil ( ) )
2018-05-07 22:01:20 +00:00
Expect ( priorityPod . Status . Phase ) . NotTo ( Equal ( v1 . PodSucceeded ) ,
fmt . Sprintf ( "pod: %s succeeded unexpectedly" , priorityPod . Name ) )
2017-09-23 00:18:19 +00:00
// Check eviction ordering.
// Note: it is alright for a priority 1 and priority 2 pod (for example) to fail in the same round,
// but never alright for a priority 1 pod to fail while the priority 2 pod is still running
for _ , lowPriorityPodSpec := range testSpecs {
var lowPriorityPod v1 . Pod
for _ , p := range updatedPods {
if p . Name == lowPriorityPodSpec . pod . Name {
lowPriorityPod = p
}
}
Expect ( lowPriorityPod ) . NotTo ( BeNil ( ) )
if priorityPodSpec . evictionPriority < lowPriorityPodSpec . evictionPriority && lowPriorityPod . Status . Phase == v1 . PodRunning {
Expect ( priorityPod . Status . Phase ) . NotTo ( Equal ( v1 . PodFailed ) ,
fmt . Sprintf ( "priority %d pod: %s failed before priority %d pod: %s" ,
priorityPodSpec . evictionPriority , priorityPodSpec . pod . Name , lowPriorityPodSpec . evictionPriority , lowPriorityPodSpec . pod . Name ) )
}
}
2018-05-07 22:01:20 +00:00
if priorityPod . Status . Phase == v1 . PodFailed {
Expect ( priorityPod . Status . Reason , eviction . Reason , "pod %s failed; expected Status.Reason to be %s, but got %s" ,
priorityPod . Name , eviction . Reason , priorityPod . Status . Reason )
}
2017-09-23 00:18:19 +00:00
// EvictionPriority 0 pods should not fail
if priorityPodSpec . evictionPriority == 0 {
Expect ( priorityPod . Status . Phase ) . NotTo ( Equal ( v1 . PodFailed ) ,
fmt . Sprintf ( "priority 0 pod: %s failed" , priorityPod . Name ) )
}
// If a pod that is not evictionPriority 0 has not been evicted, we are not done
if priorityPodSpec . evictionPriority != 0 && priorityPod . Status . Phase != v1 . PodFailed {
done = false
}
}
if done {
return nil
}
return fmt . Errorf ( "pods that should be evicted are still running" )
}
2018-05-23 23:12:54 +00:00
func verifyEvictionEvents ( f * framework . Framework , testSpecs [ ] podEvictSpec , expectedStarvedResource v1 . ResourceName ) {
for _ , spec := range testSpecs {
pod := spec . pod
if spec . evictionPriority != 0 {
selector := fields . Set {
"involvedObject.kind" : "Pod" ,
"involvedObject.name" : pod . Name ,
"involvedObject.namespace" : f . Namespace . Name ,
"reason" : eviction . Reason ,
} . AsSelector ( ) . String ( )
podEvictEvents , err := f . ClientSet . CoreV1 ( ) . Events ( f . Namespace . Name ) . List ( metav1 . ListOptions { FieldSelector : selector } )
Expect ( err ) . To ( BeNil ( ) , "Unexpected error getting events during eviction test: %v" , err )
Expect ( len ( podEvictEvents . Items ) ) . To ( Equal ( 1 ) , "Expected to find 1 eviction event for pod %s, got %d" , pod . Name , len ( podEvictEvents . Items ) )
event := podEvictEvents . Items [ 0 ]
if expectedStarvedResource != noStarvedResource {
// Check the eviction.StarvedResourceKey
starved , found := event . Annotations [ eviction . StarvedResourceKey ]
Expect ( found ) . To ( BeTrue ( ) , "Expected to find an annotation on the eviction event for pod %s containing the starved resource %s, but it was not found" ,
pod . Name , expectedStarvedResource )
starvedResource := v1 . ResourceName ( starved )
Expect ( starvedResource ) . To ( Equal ( expectedStarvedResource ) , "Expected to the starved_resource annotation on pod %s to contain %s, but got %s instead" ,
pod . Name , expectedStarvedResource , starvedResource )
// We only check these keys for memory, because ephemeral storage evictions may be due to volume usage, in which case these values are not present
if expectedStarvedResource == v1 . ResourceMemory {
// Check the eviction.OffendingContainersKey
offendersString , found := event . Annotations [ eviction . OffendingContainersKey ]
Expect ( found ) . To ( BeTrue ( ) , "Expected to find an annotation on the eviction event for pod %s containing the offending containers, but it was not found" ,
pod . Name )
offendingContainers := strings . Split ( offendersString , "," )
Expect ( len ( offendingContainers ) ) . To ( Equal ( 1 ) , "Expected to find the offending container's usage in the %s annotation, but no container was found" ,
eviction . OffendingContainersKey )
Expect ( offendingContainers [ 0 ] ) . To ( Equal ( pod . Spec . Containers [ 0 ] . Name ) , "Expected to find the offending container: %s's usage in the %s annotation, but found %s instead" ,
pod . Spec . Containers [ 0 ] . Name , eviction . OffendingContainersKey , offendingContainers [ 0 ] )
// Check the eviction.OffendingContainersUsageKey
offendingUsageString , found := event . Annotations [ eviction . OffendingContainersUsageKey ]
Expect ( found ) . To ( BeTrue ( ) , "Expected to find an annotation on the eviction event for pod %s containing the offending containers' usage, but it was not found" ,
pod . Name )
offendingContainersUsage := strings . Split ( offendingUsageString , "," )
Expect ( len ( offendingContainersUsage ) ) . To ( Equal ( 1 ) , "Expected to find the offending container's usage in the %s annotation, but found %+v" ,
eviction . OffendingContainersUsageKey , offendingContainersUsage )
usageQuantity , err := resource . ParseQuantity ( offendingContainersUsage [ 0 ] )
Expect ( err ) . To ( BeNil ( ) , "Expected to be able to parse pod %s's %s annotation as a quantity, but got err: %v" , pod . Name , eviction . OffendingContainersUsageKey , err )
request := pod . Spec . Containers [ 0 ] . Resources . Requests [ starvedResource ]
Expect ( usageQuantity . Cmp ( request ) ) . To ( Equal ( 1 ) , "Expected usage of offending container: %s in pod %s to exceed its request %s" ,
usageQuantity . String ( ) , pod . Name , request . String ( ) )
}
}
}
}
}
2017-09-23 00:18:19 +00:00
// Returns TRUE if the node has the node condition, FALSE otherwise
func hasNodeCondition ( f * framework . Framework , expectedNodeCondition v1 . NodeConditionType ) bool {
localNodeStatus := getLocalNode ( f ) . Status
_ , actualNodeCondition := nodeutil . GetNodeCondition ( & localNodeStatus , expectedNodeCondition )
Expect ( actualNodeCondition ) . NotTo ( BeNil ( ) )
return actualNodeCondition . Status == v1 . ConditionTrue
}
func logInodeMetrics ( ) {
summary , err := getNodeSummary ( )
if err != nil {
framework . Logf ( "Error getting summary: %v" , err )
return
}
if summary . Node . Runtime != nil && summary . Node . Runtime . ImageFs != nil && summary . Node . Runtime . ImageFs . Inodes != nil && summary . Node . Runtime . ImageFs . InodesFree != nil {
framework . Logf ( "imageFsInfo.Inodes: %d, imageFsInfo.InodesFree: %d" , * summary . Node . Runtime . ImageFs . Inodes , * summary . Node . Runtime . ImageFs . InodesFree )
}
if summary . Node . Fs != nil && summary . Node . Fs . Inodes != nil && summary . Node . Fs . InodesFree != nil {
framework . Logf ( "rootFsInfo.Inodes: %d, rootFsInfo.InodesFree: %d" , * summary . Node . Fs . Inodes , * summary . Node . Fs . InodesFree )
}
for _ , pod := range summary . Pods {
framework . Logf ( "Pod: %s" , pod . PodRef . Name )
for _ , container := range pod . Containers {
if container . Rootfs != nil && container . Rootfs . InodesUsed != nil {
framework . Logf ( "--- summary Container: %s inodeUsage: %d" , container . Name , * container . Rootfs . InodesUsed )
}
}
for _ , volume := range pod . VolumeStats {
if volume . FsStats . InodesUsed != nil {
framework . Logf ( "--- summary Volume: %s inodeUsage: %d" , volume . Name , * volume . FsStats . InodesUsed )
}
}
}
}
func logDiskMetrics ( ) {
summary , err := getNodeSummary ( )
if err != nil {
framework . Logf ( "Error getting summary: %v" , err )
return
}
if summary . Node . Runtime != nil && summary . Node . Runtime . ImageFs != nil && summary . Node . Runtime . ImageFs . CapacityBytes != nil && summary . Node . Runtime . ImageFs . AvailableBytes != nil {
framework . Logf ( "imageFsInfo.CapacityBytes: %d, imageFsInfo.AvailableBytes: %d" , * summary . Node . Runtime . ImageFs . CapacityBytes , * summary . Node . Runtime . ImageFs . AvailableBytes )
}
if summary . Node . Fs != nil && summary . Node . Fs . CapacityBytes != nil && summary . Node . Fs . AvailableBytes != nil {
framework . Logf ( "rootFsInfo.CapacityBytes: %d, rootFsInfo.AvailableBytes: %d" , * summary . Node . Fs . CapacityBytes , * summary . Node . Fs . AvailableBytes )
}
for _ , pod := range summary . Pods {
framework . Logf ( "Pod: %s" , pod . PodRef . Name )
for _ , container := range pod . Containers {
if container . Rootfs != nil && container . Rootfs . UsedBytes != nil {
framework . Logf ( "--- summary Container: %s UsedBytes: %d" , container . Name , * container . Rootfs . UsedBytes )
}
}
for _ , volume := range pod . VolumeStats {
if volume . FsStats . InodesUsed != nil {
framework . Logf ( "--- summary Volume: %s UsedBytes: %d" , volume . Name , * volume . FsStats . UsedBytes )
}
}
}
}
func logMemoryMetrics ( ) {
summary , err := getNodeSummary ( )
if err != nil {
framework . Logf ( "Error getting summary: %v" , err )
return
}
if summary . Node . Memory != nil && summary . Node . Memory . WorkingSetBytes != nil && summary . Node . Memory . AvailableBytes != nil {
2018-04-06 21:01:11 +00:00
framework . Logf ( "Node.Memory.WorkingSetBytes: %d, Node.Memory.AvailableBytes: %d" , * summary . Node . Memory . WorkingSetBytes , * summary . Node . Memory . AvailableBytes )
}
for _ , sysContainer := range summary . Node . SystemContainers {
if sysContainer . Name == stats . SystemContainerPods && sysContainer . Memory != nil && sysContainer . Memory . WorkingSetBytes != nil && sysContainer . Memory . AvailableBytes != nil {
framework . Logf ( "Allocatable.Memory.WorkingSetBytes: %d, Allocatable.Memory.AvailableBytes: %d" , * sysContainer . Memory . WorkingSetBytes , * sysContainer . Memory . AvailableBytes )
}
2017-09-23 00:18:19 +00:00
}
for _ , pod := range summary . Pods {
framework . Logf ( "Pod: %s" , pod . PodRef . Name )
for _ , container := range pod . Containers {
if container . Memory != nil && container . Memory . WorkingSetBytes != nil {
framework . Logf ( "--- summary Container: %s WorkingSetBytes: %d" , container . Name , * container . Memory . WorkingSetBytes )
}
}
}
}
2019-01-14 17:41:36 +00:00
func logPidMetrics ( ) {
summary , err := getNodeSummary ( )
if err != nil {
framework . Logf ( "Error getting summary: %v" , err )
return
}
if summary . Node . Rlimit != nil && summary . Node . Rlimit . MaxPID != nil && summary . Node . Rlimit . NumOfRunningProcesses != nil {
framework . Logf ( "Node.Rlimit.MaxPID: %d, Node.Rlimit.RunningProcesses: %d" , * summary . Node . Rlimit . MaxPID , * summary . Node . Rlimit . NumOfRunningProcesses )
}
}
2017-09-23 00:18:19 +00:00
func eventuallyGetSummary ( ) ( s * stats . Summary ) {
Eventually ( func ( ) error {
summary , err := getNodeSummary ( )
if err != nil {
return err
}
if summary == nil || summary . Node . Fs == nil || summary . Node . Fs . InodesFree == nil || summary . Node . Fs . AvailableBytes == nil {
return fmt . Errorf ( "some part of data is nil" )
}
s = summary
return nil
} , time . Minute , evictionPollInterval ) . Should ( BeNil ( ) )
return
}
// returns a pod that does not use any resources
func innocentPod ( ) * v1 . Pod {
return & v1 . Pod {
ObjectMeta : metav1 . ObjectMeta { Name : "innocent-pod" } ,
Spec : v1 . PodSpec {
RestartPolicy : v1 . RestartPolicyNever ,
Containers : [ ] v1 . Container {
{
Image : busyboxImage ,
Name : "innocent-container" ,
Command : [ ] string {
"sh" ,
"-c" ,
"while true; do sleep 5; done" ,
} ,
} ,
} ,
} ,
}
}
const (
volumeMountPath = "/test-mnt"
volumeName = "test-volume"
)
2018-02-16 16:35:24 +00:00
func inodeConsumingPod ( name string , numFiles int , volumeSource * v1 . VolumeSource ) * v1 . Pod {
2019-01-14 17:41:36 +00:00
path := ""
if volumeSource != nil {
path = volumeMountPath
}
2017-09-23 00:18:19 +00:00
// Each iteration creates an empty file
2019-01-14 17:41:36 +00:00
return podWithCommand ( volumeSource , v1 . ResourceRequirements { } , numFiles , name , fmt . Sprintf ( "touch %s${i}.txt; sleep 0.001;" , filepath . Join ( path , "file" ) ) )
2017-09-23 00:18:19 +00:00
}
2017-11-15 22:21:33 +00:00
func diskConsumingPod ( name string , diskConsumedMB int , volumeSource * v1 . VolumeSource , resources v1 . ResourceRequirements ) * v1 . Pod {
2019-01-14 17:41:36 +00:00
path := ""
if volumeSource != nil {
path = volumeMountPath
}
2017-11-15 22:21:33 +00:00
// Each iteration writes 1 Mb, so do diskConsumedMB iterations.
2019-01-29 19:02:48 +00:00
return podWithCommand ( volumeSource , resources , diskConsumedMB , name , fmt . Sprintf ( "dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null; sleep .1;" , filepath . Join ( path , "file" ) ) )
2019-01-14 17:41:36 +00:00
}
func pidConsumingPod ( name string , numProcesses int ) * v1 . Pod {
// Each iteration forks once, but creates two processes
return podWithCommand ( nil , v1 . ResourceRequirements { } , numProcesses / 2 , name , "(while true; do sleep 5; done)&" )
2017-09-23 00:18:19 +00:00
}
// podWithCommand returns a pod with the provided volumeSource and resourceRequirements.
2018-02-16 16:35:24 +00:00
func podWithCommand ( volumeSource * v1 . VolumeSource , resources v1 . ResourceRequirements , iterations int , name , command string ) * v1 . Pod {
2017-09-23 00:18:19 +00:00
volumeMounts := [ ] v1 . VolumeMount { }
volumes := [ ] v1 . Volume { }
if volumeSource != nil {
volumeMounts = [ ] v1 . VolumeMount { { MountPath : volumeMountPath , Name : volumeName } }
volumes = [ ] v1 . Volume { { Name : volumeName , VolumeSource : * volumeSource } }
}
return & v1 . Pod {
ObjectMeta : metav1 . ObjectMeta { Name : fmt . Sprintf ( "%s-pod" , name ) } ,
Spec : v1 . PodSpec {
RestartPolicy : v1 . RestartPolicyNever ,
Containers : [ ] v1 . Container {
{
Image : busyboxImage ,
Name : fmt . Sprintf ( "%s-container" , name ) ,
Command : [ ] string {
"sh" ,
"-c" ,
2019-01-14 17:41:36 +00:00
fmt . Sprintf ( "i=0; while [ $i -lt %d ]; do %s i=$(($i+1)); done; while true; do sleep 5; done" , iterations , command ) ,
2017-09-23 00:18:19 +00:00
} ,
Resources : resources ,
VolumeMounts : volumeMounts ,
} ,
} ,
Volumes : volumes ,
} ,
}
}
2018-01-16 19:22:17 +00:00
func getMemhogPod ( podName string , ctnName string , res v1 . ResourceRequirements ) * v1 . Pod {
env := [ ] v1 . EnvVar {
{
Name : "MEMORY_LIMIT" ,
ValueFrom : & v1 . EnvVarSource {
ResourceFieldRef : & v1 . ResourceFieldSelector {
Resource : "limits.memory" ,
} ,
} ,
} ,
}
// If there is a limit specified, pass 80% of it for -mem-total, otherwise use the downward API
// to pass limits.memory, which will be the total memory available.
// This helps prevent a guaranteed pod from triggering an OOM kill due to it's low memory limit,
// which will cause the test to fail inappropriately.
var memLimit string
if limit , ok := res . Limits [ v1 . ResourceMemory ] ; ok {
memLimit = strconv . Itoa ( int (
float64 ( limit . Value ( ) ) * 0.8 ) )
} else {
memLimit = "$(MEMORY_LIMIT)"
}
return & v1 . Pod {
ObjectMeta : metav1 . ObjectMeta {
Name : podName ,
} ,
Spec : v1 . PodSpec {
RestartPolicy : v1 . RestartPolicyNever ,
Containers : [ ] v1 . Container {
{
Name : ctnName ,
Image : "k8s.gcr.io/stress:v1" ,
ImagePullPolicy : "Always" ,
Env : env ,
// 60 min timeout * 60s / tick per 10s = 360 ticks before timeout => ~11.11Mi/tick
// to fill ~4Gi of memory, so initial ballpark 12Mi/tick.
// We might see flakes due to timeout if the total memory on the nodes increases.
Args : [ ] string { "-mem-alloc-size" , "12Mi" , "-mem-alloc-sleep" , "10s" , "-mem-total" , memLimit } ,
Resources : res ,
} ,
} ,
} ,
}
}