mirror of https://github.com/k3s-io/k3s
refactor tests, and add soft eviction test
parent
5654307632
commit
828c2d9630
|
@ -76,23 +76,19 @@ go_library(
|
|||
go_test(
|
||||
name = "go_default_test",
|
||||
srcs = [
|
||||
"allocatable_eviction_test.go",
|
||||
"apparmor_test.go",
|
||||
"cpu_manager_test.go",
|
||||
"critical_pod_test.go",
|
||||
"disk_eviction_test.go",
|
||||
"docker_test.go",
|
||||
"dockershim_checkpoint_test.go",
|
||||
"dynamic_kubelet_config_test.go",
|
||||
"e2e_node_suite_test.go",
|
||||
"eviction_test.go",
|
||||
"garbage_collector_test.go",
|
||||
"gke_environment_test.go",
|
||||
"image_id_test.go",
|
||||
"inode_eviction_test.go",
|
||||
"kubelet_test.go",
|
||||
"lifecycle_hook_test.go",
|
||||
"local_storage_allocatable_eviction_test.go",
|
||||
"local_storage_isolation_eviction_test.go",
|
||||
"log_path_test.go",
|
||||
"memory_eviction_test.go",
|
||||
"mirror_pod_test.go",
|
||||
|
@ -157,7 +153,6 @@ go_test(
|
|||
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/util/yaml:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/watch:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
|
||||
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
|
||||
"//vendor/k8s.io/client-go/kubernetes/scheme:go_default_library",
|
||||
] + select({
|
||||
|
|
|
@ -1,97 +0,0 @@
|
|||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e_node
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
nodeutil "k8s.io/kubernetes/pkg/api/v1/node"
|
||||
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// Eviction Policy is described here:
|
||||
// https://github.com/kubernetes/kubernetes/blob/master/docs/proposals/kubelet-eviction.md
|
||||
|
||||
var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
|
||||
f := framework.NewDefaultFramework("memory-allocatable-eviction-test")
|
||||
|
||||
podTestSpecs := []podTestSpec{
|
||||
{
|
||||
evictionPriority: 1, // This pod should be evicted before the innocent pod
|
||||
pod: getMemhogPod("memory-hog-pod", "memory-hog", v1.ResourceRequirements{}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 0, // This pod should never be evicted
|
||||
pod: getInnocentPod(),
|
||||
},
|
||||
}
|
||||
evictionTestTimeout := 10 * time.Minute
|
||||
testCondition := "Memory Pressure"
|
||||
|
||||
Context(fmt.Sprintf("when we run containers that should cause %s", testCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
// Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds.
|
||||
kubeReserved := getNodeCPUAndMemoryCapacity(f)[v1.ResourceMemory]
|
||||
// The default hard eviction threshold is 250Mb, so Allocatable = Capacity - Reserved - 250Mb
|
||||
// We want Allocatable = 50Mb, so set Reserved = Capacity - Allocatable - 250Mb = Capacity - 300Mb
|
||||
kubeReserved.Sub(resource.MustParse("300Mi"))
|
||||
initialConfig.KubeReserved = kubeletconfig.ConfigurationMap(map[string]string{string(v1.ResourceMemory): kubeReserved.String()})
|
||||
initialConfig.EnforceNodeAllocatable = []string{cm.NodeAllocatableEnforcementKey}
|
||||
initialConfig.ExperimentalNodeAllocatableIgnoreEvictionThreshold = false
|
||||
initialConfig.CgroupsPerQOS = true
|
||||
})
|
||||
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
|
||||
Context("With kubeconfig updated", func() {
|
||||
runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasMemoryPressure)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// Returns TRUE if the node has Memory Pressure, FALSE otherwise
|
||||
func hasMemoryPressure(f *framework.Framework, testCondition string) (bool, error) {
|
||||
localNodeStatus := getLocalNode(f).Status
|
||||
_, pressure := nodeutil.GetNodeCondition(&localNodeStatus, v1.NodeMemoryPressure)
|
||||
Expect(pressure).NotTo(BeNil())
|
||||
hasPressure := pressure.Status == v1.ConditionTrue
|
||||
By(fmt.Sprintf("checking if pod has %s: %v", testCondition, hasPressure))
|
||||
|
||||
// Additional Logging relating to Memory
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if summary.Node.Memory != nil && summary.Node.Memory.WorkingSetBytes != nil && summary.Node.Memory.AvailableBytes != nil {
|
||||
framework.Logf("Node.Memory.WorkingSetBytes: %d, summary.Node.Memory.AvailableBytes: %d", *summary.Node.Memory.WorkingSetBytes, *summary.Node.Memory.AvailableBytes)
|
||||
}
|
||||
for _, pod := range summary.Pods {
|
||||
framework.Logf("Pod: %s", pod.PodRef.Name)
|
||||
for _, container := range pod.Containers {
|
||||
if container.Memory != nil && container.Memory.WorkingSetBytes != nil {
|
||||
framework.Logf("--- summary Container: %s WorkingSetBytes: %d", container.Name, *container.Memory.WorkingSetBytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
return hasPressure, nil
|
||||
}
|
|
@ -1,258 +0,0 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e_node
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/uuid"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
)
|
||||
|
||||
const (
|
||||
// podCheckInterval is the interval seconds between pod status checks.
|
||||
podCheckInterval = time.Second * 2
|
||||
|
||||
// containerGCPeriod is the period of container garbage collect loop. It should be the same
|
||||
// with ContainerGCPeriod in kubelet.go. However we don't want to include kubelet package
|
||||
// directly which will introduce a lot more dependencies.
|
||||
containerGCPeriod = time.Minute * 1
|
||||
|
||||
dummyFile = "dummy."
|
||||
)
|
||||
|
||||
// TODO: Leverage dynamic Kubelet settings when it's implemented to only modify the kubelet eviction option in this test.
|
||||
var _ = framework.KubeDescribe("Kubelet Eviction Manager [Serial] [Disruptive]", func() {
|
||||
f := framework.NewDefaultFramework("kubelet-eviction-manager")
|
||||
var podClient *framework.PodClient
|
||||
var c clientset.Interface
|
||||
|
||||
BeforeEach(func() {
|
||||
podClient = f.PodClient()
|
||||
c = f.ClientSet
|
||||
})
|
||||
|
||||
Describe("hard eviction test", func() {
|
||||
Context("pod using the most disk space gets evicted when the node disk usage is above the eviction hard threshold", func() {
|
||||
var busyPodName, idlePodName, verifyPodName string
|
||||
|
||||
BeforeEach(func() {
|
||||
if !isImageSupported() {
|
||||
framework.Skipf("test skipped because the image is not supported by the test")
|
||||
}
|
||||
if !evictionOptionIsSet() {
|
||||
framework.Skipf("test skipped because eviction option is not set")
|
||||
}
|
||||
|
||||
busyPodName = "to-evict" + string(uuid.NewUUID())
|
||||
idlePodName = "idle" + string(uuid.NewUUID())
|
||||
verifyPodName = "verify" + string(uuid.NewUUID())
|
||||
createIdlePod(idlePodName, podClient)
|
||||
podClient.Create(&v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: busyPodName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: busyPodName,
|
||||
// Filling the disk
|
||||
Command: []string{"sh", "-c",
|
||||
fmt.Sprintf("for NUM in `seq 1 1 100000`; do dd if=/dev/urandom of=%s.$NUM bs=50000000 count=10; sleep 0.5; done",
|
||||
dummyFile)},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
if !isImageSupported() || !evictionOptionIsSet() { // Skip the after each
|
||||
return
|
||||
}
|
||||
podClient.DeleteSync(busyPodName, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
|
||||
podClient.DeleteSync(idlePodName, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
|
||||
podClient.DeleteSync(verifyPodName, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
|
||||
|
||||
// Wait for 2 container gc loop to ensure that the containers are deleted. The containers
|
||||
// created in this test consume a lot of disk, we don't want them to trigger disk eviction
|
||||
// again after the test.
|
||||
time.Sleep(containerGCPeriod * 2)
|
||||
|
||||
if framework.TestContext.PrepullImages {
|
||||
// The disk eviction test may cause the prepulled images to be evicted,
|
||||
// prepull those images again to ensure this test not affect following tests.
|
||||
PrePullAllImages()
|
||||
}
|
||||
})
|
||||
|
||||
It("should evict the pod using the most disk space [Slow]", func() {
|
||||
evictionOccurred := false
|
||||
nodeDiskPressureCondition := false
|
||||
podRescheduleable := false
|
||||
Eventually(func() error {
|
||||
// Avoid the test using up all the disk space
|
||||
err := checkDiskUsage(0.05)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// The pod should be evicted.
|
||||
if !evictionOccurred {
|
||||
podData, err := podClient.Get(busyPodName, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = verifyPodEviction(podData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
podData, err = podClient.Get(idlePodName, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if podData.Status.Phase != v1.PodRunning {
|
||||
err = verifyPodEviction(podData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
evictionOccurred = true
|
||||
return fmt.Errorf("waiting for node disk pressure condition to be set")
|
||||
}
|
||||
|
||||
// The node should have disk pressure condition after the pods are evicted.
|
||||
if !nodeDiskPressureCondition {
|
||||
if !nodeHasDiskPressure(f.ClientSet) {
|
||||
return fmt.Errorf("expected disk pressure condition is not set")
|
||||
}
|
||||
nodeDiskPressureCondition = true
|
||||
return fmt.Errorf("waiting for node disk pressure condition to be cleared")
|
||||
}
|
||||
|
||||
// After eviction happens the pod is evicted so eventually the node disk pressure should be relieved.
|
||||
if !podRescheduleable {
|
||||
if nodeHasDiskPressure(f.ClientSet) {
|
||||
return fmt.Errorf("expected disk pressure condition relief has not happened")
|
||||
}
|
||||
createIdlePod(verifyPodName, podClient)
|
||||
podRescheduleable = true
|
||||
return fmt.Errorf("waiting for the node to accept a new pod")
|
||||
}
|
||||
|
||||
// The new pod should be able to be scheduled and run after the disk pressure is relieved.
|
||||
podData, err := podClient.Get(verifyPodName, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if podData.Status.Phase != v1.PodRunning {
|
||||
return fmt.Errorf("waiting for the new pod to be running")
|
||||
}
|
||||
|
||||
return nil
|
||||
}, time.Minute*15 /* based on n1-standard-1 machine type */, podCheckInterval).Should(BeNil())
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
func createIdlePod(podName string, podClient *framework.PodClient) {
|
||||
podClient.Create(&v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: framework.GetPauseImageNameForHostArch(),
|
||||
Name: podName,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func verifyPodEviction(podData *v1.Pod) error {
|
||||
if podData.Status.Phase != v1.PodFailed {
|
||||
return fmt.Errorf("expected phase to be failed. got %+v", podData.Status.Phase)
|
||||
}
|
||||
if podData.Status.Reason != "Evicted" {
|
||||
return fmt.Errorf("expected failed reason to be evicted. got %+v", podData.Status.Reason)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func nodeHasDiskPressure(cs clientset.Interface) bool {
|
||||
nodeList := framework.GetReadySchedulableNodesOrDie(cs)
|
||||
for _, condition := range nodeList.Items[0].Status.Conditions {
|
||||
if condition.Type == v1.NodeDiskPressure {
|
||||
return condition.Status == v1.ConditionTrue
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func evictionOptionIsSet() bool {
|
||||
return len(framework.TestContext.KubeletConfig.EvictionHard) > 0
|
||||
}
|
||||
|
||||
// TODO(random-liu): Use OSImage in node status to do the check.
|
||||
func isImageSupported() bool {
|
||||
// TODO: Only images with image fs is selected for testing for now. When the kubelet settings can be dynamically updated,
|
||||
// instead of skipping images the eviction thresholds should be adjusted based on the images.
|
||||
return strings.Contains(framework.TestContext.NodeName, "-gci-dev-")
|
||||
}
|
||||
|
||||
// checkDiskUsage verifies that the available bytes on disk are above the limit.
|
||||
func checkDiskUsage(limit float64) error {
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if nodeFs := summary.Node.Fs; nodeFs != nil {
|
||||
if nodeFs.AvailableBytes != nil && nodeFs.CapacityBytes != nil {
|
||||
if float64(*nodeFs.CapacityBytes)*limit > float64(*nodeFs.AvailableBytes) {
|
||||
return fmt.Errorf("available nodefs byte is less than %v%%", limit*float64(100))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if summary.Node.Runtime != nil {
|
||||
if imageFs := summary.Node.Runtime.ImageFs; imageFs != nil {
|
||||
if float64(*imageFs.CapacityBytes)*limit > float64(*imageFs.AvailableBytes) {
|
||||
return fmt.Errorf("available imagefs byte is less than %v%%", limit*float64(100))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,601 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e_node
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
nodeutil "k8s.io/kubernetes/pkg/api/v1/node"
|
||||
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
|
||||
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// Eviction Policy is described here:
|
||||
// https://github.com/kubernetes/community/blob/master/contributors/design-proposals/kubelet-eviction.md
|
||||
|
||||
const (
|
||||
postTestConditionMonitoringPeriod = 1 * time.Minute
|
||||
evictionPollInterval = 2 * time.Second
|
||||
pressureDissapearTimeout = 1 * time.Minute
|
||||
longPodDeletionTimeout = 10 * time.Minute
|
||||
// pressure conditions often surface after evictions because the kubelet only updates
|
||||
// node conditions periodically.
|
||||
// we wait this period after evictions to make sure that we wait out this delay
|
||||
pressureDelay = 20 * time.Second
|
||||
testContextFmt = "when we run containers that should cause %s"
|
||||
noPressure = v1.NodeConditionType("NoPressure")
|
||||
)
|
||||
|
||||
// InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods.
|
||||
// Node disk pressure is induced by consuming all inodes on the node.
|
||||
var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
|
||||
f := framework.NewDefaultFramework("inode-eviction-test")
|
||||
expectedNodeCondition := v1.NodeDiskPressure
|
||||
pressureTimeout := 15 * time.Minute
|
||||
inodesConsumed := uint64(200000)
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
// Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction.
|
||||
summary := eventuallyGetSummary()
|
||||
inodesFree := *summary.Node.Fs.InodesFree
|
||||
if inodesFree <= inodesConsumed {
|
||||
framework.Skipf("Too few inodes free on the host for the InodeEviction test to run")
|
||||
}
|
||||
initialConfig.EvictionHard = fmt.Sprintf("nodefs.inodesFree<%d", inodesFree-inodesConsumed)
|
||||
initialConfig.EvictionMinimumReclaim = ""
|
||||
})
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logInodeMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: inodeConsumingPod("container-inode-hog", nil),
|
||||
},
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: inodeConsumingPod("volume-inode-hog", &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 0,
|
||||
pod: innocentPod(),
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// MemoryAllocatableEviction tests that the node responds to node memory pressure by evicting only responsible pods.
|
||||
// Node memory pressure is only encountered because we reserve the majority of the node's capacity via kube-reserved.
|
||||
var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
|
||||
f := framework.NewDefaultFramework("memory-allocatable-eviction-test")
|
||||
expectedNodeCondition := v1.NodeMemoryPressure
|
||||
pressureTimeout := 10 * time.Minute
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
// Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds.
|
||||
kubeReserved := getNodeCPUAndMemoryCapacity(f)[v1.ResourceMemory]
|
||||
// The default hard eviction threshold is 250Mb, so Allocatable = Capacity - Reserved - 250Mb
|
||||
// We want Allocatable = 50Mb, so set Reserved = Capacity - Allocatable - 250Mb = Capacity - 300Mb
|
||||
kubeReserved.Sub(resource.MustParse("300Mi"))
|
||||
initialConfig.KubeReserved = kubeletconfig.ConfigurationMap(map[string]string{string(v1.ResourceMemory): kubeReserved.String()})
|
||||
initialConfig.EnforceNodeAllocatable = []string{cm.NodeAllocatableEnforcementKey}
|
||||
initialConfig.ExperimentalNodeAllocatableIgnoreEvictionThreshold = false
|
||||
initialConfig.CgroupsPerQOS = true
|
||||
})
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logMemoryMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: getMemhogPod("memory-hog-pod", "memory-hog", v1.ResourceRequirements{}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 0,
|
||||
pod: innocentPod(),
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// LocalStorageAllocatableEviction tests that the node responds to node disk pressure by evicting only responsible pods.
|
||||
// Node disk pressure is only encountered because we reserve the majority of the node's capacity via kube-reserved.
|
||||
var _ = framework.KubeDescribe("LocalStorageAllocatableEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
|
||||
f := framework.NewDefaultFramework("localstorageallocatable-eviction-test")
|
||||
pressureTimeout := 10 * time.Minute
|
||||
expectedNodeCondition := v1.NodeDiskPressure
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
// Set up --kube-reserved for scratch storage
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
diskConsumed := uint64(200000000) // At least 200 Mb for pods to consume
|
||||
summary := eventuallyGetSummary()
|
||||
availableBytes := *(summary.Node.Fs.AvailableBytes)
|
||||
initialConfig.KubeReserved = kubeletconfig.ConfigurationMap(map[string]string{string(v1.ResourceEphemeralStorage): fmt.Sprintf("%d", availableBytes-diskConsumed)})
|
||||
initialConfig.EnforceNodeAllocatable = []string{cm.NodeAllocatableEnforcementKey}
|
||||
initialConfig.CgroupsPerQOS = true
|
||||
initialConfig.ExperimentalNodeAllocatableIgnoreEvictionThreshold = false
|
||||
if initialConfig.FeatureGates != "" {
|
||||
initialConfig.FeatureGates += ","
|
||||
}
|
||||
initialConfig.FeatureGates += "LocalStorageCapacityIsolation=true"
|
||||
// set evictionHard to be very small, so that only the allocatable eviction threshold triggers
|
||||
initialConfig.EvictionHard = "nodefs.available<1"
|
||||
initialConfig.EvictionMinimumReclaim = ""
|
||||
framework.Logf("KubeReserved: %+v", initialConfig.KubeReserved)
|
||||
})
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: diskConsumingPod("container-disk-hog", 10000, nil, v1.ResourceRequirements{}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 0,
|
||||
pod: innocentPod(),
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// LocalStorageEviction tests that the node responds to node disk pressure by evicting only responsible pods
|
||||
// Disk pressure is induced by running pods which consume disk space.
|
||||
var _ = framework.KubeDescribe("LocalStorageEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
|
||||
f := framework.NewDefaultFramework("localstorage-eviction-test")
|
||||
pressureTimeout := 10 * time.Minute
|
||||
expectedNodeCondition := v1.NodeDiskPressure
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
diskConsumed := uint64(100000000) // At least 100 Mb for pods to consume
|
||||
summary := eventuallyGetSummary()
|
||||
availableBytes := *(summary.Node.Fs.AvailableBytes)
|
||||
initialConfig.EvictionHard = fmt.Sprintf("nodefs.available<%d", availableBytes-diskConsumed)
|
||||
initialConfig.EvictionMinimumReclaim = ""
|
||||
})
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: diskConsumingPod("container-disk-hog", 10000, nil, v1.ResourceRequirements{}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 0,
|
||||
pod: innocentPod(),
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// LocalStorageEviction tests that the node responds to node disk pressure by evicting only responsible pods
|
||||
// Disk pressure is induced by running pods which consume disk space, which exceed the soft eviction threshold.
|
||||
// Note: This test's purpose is to test Soft Evictions. Local storage was chosen since it is the least costly to run.
|
||||
var _ = framework.KubeDescribe("LocalStorageSoftEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
|
||||
f := framework.NewDefaultFramework("localstorage-eviction-test")
|
||||
pressureTimeout := 10 * time.Minute
|
||||
expectedNodeCondition := v1.NodeDiskPressure
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
diskConsumed := uint64(100000000) // At least 100 Mb for pods to consume
|
||||
summary := eventuallyGetSummary()
|
||||
availableBytes := *(summary.Node.Fs.AvailableBytes)
|
||||
initialConfig.EvictionSoft = fmt.Sprintf("nodefs.available<%d", availableBytes-diskConsumed)
|
||||
initialConfig.EvictionSoftGracePeriod = "nodefs.available=1m"
|
||||
// Defer to the pod default grace period
|
||||
initialConfig.EvictionMaxPodGracePeriod = 30
|
||||
initialConfig.EvictionMinimumReclaim = ""
|
||||
// Ensure that pods are not evicted because of the eviction-hard threshold
|
||||
initialConfig.EvictionHard = ""
|
||||
})
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: diskConsumingPod("container-disk-hog", 10000, nil, v1.ResourceRequirements{}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 0,
|
||||
pod: innocentPod(),
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// LocalStorageCapacityIsolationEviction tests that container and volume local storage limits are enforced through evictions
|
||||
var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Serial] [Disruptive] [Flaky] [Feature:LocalStorageCapacityIsolation]", func() {
|
||||
f := framework.NewDefaultFramework("localstorage-eviction-test")
|
||||
evictionTestTimeout := 10 * time.Minute
|
||||
Context(fmt.Sprintf(testContextFmt, "evictions due to pod local storage violations"), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
if initialConfig.FeatureGates != "" {
|
||||
initialConfig.FeatureGates += ","
|
||||
}
|
||||
initialConfig.FeatureGates += "LocalStorageCapacityIsolation=true"
|
||||
initialConfig.EvictionHard = ""
|
||||
})
|
||||
sizeLimit := resource.MustParse("100Mi")
|
||||
used := int64(200) // Consume 200 Mb
|
||||
containerLimit := v1.ResourceList{v1.ResourceEphemeralStorage: sizeLimit}
|
||||
|
||||
runEvictionTest(f, evictionTestTimeout, noPressure, logDiskMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1, // This pod should be evicted because emptyDir (default storage type) usage violation
|
||||
pod: diskConsumingPod("emptydir-disk-sizelimit", used, &v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{SizeLimit: &sizeLimit},
|
||||
}, v1.ResourceRequirements{}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 1, // This pod should be evicted because of memory emptyDir usage violation
|
||||
pod: diskConsumingPod("emptydir-memory-sizelimit", used, &v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{Medium: "Memory", SizeLimit: &sizeLimit},
|
||||
}, v1.ResourceRequirements{}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 1, // This pod should cross the container limit by writing to its writable layer.
|
||||
pod: diskConsumingPod("container-disk-limit", used, nil, v1.ResourceRequirements{Limits: containerLimit}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 1, // This pod should hit the container limit by writing to an emptydir
|
||||
pod: diskConsumingPod("container-emptydir-disk-limit", used, &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}},
|
||||
v1.ResourceRequirements{Limits: containerLimit}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 0, // This pod should not be evicted because it uses less than its limit
|
||||
pod: diskConsumingPod("emptydir-disk-below-sizelimit", int64(50), &v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{SizeLimit: &sizeLimit},
|
||||
}, v1.ResourceRequirements{}),
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods
|
||||
type podEvictSpec struct {
|
||||
// P0 should never be evicted, P1 shouldn't evict before P2, etc.
|
||||
// If two are ranked at P1, either is permitted to fail before the other.
|
||||
// The test ends when all pods other than p0 have been evicted
|
||||
evictionPriority int
|
||||
pod *v1.Pod
|
||||
}
|
||||
|
||||
// runEvictionTest sets up a testing environment given the provided pods, and checks a few things:
|
||||
// It ensures that the desired expectedNodeCondition is actually triggered.
|
||||
// It ensures that evictionPriority 0 pods are not evicted
|
||||
// It ensures that lower evictionPriority pods are always evicted before higher evictionPriority pods (2 evicted before 1, etc.)
|
||||
// It ensures that all pods with non-zero evictionPriority are eventually evicted.
|
||||
// runEvictionTest then cleans up the testing environment by deleting provided pods, and ensures that expectedNodeCondition no longer exists
|
||||
func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expectedNodeCondition v1.NodeConditionType, logFunc func(), testSpecs []podEvictSpec) {
|
||||
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
|
||||
Context("", func() {
|
||||
BeforeEach(func() {
|
||||
// Nodes do not immediately report local storage capacity
|
||||
// Sleep so that pods requesting local storage do not fail to schedule
|
||||
time.Sleep(30 * time.Second)
|
||||
By("seting up pods to be used by tests")
|
||||
for _, spec := range testSpecs {
|
||||
By(fmt.Sprintf("creating pod with container: %s", spec.pod.Name))
|
||||
f.PodClient().CreateSync(spec.pod)
|
||||
}
|
||||
})
|
||||
|
||||
It("should eventually evict all of the correct pods", func() {
|
||||
By(fmt.Sprintf("Waiting for node to have NodeCondition: %s", expectedNodeCondition))
|
||||
Eventually(func() error {
|
||||
logFunc()
|
||||
if expectedNodeCondition == noPressure || hasNodeCondition(f, expectedNodeCondition) {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("NodeCondition: %s not encountered", expectedNodeCondition)
|
||||
}, pressureTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
By("Waiting for evictions to occur")
|
||||
Eventually(func() error {
|
||||
if expectedNodeCondition != noPressure {
|
||||
if hasNodeCondition(f, expectedNodeCondition) {
|
||||
framework.Logf("Node has %s", expectedNodeCondition)
|
||||
} else {
|
||||
framework.Logf("Node does NOT have %s", expectedNodeCondition)
|
||||
}
|
||||
}
|
||||
logKubeletMetrics(kubeletmetrics.EvictionStatsAgeKey)
|
||||
logFunc()
|
||||
return verifyEvictionOrdering(f, testSpecs)
|
||||
}, pressureTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
// We observe pressure from the API server. The eviction manager observes pressure from the kubelet internal stats.
|
||||
// This means the eviction manager will observe pressure before we will, creating a delay between when the eviction manager
|
||||
// evicts a pod, and when we observe the pressure by querying the API server. Add a delay here to account for this delay
|
||||
By("making sure pressure from test has surfaced before continuing")
|
||||
time.Sleep(pressureDelay)
|
||||
|
||||
By(fmt.Sprintf("Waiting for NodeCondition: %s to no longer exist on the node", expectedNodeCondition))
|
||||
Eventually(func() error {
|
||||
logFunc()
|
||||
logKubeletMetrics(kubeletmetrics.EvictionStatsAgeKey)
|
||||
if expectedNodeCondition != noPressure && hasNodeCondition(f, expectedNodeCondition) {
|
||||
return fmt.Errorf("Conditions havent returned to normal, node still has %s", expectedNodeCondition)
|
||||
}
|
||||
return nil
|
||||
}, pressureDissapearTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
By("checking for stable, pressure-free condition without unexpected pod failures")
|
||||
Consistently(func() error {
|
||||
if expectedNodeCondition != noPressure && hasNodeCondition(f, expectedNodeCondition) {
|
||||
return fmt.Errorf("%s dissappeared and then reappeared", expectedNodeCondition)
|
||||
}
|
||||
logFunc()
|
||||
logKubeletMetrics(kubeletmetrics.EvictionStatsAgeKey)
|
||||
return verifyEvictionOrdering(f, testSpecs)
|
||||
}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(BeNil())
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
By("deleting pods")
|
||||
for _, spec := range testSpecs {
|
||||
By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
|
||||
f.PodClient().DeleteSync(spec.pod.Name, &metav1.DeleteOptions{}, 10*time.Minute)
|
||||
}
|
||||
By("making sure we can start a new pod after the test")
|
||||
podName := "test-admit-pod"
|
||||
f.PodClient().CreateSync(&v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: framework.GetPauseImageNameForHostArch(),
|
||||
Name: podName,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
if CurrentGinkgoTestDescription().Failed {
|
||||
if framework.TestContext.DumpLogsOnFailure {
|
||||
logPodEvents(f)
|
||||
logNodeEvents(f)
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// verifyEvictionOrdering returns an error if all non-zero priority pods have not been evicted, nil otherwise
|
||||
// This function panics (via Expect) if eviction ordering is violated, or if a priority-zero pod fails.
|
||||
func verifyEvictionOrdering(f *framework.Framework, testSpecs []podEvictSpec) error {
|
||||
// Gather current information
|
||||
updatedPodList, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
updatedPods := updatedPodList.Items
|
||||
for _, p := range updatedPods {
|
||||
framework.Logf("fetching pod %s; phase= %v", p.Name, p.Status.Phase)
|
||||
}
|
||||
|
||||
By("checking eviction ordering and ensuring important pods dont fail")
|
||||
done := true
|
||||
for _, priorityPodSpec := range testSpecs {
|
||||
var priorityPod v1.Pod
|
||||
for _, p := range updatedPods {
|
||||
if p.Name == priorityPodSpec.pod.Name {
|
||||
priorityPod = p
|
||||
}
|
||||
}
|
||||
Expect(priorityPod).NotTo(BeNil())
|
||||
|
||||
// Check eviction ordering.
|
||||
// Note: it is alright for a priority 1 and priority 2 pod (for example) to fail in the same round,
|
||||
// but never alright for a priority 1 pod to fail while the priority 2 pod is still running
|
||||
for _, lowPriorityPodSpec := range testSpecs {
|
||||
var lowPriorityPod v1.Pod
|
||||
for _, p := range updatedPods {
|
||||
if p.Name == lowPriorityPodSpec.pod.Name {
|
||||
lowPriorityPod = p
|
||||
}
|
||||
}
|
||||
Expect(lowPriorityPod).NotTo(BeNil())
|
||||
if priorityPodSpec.evictionPriority < lowPriorityPodSpec.evictionPriority && lowPriorityPod.Status.Phase == v1.PodRunning {
|
||||
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
|
||||
fmt.Sprintf("priority %d pod: %s failed before priority %d pod: %s",
|
||||
priorityPodSpec.evictionPriority, priorityPodSpec.pod.Name, lowPriorityPodSpec.evictionPriority, lowPriorityPodSpec.pod.Name))
|
||||
}
|
||||
}
|
||||
|
||||
// EvictionPriority 0 pods should not fail
|
||||
if priorityPodSpec.evictionPriority == 0 {
|
||||
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
|
||||
fmt.Sprintf("priority 0 pod: %s failed", priorityPod.Name))
|
||||
}
|
||||
|
||||
// If a pod that is not evictionPriority 0 has not been evicted, we are not done
|
||||
if priorityPodSpec.evictionPriority != 0 && priorityPod.Status.Phase != v1.PodFailed {
|
||||
done = false
|
||||
}
|
||||
}
|
||||
if done {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("pods that should be evicted are still running")
|
||||
}
|
||||
|
||||
// Returns TRUE if the node has the node condition, FALSE otherwise
|
||||
func hasNodeCondition(f *framework.Framework, expectedNodeCondition v1.NodeConditionType) bool {
|
||||
localNodeStatus := getLocalNode(f).Status
|
||||
_, actualNodeCondition := nodeutil.GetNodeCondition(&localNodeStatus, expectedNodeCondition)
|
||||
Expect(actualNodeCondition).NotTo(BeNil())
|
||||
return actualNodeCondition.Status == v1.ConditionTrue
|
||||
}
|
||||
|
||||
func logInodeMetrics() {
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
framework.Logf("Error getting summary: %v", err)
|
||||
return
|
||||
}
|
||||
if summary.Node.Runtime != nil && summary.Node.Runtime.ImageFs != nil && summary.Node.Runtime.ImageFs.Inodes != nil && summary.Node.Runtime.ImageFs.InodesFree != nil {
|
||||
framework.Logf("imageFsInfo.Inodes: %d, imageFsInfo.InodesFree: %d", *summary.Node.Runtime.ImageFs.Inodes, *summary.Node.Runtime.ImageFs.InodesFree)
|
||||
}
|
||||
if summary.Node.Fs != nil && summary.Node.Fs.Inodes != nil && summary.Node.Fs.InodesFree != nil {
|
||||
framework.Logf("rootFsInfo.Inodes: %d, rootFsInfo.InodesFree: %d", *summary.Node.Fs.Inodes, *summary.Node.Fs.InodesFree)
|
||||
}
|
||||
for _, pod := range summary.Pods {
|
||||
framework.Logf("Pod: %s", pod.PodRef.Name)
|
||||
for _, container := range pod.Containers {
|
||||
if container.Rootfs != nil && container.Rootfs.InodesUsed != nil {
|
||||
framework.Logf("--- summary Container: %s inodeUsage: %d", container.Name, *container.Rootfs.InodesUsed)
|
||||
}
|
||||
}
|
||||
for _, volume := range pod.VolumeStats {
|
||||
if volume.FsStats.InodesUsed != nil {
|
||||
framework.Logf("--- summary Volume: %s inodeUsage: %d", volume.Name, *volume.FsStats.InodesUsed)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func logDiskMetrics() {
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
framework.Logf("Error getting summary: %v", err)
|
||||
return
|
||||
}
|
||||
if summary.Node.Runtime != nil && summary.Node.Runtime.ImageFs != nil && summary.Node.Runtime.ImageFs.CapacityBytes != nil && summary.Node.Runtime.ImageFs.AvailableBytes != nil {
|
||||
framework.Logf("imageFsInfo.CapacityBytes: %d, imageFsInfo.AvailableBytes: %d", *summary.Node.Runtime.ImageFs.CapacityBytes, *summary.Node.Runtime.ImageFs.AvailableBytes)
|
||||
}
|
||||
if summary.Node.Fs != nil && summary.Node.Fs.CapacityBytes != nil && summary.Node.Fs.AvailableBytes != nil {
|
||||
framework.Logf("rootFsInfo.CapacityBytes: %d, rootFsInfo.AvailableBytes: %d", *summary.Node.Fs.CapacityBytes, *summary.Node.Fs.AvailableBytes)
|
||||
}
|
||||
for _, pod := range summary.Pods {
|
||||
framework.Logf("Pod: %s", pod.PodRef.Name)
|
||||
for _, container := range pod.Containers {
|
||||
if container.Rootfs != nil && container.Rootfs.UsedBytes != nil {
|
||||
framework.Logf("--- summary Container: %s UsedBytes: %d", container.Name, *container.Rootfs.UsedBytes)
|
||||
}
|
||||
}
|
||||
for _, volume := range pod.VolumeStats {
|
||||
if volume.FsStats.InodesUsed != nil {
|
||||
framework.Logf("--- summary Volume: %s UsedBytes: %d", volume.Name, *volume.FsStats.UsedBytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func logMemoryMetrics() {
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
framework.Logf("Error getting summary: %v", err)
|
||||
return
|
||||
}
|
||||
if summary.Node.Memory != nil && summary.Node.Memory.WorkingSetBytes != nil && summary.Node.Memory.AvailableBytes != nil {
|
||||
framework.Logf("Node.Memory.WorkingSetBytes: %d, summary.Node.Memory.AvailableBytes: %d", *summary.Node.Memory.WorkingSetBytes, *summary.Node.Memory.AvailableBytes)
|
||||
}
|
||||
for _, pod := range summary.Pods {
|
||||
framework.Logf("Pod: %s", pod.PodRef.Name)
|
||||
for _, container := range pod.Containers {
|
||||
if container.Memory != nil && container.Memory.WorkingSetBytes != nil {
|
||||
framework.Logf("--- summary Container: %s WorkingSetBytes: %d", container.Name, *container.Memory.WorkingSetBytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func eventuallyGetSummary() (s *stats.Summary) {
|
||||
Eventually(func() error {
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if summary == nil || summary.Node.Fs == nil || summary.Node.Fs.InodesFree == nil || summary.Node.Fs.AvailableBytes == nil {
|
||||
return fmt.Errorf("some part of data is nil")
|
||||
}
|
||||
s = summary
|
||||
return nil
|
||||
}, time.Minute, evictionPollInterval).Should(BeNil())
|
||||
return
|
||||
}
|
||||
|
||||
// returns a pod that does not use any resources
|
||||
func innocentPod() *v1.Pod {
|
||||
return &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "innocent-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "innocent-container",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"while true; do sleep 5; done",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
volumeMountPath = "/test-mnt"
|
||||
volumeName = "test-volume"
|
||||
)
|
||||
|
||||
func inodeConsumingPod(name string, volumeSource *v1.VolumeSource) *v1.Pod {
|
||||
// Each iteration creates an empty file
|
||||
return podWithCommand(volumeSource, v1.ResourceRequirements{}, name, "i=0; while true; do touch %s${i}.txt; sleep 0.001; i=$((i+=1)); done;")
|
||||
}
|
||||
|
||||
func diskConsumingPod(name string, diskConsumedMB int64, volumeSource *v1.VolumeSource, resources v1.ResourceRequirements) *v1.Pod {
|
||||
// Each iteration writes 1Mb to the file
|
||||
return podWithCommand(volumeSource, resources, name, fmt.Sprintf("i=0; while [ $i -lt %d ];", diskConsumedMB/100)+" do dd if=/dev/urandom of=%s${i} bs=100 count=1000000; i=$(($i+1)); done; while true; do sleep 5; done")
|
||||
}
|
||||
|
||||
// podWithCommand returns a pod with the provided volumeSource and resourceRequirements.
|
||||
// If a volumeSource is provided, then the volumeMountPath to the volume is inserted into the provided command.
|
||||
func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirements, name, command string) *v1.Pod {
|
||||
path := ""
|
||||
volumeMounts := []v1.VolumeMount{}
|
||||
volumes := []v1.Volume{}
|
||||
if volumeSource != nil {
|
||||
path = volumeMountPath
|
||||
volumeMounts = []v1.VolumeMount{{MountPath: volumeMountPath, Name: volumeName}}
|
||||
volumes = []v1.Volume{{Name: volumeName, VolumeSource: *volumeSource}}
|
||||
}
|
||||
return &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("%s-pod", name)},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: fmt.Sprintf("%s-container", name),
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
fmt.Sprintf(command, filepath.Join(path, "file")),
|
||||
},
|
||||
Resources: resources,
|
||||
VolumeMounts: volumeMounts,
|
||||
},
|
||||
},
|
||||
Volumes: volumes,
|
||||
},
|
||||
}
|
||||
}
|
|
@ -1,371 +0,0 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e_node
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
nodeutil "k8s.io/kubernetes/pkg/api/v1/node"
|
||||
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
|
||||
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// Eviction Policy is described here:
|
||||
// https://github.com/kubernetes/kubernetes/blob/master/docs/proposals/kubelet-eviction.md
|
||||
|
||||
const (
|
||||
postTestConditionMonitoringPeriod = 2 * time.Minute
|
||||
evictionPollInterval = 2 * time.Second
|
||||
// pressure conditions often surface after evictions because of delay in propegation of metrics to pressure
|
||||
// we wait this period after evictions to make sure that we wait out this delay
|
||||
pressureDelay = 20 * time.Second
|
||||
)
|
||||
|
||||
var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
|
||||
f := framework.NewDefaultFramework("inode-eviction-test")
|
||||
|
||||
volumeMountPath := "/test-empty-dir-mnt"
|
||||
podTestSpecs := []podTestSpec{
|
||||
{
|
||||
evictionPriority: 1, // This pod should be evicted before the normal memory usage pod
|
||||
pod: &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "container-inode-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "container-inode-hog-container",
|
||||
Command: getInodeConsumingCommand(""),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
evictionPriority: 1, // This pod should be evicted before the normal memory usage pod
|
||||
pod: &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "volume-inode-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "volume-inode-hog-container",
|
||||
Command: getInodeConsumingCommand(volumeMountPath),
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{MountPath: volumeMountPath, Name: "test-empty-dir"},
|
||||
},
|
||||
},
|
||||
},
|
||||
Volumes: []v1.Volume{
|
||||
{Name: "test-empty-dir", VolumeSource: v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
evictionPriority: 0, // This pod should never be evicted
|
||||
pod: getInnocentPod(),
|
||||
},
|
||||
}
|
||||
evictionTestTimeout := 15 * time.Minute
|
||||
testCondition := "Disk Pressure due to Inodes"
|
||||
inodesConsumed := uint64(200000)
|
||||
|
||||
Context(fmt.Sprintf("when we run containers that should cause %s", testCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
// Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction.
|
||||
inodesFree := getInodesFree()
|
||||
if inodesFree <= inodesConsumed {
|
||||
framework.Skipf("Too few inodes free on the host for the InodeEviction test to run")
|
||||
}
|
||||
initialConfig.EvictionHard = fmt.Sprintf("nodefs.inodesFree<%d", getInodesFree()-inodesConsumed)
|
||||
initialConfig.EvictionMinimumReclaim = ""
|
||||
})
|
||||
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
|
||||
Context("With kubeconfig updated", func() {
|
||||
runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasInodePressure)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods
|
||||
type podTestSpec struct {
|
||||
// 0 should never be evicted, 1 shouldn't evict before 2, etc.
|
||||
// If two are ranked at 1, either is permitted to fail before the other.
|
||||
// The test ends when all other than the 0 have been evicted
|
||||
evictionPriority int
|
||||
pod *v1.Pod
|
||||
}
|
||||
|
||||
// runEvictionTest sets up a testing environment given the provided nodes, and checks a few things:
|
||||
// It ensures that the desired testCondition is actually triggered.
|
||||
// It ensures that evictionPriority 0 pods are not evicted
|
||||
// It ensures that lower evictionPriority pods are always evicted before higher evictionPriority pods (2 evicted before 1, etc.)
|
||||
// It ensures that all lower evictionPriority pods are eventually evicted.
|
||||
// runEvictionTest then cleans up the testing environment by deleting provided nodes, and ensures that testCondition no longer exists
|
||||
func runEvictionTest(f *framework.Framework, testCondition string, podTestSpecs []podTestSpec, evictionTestTimeout time.Duration,
|
||||
hasPressureCondition func(*framework.Framework, string) (bool, error)) {
|
||||
BeforeEach(func() {
|
||||
By("seting up pods to be used by tests")
|
||||
for _, spec := range podTestSpecs {
|
||||
By(fmt.Sprintf("creating pod with container: %s", spec.pod.Name))
|
||||
f.PodClient().CreateSync(spec.pod)
|
||||
}
|
||||
})
|
||||
|
||||
It(fmt.Sprintf("should eventually see %s, and then evict all of the correct pods", testCondition), func() {
|
||||
configEnabled, err := isKubeletConfigEnabled(f)
|
||||
framework.ExpectNoError(err)
|
||||
if !configEnabled {
|
||||
framework.Skipf("Dynamic kubelet config must be enabled for this test to run.")
|
||||
}
|
||||
Eventually(func() error {
|
||||
hasPressure, err := hasPressureCondition(f, testCondition)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if hasPressure {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("Condition: %s not encountered", testCondition)
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
Eventually(func() error {
|
||||
// Gather current information
|
||||
updatedPodList, err := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
|
||||
updatedPods := updatedPodList.Items
|
||||
for _, p := range updatedPods {
|
||||
framework.Logf("fetching pod %s; phase= %v", p.Name, p.Status.Phase)
|
||||
}
|
||||
logKubeletMetrics(kubeletmetrics.EvictionStatsAgeKey)
|
||||
_, err = hasPressureCondition(f, testCondition)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
By("checking eviction ordering and ensuring important pods dont fail")
|
||||
done := true
|
||||
for _, priorityPodSpec := range podTestSpecs {
|
||||
var priorityPod v1.Pod
|
||||
for _, p := range updatedPods {
|
||||
if p.Name == priorityPodSpec.pod.Name {
|
||||
priorityPod = p
|
||||
}
|
||||
}
|
||||
Expect(priorityPod).NotTo(BeNil())
|
||||
|
||||
// Check eviction ordering.
|
||||
// Note: it is alright for a priority 1 and priority 2 pod (for example) to fail in the same round,
|
||||
// but never alright for a priority 1 pod to fail while the priority 2 pod is still running
|
||||
for _, lowPriorityPodSpec := range podTestSpecs {
|
||||
var lowPriorityPod v1.Pod
|
||||
for _, p := range updatedPods {
|
||||
if p.Name == lowPriorityPodSpec.pod.Name {
|
||||
lowPriorityPod = p
|
||||
}
|
||||
}
|
||||
Expect(lowPriorityPod).NotTo(BeNil())
|
||||
if priorityPodSpec.evictionPriority < lowPriorityPodSpec.evictionPriority && lowPriorityPod.Status.Phase == v1.PodRunning {
|
||||
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
|
||||
fmt.Sprintf("%s pod failed before %s pod", priorityPodSpec.pod.Name, lowPriorityPodSpec.pod.Name))
|
||||
}
|
||||
}
|
||||
|
||||
// EvictionPriority 0 pods should not fail
|
||||
if priorityPodSpec.evictionPriority == 0 {
|
||||
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
|
||||
fmt.Sprintf("%s pod failed (and shouldn't have failed)", priorityPod.Name))
|
||||
}
|
||||
|
||||
// If a pod that is not evictionPriority 0 has not been evicted, we are not done
|
||||
if priorityPodSpec.evictionPriority != 0 && priorityPod.Status.Phase != v1.PodFailed {
|
||||
done = false
|
||||
}
|
||||
}
|
||||
if done {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("pods that caused %s have not been evicted.", testCondition)
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
// We observe pressure from the API server. The eviction manager observes pressure from the kubelet internal stats.
|
||||
// This means the eviction manager will observe pressure before we will, creating a delay between when the eviction manager
|
||||
// evicts a pod, and when we observe the pressure by querrying the API server. Add a delay here to account for this delay
|
||||
By("making sure pressure from test has surfaced before continuing")
|
||||
time.Sleep(pressureDelay)
|
||||
|
||||
By("making sure conditions eventually return to normal")
|
||||
Eventually(func() error {
|
||||
hasPressure, err := hasPressureCondition(f, testCondition)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if hasPressure {
|
||||
return fmt.Errorf("Conditions havent returned to normal, we still have %s", testCondition)
|
||||
}
|
||||
return nil
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
By("making sure conditions do not return, and that pods that shouldnt fail dont fail")
|
||||
Consistently(func() error {
|
||||
hasPressure, err := hasPressureCondition(f, testCondition)
|
||||
if err != nil {
|
||||
// Race conditions sometimes occur when checking pressure condition due to #38710 (Docker bug)
|
||||
// Do not fail the test when this occurs, since this is expected to happen occasionally.
|
||||
framework.Logf("Failed to check pressure condition. Error: %v", err)
|
||||
return nil
|
||||
}
|
||||
if hasPressure {
|
||||
return fmt.Errorf("%s dissappeared and then reappeared", testCondition)
|
||||
}
|
||||
// Gather current information
|
||||
updatedPodList, _ := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
|
||||
for _, priorityPodSpec := range podTestSpecs {
|
||||
// EvictionPriority 0 pods should not fail
|
||||
if priorityPodSpec.evictionPriority == 0 {
|
||||
for _, p := range updatedPodList.Items {
|
||||
if p.Name == priorityPodSpec.pod.Name && p.Status.Phase == v1.PodFailed {
|
||||
logKubeletMetrics(kubeletmetrics.EvictionStatsAgeKey)
|
||||
return fmt.Errorf("%s pod failed (delayed) and shouldn't have failed", p.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(BeNil())
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
By("deleting pods")
|
||||
for _, spec := range podTestSpecs {
|
||||
By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
|
||||
f.PodClient().DeleteSync(spec.pod.Name, &metav1.DeleteOptions{}, 10*time.Minute)
|
||||
}
|
||||
|
||||
By("making sure we can start a new pod after the test")
|
||||
podName := "test-admit-pod"
|
||||
f.PodClient().CreateSync(&v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: framework.GetPauseImageNameForHostArch(),
|
||||
Name: podName,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
if CurrentGinkgoTestDescription().Failed && framework.TestContext.DumpLogsOnFailure {
|
||||
logPodEvents(f)
|
||||
logNodeEvents(f)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Returns TRUE if the node has disk pressure due to inodes exists on the node, FALSE otherwise
|
||||
func hasInodePressure(f *framework.Framework, testCondition string) (bool, error) {
|
||||
localNodeStatus := getLocalNode(f).Status
|
||||
_, pressure := nodeutil.GetNodeCondition(&localNodeStatus, v1.NodeDiskPressure)
|
||||
Expect(pressure).NotTo(BeNil())
|
||||
hasPressure := pressure.Status == v1.ConditionTrue
|
||||
By(fmt.Sprintf("checking if pod has %s: %v", testCondition, hasPressure))
|
||||
|
||||
// Additional Logging relating to Inodes
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if summary.Node.Runtime != nil && summary.Node.Runtime.ImageFs != nil && summary.Node.Runtime.ImageFs.Inodes != nil && summary.Node.Runtime.ImageFs.InodesFree != nil {
|
||||
framework.Logf("imageFsInfo.Inodes: %d, imageFsInfo.InodesFree: %d", *summary.Node.Runtime.ImageFs.Inodes, *summary.Node.Runtime.ImageFs.InodesFree)
|
||||
}
|
||||
if summary.Node.Fs != nil && summary.Node.Fs.Inodes != nil && summary.Node.Fs.InodesFree != nil {
|
||||
framework.Logf("rootFsInfo.Inodes: %d, rootFsInfo.InodesFree: %d", *summary.Node.Fs.Inodes, *summary.Node.Fs.InodesFree)
|
||||
}
|
||||
for _, pod := range summary.Pods {
|
||||
framework.Logf("Pod: %s", pod.PodRef.Name)
|
||||
for _, container := range pod.Containers {
|
||||
if container.Rootfs != nil && container.Rootfs.InodesUsed != nil {
|
||||
framework.Logf("--- summary Container: %s inodeUsage: %d", container.Name, *container.Rootfs.InodesUsed)
|
||||
}
|
||||
}
|
||||
for _, volume := range pod.VolumeStats {
|
||||
if volume.FsStats.InodesUsed != nil {
|
||||
framework.Logf("--- summary Volume: %s inodeUsage: %d", volume.Name, *volume.FsStats.InodesUsed)
|
||||
}
|
||||
}
|
||||
}
|
||||
return hasPressure, nil
|
||||
}
|
||||
|
||||
func getInodesFree() uint64 {
|
||||
var inodesFree uint64
|
||||
Eventually(func() error {
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if summary == nil || summary.Node.Fs == nil || summary.Node.Fs.InodesFree == nil {
|
||||
return fmt.Errorf("some part of data is nil")
|
||||
}
|
||||
inodesFree = *summary.Node.Fs.InodesFree
|
||||
return nil
|
||||
}, time.Minute, evictionPollInterval).Should(BeNil())
|
||||
return inodesFree
|
||||
}
|
||||
|
||||
// returns a pod that does not use any resources
|
||||
func getInnocentPod() *v1.Pod {
|
||||
return &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "innocent-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "innocent-container",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c", //make one large file
|
||||
"dd if=/dev/urandom of=largefile bs=5000000000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func getInodeConsumingCommand(path string) []string {
|
||||
return []string{
|
||||
"sh",
|
||||
"-c",
|
||||
fmt.Sprintf("i=0; while true; do touch %s${i}.txt; sleep 0.001; i=$((i+=1)); done;", filepath.Join(path, "smallfile")),
|
||||
}
|
||||
}
|
|
@ -4,8 +4,8 @@ GCE_ZONE=us-central1-f
|
|||
GCE_PROJECT=k8s-jkns-ci-node-e2e
|
||||
CLEANUP=true
|
||||
GINKGO_FLAGS='--focus="\[Flaky\]"'
|
||||
TEST_ARGS='--feature-gates=DynamicKubeletConfig=true'
|
||||
TEST_ARGS='--feature-gates=DynamicKubeletConfig=true,LocalStorageCapacityIsolation=true'
|
||||
KUBELET_ARGS='--cgroups-per-qos=true --cgroup-root=/'
|
||||
PARALLELISM=1
|
||||
TIMEOUT=2h
|
||||
TIMEOUT=3h
|
||||
|
||||
|
|
|
@ -1,321 +0,0 @@
|
|||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e_node
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
nodeutil "k8s.io/kubernetes/pkg/api/v1/node"
|
||||
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// Eviction Policy is described here:
|
||||
// https://github.com/kubernetes/kubernetes/blob/master/docs/proposals/kubelet-eviction.md
|
||||
|
||||
var _ = framework.KubeDescribe("LocalStorageAllocatableEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
|
||||
f := framework.NewDefaultFramework("localstorageallocatable-eviction-test")
|
||||
evictionTestTimeout := 15 * time.Minute
|
||||
testCondition := "Evict pod due to local storage allocatable violation"
|
||||
conditionType := v1.NodeDiskPressure
|
||||
var podTestSpecs []podTestSpec
|
||||
//podTestSpecsS := make([]podTestSpec, 5)
|
||||
var diskReserve uint64
|
||||
Context(fmt.Sprintf("when we run containers that should cause %s", testCondition), func() {
|
||||
|
||||
BeforeEach(func() {
|
||||
diskAvail, err := getDiskUsage()
|
||||
if err != nil {
|
||||
framework.ExpectNoError(err)
|
||||
}
|
||||
|
||||
diskReserve = uint64(0.8 * diskAvail / 1000000) // Reserve 0.8 * disk Capacity for kube-reserved scratch storage
|
||||
maxDisk := 10000000 // Set dd command to read and write up to 10MB at a time
|
||||
count := uint64(0.8 * diskAvail / float64(maxDisk))
|
||||
command := fmt.Sprintf("dd if=/dev/urandom of=dummy bs=%d count=%d; while true; do sleep 5; done", maxDisk, count)
|
||||
podTestSpecs = []podTestSpec{
|
||||
{
|
||||
evictionPriority: 1, // This pod should be evicted before the innocent pod
|
||||
pod: &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "container-disk-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "container-disk-hog-pod",
|
||||
Command: []string{"sh", "-c", command},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
evictionPriority: 0, // This pod should never be evicted
|
||||
pod: &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "idle-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "idle-pod",
|
||||
Command: []string{"sh", "-c",
|
||||
fmt.Sprintf("while true; do sleep 5; done")},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
|
||||
// Set up --kube-reserved for scratch storage
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
framework.Logf("Set up --kube-reserved for local storage reserved %dMi", diskReserve)
|
||||
initialConfig.KubeReserved = kubeletconfig.ConfigurationMap(map[string]string{"storage": fmt.Sprintf("%dMi", diskReserve)})
|
||||
|
||||
})
|
||||
|
||||
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
|
||||
Context("With kubeconfig updated", func() {
|
||||
runLocalStorageEvictionTest(f, conditionType, testCondition, &podTestSpecs, evictionTestTimeout, hasDiskPressure)
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
// Returns TRUE if the node has disk pressure, FALSE otherwise
|
||||
func hasDiskPressure(f *framework.Framework, conditionType v1.NodeConditionType, testCondition string) (bool, error) {
|
||||
localNodeStatus := getLocalNode(f).Status
|
||||
_, pressure := nodeutil.GetNodeCondition(&localNodeStatus, conditionType)
|
||||
Expect(pressure).NotTo(BeNil())
|
||||
hasPressure := pressure.Status == v1.ConditionTrue
|
||||
By(fmt.Sprintf("checking if pod has %s: %v", testCondition, hasPressure))
|
||||
|
||||
// Additional Logging relating to disk
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if summary.Node.Runtime != nil && summary.Node.Runtime.ImageFs != nil && summary.Node.Runtime.ImageFs.UsedBytes != nil {
|
||||
framework.Logf("imageFsInfo.UsedBytes: %d", *summary.Node.Runtime.ImageFs.UsedBytes)
|
||||
}
|
||||
if summary.Node.Fs != nil && summary.Node.Fs.UsedBytes != nil {
|
||||
framework.Logf("rootFsInfo.UsedBytes: %d", *summary.Node.Fs.UsedBytes)
|
||||
}
|
||||
for _, pod := range summary.Pods {
|
||||
framework.Logf("Pod: %s", pod.PodRef.Name)
|
||||
for _, container := range pod.Containers {
|
||||
if container.Rootfs != nil && container.Rootfs.UsedBytes != nil {
|
||||
framework.Logf("--- summary Container: %s UsedBytes: %d", container.Name, *container.Rootfs.UsedBytes)
|
||||
}
|
||||
}
|
||||
for _, volume := range pod.VolumeStats {
|
||||
if volume.FsStats.UsedBytes != nil {
|
||||
framework.Logf("--- summary Volume: %s UsedBytes: %d", volume.Name, *volume.FsStats.UsedBytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
return hasPressure, nil
|
||||
}
|
||||
|
||||
// Pass podTestSpecsP as references so that it could be set up in the first BeforeEach clause
|
||||
func runLocalStorageEvictionTest(f *framework.Framework, conditionType v1.NodeConditionType, testCondition string, podTestSpecsP *[]podTestSpec, evictionTestTimeout time.Duration,
|
||||
hasPressureCondition func(*framework.Framework, v1.NodeConditionType, string) (bool, error)) {
|
||||
BeforeEach(func() {
|
||||
|
||||
By("seting up pods to be used by tests")
|
||||
for _, spec := range *podTestSpecsP {
|
||||
By(fmt.Sprintf("creating pod with container: %s", spec.pod.Name))
|
||||
f.PodClient().CreateSync(spec.pod)
|
||||
}
|
||||
})
|
||||
|
||||
It(fmt.Sprintf("should eventually see %s, and then evict all of the correct pods", testCondition), func() {
|
||||
Expect(podTestSpecsP).NotTo(BeNil())
|
||||
podTestSpecs := *podTestSpecsP
|
||||
|
||||
Eventually(func() error {
|
||||
hasPressure, err := hasPressureCondition(f, conditionType, testCondition)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if hasPressure {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("Condition: %s not encountered", testCondition)
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
Eventually(func() error {
|
||||
// Gather current information
|
||||
updatedPodList, err := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
|
||||
updatedPods := updatedPodList.Items
|
||||
for _, p := range updatedPods {
|
||||
framework.Logf("fetching pod %s; phase= %v", p.Name, p.Status.Phase)
|
||||
}
|
||||
_, err = hasPressureCondition(f, conditionType, testCondition)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
By("checking eviction ordering and ensuring important pods dont fail")
|
||||
done := true
|
||||
for _, priorityPodSpec := range podTestSpecs {
|
||||
var priorityPod v1.Pod
|
||||
for _, p := range updatedPods {
|
||||
if p.Name == priorityPodSpec.pod.Name {
|
||||
priorityPod = p
|
||||
}
|
||||
}
|
||||
Expect(priorityPod).NotTo(BeNil())
|
||||
|
||||
// Check eviction ordering.
|
||||
// Note: it is alright for a priority 1 and priority 2 pod (for example) to fail in the same round
|
||||
for _, lowPriorityPodSpec := range podTestSpecs {
|
||||
var lowPriorityPod v1.Pod
|
||||
for _, p := range updatedPods {
|
||||
if p.Name == lowPriorityPodSpec.pod.Name {
|
||||
lowPriorityPod = p
|
||||
}
|
||||
}
|
||||
Expect(lowPriorityPod).NotTo(BeNil())
|
||||
if priorityPodSpec.evictionPriority < lowPriorityPodSpec.evictionPriority && lowPriorityPod.Status.Phase == v1.PodRunning {
|
||||
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
|
||||
fmt.Sprintf("%s pod failed before %s pod", priorityPodSpec.pod.Name, lowPriorityPodSpec.pod.Name))
|
||||
}
|
||||
}
|
||||
|
||||
// EvictionPriority 0 pods should not fail
|
||||
if priorityPodSpec.evictionPriority == 0 {
|
||||
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
|
||||
fmt.Sprintf("%s pod failed (and shouldn't have failed)", priorityPod.Name))
|
||||
}
|
||||
|
||||
// If a pod that is not evictionPriority 0 has not been evicted, we are not done
|
||||
if priorityPodSpec.evictionPriority != 0 && priorityPod.Status.Phase != v1.PodFailed {
|
||||
done = false
|
||||
}
|
||||
}
|
||||
if done {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("pods that caused %s have not been evicted.", testCondition)
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
// We observe pressure from the API server. The eviction manager observes pressure from the kubelet internal stats.
|
||||
// This means the eviction manager will observe pressure before we will, creating a delay between when the eviction manager
|
||||
// evicts a pod, and when we observe the pressure by querrying the API server. Add a delay here to account for this delay
|
||||
By("making sure pressure from test has surfaced before continuing")
|
||||
time.Sleep(pressureDelay)
|
||||
|
||||
By("making sure conditions eventually return to normal")
|
||||
Eventually(func() error {
|
||||
hasPressure, err := hasPressureCondition(f, conditionType, testCondition)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if hasPressure {
|
||||
return fmt.Errorf("Conditions havent returned to normal, we still have %s", testCondition)
|
||||
}
|
||||
return nil
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
By("making sure conditions do not return, and that pods that shouldnt fail dont fail")
|
||||
Consistently(func() error {
|
||||
hasPressure, err := hasPressureCondition(f, conditionType, testCondition)
|
||||
if err != nil {
|
||||
// Race conditions sometimes occur when checking pressure condition due to #38710 (Docker bug)
|
||||
// Do not fail the test when this occurs, since this is expected to happen occasionally.
|
||||
framework.Logf("Failed to check pressure condition. Error: %v", err)
|
||||
return nil
|
||||
}
|
||||
if hasPressure {
|
||||
return fmt.Errorf("%s dissappeared and then reappeared", testCondition)
|
||||
}
|
||||
// Gather current information
|
||||
updatedPodList, _ := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
|
||||
for _, priorityPodSpec := range podTestSpecs {
|
||||
// EvictionPriority 0 pods should not fail
|
||||
if priorityPodSpec.evictionPriority == 0 {
|
||||
for _, p := range updatedPodList.Items {
|
||||
if p.Name == priorityPodSpec.pod.Name && p.Status.Phase == v1.PodFailed {
|
||||
return fmt.Errorf("%s pod failed (delayed) and shouldn't have failed", p.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(BeNil())
|
||||
|
||||
By("making sure we can start a new pod after the test")
|
||||
podName := "test-admit-pod"
|
||||
f.PodClient().CreateSync(&v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: framework.GetPauseImageNameForHostArch(),
|
||||
Name: podName,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
By("deleting pods")
|
||||
for _, spec := range *podTestSpecsP {
|
||||
By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
|
||||
f.PodClient().DeleteSync(spec.pod.Name, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
|
||||
}
|
||||
|
||||
if CurrentGinkgoTestDescription().Failed {
|
||||
if framework.TestContext.DumpLogsOnFailure {
|
||||
logPodEvents(f)
|
||||
logNodeEvents(f)
|
||||
}
|
||||
By("sleeping to allow for cleanup of test")
|
||||
time.Sleep(postTestConditionMonitoringPeriod)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func getDiskUsage() (float64, error) {
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if nodeFs := summary.Node.Fs; nodeFs != nil {
|
||||
return float64(*nodeFs.AvailableBytes), nil
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("fail to get nodefs available bytes")
|
||||
|
||||
}
|
|
@ -1,466 +0,0 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e_node
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
)
|
||||
|
||||
type podEvictSpec struct {
|
||||
evicted bool
|
||||
pod v1.Pod
|
||||
}
|
||||
|
||||
const (
|
||||
totalEvict = 7
|
||||
)
|
||||
|
||||
// Eviction Policy is described here:
|
||||
// https://github.com/kubernetes/kubernetes/blob/master/docs/proposals/kubelet-eviction.md
|
||||
|
||||
var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Serial] [Disruptive] [Flaky] [Feature:LocalStorageCapacityIsolation]", func() {
|
||||
|
||||
f := framework.NewDefaultFramework("localstorage-eviction-test")
|
||||
|
||||
emptyDirVolumeName := "volume-emptydir-pod"
|
||||
gitRepoVolumeName := "volume-gitrepo-pod"
|
||||
configMapVolumeName := "volume-configmap-pod"
|
||||
downwardAPIVolumeName := "volume-downwardapi-pod"
|
||||
podTestSpecs := []podEvictSpec{
|
||||
{evicted: true, // This pod should be evicted because emptyDir (default storage type) usage violation
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "emptydir-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "container-emptydir-hog-pod",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 5; dd if=/dev/urandom of=target-file of=/cache/target-file bs=50000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{
|
||||
Name: emptyDirVolumeName,
|
||||
MountPath: "/cache",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
Name: emptyDirVolumeName,
|
||||
VolumeSource: v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(1000), resource.BinarySI),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{evicted: true, // This pod should be evicted because emptyDir (memory type) usage violation
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "emptydir-memory-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "container-emptydir-memory-pod",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 5; dd if=/dev/urandom of=target-file of=/cache/target-file bs=50000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{
|
||||
Name: emptyDirVolumeName,
|
||||
MountPath: "/cache",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
Name: emptyDirVolumeName,
|
||||
VolumeSource: v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
Medium: "Memory",
|
||||
SizeLimit: resource.NewQuantity(int64(10000), resource.BinarySI),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{evicted: false,
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "container-emptydir-pod-critical"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "container-emptydir-hog-pod",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 5; dd if=/dev/urandom of=target-file of=/cache/target-file bs=50000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{
|
||||
Name: emptyDirVolumeName,
|
||||
MountPath: "/cache",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
Name: emptyDirVolumeName,
|
||||
VolumeSource: v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(100000), resource.BinarySI),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{evicted: true, // This pod should be evicted because container ephemeral storage usage violation
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "container-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "container-hog-pod",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 5; dd if=/dev/urandom of=target-file bs=50000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceEphemeralStorage: *resource.NewMilliQuantity(
|
||||
int64(40000),
|
||||
resource.BinarySI),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{evicted: true, // This pod should be evicted because pod ephemeral storage usage violation
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "emptydir-container-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: "gcr.io/google_containers/busybox:1.24",
|
||||
Name: "emptydir-container-hog-pod",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 5; dd if=/dev/urandom of=target-file of=/cache/target-file bs=50000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceEphemeralStorage: *resource.NewMilliQuantity(
|
||||
int64(40000),
|
||||
resource.BinarySI),
|
||||
},
|
||||
},
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{
|
||||
Name: emptyDirVolumeName,
|
||||
MountPath: "/cache",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
Name: emptyDirVolumeName,
|
||||
VolumeSource: v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(100000), resource.BinarySI),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{evicted: true, // This pod should be evicted because pod ephemeral storage usage violation
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "downward-api-container-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: "gcr.io/google_containers/busybox:1.24",
|
||||
Name: "downward-api-container-hog-pod",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 5; dd if=/dev/urandom of=target-file of=/cache/target-file bs=50000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceEphemeralStorage: *resource.NewMilliQuantity(
|
||||
int64(40000),
|
||||
resource.BinarySI),
|
||||
},
|
||||
},
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{
|
||||
Name: downwardAPIVolumeName,
|
||||
MountPath: "/cache",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
Name: downwardAPIVolumeName,
|
||||
VolumeSource: v1.VolumeSource{
|
||||
DownwardAPI: &v1.DownwardAPIVolumeSource{},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{evicted: true, // This pod should be evicted because pod ephemeral storage usage violation
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "configmap-container-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: "gcr.io/google_containers/busybox:1.24",
|
||||
Name: "configmap-container-hog-pod",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 5; dd if=/dev/urandom of=target-file of=/cache/target-file bs=50000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceEphemeralStorage: *resource.NewMilliQuantity(
|
||||
int64(40000),
|
||||
resource.BinarySI),
|
||||
},
|
||||
},
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{
|
||||
Name: configMapVolumeName,
|
||||
MountPath: "/cache",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
Name: configMapVolumeName,
|
||||
VolumeSource: v1.VolumeSource{
|
||||
ConfigMap: &v1.ConfigMapVolumeSource{
|
||||
LocalObjectReference: v1.LocalObjectReference{
|
||||
Name: "my-cfgmap",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{evicted: true, // This pod should be evicted because pod ephemeral storage usage violation
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "gitrepo-container-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: "gcr.io/google_containers/busybox:1.24",
|
||||
Name: "gitrepo-container-hog-pod",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 5; dd if=/dev/urandom of=target-file of=/cache/target-file bs=50000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceEphemeralStorage: *resource.NewMilliQuantity(
|
||||
int64(40000),
|
||||
resource.BinarySI),
|
||||
},
|
||||
},
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{
|
||||
Name: gitRepoVolumeName,
|
||||
MountPath: "/cache",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
Name: gitRepoVolumeName,
|
||||
VolumeSource: v1.VolumeSource{
|
||||
GitRepo: &v1.GitRepoVolumeSource{
|
||||
Repository: "my-repo",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
evictionTestTimeout := 10 * time.Minute
|
||||
testCondition := "PodLocalEphemeralStorage/ContainerLocalEphemeralStorage usage limit violation"
|
||||
Context(fmt.Sprintf("EmptyDirEviction when we run containers that should cause %s", testCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
initialConfig.FeatureGates += ", LocalStorageCapacityIsolation=true"
|
||||
})
|
||||
err := utilfeature.DefaultFeatureGate.Set("LocalStorageCapacityIsolation=true")
|
||||
if err != nil {
|
||||
framework.Failf("Failed to enable feature gate for LocalStorageCapacityIsolation: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
runLocalStorageIsolationEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasInodePressure)
|
||||
})
|
||||
})
|
||||
|
||||
// runLocalStorageEvictionTest sets up a testing environment given the provided nodes, and checks a few things:
|
||||
// pods that exceed their local storage limit are evicted
|
||||
// pods that didn't exceed their local storage limit are not evicted
|
||||
// runLocalStorageEvictionTest then cleans up the testing environment by deleting provided nodes,
|
||||
func runLocalStorageIsolationEvictionTest(f *framework.Framework, testCondition string, podTestSpecs []podEvictSpec, evictionTestTimeout time.Duration,
|
||||
hasPressureCondition func(*framework.Framework, string) (bool, error)) {
|
||||
|
||||
Context(fmt.Sprintf("EmptyDirEviction when we run containers that should cause %s", testCondition), func() {
|
||||
|
||||
BeforeEach(func() {
|
||||
By("seting up pods to be used by tests")
|
||||
|
||||
for _, spec := range podTestSpecs {
|
||||
By(fmt.Sprintf("creating pod with container: %s", spec.pod.Name))
|
||||
f.PodClient().CreateSync(&spec.pod)
|
||||
}
|
||||
})
|
||||
|
||||
It(fmt.Sprintf("Test should eventually see %s, and then evict the correct pods", testCondition), func() {
|
||||
evictNum := 0
|
||||
evictMap := make(map[string]string)
|
||||
Eventually(func() error {
|
||||
// Gather current information
|
||||
updatedPodList, err := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get the list of pod: %v", err)
|
||||
}
|
||||
updatedPods := updatedPodList.Items
|
||||
|
||||
for _, p := range updatedPods {
|
||||
framework.Logf("fetching pod %s; phase= %v", p.Name, p.Status.Phase)
|
||||
for _, testPod := range podTestSpecs {
|
||||
if p.Name == testPod.pod.Name {
|
||||
if !testPod.evicted {
|
||||
Expect(p.Status.Phase).NotTo(Equal(v1.PodFailed),
|
||||
fmt.Sprintf("%s pod failed (and shouldn't have failed)", p.Name))
|
||||
} else {
|
||||
if _, ok := evictMap[p.Name]; !ok && p.Status.Phase == v1.PodFailed {
|
||||
evictNum++
|
||||
evictMap[p.Name] = p.Name
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
if evictNum == totalEvict {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("pods that caused %s have not been evicted", testCondition)
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
By("making sure we can start a new pod after the test")
|
||||
podName := "test-admit-pod"
|
||||
f.PodClient().CreateSync(&v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: framework.GetPauseImageNameForHostArch(),
|
||||
Name: podName,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
By("deleting pods")
|
||||
for _, spec := range podTestSpecs {
|
||||
By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
|
||||
f.PodClient().DeleteSync(spec.pod.Name, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
|
||||
}
|
||||
|
||||
if CurrentGinkgoTestDescription().Failed {
|
||||
if framework.TestContext.DumpLogsOnFailure {
|
||||
logPodEvents(f)
|
||||
logNodeEvents(f)
|
||||
}
|
||||
By("sleeping to allow for cleanup of test")
|
||||
time.Sleep(postTestConditionMonitoringPeriod)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
|
@ -100,17 +100,14 @@ func tempSetCurrentKubeletConfig(f *framework.Framework, updateFunction func(ini
|
|||
BeforeEach(func() {
|
||||
configEnabled, err := isKubeletConfigEnabled(f)
|
||||
framework.ExpectNoError(err)
|
||||
if configEnabled {
|
||||
oldCfg, err = getCurrentKubeletConfig()
|
||||
framework.ExpectNoError(err)
|
||||
newCfg := oldCfg.DeepCopy()
|
||||
updateFunction(newCfg)
|
||||
framework.ExpectNoError(setKubeletConfiguration(f, newCfg))
|
||||
} else {
|
||||
framework.Logf("The Dynamic Kubelet Configuration feature is not enabled.\n" +
|
||||
"Pass --feature-gates=DynamicKubeletConfig=true to the Kubelet to enable this feature.\n" +
|
||||
"For `make test-e2e-node`, you can set `TEST_ARGS='--feature-gates=DynamicKubeletConfig=true'`.")
|
||||
}
|
||||
Expect(configEnabled).To(BeTrue(), "The Dynamic Kubelet Configuration feature is not enabled.\n"+
|
||||
"Pass --feature-gates=DynamicKubeletConfig=true to the Kubelet to enable this feature.\n"+
|
||||
"For `make test-e2e-node`, you can set `TEST_ARGS='--feature-gates=DynamicKubeletConfig=true'`.")
|
||||
oldCfg, err = getCurrentKubeletConfig()
|
||||
framework.ExpectNoError(err)
|
||||
newCfg := oldCfg.DeepCopy()
|
||||
updateFunction(newCfg)
|
||||
framework.ExpectNoError(setKubeletConfiguration(f, newCfg))
|
||||
})
|
||||
AfterEach(func() {
|
||||
if oldCfg != nil {
|
||||
|
|
Loading…
Reference in New Issue