mirror of https://github.com/k3s-io/k3s
Merge pull request #52373 from dashpole/eviction_cleanup
Automatic merge from submit-queue (batch tested with PRs 52960, 52373). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.. Refactor eviction tests fixes: #52203 We have a bunch of eviction tests, which each break independently, and take a large amount of time to fix. This refactors these tests to share the core eviction testing logic. Each tests needs only to set kubelet flags, and specify which pods to run. I decided to omit the memory eviction tests because they work. Best not to disturb them. A large portion of the code changes are the renaming of inode_eviction_test.go -> eviction_test.go This should probably wait until after https://github.com/kubernetes/kubernetes/pull/50392 /assign @mtaufen @Random-Liupull/6/head
commit
407bef47f8
|
@ -76,23 +76,19 @@ go_library(
|
|||
go_test(
|
||||
name = "go_default_test",
|
||||
srcs = [
|
||||
"allocatable_eviction_test.go",
|
||||
"apparmor_test.go",
|
||||
"cpu_manager_test.go",
|
||||
"critical_pod_test.go",
|
||||
"disk_eviction_test.go",
|
||||
"docker_test.go",
|
||||
"dockershim_checkpoint_test.go",
|
||||
"dynamic_kubelet_config_test.go",
|
||||
"e2e_node_suite_test.go",
|
||||
"eviction_test.go",
|
||||
"garbage_collector_test.go",
|
||||
"gke_environment_test.go",
|
||||
"image_id_test.go",
|
||||
"inode_eviction_test.go",
|
||||
"kubelet_test.go",
|
||||
"lifecycle_hook_test.go",
|
||||
"local_storage_allocatable_eviction_test.go",
|
||||
"local_storage_isolation_eviction_test.go",
|
||||
"log_path_test.go",
|
||||
"memory_eviction_test.go",
|
||||
"mirror_pod_test.go",
|
||||
|
@ -157,7 +153,6 @@ go_test(
|
|||
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/util/yaml:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/watch:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
|
||||
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
|
||||
"//vendor/k8s.io/client-go/kubernetes/scheme:go_default_library",
|
||||
] + select({
|
||||
|
|
|
@ -1,97 +0,0 @@
|
|||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e_node
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
nodeutil "k8s.io/kubernetes/pkg/api/v1/node"
|
||||
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// Eviction Policy is described here:
|
||||
// https://github.com/kubernetes/kubernetes/blob/master/docs/proposals/kubelet-eviction.md
|
||||
|
||||
var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
|
||||
f := framework.NewDefaultFramework("memory-allocatable-eviction-test")
|
||||
|
||||
podTestSpecs := []podTestSpec{
|
||||
{
|
||||
evictionPriority: 1, // This pod should be evicted before the innocent pod
|
||||
pod: getMemhogPod("memory-hog-pod", "memory-hog", v1.ResourceRequirements{}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 0, // This pod should never be evicted
|
||||
pod: getInnocentPod(),
|
||||
},
|
||||
}
|
||||
evictionTestTimeout := 10 * time.Minute
|
||||
testCondition := "Memory Pressure"
|
||||
|
||||
Context(fmt.Sprintf("when we run containers that should cause %s", testCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
// Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds.
|
||||
kubeReserved := getNodeCPUAndMemoryCapacity(f)[v1.ResourceMemory]
|
||||
// The default hard eviction threshold is 250Mb, so Allocatable = Capacity - Reserved - 250Mb
|
||||
// We want Allocatable = 50Mb, so set Reserved = Capacity - Allocatable - 250Mb = Capacity - 300Mb
|
||||
kubeReserved.Sub(resource.MustParse("300Mi"))
|
||||
initialConfig.KubeReserved = kubeletconfig.ConfigurationMap(map[string]string{string(v1.ResourceMemory): kubeReserved.String()})
|
||||
initialConfig.EnforceNodeAllocatable = []string{cm.NodeAllocatableEnforcementKey}
|
||||
initialConfig.ExperimentalNodeAllocatableIgnoreEvictionThreshold = false
|
||||
initialConfig.CgroupsPerQOS = true
|
||||
})
|
||||
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
|
||||
Context("With kubeconfig updated", func() {
|
||||
runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasMemoryPressure)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// Returns TRUE if the node has Memory Pressure, FALSE otherwise
|
||||
func hasMemoryPressure(f *framework.Framework, testCondition string) (bool, error) {
|
||||
localNodeStatus := getLocalNode(f).Status
|
||||
_, pressure := nodeutil.GetNodeCondition(&localNodeStatus, v1.NodeMemoryPressure)
|
||||
Expect(pressure).NotTo(BeNil())
|
||||
hasPressure := pressure.Status == v1.ConditionTrue
|
||||
By(fmt.Sprintf("checking if pod has %s: %v", testCondition, hasPressure))
|
||||
|
||||
// Additional Logging relating to Memory
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if summary.Node.Memory != nil && summary.Node.Memory.WorkingSetBytes != nil && summary.Node.Memory.AvailableBytes != nil {
|
||||
framework.Logf("Node.Memory.WorkingSetBytes: %d, summary.Node.Memory.AvailableBytes: %d", *summary.Node.Memory.WorkingSetBytes, *summary.Node.Memory.AvailableBytes)
|
||||
}
|
||||
for _, pod := range summary.Pods {
|
||||
framework.Logf("Pod: %s", pod.PodRef.Name)
|
||||
for _, container := range pod.Containers {
|
||||
if container.Memory != nil && container.Memory.WorkingSetBytes != nil {
|
||||
framework.Logf("--- summary Container: %s WorkingSetBytes: %d", container.Name, *container.Memory.WorkingSetBytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
return hasPressure, nil
|
||||
}
|
|
@ -1,258 +0,0 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e_node
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/uuid"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
)
|
||||
|
||||
const (
|
||||
// podCheckInterval is the interval seconds between pod status checks.
|
||||
podCheckInterval = time.Second * 2
|
||||
|
||||
// containerGCPeriod is the period of container garbage collect loop. It should be the same
|
||||
// with ContainerGCPeriod in kubelet.go. However we don't want to include kubelet package
|
||||
// directly which will introduce a lot more dependencies.
|
||||
containerGCPeriod = time.Minute * 1
|
||||
|
||||
dummyFile = "dummy."
|
||||
)
|
||||
|
||||
// TODO: Leverage dynamic Kubelet settings when it's implemented to only modify the kubelet eviction option in this test.
|
||||
var _ = framework.KubeDescribe("Kubelet Eviction Manager [Serial] [Disruptive]", func() {
|
||||
f := framework.NewDefaultFramework("kubelet-eviction-manager")
|
||||
var podClient *framework.PodClient
|
||||
var c clientset.Interface
|
||||
|
||||
BeforeEach(func() {
|
||||
podClient = f.PodClient()
|
||||
c = f.ClientSet
|
||||
})
|
||||
|
||||
Describe("hard eviction test", func() {
|
||||
Context("pod using the most disk space gets evicted when the node disk usage is above the eviction hard threshold", func() {
|
||||
var busyPodName, idlePodName, verifyPodName string
|
||||
|
||||
BeforeEach(func() {
|
||||
if !isImageSupported() {
|
||||
framework.Skipf("test skipped because the image is not supported by the test")
|
||||
}
|
||||
if !evictionOptionIsSet() {
|
||||
framework.Skipf("test skipped because eviction option is not set")
|
||||
}
|
||||
|
||||
busyPodName = "to-evict" + string(uuid.NewUUID())
|
||||
idlePodName = "idle" + string(uuid.NewUUID())
|
||||
verifyPodName = "verify" + string(uuid.NewUUID())
|
||||
createIdlePod(idlePodName, podClient)
|
||||
podClient.Create(&v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: busyPodName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: busyPodName,
|
||||
// Filling the disk
|
||||
Command: []string{"sh", "-c",
|
||||
fmt.Sprintf("for NUM in `seq 1 1 100000`; do dd if=/dev/urandom of=%s.$NUM bs=50000000 count=10; sleep 0.5; done",
|
||||
dummyFile)},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
if !isImageSupported() || !evictionOptionIsSet() { // Skip the after each
|
||||
return
|
||||
}
|
||||
podClient.DeleteSync(busyPodName, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
|
||||
podClient.DeleteSync(idlePodName, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
|
||||
podClient.DeleteSync(verifyPodName, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
|
||||
|
||||
// Wait for 2 container gc loop to ensure that the containers are deleted. The containers
|
||||
// created in this test consume a lot of disk, we don't want them to trigger disk eviction
|
||||
// again after the test.
|
||||
time.Sleep(containerGCPeriod * 2)
|
||||
|
||||
if framework.TestContext.PrepullImages {
|
||||
// The disk eviction test may cause the prepulled images to be evicted,
|
||||
// prepull those images again to ensure this test not affect following tests.
|
||||
PrePullAllImages()
|
||||
}
|
||||
})
|
||||
|
||||
It("should evict the pod using the most disk space [Slow]", func() {
|
||||
evictionOccurred := false
|
||||
nodeDiskPressureCondition := false
|
||||
podRescheduleable := false
|
||||
Eventually(func() error {
|
||||
// Avoid the test using up all the disk space
|
||||
err := checkDiskUsage(0.05)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// The pod should be evicted.
|
||||
if !evictionOccurred {
|
||||
podData, err := podClient.Get(busyPodName, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = verifyPodEviction(podData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
podData, err = podClient.Get(idlePodName, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if podData.Status.Phase != v1.PodRunning {
|
||||
err = verifyPodEviction(podData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
evictionOccurred = true
|
||||
return fmt.Errorf("waiting for node disk pressure condition to be set")
|
||||
}
|
||||
|
||||
// The node should have disk pressure condition after the pods are evicted.
|
||||
if !nodeDiskPressureCondition {
|
||||
if !nodeHasDiskPressure(f.ClientSet) {
|
||||
return fmt.Errorf("expected disk pressure condition is not set")
|
||||
}
|
||||
nodeDiskPressureCondition = true
|
||||
return fmt.Errorf("waiting for node disk pressure condition to be cleared")
|
||||
}
|
||||
|
||||
// After eviction happens the pod is evicted so eventually the node disk pressure should be relieved.
|
||||
if !podRescheduleable {
|
||||
if nodeHasDiskPressure(f.ClientSet) {
|
||||
return fmt.Errorf("expected disk pressure condition relief has not happened")
|
||||
}
|
||||
createIdlePod(verifyPodName, podClient)
|
||||
podRescheduleable = true
|
||||
return fmt.Errorf("waiting for the node to accept a new pod")
|
||||
}
|
||||
|
||||
// The new pod should be able to be scheduled and run after the disk pressure is relieved.
|
||||
podData, err := podClient.Get(verifyPodName, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if podData.Status.Phase != v1.PodRunning {
|
||||
return fmt.Errorf("waiting for the new pod to be running")
|
||||
}
|
||||
|
||||
return nil
|
||||
}, time.Minute*15 /* based on n1-standard-1 machine type */, podCheckInterval).Should(BeNil())
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
func createIdlePod(podName string, podClient *framework.PodClient) {
|
||||
podClient.Create(&v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: framework.GetPauseImageNameForHostArch(),
|
||||
Name: podName,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func verifyPodEviction(podData *v1.Pod) error {
|
||||
if podData.Status.Phase != v1.PodFailed {
|
||||
return fmt.Errorf("expected phase to be failed. got %+v", podData.Status.Phase)
|
||||
}
|
||||
if podData.Status.Reason != "Evicted" {
|
||||
return fmt.Errorf("expected failed reason to be evicted. got %+v", podData.Status.Reason)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func nodeHasDiskPressure(cs clientset.Interface) bool {
|
||||
nodeList := framework.GetReadySchedulableNodesOrDie(cs)
|
||||
for _, condition := range nodeList.Items[0].Status.Conditions {
|
||||
if condition.Type == v1.NodeDiskPressure {
|
||||
return condition.Status == v1.ConditionTrue
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func evictionOptionIsSet() bool {
|
||||
return len(framework.TestContext.KubeletConfig.EvictionHard) > 0
|
||||
}
|
||||
|
||||
// TODO(random-liu): Use OSImage in node status to do the check.
|
||||
func isImageSupported() bool {
|
||||
// TODO: Only images with image fs is selected for testing for now. When the kubelet settings can be dynamically updated,
|
||||
// instead of skipping images the eviction thresholds should be adjusted based on the images.
|
||||
return strings.Contains(framework.TestContext.NodeName, "-gci-dev-")
|
||||
}
|
||||
|
||||
// checkDiskUsage verifies that the available bytes on disk are above the limit.
|
||||
func checkDiskUsage(limit float64) error {
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if nodeFs := summary.Node.Fs; nodeFs != nil {
|
||||
if nodeFs.AvailableBytes != nil && nodeFs.CapacityBytes != nil {
|
||||
if float64(*nodeFs.CapacityBytes)*limit > float64(*nodeFs.AvailableBytes) {
|
||||
return fmt.Errorf("available nodefs byte is less than %v%%", limit*float64(100))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if summary.Node.Runtime != nil {
|
||||
if imageFs := summary.Node.Runtime.ImageFs; imageFs != nil {
|
||||
if float64(*imageFs.CapacityBytes)*limit > float64(*imageFs.AvailableBytes) {
|
||||
return fmt.Errorf("available imagefs byte is less than %v%%", limit*float64(100))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,601 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e_node
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
nodeutil "k8s.io/kubernetes/pkg/api/v1/node"
|
||||
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
|
||||
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// Eviction Policy is described here:
|
||||
// https://github.com/kubernetes/community/blob/master/contributors/design-proposals/kubelet-eviction.md
|
||||
|
||||
const (
|
||||
postTestConditionMonitoringPeriod = 1 * time.Minute
|
||||
evictionPollInterval = 2 * time.Second
|
||||
pressureDissapearTimeout = 1 * time.Minute
|
||||
longPodDeletionTimeout = 10 * time.Minute
|
||||
// pressure conditions often surface after evictions because the kubelet only updates
|
||||
// node conditions periodically.
|
||||
// we wait this period after evictions to make sure that we wait out this delay
|
||||
pressureDelay = 20 * time.Second
|
||||
testContextFmt = "when we run containers that should cause %s"
|
||||
noPressure = v1.NodeConditionType("NoPressure")
|
||||
)
|
||||
|
||||
// InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods.
|
||||
// Node disk pressure is induced by consuming all inodes on the node.
|
||||
var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
|
||||
f := framework.NewDefaultFramework("inode-eviction-test")
|
||||
expectedNodeCondition := v1.NodeDiskPressure
|
||||
pressureTimeout := 15 * time.Minute
|
||||
inodesConsumed := uint64(200000)
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
// Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction.
|
||||
summary := eventuallyGetSummary()
|
||||
inodesFree := *summary.Node.Fs.InodesFree
|
||||
if inodesFree <= inodesConsumed {
|
||||
framework.Skipf("Too few inodes free on the host for the InodeEviction test to run")
|
||||
}
|
||||
initialConfig.EvictionHard = fmt.Sprintf("nodefs.inodesFree<%d", inodesFree-inodesConsumed)
|
||||
initialConfig.EvictionMinimumReclaim = ""
|
||||
})
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logInodeMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: inodeConsumingPod("container-inode-hog", nil),
|
||||
},
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: inodeConsumingPod("volume-inode-hog", &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 0,
|
||||
pod: innocentPod(),
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// MemoryAllocatableEviction tests that the node responds to node memory pressure by evicting only responsible pods.
|
||||
// Node memory pressure is only encountered because we reserve the majority of the node's capacity via kube-reserved.
|
||||
var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
|
||||
f := framework.NewDefaultFramework("memory-allocatable-eviction-test")
|
||||
expectedNodeCondition := v1.NodeMemoryPressure
|
||||
pressureTimeout := 10 * time.Minute
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
// Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds.
|
||||
kubeReserved := getNodeCPUAndMemoryCapacity(f)[v1.ResourceMemory]
|
||||
// The default hard eviction threshold is 250Mb, so Allocatable = Capacity - Reserved - 250Mb
|
||||
// We want Allocatable = 50Mb, so set Reserved = Capacity - Allocatable - 250Mb = Capacity - 300Mb
|
||||
kubeReserved.Sub(resource.MustParse("300Mi"))
|
||||
initialConfig.KubeReserved = kubeletconfig.ConfigurationMap(map[string]string{string(v1.ResourceMemory): kubeReserved.String()})
|
||||
initialConfig.EnforceNodeAllocatable = []string{cm.NodeAllocatableEnforcementKey}
|
||||
initialConfig.ExperimentalNodeAllocatableIgnoreEvictionThreshold = false
|
||||
initialConfig.CgroupsPerQOS = true
|
||||
})
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logMemoryMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: getMemhogPod("memory-hog-pod", "memory-hog", v1.ResourceRequirements{}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 0,
|
||||
pod: innocentPod(),
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// LocalStorageAllocatableEviction tests that the node responds to node disk pressure by evicting only responsible pods.
|
||||
// Node disk pressure is only encountered because we reserve the majority of the node's capacity via kube-reserved.
|
||||
var _ = framework.KubeDescribe("LocalStorageAllocatableEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
|
||||
f := framework.NewDefaultFramework("localstorageallocatable-eviction-test")
|
||||
pressureTimeout := 10 * time.Minute
|
||||
expectedNodeCondition := v1.NodeDiskPressure
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
// Set up --kube-reserved for scratch storage
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
diskConsumed := uint64(200000000) // At least 200 Mb for pods to consume
|
||||
summary := eventuallyGetSummary()
|
||||
availableBytes := *(summary.Node.Fs.AvailableBytes)
|
||||
initialConfig.KubeReserved = kubeletconfig.ConfigurationMap(map[string]string{string(v1.ResourceEphemeralStorage): fmt.Sprintf("%d", availableBytes-diskConsumed)})
|
||||
initialConfig.EnforceNodeAllocatable = []string{cm.NodeAllocatableEnforcementKey}
|
||||
initialConfig.CgroupsPerQOS = true
|
||||
initialConfig.ExperimentalNodeAllocatableIgnoreEvictionThreshold = false
|
||||
if initialConfig.FeatureGates != "" {
|
||||
initialConfig.FeatureGates += ","
|
||||
}
|
||||
initialConfig.FeatureGates += "LocalStorageCapacityIsolation=true"
|
||||
// set evictionHard to be very small, so that only the allocatable eviction threshold triggers
|
||||
initialConfig.EvictionHard = "nodefs.available<1"
|
||||
initialConfig.EvictionMinimumReclaim = ""
|
||||
framework.Logf("KubeReserved: %+v", initialConfig.KubeReserved)
|
||||
})
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: diskConsumingPod("container-disk-hog", 10000, nil, v1.ResourceRequirements{}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 0,
|
||||
pod: innocentPod(),
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// LocalStorageEviction tests that the node responds to node disk pressure by evicting only responsible pods
|
||||
// Disk pressure is induced by running pods which consume disk space.
|
||||
var _ = framework.KubeDescribe("LocalStorageEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
|
||||
f := framework.NewDefaultFramework("localstorage-eviction-test")
|
||||
pressureTimeout := 10 * time.Minute
|
||||
expectedNodeCondition := v1.NodeDiskPressure
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
diskConsumed := uint64(100000000) // At least 100 Mb for pods to consume
|
||||
summary := eventuallyGetSummary()
|
||||
availableBytes := *(summary.Node.Fs.AvailableBytes)
|
||||
initialConfig.EvictionHard = fmt.Sprintf("nodefs.available<%d", availableBytes-diskConsumed)
|
||||
initialConfig.EvictionMinimumReclaim = ""
|
||||
})
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: diskConsumingPod("container-disk-hog", 10000, nil, v1.ResourceRequirements{}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 0,
|
||||
pod: innocentPod(),
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// LocalStorageEviction tests that the node responds to node disk pressure by evicting only responsible pods
|
||||
// Disk pressure is induced by running pods which consume disk space, which exceed the soft eviction threshold.
|
||||
// Note: This test's purpose is to test Soft Evictions. Local storage was chosen since it is the least costly to run.
|
||||
var _ = framework.KubeDescribe("LocalStorageSoftEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
|
||||
f := framework.NewDefaultFramework("localstorage-eviction-test")
|
||||
pressureTimeout := 10 * time.Minute
|
||||
expectedNodeCondition := v1.NodeDiskPressure
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
diskConsumed := uint64(100000000) // At least 100 Mb for pods to consume
|
||||
summary := eventuallyGetSummary()
|
||||
availableBytes := *(summary.Node.Fs.AvailableBytes)
|
||||
initialConfig.EvictionSoft = fmt.Sprintf("nodefs.available<%d", availableBytes-diskConsumed)
|
||||
initialConfig.EvictionSoftGracePeriod = "nodefs.available=1m"
|
||||
// Defer to the pod default grace period
|
||||
initialConfig.EvictionMaxPodGracePeriod = 30
|
||||
initialConfig.EvictionMinimumReclaim = ""
|
||||
// Ensure that pods are not evicted because of the eviction-hard threshold
|
||||
initialConfig.EvictionHard = ""
|
||||
})
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: diskConsumingPod("container-disk-hog", 10000, nil, v1.ResourceRequirements{}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 0,
|
||||
pod: innocentPod(),
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// LocalStorageCapacityIsolationEviction tests that container and volume local storage limits are enforced through evictions
|
||||
var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Serial] [Disruptive] [Flaky] [Feature:LocalStorageCapacityIsolation]", func() {
|
||||
f := framework.NewDefaultFramework("localstorage-eviction-test")
|
||||
evictionTestTimeout := 10 * time.Minute
|
||||
Context(fmt.Sprintf(testContextFmt, "evictions due to pod local storage violations"), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
if initialConfig.FeatureGates != "" {
|
||||
initialConfig.FeatureGates += ","
|
||||
}
|
||||
initialConfig.FeatureGates += "LocalStorageCapacityIsolation=true"
|
||||
initialConfig.EvictionHard = ""
|
||||
})
|
||||
sizeLimit := resource.MustParse("100Mi")
|
||||
used := int64(200) // Consume 200 Mb
|
||||
containerLimit := v1.ResourceList{v1.ResourceEphemeralStorage: sizeLimit}
|
||||
|
||||
runEvictionTest(f, evictionTestTimeout, noPressure, logDiskMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1, // This pod should be evicted because emptyDir (default storage type) usage violation
|
||||
pod: diskConsumingPod("emptydir-disk-sizelimit", used, &v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{SizeLimit: &sizeLimit},
|
||||
}, v1.ResourceRequirements{}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 1, // This pod should be evicted because of memory emptyDir usage violation
|
||||
pod: diskConsumingPod("emptydir-memory-sizelimit", used, &v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{Medium: "Memory", SizeLimit: &sizeLimit},
|
||||
}, v1.ResourceRequirements{}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 1, // This pod should cross the container limit by writing to its writable layer.
|
||||
pod: diskConsumingPod("container-disk-limit", used, nil, v1.ResourceRequirements{Limits: containerLimit}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 1, // This pod should hit the container limit by writing to an emptydir
|
||||
pod: diskConsumingPod("container-emptydir-disk-limit", used, &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}},
|
||||
v1.ResourceRequirements{Limits: containerLimit}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 0, // This pod should not be evicted because it uses less than its limit
|
||||
pod: diskConsumingPod("emptydir-disk-below-sizelimit", int64(50), &v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{SizeLimit: &sizeLimit},
|
||||
}, v1.ResourceRequirements{}),
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods
|
||||
type podEvictSpec struct {
|
||||
// P0 should never be evicted, P1 shouldn't evict before P2, etc.
|
||||
// If two are ranked at P1, either is permitted to fail before the other.
|
||||
// The test ends when all pods other than p0 have been evicted
|
||||
evictionPriority int
|
||||
pod *v1.Pod
|
||||
}
|
||||
|
||||
// runEvictionTest sets up a testing environment given the provided pods, and checks a few things:
|
||||
// It ensures that the desired expectedNodeCondition is actually triggered.
|
||||
// It ensures that evictionPriority 0 pods are not evicted
|
||||
// It ensures that lower evictionPriority pods are always evicted before higher evictionPriority pods (2 evicted before 1, etc.)
|
||||
// It ensures that all pods with non-zero evictionPriority are eventually evicted.
|
||||
// runEvictionTest then cleans up the testing environment by deleting provided pods, and ensures that expectedNodeCondition no longer exists
|
||||
func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expectedNodeCondition v1.NodeConditionType, logFunc func(), testSpecs []podEvictSpec) {
|
||||
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
|
||||
Context("", func() {
|
||||
BeforeEach(func() {
|
||||
// Nodes do not immediately report local storage capacity
|
||||
// Sleep so that pods requesting local storage do not fail to schedule
|
||||
time.Sleep(30 * time.Second)
|
||||
By("seting up pods to be used by tests")
|
||||
for _, spec := range testSpecs {
|
||||
By(fmt.Sprintf("creating pod with container: %s", spec.pod.Name))
|
||||
f.PodClient().CreateSync(spec.pod)
|
||||
}
|
||||
})
|
||||
|
||||
It("should eventually evict all of the correct pods", func() {
|
||||
By(fmt.Sprintf("Waiting for node to have NodeCondition: %s", expectedNodeCondition))
|
||||
Eventually(func() error {
|
||||
logFunc()
|
||||
if expectedNodeCondition == noPressure || hasNodeCondition(f, expectedNodeCondition) {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("NodeCondition: %s not encountered", expectedNodeCondition)
|
||||
}, pressureTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
By("Waiting for evictions to occur")
|
||||
Eventually(func() error {
|
||||
if expectedNodeCondition != noPressure {
|
||||
if hasNodeCondition(f, expectedNodeCondition) {
|
||||
framework.Logf("Node has %s", expectedNodeCondition)
|
||||
} else {
|
||||
framework.Logf("Node does NOT have %s", expectedNodeCondition)
|
||||
}
|
||||
}
|
||||
logKubeletMetrics(kubeletmetrics.EvictionStatsAgeKey)
|
||||
logFunc()
|
||||
return verifyEvictionOrdering(f, testSpecs)
|
||||
}, pressureTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
// We observe pressure from the API server. The eviction manager observes pressure from the kubelet internal stats.
|
||||
// This means the eviction manager will observe pressure before we will, creating a delay between when the eviction manager
|
||||
// evicts a pod, and when we observe the pressure by querying the API server. Add a delay here to account for this delay
|
||||
By("making sure pressure from test has surfaced before continuing")
|
||||
time.Sleep(pressureDelay)
|
||||
|
||||
By(fmt.Sprintf("Waiting for NodeCondition: %s to no longer exist on the node", expectedNodeCondition))
|
||||
Eventually(func() error {
|
||||
logFunc()
|
||||
logKubeletMetrics(kubeletmetrics.EvictionStatsAgeKey)
|
||||
if expectedNodeCondition != noPressure && hasNodeCondition(f, expectedNodeCondition) {
|
||||
return fmt.Errorf("Conditions havent returned to normal, node still has %s", expectedNodeCondition)
|
||||
}
|
||||
return nil
|
||||
}, pressureDissapearTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
By("checking for stable, pressure-free condition without unexpected pod failures")
|
||||
Consistently(func() error {
|
||||
if expectedNodeCondition != noPressure && hasNodeCondition(f, expectedNodeCondition) {
|
||||
return fmt.Errorf("%s dissappeared and then reappeared", expectedNodeCondition)
|
||||
}
|
||||
logFunc()
|
||||
logKubeletMetrics(kubeletmetrics.EvictionStatsAgeKey)
|
||||
return verifyEvictionOrdering(f, testSpecs)
|
||||
}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(BeNil())
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
By("deleting pods")
|
||||
for _, spec := range testSpecs {
|
||||
By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
|
||||
f.PodClient().DeleteSync(spec.pod.Name, &metav1.DeleteOptions{}, 10*time.Minute)
|
||||
}
|
||||
By("making sure we can start a new pod after the test")
|
||||
podName := "test-admit-pod"
|
||||
f.PodClient().CreateSync(&v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: framework.GetPauseImageNameForHostArch(),
|
||||
Name: podName,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
if CurrentGinkgoTestDescription().Failed {
|
||||
if framework.TestContext.DumpLogsOnFailure {
|
||||
logPodEvents(f)
|
||||
logNodeEvents(f)
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// verifyEvictionOrdering returns an error if all non-zero priority pods have not been evicted, nil otherwise
|
||||
// This function panics (via Expect) if eviction ordering is violated, or if a priority-zero pod fails.
|
||||
func verifyEvictionOrdering(f *framework.Framework, testSpecs []podEvictSpec) error {
|
||||
// Gather current information
|
||||
updatedPodList, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
updatedPods := updatedPodList.Items
|
||||
for _, p := range updatedPods {
|
||||
framework.Logf("fetching pod %s; phase= %v", p.Name, p.Status.Phase)
|
||||
}
|
||||
|
||||
By("checking eviction ordering and ensuring important pods dont fail")
|
||||
done := true
|
||||
for _, priorityPodSpec := range testSpecs {
|
||||
var priorityPod v1.Pod
|
||||
for _, p := range updatedPods {
|
||||
if p.Name == priorityPodSpec.pod.Name {
|
||||
priorityPod = p
|
||||
}
|
||||
}
|
||||
Expect(priorityPod).NotTo(BeNil())
|
||||
|
||||
// Check eviction ordering.
|
||||
// Note: it is alright for a priority 1 and priority 2 pod (for example) to fail in the same round,
|
||||
// but never alright for a priority 1 pod to fail while the priority 2 pod is still running
|
||||
for _, lowPriorityPodSpec := range testSpecs {
|
||||
var lowPriorityPod v1.Pod
|
||||
for _, p := range updatedPods {
|
||||
if p.Name == lowPriorityPodSpec.pod.Name {
|
||||
lowPriorityPod = p
|
||||
}
|
||||
}
|
||||
Expect(lowPriorityPod).NotTo(BeNil())
|
||||
if priorityPodSpec.evictionPriority < lowPriorityPodSpec.evictionPriority && lowPriorityPod.Status.Phase == v1.PodRunning {
|
||||
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
|
||||
fmt.Sprintf("priority %d pod: %s failed before priority %d pod: %s",
|
||||
priorityPodSpec.evictionPriority, priorityPodSpec.pod.Name, lowPriorityPodSpec.evictionPriority, lowPriorityPodSpec.pod.Name))
|
||||
}
|
||||
}
|
||||
|
||||
// EvictionPriority 0 pods should not fail
|
||||
if priorityPodSpec.evictionPriority == 0 {
|
||||
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
|
||||
fmt.Sprintf("priority 0 pod: %s failed", priorityPod.Name))
|
||||
}
|
||||
|
||||
// If a pod that is not evictionPriority 0 has not been evicted, we are not done
|
||||
if priorityPodSpec.evictionPriority != 0 && priorityPod.Status.Phase != v1.PodFailed {
|
||||
done = false
|
||||
}
|
||||
}
|
||||
if done {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("pods that should be evicted are still running")
|
||||
}
|
||||
|
||||
// Returns TRUE if the node has the node condition, FALSE otherwise
|
||||
func hasNodeCondition(f *framework.Framework, expectedNodeCondition v1.NodeConditionType) bool {
|
||||
localNodeStatus := getLocalNode(f).Status
|
||||
_, actualNodeCondition := nodeutil.GetNodeCondition(&localNodeStatus, expectedNodeCondition)
|
||||
Expect(actualNodeCondition).NotTo(BeNil())
|
||||
return actualNodeCondition.Status == v1.ConditionTrue
|
||||
}
|
||||
|
||||
func logInodeMetrics() {
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
framework.Logf("Error getting summary: %v", err)
|
||||
return
|
||||
}
|
||||
if summary.Node.Runtime != nil && summary.Node.Runtime.ImageFs != nil && summary.Node.Runtime.ImageFs.Inodes != nil && summary.Node.Runtime.ImageFs.InodesFree != nil {
|
||||
framework.Logf("imageFsInfo.Inodes: %d, imageFsInfo.InodesFree: %d", *summary.Node.Runtime.ImageFs.Inodes, *summary.Node.Runtime.ImageFs.InodesFree)
|
||||
}
|
||||
if summary.Node.Fs != nil && summary.Node.Fs.Inodes != nil && summary.Node.Fs.InodesFree != nil {
|
||||
framework.Logf("rootFsInfo.Inodes: %d, rootFsInfo.InodesFree: %d", *summary.Node.Fs.Inodes, *summary.Node.Fs.InodesFree)
|
||||
}
|
||||
for _, pod := range summary.Pods {
|
||||
framework.Logf("Pod: %s", pod.PodRef.Name)
|
||||
for _, container := range pod.Containers {
|
||||
if container.Rootfs != nil && container.Rootfs.InodesUsed != nil {
|
||||
framework.Logf("--- summary Container: %s inodeUsage: %d", container.Name, *container.Rootfs.InodesUsed)
|
||||
}
|
||||
}
|
||||
for _, volume := range pod.VolumeStats {
|
||||
if volume.FsStats.InodesUsed != nil {
|
||||
framework.Logf("--- summary Volume: %s inodeUsage: %d", volume.Name, *volume.FsStats.InodesUsed)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func logDiskMetrics() {
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
framework.Logf("Error getting summary: %v", err)
|
||||
return
|
||||
}
|
||||
if summary.Node.Runtime != nil && summary.Node.Runtime.ImageFs != nil && summary.Node.Runtime.ImageFs.CapacityBytes != nil && summary.Node.Runtime.ImageFs.AvailableBytes != nil {
|
||||
framework.Logf("imageFsInfo.CapacityBytes: %d, imageFsInfo.AvailableBytes: %d", *summary.Node.Runtime.ImageFs.CapacityBytes, *summary.Node.Runtime.ImageFs.AvailableBytes)
|
||||
}
|
||||
if summary.Node.Fs != nil && summary.Node.Fs.CapacityBytes != nil && summary.Node.Fs.AvailableBytes != nil {
|
||||
framework.Logf("rootFsInfo.CapacityBytes: %d, rootFsInfo.AvailableBytes: %d", *summary.Node.Fs.CapacityBytes, *summary.Node.Fs.AvailableBytes)
|
||||
}
|
||||
for _, pod := range summary.Pods {
|
||||
framework.Logf("Pod: %s", pod.PodRef.Name)
|
||||
for _, container := range pod.Containers {
|
||||
if container.Rootfs != nil && container.Rootfs.UsedBytes != nil {
|
||||
framework.Logf("--- summary Container: %s UsedBytes: %d", container.Name, *container.Rootfs.UsedBytes)
|
||||
}
|
||||
}
|
||||
for _, volume := range pod.VolumeStats {
|
||||
if volume.FsStats.InodesUsed != nil {
|
||||
framework.Logf("--- summary Volume: %s UsedBytes: %d", volume.Name, *volume.FsStats.UsedBytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func logMemoryMetrics() {
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
framework.Logf("Error getting summary: %v", err)
|
||||
return
|
||||
}
|
||||
if summary.Node.Memory != nil && summary.Node.Memory.WorkingSetBytes != nil && summary.Node.Memory.AvailableBytes != nil {
|
||||
framework.Logf("Node.Memory.WorkingSetBytes: %d, summary.Node.Memory.AvailableBytes: %d", *summary.Node.Memory.WorkingSetBytes, *summary.Node.Memory.AvailableBytes)
|
||||
}
|
||||
for _, pod := range summary.Pods {
|
||||
framework.Logf("Pod: %s", pod.PodRef.Name)
|
||||
for _, container := range pod.Containers {
|
||||
if container.Memory != nil && container.Memory.WorkingSetBytes != nil {
|
||||
framework.Logf("--- summary Container: %s WorkingSetBytes: %d", container.Name, *container.Memory.WorkingSetBytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func eventuallyGetSummary() (s *stats.Summary) {
|
||||
Eventually(func() error {
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if summary == nil || summary.Node.Fs == nil || summary.Node.Fs.InodesFree == nil || summary.Node.Fs.AvailableBytes == nil {
|
||||
return fmt.Errorf("some part of data is nil")
|
||||
}
|
||||
s = summary
|
||||
return nil
|
||||
}, time.Minute, evictionPollInterval).Should(BeNil())
|
||||
return
|
||||
}
|
||||
|
||||
// returns a pod that does not use any resources
|
||||
func innocentPod() *v1.Pod {
|
||||
return &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "innocent-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "innocent-container",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"while true; do sleep 5; done",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
volumeMountPath = "/test-mnt"
|
||||
volumeName = "test-volume"
|
||||
)
|
||||
|
||||
func inodeConsumingPod(name string, volumeSource *v1.VolumeSource) *v1.Pod {
|
||||
// Each iteration creates an empty file
|
||||
return podWithCommand(volumeSource, v1.ResourceRequirements{}, name, "i=0; while true; do touch %s${i}.txt; sleep 0.001; i=$((i+=1)); done;")
|
||||
}
|
||||
|
||||
func diskConsumingPod(name string, diskConsumedMB int64, volumeSource *v1.VolumeSource, resources v1.ResourceRequirements) *v1.Pod {
|
||||
// Each iteration writes 1Mb to the file
|
||||
return podWithCommand(volumeSource, resources, name, fmt.Sprintf("i=0; while [ $i -lt %d ];", diskConsumedMB/100)+" do dd if=/dev/urandom of=%s${i} bs=100 count=1000000; i=$(($i+1)); done; while true; do sleep 5; done")
|
||||
}
|
||||
|
||||
// podWithCommand returns a pod with the provided volumeSource and resourceRequirements.
|
||||
// If a volumeSource is provided, then the volumeMountPath to the volume is inserted into the provided command.
|
||||
func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirements, name, command string) *v1.Pod {
|
||||
path := ""
|
||||
volumeMounts := []v1.VolumeMount{}
|
||||
volumes := []v1.Volume{}
|
||||
if volumeSource != nil {
|
||||
path = volumeMountPath
|
||||
volumeMounts = []v1.VolumeMount{{MountPath: volumeMountPath, Name: volumeName}}
|
||||
volumes = []v1.Volume{{Name: volumeName, VolumeSource: *volumeSource}}
|
||||
}
|
||||
return &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("%s-pod", name)},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: fmt.Sprintf("%s-container", name),
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
fmt.Sprintf(command, filepath.Join(path, "file")),
|
||||
},
|
||||
Resources: resources,
|
||||
VolumeMounts: volumeMounts,
|
||||
},
|
||||
},
|
||||
Volumes: volumes,
|
||||
},
|
||||
}
|
||||
}
|
|
@ -1,371 +0,0 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e_node
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
nodeutil "k8s.io/kubernetes/pkg/api/v1/node"
|
||||
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
|
||||
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// Eviction Policy is described here:
|
||||
// https://github.com/kubernetes/kubernetes/blob/master/docs/proposals/kubelet-eviction.md
|
||||
|
||||
const (
|
||||
postTestConditionMonitoringPeriod = 2 * time.Minute
|
||||
evictionPollInterval = 2 * time.Second
|
||||
// pressure conditions often surface after evictions because of delay in propegation of metrics to pressure
|
||||
// we wait this period after evictions to make sure that we wait out this delay
|
||||
pressureDelay = 20 * time.Second
|
||||
)
|
||||
|
||||
var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
|
||||
f := framework.NewDefaultFramework("inode-eviction-test")
|
||||
|
||||
volumeMountPath := "/test-empty-dir-mnt"
|
||||
podTestSpecs := []podTestSpec{
|
||||
{
|
||||
evictionPriority: 1, // This pod should be evicted before the normal memory usage pod
|
||||
pod: &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "container-inode-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "container-inode-hog-container",
|
||||
Command: getInodeConsumingCommand(""),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
evictionPriority: 1, // This pod should be evicted before the normal memory usage pod
|
||||
pod: &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "volume-inode-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "volume-inode-hog-container",
|
||||
Command: getInodeConsumingCommand(volumeMountPath),
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{MountPath: volumeMountPath, Name: "test-empty-dir"},
|
||||
},
|
||||
},
|
||||
},
|
||||
Volumes: []v1.Volume{
|
||||
{Name: "test-empty-dir", VolumeSource: v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
evictionPriority: 0, // This pod should never be evicted
|
||||
pod: getInnocentPod(),
|
||||
},
|
||||
}
|
||||
evictionTestTimeout := 15 * time.Minute
|
||||
testCondition := "Disk Pressure due to Inodes"
|
||||
inodesConsumed := uint64(200000)
|
||||
|
||||
Context(fmt.Sprintf("when we run containers that should cause %s", testCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
// Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction.
|
||||
inodesFree := getInodesFree()
|
||||
if inodesFree <= inodesConsumed {
|
||||
framework.Skipf("Too few inodes free on the host for the InodeEviction test to run")
|
||||
}
|
||||
initialConfig.EvictionHard = fmt.Sprintf("nodefs.inodesFree<%d", getInodesFree()-inodesConsumed)
|
||||
initialConfig.EvictionMinimumReclaim = ""
|
||||
})
|
||||
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
|
||||
Context("With kubeconfig updated", func() {
|
||||
runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasInodePressure)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods
|
||||
type podTestSpec struct {
|
||||
// 0 should never be evicted, 1 shouldn't evict before 2, etc.
|
||||
// If two are ranked at 1, either is permitted to fail before the other.
|
||||
// The test ends when all other than the 0 have been evicted
|
||||
evictionPriority int
|
||||
pod *v1.Pod
|
||||
}
|
||||
|
||||
// runEvictionTest sets up a testing environment given the provided nodes, and checks a few things:
|
||||
// It ensures that the desired testCondition is actually triggered.
|
||||
// It ensures that evictionPriority 0 pods are not evicted
|
||||
// It ensures that lower evictionPriority pods are always evicted before higher evictionPriority pods (2 evicted before 1, etc.)
|
||||
// It ensures that all lower evictionPriority pods are eventually evicted.
|
||||
// runEvictionTest then cleans up the testing environment by deleting provided nodes, and ensures that testCondition no longer exists
|
||||
func runEvictionTest(f *framework.Framework, testCondition string, podTestSpecs []podTestSpec, evictionTestTimeout time.Duration,
|
||||
hasPressureCondition func(*framework.Framework, string) (bool, error)) {
|
||||
BeforeEach(func() {
|
||||
By("seting up pods to be used by tests")
|
||||
for _, spec := range podTestSpecs {
|
||||
By(fmt.Sprintf("creating pod with container: %s", spec.pod.Name))
|
||||
f.PodClient().CreateSync(spec.pod)
|
||||
}
|
||||
})
|
||||
|
||||
It(fmt.Sprintf("should eventually see %s, and then evict all of the correct pods", testCondition), func() {
|
||||
configEnabled, err := isKubeletConfigEnabled(f)
|
||||
framework.ExpectNoError(err)
|
||||
if !configEnabled {
|
||||
framework.Skipf("Dynamic kubelet config must be enabled for this test to run.")
|
||||
}
|
||||
Eventually(func() error {
|
||||
hasPressure, err := hasPressureCondition(f, testCondition)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if hasPressure {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("Condition: %s not encountered", testCondition)
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
Eventually(func() error {
|
||||
// Gather current information
|
||||
updatedPodList, err := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
|
||||
updatedPods := updatedPodList.Items
|
||||
for _, p := range updatedPods {
|
||||
framework.Logf("fetching pod %s; phase= %v", p.Name, p.Status.Phase)
|
||||
}
|
||||
logKubeletMetrics(kubeletmetrics.EvictionStatsAgeKey)
|
||||
_, err = hasPressureCondition(f, testCondition)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
By("checking eviction ordering and ensuring important pods dont fail")
|
||||
done := true
|
||||
for _, priorityPodSpec := range podTestSpecs {
|
||||
var priorityPod v1.Pod
|
||||
for _, p := range updatedPods {
|
||||
if p.Name == priorityPodSpec.pod.Name {
|
||||
priorityPod = p
|
||||
}
|
||||
}
|
||||
Expect(priorityPod).NotTo(BeNil())
|
||||
|
||||
// Check eviction ordering.
|
||||
// Note: it is alright for a priority 1 and priority 2 pod (for example) to fail in the same round,
|
||||
// but never alright for a priority 1 pod to fail while the priority 2 pod is still running
|
||||
for _, lowPriorityPodSpec := range podTestSpecs {
|
||||
var lowPriorityPod v1.Pod
|
||||
for _, p := range updatedPods {
|
||||
if p.Name == lowPriorityPodSpec.pod.Name {
|
||||
lowPriorityPod = p
|
||||
}
|
||||
}
|
||||
Expect(lowPriorityPod).NotTo(BeNil())
|
||||
if priorityPodSpec.evictionPriority < lowPriorityPodSpec.evictionPriority && lowPriorityPod.Status.Phase == v1.PodRunning {
|
||||
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
|
||||
fmt.Sprintf("%s pod failed before %s pod", priorityPodSpec.pod.Name, lowPriorityPodSpec.pod.Name))
|
||||
}
|
||||
}
|
||||
|
||||
// EvictionPriority 0 pods should not fail
|
||||
if priorityPodSpec.evictionPriority == 0 {
|
||||
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
|
||||
fmt.Sprintf("%s pod failed (and shouldn't have failed)", priorityPod.Name))
|
||||
}
|
||||
|
||||
// If a pod that is not evictionPriority 0 has not been evicted, we are not done
|
||||
if priorityPodSpec.evictionPriority != 0 && priorityPod.Status.Phase != v1.PodFailed {
|
||||
done = false
|
||||
}
|
||||
}
|
||||
if done {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("pods that caused %s have not been evicted.", testCondition)
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
// We observe pressure from the API server. The eviction manager observes pressure from the kubelet internal stats.
|
||||
// This means the eviction manager will observe pressure before we will, creating a delay between when the eviction manager
|
||||
// evicts a pod, and when we observe the pressure by querrying the API server. Add a delay here to account for this delay
|
||||
By("making sure pressure from test has surfaced before continuing")
|
||||
time.Sleep(pressureDelay)
|
||||
|
||||
By("making sure conditions eventually return to normal")
|
||||
Eventually(func() error {
|
||||
hasPressure, err := hasPressureCondition(f, testCondition)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if hasPressure {
|
||||
return fmt.Errorf("Conditions havent returned to normal, we still have %s", testCondition)
|
||||
}
|
||||
return nil
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
By("making sure conditions do not return, and that pods that shouldnt fail dont fail")
|
||||
Consistently(func() error {
|
||||
hasPressure, err := hasPressureCondition(f, testCondition)
|
||||
if err != nil {
|
||||
// Race conditions sometimes occur when checking pressure condition due to #38710 (Docker bug)
|
||||
// Do not fail the test when this occurs, since this is expected to happen occasionally.
|
||||
framework.Logf("Failed to check pressure condition. Error: %v", err)
|
||||
return nil
|
||||
}
|
||||
if hasPressure {
|
||||
return fmt.Errorf("%s dissappeared and then reappeared", testCondition)
|
||||
}
|
||||
// Gather current information
|
||||
updatedPodList, _ := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
|
||||
for _, priorityPodSpec := range podTestSpecs {
|
||||
// EvictionPriority 0 pods should not fail
|
||||
if priorityPodSpec.evictionPriority == 0 {
|
||||
for _, p := range updatedPodList.Items {
|
||||
if p.Name == priorityPodSpec.pod.Name && p.Status.Phase == v1.PodFailed {
|
||||
logKubeletMetrics(kubeletmetrics.EvictionStatsAgeKey)
|
||||
return fmt.Errorf("%s pod failed (delayed) and shouldn't have failed", p.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(BeNil())
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
By("deleting pods")
|
||||
for _, spec := range podTestSpecs {
|
||||
By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
|
||||
f.PodClient().DeleteSync(spec.pod.Name, &metav1.DeleteOptions{}, 10*time.Minute)
|
||||
}
|
||||
|
||||
By("making sure we can start a new pod after the test")
|
||||
podName := "test-admit-pod"
|
||||
f.PodClient().CreateSync(&v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: framework.GetPauseImageNameForHostArch(),
|
||||
Name: podName,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
if CurrentGinkgoTestDescription().Failed && framework.TestContext.DumpLogsOnFailure {
|
||||
logPodEvents(f)
|
||||
logNodeEvents(f)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Returns TRUE if the node has disk pressure due to inodes exists on the node, FALSE otherwise
|
||||
func hasInodePressure(f *framework.Framework, testCondition string) (bool, error) {
|
||||
localNodeStatus := getLocalNode(f).Status
|
||||
_, pressure := nodeutil.GetNodeCondition(&localNodeStatus, v1.NodeDiskPressure)
|
||||
Expect(pressure).NotTo(BeNil())
|
||||
hasPressure := pressure.Status == v1.ConditionTrue
|
||||
By(fmt.Sprintf("checking if pod has %s: %v", testCondition, hasPressure))
|
||||
|
||||
// Additional Logging relating to Inodes
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if summary.Node.Runtime != nil && summary.Node.Runtime.ImageFs != nil && summary.Node.Runtime.ImageFs.Inodes != nil && summary.Node.Runtime.ImageFs.InodesFree != nil {
|
||||
framework.Logf("imageFsInfo.Inodes: %d, imageFsInfo.InodesFree: %d", *summary.Node.Runtime.ImageFs.Inodes, *summary.Node.Runtime.ImageFs.InodesFree)
|
||||
}
|
||||
if summary.Node.Fs != nil && summary.Node.Fs.Inodes != nil && summary.Node.Fs.InodesFree != nil {
|
||||
framework.Logf("rootFsInfo.Inodes: %d, rootFsInfo.InodesFree: %d", *summary.Node.Fs.Inodes, *summary.Node.Fs.InodesFree)
|
||||
}
|
||||
for _, pod := range summary.Pods {
|
||||
framework.Logf("Pod: %s", pod.PodRef.Name)
|
||||
for _, container := range pod.Containers {
|
||||
if container.Rootfs != nil && container.Rootfs.InodesUsed != nil {
|
||||
framework.Logf("--- summary Container: %s inodeUsage: %d", container.Name, *container.Rootfs.InodesUsed)
|
||||
}
|
||||
}
|
||||
for _, volume := range pod.VolumeStats {
|
||||
if volume.FsStats.InodesUsed != nil {
|
||||
framework.Logf("--- summary Volume: %s inodeUsage: %d", volume.Name, *volume.FsStats.InodesUsed)
|
||||
}
|
||||
}
|
||||
}
|
||||
return hasPressure, nil
|
||||
}
|
||||
|
||||
func getInodesFree() uint64 {
|
||||
var inodesFree uint64
|
||||
Eventually(func() error {
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if summary == nil || summary.Node.Fs == nil || summary.Node.Fs.InodesFree == nil {
|
||||
return fmt.Errorf("some part of data is nil")
|
||||
}
|
||||
inodesFree = *summary.Node.Fs.InodesFree
|
||||
return nil
|
||||
}, time.Minute, evictionPollInterval).Should(BeNil())
|
||||
return inodesFree
|
||||
}
|
||||
|
||||
// returns a pod that does not use any resources
|
||||
func getInnocentPod() *v1.Pod {
|
||||
return &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "innocent-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "innocent-container",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c", //make one large file
|
||||
"dd if=/dev/urandom of=largefile bs=5000000000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func getInodeConsumingCommand(path string) []string {
|
||||
return []string{
|
||||
"sh",
|
||||
"-c",
|
||||
fmt.Sprintf("i=0; while true; do touch %s${i}.txt; sleep 0.001; i=$((i+=1)); done;", filepath.Join(path, "smallfile")),
|
||||
}
|
||||
}
|
|
@ -4,8 +4,8 @@ GCE_ZONE=us-central1-f
|
|||
GCE_PROJECT=k8s-jkns-ci-node-e2e
|
||||
CLEANUP=true
|
||||
GINKGO_FLAGS='--focus="\[Flaky\]"'
|
||||
TEST_ARGS='--feature-gates=DynamicKubeletConfig=true'
|
||||
TEST_ARGS='--feature-gates=DynamicKubeletConfig=true,LocalStorageCapacityIsolation=true'
|
||||
KUBELET_ARGS='--cgroups-per-qos=true --cgroup-root=/'
|
||||
PARALLELISM=1
|
||||
TIMEOUT=2h
|
||||
TIMEOUT=3h
|
||||
|
||||
|
|
|
@ -1,321 +0,0 @@
|
|||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e_node
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
nodeutil "k8s.io/kubernetes/pkg/api/v1/node"
|
||||
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// Eviction Policy is described here:
|
||||
// https://github.com/kubernetes/kubernetes/blob/master/docs/proposals/kubelet-eviction.md
|
||||
|
||||
var _ = framework.KubeDescribe("LocalStorageAllocatableEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
|
||||
f := framework.NewDefaultFramework("localstorageallocatable-eviction-test")
|
||||
evictionTestTimeout := 15 * time.Minute
|
||||
testCondition := "Evict pod due to local storage allocatable violation"
|
||||
conditionType := v1.NodeDiskPressure
|
||||
var podTestSpecs []podTestSpec
|
||||
//podTestSpecsS := make([]podTestSpec, 5)
|
||||
var diskReserve uint64
|
||||
Context(fmt.Sprintf("when we run containers that should cause %s", testCondition), func() {
|
||||
|
||||
BeforeEach(func() {
|
||||
diskAvail, err := getDiskUsage()
|
||||
if err != nil {
|
||||
framework.ExpectNoError(err)
|
||||
}
|
||||
|
||||
diskReserve = uint64(0.8 * diskAvail / 1000000) // Reserve 0.8 * disk Capacity for kube-reserved scratch storage
|
||||
maxDisk := 10000000 // Set dd command to read and write up to 10MB at a time
|
||||
count := uint64(0.8 * diskAvail / float64(maxDisk))
|
||||
command := fmt.Sprintf("dd if=/dev/urandom of=dummy bs=%d count=%d; while true; do sleep 5; done", maxDisk, count)
|
||||
podTestSpecs = []podTestSpec{
|
||||
{
|
||||
evictionPriority: 1, // This pod should be evicted before the innocent pod
|
||||
pod: &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "container-disk-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "container-disk-hog-pod",
|
||||
Command: []string{"sh", "-c", command},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
evictionPriority: 0, // This pod should never be evicted
|
||||
pod: &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "idle-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "idle-pod",
|
||||
Command: []string{"sh", "-c",
|
||||
fmt.Sprintf("while true; do sleep 5; done")},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
})
|
||||
|
||||
// Set up --kube-reserved for scratch storage
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
framework.Logf("Set up --kube-reserved for local storage reserved %dMi", diskReserve)
|
||||
initialConfig.KubeReserved = kubeletconfig.ConfigurationMap(map[string]string{"storage": fmt.Sprintf("%dMi", diskReserve)})
|
||||
|
||||
})
|
||||
|
||||
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
|
||||
Context("With kubeconfig updated", func() {
|
||||
runLocalStorageEvictionTest(f, conditionType, testCondition, &podTestSpecs, evictionTestTimeout, hasDiskPressure)
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
// Returns TRUE if the node has disk pressure, FALSE otherwise
|
||||
func hasDiskPressure(f *framework.Framework, conditionType v1.NodeConditionType, testCondition string) (bool, error) {
|
||||
localNodeStatus := getLocalNode(f).Status
|
||||
_, pressure := nodeutil.GetNodeCondition(&localNodeStatus, conditionType)
|
||||
Expect(pressure).NotTo(BeNil())
|
||||
hasPressure := pressure.Status == v1.ConditionTrue
|
||||
By(fmt.Sprintf("checking if pod has %s: %v", testCondition, hasPressure))
|
||||
|
||||
// Additional Logging relating to disk
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if summary.Node.Runtime != nil && summary.Node.Runtime.ImageFs != nil && summary.Node.Runtime.ImageFs.UsedBytes != nil {
|
||||
framework.Logf("imageFsInfo.UsedBytes: %d", *summary.Node.Runtime.ImageFs.UsedBytes)
|
||||
}
|
||||
if summary.Node.Fs != nil && summary.Node.Fs.UsedBytes != nil {
|
||||
framework.Logf("rootFsInfo.UsedBytes: %d", *summary.Node.Fs.UsedBytes)
|
||||
}
|
||||
for _, pod := range summary.Pods {
|
||||
framework.Logf("Pod: %s", pod.PodRef.Name)
|
||||
for _, container := range pod.Containers {
|
||||
if container.Rootfs != nil && container.Rootfs.UsedBytes != nil {
|
||||
framework.Logf("--- summary Container: %s UsedBytes: %d", container.Name, *container.Rootfs.UsedBytes)
|
||||
}
|
||||
}
|
||||
for _, volume := range pod.VolumeStats {
|
||||
if volume.FsStats.UsedBytes != nil {
|
||||
framework.Logf("--- summary Volume: %s UsedBytes: %d", volume.Name, *volume.FsStats.UsedBytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
return hasPressure, nil
|
||||
}
|
||||
|
||||
// Pass podTestSpecsP as references so that it could be set up in the first BeforeEach clause
|
||||
func runLocalStorageEvictionTest(f *framework.Framework, conditionType v1.NodeConditionType, testCondition string, podTestSpecsP *[]podTestSpec, evictionTestTimeout time.Duration,
|
||||
hasPressureCondition func(*framework.Framework, v1.NodeConditionType, string) (bool, error)) {
|
||||
BeforeEach(func() {
|
||||
|
||||
By("seting up pods to be used by tests")
|
||||
for _, spec := range *podTestSpecsP {
|
||||
By(fmt.Sprintf("creating pod with container: %s", spec.pod.Name))
|
||||
f.PodClient().CreateSync(spec.pod)
|
||||
}
|
||||
})
|
||||
|
||||
It(fmt.Sprintf("should eventually see %s, and then evict all of the correct pods", testCondition), func() {
|
||||
Expect(podTestSpecsP).NotTo(BeNil())
|
||||
podTestSpecs := *podTestSpecsP
|
||||
|
||||
Eventually(func() error {
|
||||
hasPressure, err := hasPressureCondition(f, conditionType, testCondition)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if hasPressure {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("Condition: %s not encountered", testCondition)
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
Eventually(func() error {
|
||||
// Gather current information
|
||||
updatedPodList, err := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
|
||||
updatedPods := updatedPodList.Items
|
||||
for _, p := range updatedPods {
|
||||
framework.Logf("fetching pod %s; phase= %v", p.Name, p.Status.Phase)
|
||||
}
|
||||
_, err = hasPressureCondition(f, conditionType, testCondition)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
By("checking eviction ordering and ensuring important pods dont fail")
|
||||
done := true
|
||||
for _, priorityPodSpec := range podTestSpecs {
|
||||
var priorityPod v1.Pod
|
||||
for _, p := range updatedPods {
|
||||
if p.Name == priorityPodSpec.pod.Name {
|
||||
priorityPod = p
|
||||
}
|
||||
}
|
||||
Expect(priorityPod).NotTo(BeNil())
|
||||
|
||||
// Check eviction ordering.
|
||||
// Note: it is alright for a priority 1 and priority 2 pod (for example) to fail in the same round
|
||||
for _, lowPriorityPodSpec := range podTestSpecs {
|
||||
var lowPriorityPod v1.Pod
|
||||
for _, p := range updatedPods {
|
||||
if p.Name == lowPriorityPodSpec.pod.Name {
|
||||
lowPriorityPod = p
|
||||
}
|
||||
}
|
||||
Expect(lowPriorityPod).NotTo(BeNil())
|
||||
if priorityPodSpec.evictionPriority < lowPriorityPodSpec.evictionPriority && lowPriorityPod.Status.Phase == v1.PodRunning {
|
||||
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
|
||||
fmt.Sprintf("%s pod failed before %s pod", priorityPodSpec.pod.Name, lowPriorityPodSpec.pod.Name))
|
||||
}
|
||||
}
|
||||
|
||||
// EvictionPriority 0 pods should not fail
|
||||
if priorityPodSpec.evictionPriority == 0 {
|
||||
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
|
||||
fmt.Sprintf("%s pod failed (and shouldn't have failed)", priorityPod.Name))
|
||||
}
|
||||
|
||||
// If a pod that is not evictionPriority 0 has not been evicted, we are not done
|
||||
if priorityPodSpec.evictionPriority != 0 && priorityPod.Status.Phase != v1.PodFailed {
|
||||
done = false
|
||||
}
|
||||
}
|
||||
if done {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("pods that caused %s have not been evicted.", testCondition)
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
// We observe pressure from the API server. The eviction manager observes pressure from the kubelet internal stats.
|
||||
// This means the eviction manager will observe pressure before we will, creating a delay between when the eviction manager
|
||||
// evicts a pod, and when we observe the pressure by querrying the API server. Add a delay here to account for this delay
|
||||
By("making sure pressure from test has surfaced before continuing")
|
||||
time.Sleep(pressureDelay)
|
||||
|
||||
By("making sure conditions eventually return to normal")
|
||||
Eventually(func() error {
|
||||
hasPressure, err := hasPressureCondition(f, conditionType, testCondition)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if hasPressure {
|
||||
return fmt.Errorf("Conditions havent returned to normal, we still have %s", testCondition)
|
||||
}
|
||||
return nil
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
By("making sure conditions do not return, and that pods that shouldnt fail dont fail")
|
||||
Consistently(func() error {
|
||||
hasPressure, err := hasPressureCondition(f, conditionType, testCondition)
|
||||
if err != nil {
|
||||
// Race conditions sometimes occur when checking pressure condition due to #38710 (Docker bug)
|
||||
// Do not fail the test when this occurs, since this is expected to happen occasionally.
|
||||
framework.Logf("Failed to check pressure condition. Error: %v", err)
|
||||
return nil
|
||||
}
|
||||
if hasPressure {
|
||||
return fmt.Errorf("%s dissappeared and then reappeared", testCondition)
|
||||
}
|
||||
// Gather current information
|
||||
updatedPodList, _ := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
|
||||
for _, priorityPodSpec := range podTestSpecs {
|
||||
// EvictionPriority 0 pods should not fail
|
||||
if priorityPodSpec.evictionPriority == 0 {
|
||||
for _, p := range updatedPodList.Items {
|
||||
if p.Name == priorityPodSpec.pod.Name && p.Status.Phase == v1.PodFailed {
|
||||
return fmt.Errorf("%s pod failed (delayed) and shouldn't have failed", p.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(BeNil())
|
||||
|
||||
By("making sure we can start a new pod after the test")
|
||||
podName := "test-admit-pod"
|
||||
f.PodClient().CreateSync(&v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: framework.GetPauseImageNameForHostArch(),
|
||||
Name: podName,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
By("deleting pods")
|
||||
for _, spec := range *podTestSpecsP {
|
||||
By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
|
||||
f.PodClient().DeleteSync(spec.pod.Name, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
|
||||
}
|
||||
|
||||
if CurrentGinkgoTestDescription().Failed {
|
||||
if framework.TestContext.DumpLogsOnFailure {
|
||||
logPodEvents(f)
|
||||
logNodeEvents(f)
|
||||
}
|
||||
By("sleeping to allow for cleanup of test")
|
||||
time.Sleep(postTestConditionMonitoringPeriod)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func getDiskUsage() (float64, error) {
|
||||
summary, err := getNodeSummary()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if nodeFs := summary.Node.Fs; nodeFs != nil {
|
||||
return float64(*nodeFs.AvailableBytes), nil
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("fail to get nodefs available bytes")
|
||||
|
||||
}
|
|
@ -1,466 +0,0 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2e_node
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
)
|
||||
|
||||
type podEvictSpec struct {
|
||||
evicted bool
|
||||
pod v1.Pod
|
||||
}
|
||||
|
||||
const (
|
||||
totalEvict = 7
|
||||
)
|
||||
|
||||
// Eviction Policy is described here:
|
||||
// https://github.com/kubernetes/kubernetes/blob/master/docs/proposals/kubelet-eviction.md
|
||||
|
||||
var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Serial] [Disruptive] [Flaky] [Feature:LocalStorageCapacityIsolation]", func() {
|
||||
|
||||
f := framework.NewDefaultFramework("localstorage-eviction-test")
|
||||
|
||||
emptyDirVolumeName := "volume-emptydir-pod"
|
||||
gitRepoVolumeName := "volume-gitrepo-pod"
|
||||
configMapVolumeName := "volume-configmap-pod"
|
||||
downwardAPIVolumeName := "volume-downwardapi-pod"
|
||||
podTestSpecs := []podEvictSpec{
|
||||
{evicted: true, // This pod should be evicted because emptyDir (default storage type) usage violation
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "emptydir-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "container-emptydir-hog-pod",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 5; dd if=/dev/urandom of=target-file of=/cache/target-file bs=50000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{
|
||||
Name: emptyDirVolumeName,
|
||||
MountPath: "/cache",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
Name: emptyDirVolumeName,
|
||||
VolumeSource: v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(1000), resource.BinarySI),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{evicted: true, // This pod should be evicted because emptyDir (memory type) usage violation
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "emptydir-memory-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "container-emptydir-memory-pod",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 5; dd if=/dev/urandom of=target-file of=/cache/target-file bs=50000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{
|
||||
Name: emptyDirVolumeName,
|
||||
MountPath: "/cache",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
Name: emptyDirVolumeName,
|
||||
VolumeSource: v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
Medium: "Memory",
|
||||
SizeLimit: resource.NewQuantity(int64(10000), resource.BinarySI),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{evicted: false,
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "container-emptydir-pod-critical"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "container-emptydir-hog-pod",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 5; dd if=/dev/urandom of=target-file of=/cache/target-file bs=50000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{
|
||||
Name: emptyDirVolumeName,
|
||||
MountPath: "/cache",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
Name: emptyDirVolumeName,
|
||||
VolumeSource: v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(100000), resource.BinarySI),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{evicted: true, // This pod should be evicted because container ephemeral storage usage violation
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "container-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: busyboxImage,
|
||||
Name: "container-hog-pod",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 5; dd if=/dev/urandom of=target-file bs=50000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceEphemeralStorage: *resource.NewMilliQuantity(
|
||||
int64(40000),
|
||||
resource.BinarySI),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{evicted: true, // This pod should be evicted because pod ephemeral storage usage violation
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "emptydir-container-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: "gcr.io/google_containers/busybox:1.24",
|
||||
Name: "emptydir-container-hog-pod",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 5; dd if=/dev/urandom of=target-file of=/cache/target-file bs=50000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceEphemeralStorage: *resource.NewMilliQuantity(
|
||||
int64(40000),
|
||||
resource.BinarySI),
|
||||
},
|
||||
},
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{
|
||||
Name: emptyDirVolumeName,
|
||||
MountPath: "/cache",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
Name: emptyDirVolumeName,
|
||||
VolumeSource: v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(100000), resource.BinarySI),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{evicted: true, // This pod should be evicted because pod ephemeral storage usage violation
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "downward-api-container-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: "gcr.io/google_containers/busybox:1.24",
|
||||
Name: "downward-api-container-hog-pod",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 5; dd if=/dev/urandom of=target-file of=/cache/target-file bs=50000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceEphemeralStorage: *resource.NewMilliQuantity(
|
||||
int64(40000),
|
||||
resource.BinarySI),
|
||||
},
|
||||
},
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{
|
||||
Name: downwardAPIVolumeName,
|
||||
MountPath: "/cache",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
Name: downwardAPIVolumeName,
|
||||
VolumeSource: v1.VolumeSource{
|
||||
DownwardAPI: &v1.DownwardAPIVolumeSource{},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{evicted: true, // This pod should be evicted because pod ephemeral storage usage violation
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "configmap-container-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: "gcr.io/google_containers/busybox:1.24",
|
||||
Name: "configmap-container-hog-pod",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 5; dd if=/dev/urandom of=target-file of=/cache/target-file bs=50000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceEphemeralStorage: *resource.NewMilliQuantity(
|
||||
int64(40000),
|
||||
resource.BinarySI),
|
||||
},
|
||||
},
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{
|
||||
Name: configMapVolumeName,
|
||||
MountPath: "/cache",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
Name: configMapVolumeName,
|
||||
VolumeSource: v1.VolumeSource{
|
||||
ConfigMap: &v1.ConfigMapVolumeSource{
|
||||
LocalObjectReference: v1.LocalObjectReference{
|
||||
Name: "my-cfgmap",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{evicted: true, // This pod should be evicted because pod ephemeral storage usage violation
|
||||
pod: v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "gitrepo-container-hog-pod"},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: "gcr.io/google_containers/busybox:1.24",
|
||||
Name: "gitrepo-container-hog-pod",
|
||||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 5; dd if=/dev/urandom of=target-file of=/cache/target-file bs=50000 count=1; while true; do sleep 5; done",
|
||||
},
|
||||
Resources: v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceEphemeralStorage: *resource.NewMilliQuantity(
|
||||
int64(40000),
|
||||
resource.BinarySI),
|
||||
},
|
||||
},
|
||||
VolumeMounts: []v1.VolumeMount{
|
||||
{
|
||||
Name: gitRepoVolumeName,
|
||||
MountPath: "/cache",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Volumes: []v1.Volume{
|
||||
{
|
||||
Name: gitRepoVolumeName,
|
||||
VolumeSource: v1.VolumeSource{
|
||||
GitRepo: &v1.GitRepoVolumeSource{
|
||||
Repository: "my-repo",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
evictionTestTimeout := 10 * time.Minute
|
||||
testCondition := "PodLocalEphemeralStorage/ContainerLocalEphemeralStorage usage limit violation"
|
||||
Context(fmt.Sprintf("EmptyDirEviction when we run containers that should cause %s", testCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
initialConfig.FeatureGates += ", LocalStorageCapacityIsolation=true"
|
||||
})
|
||||
err := utilfeature.DefaultFeatureGate.Set("LocalStorageCapacityIsolation=true")
|
||||
if err != nil {
|
||||
framework.Failf("Failed to enable feature gate for LocalStorageCapacityIsolation: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
runLocalStorageIsolationEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasInodePressure)
|
||||
})
|
||||
})
|
||||
|
||||
// runLocalStorageEvictionTest sets up a testing environment given the provided nodes, and checks a few things:
|
||||
// pods that exceed their local storage limit are evicted
|
||||
// pods that didn't exceed their local storage limit are not evicted
|
||||
// runLocalStorageEvictionTest then cleans up the testing environment by deleting provided nodes,
|
||||
func runLocalStorageIsolationEvictionTest(f *framework.Framework, testCondition string, podTestSpecs []podEvictSpec, evictionTestTimeout time.Duration,
|
||||
hasPressureCondition func(*framework.Framework, string) (bool, error)) {
|
||||
|
||||
Context(fmt.Sprintf("EmptyDirEviction when we run containers that should cause %s", testCondition), func() {
|
||||
|
||||
BeforeEach(func() {
|
||||
By("seting up pods to be used by tests")
|
||||
|
||||
for _, spec := range podTestSpecs {
|
||||
By(fmt.Sprintf("creating pod with container: %s", spec.pod.Name))
|
||||
f.PodClient().CreateSync(&spec.pod)
|
||||
}
|
||||
})
|
||||
|
||||
It(fmt.Sprintf("Test should eventually see %s, and then evict the correct pods", testCondition), func() {
|
||||
evictNum := 0
|
||||
evictMap := make(map[string]string)
|
||||
Eventually(func() error {
|
||||
// Gather current information
|
||||
updatedPodList, err := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get the list of pod: %v", err)
|
||||
}
|
||||
updatedPods := updatedPodList.Items
|
||||
|
||||
for _, p := range updatedPods {
|
||||
framework.Logf("fetching pod %s; phase= %v", p.Name, p.Status.Phase)
|
||||
for _, testPod := range podTestSpecs {
|
||||
if p.Name == testPod.pod.Name {
|
||||
if !testPod.evicted {
|
||||
Expect(p.Status.Phase).NotTo(Equal(v1.PodFailed),
|
||||
fmt.Sprintf("%s pod failed (and shouldn't have failed)", p.Name))
|
||||
} else {
|
||||
if _, ok := evictMap[p.Name]; !ok && p.Status.Phase == v1.PodFailed {
|
||||
evictNum++
|
||||
evictMap[p.Name] = p.Name
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
if evictNum == totalEvict {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("pods that caused %s have not been evicted", testCondition)
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
By("making sure we can start a new pod after the test")
|
||||
podName := "test-admit-pod"
|
||||
f.PodClient().CreateSync(&v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: framework.GetPauseImageNameForHostArch(),
|
||||
Name: podName,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
By("deleting pods")
|
||||
for _, spec := range podTestSpecs {
|
||||
By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
|
||||
f.PodClient().DeleteSync(spec.pod.Name, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
|
||||
}
|
||||
|
||||
if CurrentGinkgoTestDescription().Failed {
|
||||
if framework.TestContext.DumpLogsOnFailure {
|
||||
logPodEvents(f)
|
||||
logNodeEvents(f)
|
||||
}
|
||||
By("sleeping to allow for cleanup of test")
|
||||
time.Sleep(postTestConditionMonitoringPeriod)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
|
@ -100,17 +100,14 @@ func tempSetCurrentKubeletConfig(f *framework.Framework, updateFunction func(ini
|
|||
BeforeEach(func() {
|
||||
configEnabled, err := isKubeletConfigEnabled(f)
|
||||
framework.ExpectNoError(err)
|
||||
if configEnabled {
|
||||
Expect(configEnabled).To(BeTrue(), "The Dynamic Kubelet Configuration feature is not enabled.\n"+
|
||||
"Pass --feature-gates=DynamicKubeletConfig=true to the Kubelet to enable this feature.\n"+
|
||||
"For `make test-e2e-node`, you can set `TEST_ARGS='--feature-gates=DynamicKubeletConfig=true'`.")
|
||||
oldCfg, err = getCurrentKubeletConfig()
|
||||
framework.ExpectNoError(err)
|
||||
newCfg := oldCfg.DeepCopy()
|
||||
updateFunction(newCfg)
|
||||
framework.ExpectNoError(setKubeletConfiguration(f, newCfg))
|
||||
} else {
|
||||
framework.Logf("The Dynamic Kubelet Configuration feature is not enabled.\n" +
|
||||
"Pass --feature-gates=DynamicKubeletConfig=true to the Kubelet to enable this feature.\n" +
|
||||
"For `make test-e2e-node`, you can set `TEST_ARGS='--feature-gates=DynamicKubeletConfig=true'`.")
|
||||
}
|
||||
})
|
||||
AfterEach(func() {
|
||||
if oldCfg != nil {
|
||||
|
|
Loading…
Reference in New Issue