diff --git a/pkg/controller/testutil/test_utils.go b/pkg/controller/testutil/test_utils.go index 770df607c4..0211919102 100644 --- a/pkg/controller/testutil/test_utils.go +++ b/pkg/controller/testutil/test_utils.go @@ -365,6 +365,11 @@ func (f *FakeRecorder) Eventf(obj runtime.Object, eventtype, reason, messageFmt func (f *FakeRecorder) PastEventf(obj runtime.Object, timestamp metav1.Time, eventtype, reason, messageFmt string, args ...interface{}) { } +// AnnotatedEventf emits a fake formatted event to the fake recorder +func (f *FakeRecorder) AnnotatedEventf(obj runtime.Object, annotations map[string]string, eventtype, reason, messageFmt string, args ...interface{}) { + f.Eventf(obj, eventtype, reason, messageFmt, args) +} + func (f *FakeRecorder) generateEvent(obj runtime.Object, timestamp metav1.Time, eventtype, reason, message string) { f.Lock() defer f.Unlock() diff --git a/pkg/kubelet/container/helpers.go b/pkg/kubelet/container/helpers.go index 180a3e6df2..399fa959f4 100644 --- a/pkg/kubelet/container/helpers.go +++ b/pkg/kubelet/container/helpers.go @@ -193,6 +193,13 @@ func (irecorder *innerEventRecorder) PastEventf(object runtime.Object, timestamp } } +func (irecorder *innerEventRecorder) AnnotatedEventf(object runtime.Object, annotations map[string]string, eventtype, reason, messageFmt string, args ...interface{}) { + if ref, ok := irecorder.shouldRecordEvent(object); ok { + irecorder.recorder.AnnotatedEventf(ref, annotations, eventtype, reason, messageFmt, args...) + } + +} + // Pod must not be nil. func IsHostNetworkPod(pod *v1.Pod) bool { return pod.Spec.HostNetwork diff --git a/pkg/kubelet/eviction/eviction_manager.go b/pkg/kubelet/eviction/eviction_manager.go index b601fe4763..cadc6afaa6 100644 --- a/pkg/kubelet/eviction/eviction_manager.go +++ b/pkg/kubelet/eviction/eviction_manager.go @@ -429,7 +429,8 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act if !isHardEvictionThreshold(thresholdToReclaim) { gracePeriodOverride = m.config.MaxPodGracePeriodSeconds } - if m.evictPod(pod, gracePeriodOverride, evictionMessage(resourceToReclaim, pod, statsFunc)) { + message, annotations := evictionMessage(resourceToReclaim, pod, statsFunc) + if m.evictPod(pod, gracePeriodOverride, message, annotations) { return []*v1.Pod{pod} } } @@ -534,7 +535,7 @@ func (m *managerImpl) emptyDirLimitEviction(podStats statsapi.PodStats, pod *v1. used := podVolumeUsed[pod.Spec.Volumes[i].Name] if used != nil && size != nil && size.Sign() == 1 && used.Cmp(*size) > 0 { // the emptyDir usage exceeds the size limit, evict the pod - return m.evictPod(pod, 0, fmt.Sprintf(emptyDirMessage, pod.Spec.Volumes[i].Name, size.String())) + return m.evictPod(pod, 0, fmt.Sprintf(emptyDirMessage, pod.Spec.Volumes[i].Name, size.String()), nil) } } } @@ -566,7 +567,7 @@ func (m *managerImpl) podEphemeralStorageLimitEviction(podStats statsapi.PodStat podEphemeralStorageLimit := podLimits[v1.ResourceEphemeralStorage] if podEphemeralStorageTotalUsage.Cmp(podEphemeralStorageLimit) > 0 { // the total usage of pod exceeds the total size limit of containers, evict the pod - return m.evictPod(pod, 0, fmt.Sprintf(podEphemeralStorageMessage, podEphemeralStorageLimit.String())) + return m.evictPod(pod, 0, fmt.Sprintf(podEphemeralStorageMessage, podEphemeralStorageLimit.String()), nil) } return false } @@ -588,7 +589,7 @@ func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.P if ephemeralStorageThreshold, ok := thresholdsMap[containerStat.Name]; ok { if ephemeralStorageThreshold.Cmp(*containerUsed) < 0 { - return m.evictPod(pod, 0, fmt.Sprintf(containerEphemeralStorageMessage, containerStat.Name, ephemeralStorageThreshold.String())) + return m.evictPod(pod, 0, fmt.Sprintf(containerEphemeralStorageMessage, containerStat.Name, ephemeralStorageThreshold.String()), nil) } } @@ -596,7 +597,7 @@ func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.P return false } -func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg string) bool { +func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg string, annotations map[string]string) bool { // If the pod is marked as critical and static, and support for critical pod annotations is enabled, // do not evict such pods. Static pods are not re-admitted after evictions. // https://github.com/kubernetes/kubernetes/issues/40573 has more details. @@ -611,7 +612,7 @@ func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg Reason: Reason, } // record that we are evicting the pod - m.recorder.Eventf(pod, v1.EventTypeWarning, Reason, evictMsg) + m.recorder.AnnotatedEventf(pod, annotations, v1.EventTypeWarning, Reason, evictMsg) // this is a blocking call and should only return when the pod and its containers are killed. err := m.killPodFunc(pod, status, &gracePeriodOverride) if err != nil { diff --git a/pkg/kubelet/eviction/helpers.go b/pkg/kubelet/eviction/helpers.go index fd862ea1ce..1c309360a3 100644 --- a/pkg/kubelet/eviction/helpers.go +++ b/pkg/kubelet/eviction/helpers.go @@ -55,6 +55,12 @@ const ( // this prevents constantly updating the memcg notifier if synchronize // is run frequently. notifierRefreshInterval = 10 * time.Second + // OffendingContainersKey is the key in eviction event annotations for the list of container names which exceeded their requests + OffendingContainersKey = "offending_containers" + // OffendingContainersUsageKey is the key in eviction event annotations for the list of usage of containers which exceeded their requests + OffendingContainersUsageKey = "offending_containers_usage" + // StarvedResourceKey is the key for the starved resource in eviction event annotations + StarvedResourceKey = "starved_resource" ) var ( @@ -1053,12 +1059,15 @@ func buildSignalToNodeReclaimFuncs(imageGC ImageGC, containerGC ContainerGC, wit return signalToReclaimFunc } -// evictionMessage constructs a useful message about why an eviction occurred -func evictionMessage(resourceToReclaim v1.ResourceName, pod *v1.Pod, stats statsFunc) string { - message := fmt.Sprintf(message, resourceToReclaim) +// evictionMessage constructs a useful message about why an eviction occurred, and annotations to provide metadata about the eviction +func evictionMessage(resourceToReclaim v1.ResourceName, pod *v1.Pod, stats statsFunc) (message string, annotations map[string]string) { + annotations = make(map[string]string) + message = fmt.Sprintf(message, resourceToReclaim) + containers := []string{} + containerUsage := []string{} podStats, ok := stats(pod) if !ok { - return message + return } for _, containerStats := range podStats.Containers { for _, container := range pod.Spec.Containers { @@ -1077,11 +1086,16 @@ func evictionMessage(resourceToReclaim v1.ResourceName, pod *v1.Pod, stats stats } if usage != nil && usage.Cmp(requests) > 0 { message += fmt.Sprintf(containerMessage, container.Name, usage.String(), requests.String()) + containers = append(containers, container.Name) + containerUsage = append(containerUsage, usage.String()) } } } } - return message + annotations[OffendingContainersKey] = strings.Join(containers, ",") + annotations[OffendingContainersUsageKey] = strings.Join(containerUsage, ",") + annotations[StarvedResourceKey] = string(resourceToReclaim) + return } // thresholdStopCh is a ThresholdStopCh which can only be closed after notifierRefreshInterval time has passed diff --git a/staging/src/k8s.io/client-go/tools/record/event.go b/staging/src/k8s.io/client-go/tools/record/event.go index cc665d74e6..168dfa80c5 100644 --- a/staging/src/k8s.io/client-go/tools/record/event.go +++ b/staging/src/k8s.io/client-go/tools/record/event.go @@ -72,6 +72,9 @@ type EventRecorder interface { // PastEventf is just like Eventf, but with an option to specify the event's 'timestamp' field. PastEventf(object runtime.Object, timestamp metav1.Time, eventtype, reason, messageFmt string, args ...interface{}) + + // AnnotatedEventf is just like eventf, but with annotations attached + AnnotatedEventf(object runtime.Object, annotations map[string]string, eventtype, reason, messageFmt string, args ...interface{}) } // EventBroadcaster knows how to receive events and send them to any EventSink, watcher, or log. @@ -250,7 +253,7 @@ type recorderImpl struct { clock clock.Clock } -func (recorder *recorderImpl) generateEvent(object runtime.Object, timestamp metav1.Time, eventtype, reason, message string) { +func (recorder *recorderImpl) generateEvent(object runtime.Object, annotations map[string]string, timestamp metav1.Time, eventtype, reason, message string) { ref, err := ref.GetReference(recorder.scheme, object) if err != nil { glog.Errorf("Could not construct reference to: '%#v' due to: '%v'. Will not report event: '%v' '%v' '%v'", object, err, eventtype, reason, message) @@ -262,7 +265,7 @@ func (recorder *recorderImpl) generateEvent(object runtime.Object, timestamp met return } - event := recorder.makeEvent(ref, eventtype, reason, message) + event := recorder.makeEvent(ref, annotations, eventtype, reason, message) event.Source = recorder.source go func() { @@ -281,7 +284,7 @@ func validateEventType(eventtype string) bool { } func (recorder *recorderImpl) Event(object runtime.Object, eventtype, reason, message string) { - recorder.generateEvent(object, metav1.Now(), eventtype, reason, message) + recorder.generateEvent(object, nil, metav1.Now(), eventtype, reason, message) } func (recorder *recorderImpl) Eventf(object runtime.Object, eventtype, reason, messageFmt string, args ...interface{}) { @@ -289,10 +292,14 @@ func (recorder *recorderImpl) Eventf(object runtime.Object, eventtype, reason, m } func (recorder *recorderImpl) PastEventf(object runtime.Object, timestamp metav1.Time, eventtype, reason, messageFmt string, args ...interface{}) { - recorder.generateEvent(object, timestamp, eventtype, reason, fmt.Sprintf(messageFmt, args...)) + recorder.generateEvent(object, nil, timestamp, eventtype, reason, fmt.Sprintf(messageFmt, args...)) } -func (recorder *recorderImpl) makeEvent(ref *v1.ObjectReference, eventtype, reason, message string) *v1.Event { +func (recorder *recorderImpl) AnnotatedEventf(object runtime.Object, annotations map[string]string, eventtype, reason, messageFmt string, args ...interface{}) { + recorder.generateEvent(object, annotations, metav1.Now(), eventtype, reason, fmt.Sprintf(messageFmt, args...)) +} + +func (recorder *recorderImpl) makeEvent(ref *v1.ObjectReference, annotations map[string]string, eventtype, reason, message string) *v1.Event { t := metav1.Time{Time: recorder.clock.Now()} namespace := ref.Namespace if namespace == "" { @@ -300,8 +307,9 @@ func (recorder *recorderImpl) makeEvent(ref *v1.ObjectReference, eventtype, reas } return &v1.Event{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%v.%x", ref.Name, t.UnixNano()), - Namespace: namespace, + Name: fmt.Sprintf("%v.%x", ref.Name, t.UnixNano()), + Namespace: namespace, + Annotations: annotations, }, InvolvedObject: *ref, Reason: reason, diff --git a/staging/src/k8s.io/client-go/tools/record/fake.go b/staging/src/k8s.io/client-go/tools/record/fake.go index c0e8eedbb7..6e031daaff 100644 --- a/staging/src/k8s.io/client-go/tools/record/fake.go +++ b/staging/src/k8s.io/client-go/tools/record/fake.go @@ -45,6 +45,10 @@ func (f *FakeRecorder) Eventf(object runtime.Object, eventtype, reason, messageF func (f *FakeRecorder) PastEventf(object runtime.Object, timestamp metav1.Time, eventtype, reason, messageFmt string, args ...interface{}) { } +func (f *FakeRecorder) AnnotatedEventf(object runtime.Object, annotations map[string]string, eventtype, reason, messageFmt string, args ...interface{}) { + f.Eventf(object, eventtype, reason, messageFmt, args) +} + // NewFakeRecorder creates new fake event recorder with event channel with // buffer of given size. func NewFakeRecorder(bufferSize int) *FakeRecorder { diff --git a/test/e2e_node/BUILD b/test/e2e_node/BUILD index 9113fa745d..859fa688bc 100644 --- a/test/e2e_node/BUILD +++ b/test/e2e_node/BUILD @@ -152,6 +152,7 @@ go_test( "//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library", "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/fields:go_default_library", "//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library", "//vendor/k8s.io/apimachinery/pkg/types:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library", diff --git a/test/e2e_node/eviction_test.go b/test/e2e_node/eviction_test.go index 21962c62f3..3e3a55d792 100644 --- a/test/e2e_node/eviction_test.go +++ b/test/e2e_node/eviction_test.go @@ -20,11 +20,13 @@ import ( "fmt" "path/filepath" "strconv" + "strings" "time" "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" nodeutil "k8s.io/kubernetes/pkg/api/v1/node" "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig" @@ -50,11 +52,13 @@ const ( // pressure conditions often surface after evictions because the kubelet only updates // node conditions periodically. // we wait this period after evictions to make sure that we wait out this delay - pressureDelay = 20 * time.Second - testContextFmt = "when we run containers that should cause %s" - noPressure = v1.NodeConditionType("NoPressure") - lotsOfDisk = 10240 // 10 Gb in Mb - lotsOfFiles = 1000000000 // 1 billion + pressureDelay = 20 * time.Second + testContextFmt = "when we run containers that should cause %s" + noPressure = v1.NodeConditionType("NoPressure") + lotsOfDisk = 10240 // 10 Gb in Mb + lotsOfFiles = 1000000000 // 1 billion + resourceInodes = v1.ResourceName("inodes") + noStarvedResource = v1.ResourceName("none") ) // InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods. @@ -62,6 +66,7 @@ const ( var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive]", func() { f := framework.NewDefaultFramework("inode-eviction-test") expectedNodeCondition := v1.NodeDiskPressure + expectedStarvedResource := resourceInodes pressureTimeout := 15 * time.Minute inodesConsumed := uint64(200000) Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { @@ -75,7 +80,7 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive]", fun initialConfig.EvictionHard = map[string]string{"nodefs.inodesFree": fmt.Sprintf("%d", inodesFree-inodesConsumed)} initialConfig.EvictionMinimumReclaim = map[string]string{} }) - runEvictionTest(f, pressureTimeout, expectedNodeCondition, logInodeMetrics, []podEvictSpec{ + runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logInodeMetrics, []podEvictSpec{ { evictionPriority: 1, pod: inodeConsumingPod("container-inode-hog", lotsOfFiles, nil), @@ -98,6 +103,7 @@ var _ = framework.KubeDescribe("ImageGCNoEviction [Slow] [Serial] [Disruptive]", f := framework.NewDefaultFramework("image-gc-eviction-test") pressureTimeout := 10 * time.Minute expectedNodeCondition := v1.NodeDiskPressure + expectedStarvedResource := resourceInodes inodesConsumed := uint64(100000) Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { @@ -112,7 +118,7 @@ var _ = framework.KubeDescribe("ImageGCNoEviction [Slow] [Serial] [Disruptive]", }) // Consume enough inodes to induce disk pressure, // but expect that image garbage collection can reduce it enough to avoid an eviction - runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{ + runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{ { evictionPriority: 0, pod: inodeConsumingPod("container-inode", 110000, nil), @@ -126,6 +132,7 @@ var _ = framework.KubeDescribe("ImageGCNoEviction [Slow] [Serial] [Disruptive]", var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive]", func() { f := framework.NewDefaultFramework("memory-allocatable-eviction-test") expectedNodeCondition := v1.NodeMemoryPressure + expectedStarvedResource := v1.ResourceMemory pressureTimeout := 10 * time.Minute Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { @@ -140,7 +147,7 @@ var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disru initialConfig.EnforceNodeAllocatable = []string{kubetypes.NodeAllocatableEnforcementKey} initialConfig.CgroupsPerQOS = true }) - runEvictionTest(f, pressureTimeout, expectedNodeCondition, logMemoryMetrics, []podEvictSpec{ + runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logMemoryMetrics, []podEvictSpec{ { evictionPriority: 1, pod: getMemhogPod("memory-hog-pod", "memory-hog", v1.ResourceRequirements{}), @@ -159,6 +166,7 @@ var _ = framework.KubeDescribe("LocalStorageEviction [Slow] [Serial] [Disruptive f := framework.NewDefaultFramework("localstorage-eviction-test") pressureTimeout := 10 * time.Minute expectedNodeCondition := v1.NodeDiskPressure + expectedStarvedResource := v1.ResourceEphemeralStorage Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { diskConsumed := resource.MustParse("100Mi") @@ -167,7 +175,7 @@ var _ = framework.KubeDescribe("LocalStorageEviction [Slow] [Serial] [Disruptive initialConfig.EvictionHard = map[string]string{"nodefs.available": fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))} initialConfig.EvictionMinimumReclaim = map[string]string{} }) - runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{ + runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{ { evictionPriority: 1, pod: diskConsumingPod("container-disk-hog", lotsOfDisk, nil, v1.ResourceRequirements{}), @@ -187,6 +195,7 @@ var _ = framework.KubeDescribe("LocalStorageSoftEviction [Slow] [Serial] [Disrup f := framework.NewDefaultFramework("localstorage-eviction-test") pressureTimeout := 10 * time.Minute expectedNodeCondition := v1.NodeDiskPressure + expectedStarvedResource := v1.ResourceEphemeralStorage Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { diskConsumed := resource.MustParse("100Mi") @@ -204,7 +213,7 @@ var _ = framework.KubeDescribe("LocalStorageSoftEviction [Slow] [Serial] [Disrup // setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty) initialConfig.EvictionHard = map[string]string{"memory.available": "0%"} }) - runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{ + runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{ { evictionPriority: 1, pod: diskConsumingPod("container-disk-hog", lotsOfDisk, nil, v1.ResourceRequirements{}), @@ -232,7 +241,7 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se useUnderLimit := 99 /* Mb */ containerLimit := v1.ResourceList{v1.ResourceEphemeralStorage: sizeLimit} - runEvictionTest(f, evictionTestTimeout, noPressure, logDiskMetrics, []podEvictSpec{ + runEvictionTest(f, evictionTestTimeout, noPressure, noStarvedResource, logDiskMetrics, []podEvictSpec{ { evictionPriority: 1, // This pod should be evicted because emptyDir (default storage type) usage violation pod: diskConsumingPod("emptydir-disk-sizelimit", useOverLimit, &v1.VolumeSource{ @@ -274,6 +283,7 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se var _ = framework.KubeDescribe("PriorityMemoryEvictionOrdering [Slow] [Serial] [Disruptive]", func() { f := framework.NewDefaultFramework("priority-memory-eviction-ordering-test") expectedNodeCondition := v1.NodeMemoryPressure + expectedStarvedResource := v1.ResourceMemory pressureTimeout := 10 * time.Minute Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { @@ -310,7 +320,7 @@ var _ = framework.KubeDescribe("PriorityMemoryEvictionOrdering [Slow] [Serial] [ } systemPriority := int32(2147483647) specs[1].pod.Spec.Priority = &systemPriority - runEvictionTest(f, pressureTimeout, expectedNodeCondition, logMemoryMetrics, specs) + runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logMemoryMetrics, specs) }) }) @@ -320,6 +330,7 @@ var _ = framework.KubeDescribe("PriorityMemoryEvictionOrdering [Slow] [Serial] [ var _ = framework.KubeDescribe("PriorityLocalStorageEvictionOrdering [Slow] [Serial] [Disruptive]", func() { f := framework.NewDefaultFramework("priority-disk-eviction-ordering-test") expectedNodeCondition := v1.NodeDiskPressure + expectedStarvedResource := v1.ResourceEphemeralStorage pressureTimeout := 10 * time.Minute Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { @@ -358,7 +369,7 @@ var _ = framework.KubeDescribe("PriorityLocalStorageEvictionOrdering [Slow] [Ser } systemPriority := int32(2147483647) specs[1].pod.Spec.Priority = &systemPriority - runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, specs) + runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, specs) }) }) @@ -377,7 +388,7 @@ type podEvictSpec struct { // It ensures that lower evictionPriority pods are always evicted before higher evictionPriority pods (2 evicted before 1, etc.) // It ensures that all pods with non-zero evictionPriority are eventually evicted. // runEvictionTest then cleans up the testing environment by deleting provided pods, and ensures that expectedNodeCondition no longer exists -func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expectedNodeCondition v1.NodeConditionType, logFunc func(), testSpecs []podEvictSpec) { +func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expectedNodeCondition v1.NodeConditionType, expectedStarvedResource v1.ResourceName, logFunc func(), testSpecs []podEvictSpec) { // Place the remainder of the test within a context so that the kubelet config is set before and after the test. Context("", func() { BeforeEach(func() { @@ -442,6 +453,9 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe logKubeletMetrics(kubeletmetrics.EvictionStatsAgeKey) return verifyEvictionOrdering(f, testSpecs) }, postTestConditionMonitoringPeriod, evictionPollInterval).Should(BeNil()) + + By("checking for correctly formatted eviction events") + verifyEvictionEvents(f, testSpecs, expectedStarvedResource) }) AfterEach(func() { @@ -549,6 +563,60 @@ func verifyEvictionOrdering(f *framework.Framework, testSpecs []podEvictSpec) er return fmt.Errorf("pods that should be evicted are still running") } +func verifyEvictionEvents(f *framework.Framework, testSpecs []podEvictSpec, expectedStarvedResource v1.ResourceName) { + for _, spec := range testSpecs { + pod := spec.pod + if spec.evictionPriority != 0 { + selector := fields.Set{ + "involvedObject.kind": "Pod", + "involvedObject.name": pod.Name, + "involvedObject.namespace": f.Namespace.Name, + "reason": eviction.Reason, + }.AsSelector().String() + podEvictEvents, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(metav1.ListOptions{FieldSelector: selector}) + Expect(err).To(BeNil(), "Unexpected error getting events during eviction test: %v", err) + Expect(len(podEvictEvents.Items)).To(Equal(1), "Expected to find 1 eviction event for pod %s, got %d", pod.Name, len(podEvictEvents.Items)) + event := podEvictEvents.Items[0] + + if expectedStarvedResource != noStarvedResource { + // Check the eviction.StarvedResourceKey + starved, found := event.Annotations[eviction.StarvedResourceKey] + Expect(found).To(BeTrue(), "Expected to find an annotation on the eviction event for pod %s containing the starved resource %s, but it was not found", + pod.Name, expectedStarvedResource) + starvedResource := v1.ResourceName(starved) + Expect(starvedResource).To(Equal(expectedStarvedResource), "Expected to the starved_resource annotation on pod %s to contain %s, but got %s instead", + pod.Name, expectedStarvedResource, starvedResource) + + // We only check these keys for memory, because ephemeral storage evictions may be due to volume usage, in which case these values are not present + if expectedStarvedResource == v1.ResourceMemory { + // Check the eviction.OffendingContainersKey + offendersString, found := event.Annotations[eviction.OffendingContainersKey] + Expect(found).To(BeTrue(), "Expected to find an annotation on the eviction event for pod %s containing the offending containers, but it was not found", + pod.Name) + offendingContainers := strings.Split(offendersString, ",") + Expect(len(offendingContainers)).To(Equal(1), "Expected to find the offending container's usage in the %s annotation, but no container was found", + eviction.OffendingContainersKey) + Expect(offendingContainers[0]).To(Equal(pod.Spec.Containers[0].Name), "Expected to find the offending container: %s's usage in the %s annotation, but found %s instead", + pod.Spec.Containers[0].Name, eviction.OffendingContainersKey, offendingContainers[0]) + + // Check the eviction.OffendingContainersUsageKey + offendingUsageString, found := event.Annotations[eviction.OffendingContainersUsageKey] + Expect(found).To(BeTrue(), "Expected to find an annotation on the eviction event for pod %s containing the offending containers' usage, but it was not found", + pod.Name) + offendingContainersUsage := strings.Split(offendingUsageString, ",") + Expect(len(offendingContainersUsage)).To(Equal(1), "Expected to find the offending container's usage in the %s annotation, but found %+v", + eviction.OffendingContainersUsageKey, offendingContainersUsage) + usageQuantity, err := resource.ParseQuantity(offendingContainersUsage[0]) + Expect(err).To(BeNil(), "Expected to be able to parse pod %s's %s annotation as a quantity, but got err: %v", pod.Name, eviction.OffendingContainersUsageKey, err) + request := pod.Spec.Containers[0].Resources.Requests[starvedResource] + Expect(usageQuantity.Cmp(request)).To(Equal(1), "Expected usage of offending container: %s in pod %s to exceed its request %s", + usageQuantity.String(), pod.Name, request.String()) + } + } + } + } +} + // Returns TRUE if the node has the node condition, FALSE otherwise func hasNodeCondition(f *framework.Framework, expectedNodeCondition v1.NodeConditionType) bool { localNodeStatus := getLocalNode(f).Status