mirror of https://github.com/k3s-io/k3s
add metadata to kubelet eviction event annotations
parent
8f4674d267
commit
fd1f19fc42
|
@ -365,6 +365,11 @@ func (f *FakeRecorder) Eventf(obj runtime.Object, eventtype, reason, messageFmt
|
|||
func (f *FakeRecorder) PastEventf(obj runtime.Object, timestamp metav1.Time, eventtype, reason, messageFmt string, args ...interface{}) {
|
||||
}
|
||||
|
||||
// AnnotatedEventf emits a fake formatted event to the fake recorder
|
||||
func (f *FakeRecorder) AnnotatedEventf(obj runtime.Object, annotations map[string]string, eventtype, reason, messageFmt string, args ...interface{}) {
|
||||
f.Eventf(obj, eventtype, reason, messageFmt, args)
|
||||
}
|
||||
|
||||
func (f *FakeRecorder) generateEvent(obj runtime.Object, timestamp metav1.Time, eventtype, reason, message string) {
|
||||
f.Lock()
|
||||
defer f.Unlock()
|
||||
|
|
|
@ -193,6 +193,13 @@ func (irecorder *innerEventRecorder) PastEventf(object runtime.Object, timestamp
|
|||
}
|
||||
}
|
||||
|
||||
func (irecorder *innerEventRecorder) AnnotatedEventf(object runtime.Object, annotations map[string]string, eventtype, reason, messageFmt string, args ...interface{}) {
|
||||
if ref, ok := irecorder.shouldRecordEvent(object); ok {
|
||||
irecorder.recorder.AnnotatedEventf(ref, annotations, eventtype, reason, messageFmt, args...)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Pod must not be nil.
|
||||
func IsHostNetworkPod(pod *v1.Pod) bool {
|
||||
return pod.Spec.HostNetwork
|
||||
|
|
|
@ -429,7 +429,8 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
|||
if !isHardEvictionThreshold(thresholdToReclaim) {
|
||||
gracePeriodOverride = m.config.MaxPodGracePeriodSeconds
|
||||
}
|
||||
if m.evictPod(pod, gracePeriodOverride, evictionMessage(resourceToReclaim, pod, statsFunc)) {
|
||||
message, annotations := evictionMessage(resourceToReclaim, pod, statsFunc)
|
||||
if m.evictPod(pod, gracePeriodOverride, message, annotations) {
|
||||
return []*v1.Pod{pod}
|
||||
}
|
||||
}
|
||||
|
@ -534,7 +535,7 @@ func (m *managerImpl) emptyDirLimitEviction(podStats statsapi.PodStats, pod *v1.
|
|||
used := podVolumeUsed[pod.Spec.Volumes[i].Name]
|
||||
if used != nil && size != nil && size.Sign() == 1 && used.Cmp(*size) > 0 {
|
||||
// the emptyDir usage exceeds the size limit, evict the pod
|
||||
return m.evictPod(pod, 0, fmt.Sprintf(emptyDirMessage, pod.Spec.Volumes[i].Name, size.String()))
|
||||
return m.evictPod(pod, 0, fmt.Sprintf(emptyDirMessage, pod.Spec.Volumes[i].Name, size.String()), nil)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -566,7 +567,7 @@ func (m *managerImpl) podEphemeralStorageLimitEviction(podStats statsapi.PodStat
|
|||
podEphemeralStorageLimit := podLimits[v1.ResourceEphemeralStorage]
|
||||
if podEphemeralStorageTotalUsage.Cmp(podEphemeralStorageLimit) > 0 {
|
||||
// the total usage of pod exceeds the total size limit of containers, evict the pod
|
||||
return m.evictPod(pod, 0, fmt.Sprintf(podEphemeralStorageMessage, podEphemeralStorageLimit.String()))
|
||||
return m.evictPod(pod, 0, fmt.Sprintf(podEphemeralStorageMessage, podEphemeralStorageLimit.String()), nil)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
@ -588,7 +589,7 @@ func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.P
|
|||
|
||||
if ephemeralStorageThreshold, ok := thresholdsMap[containerStat.Name]; ok {
|
||||
if ephemeralStorageThreshold.Cmp(*containerUsed) < 0 {
|
||||
return m.evictPod(pod, 0, fmt.Sprintf(containerEphemeralStorageMessage, containerStat.Name, ephemeralStorageThreshold.String()))
|
||||
return m.evictPod(pod, 0, fmt.Sprintf(containerEphemeralStorageMessage, containerStat.Name, ephemeralStorageThreshold.String()), nil)
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -596,7 +597,7 @@ func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.P
|
|||
return false
|
||||
}
|
||||
|
||||
func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg string) bool {
|
||||
func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg string, annotations map[string]string) bool {
|
||||
// If the pod is marked as critical and static, and support for critical pod annotations is enabled,
|
||||
// do not evict such pods. Static pods are not re-admitted after evictions.
|
||||
// https://github.com/kubernetes/kubernetes/issues/40573 has more details.
|
||||
|
@ -611,7 +612,7 @@ func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg
|
|||
Reason: Reason,
|
||||
}
|
||||
// record that we are evicting the pod
|
||||
m.recorder.Eventf(pod, v1.EventTypeWarning, Reason, evictMsg)
|
||||
m.recorder.AnnotatedEventf(pod, annotations, v1.EventTypeWarning, Reason, evictMsg)
|
||||
// this is a blocking call and should only return when the pod and its containers are killed.
|
||||
err := m.killPodFunc(pod, status, &gracePeriodOverride)
|
||||
if err != nil {
|
||||
|
|
|
@ -55,6 +55,12 @@ const (
|
|||
// this prevents constantly updating the memcg notifier if synchronize
|
||||
// is run frequently.
|
||||
notifierRefreshInterval = 10 * time.Second
|
||||
// OffendingContainersKey is the key in eviction event annotations for the list of container names which exceeded their requests
|
||||
OffendingContainersKey = "offending_containers"
|
||||
// OffendingContainersUsageKey is the key in eviction event annotations for the list of usage of containers which exceeded their requests
|
||||
OffendingContainersUsageKey = "offending_containers_usage"
|
||||
// StarvedResourceKey is the key for the starved resource in eviction event annotations
|
||||
StarvedResourceKey = "starved_resource"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -1053,12 +1059,15 @@ func buildSignalToNodeReclaimFuncs(imageGC ImageGC, containerGC ContainerGC, wit
|
|||
return signalToReclaimFunc
|
||||
}
|
||||
|
||||
// evictionMessage constructs a useful message about why an eviction occurred
|
||||
func evictionMessage(resourceToReclaim v1.ResourceName, pod *v1.Pod, stats statsFunc) string {
|
||||
message := fmt.Sprintf(message, resourceToReclaim)
|
||||
// evictionMessage constructs a useful message about why an eviction occurred, and annotations to provide metadata about the eviction
|
||||
func evictionMessage(resourceToReclaim v1.ResourceName, pod *v1.Pod, stats statsFunc) (message string, annotations map[string]string) {
|
||||
annotations = make(map[string]string)
|
||||
message = fmt.Sprintf(message, resourceToReclaim)
|
||||
containers := []string{}
|
||||
containerUsage := []string{}
|
||||
podStats, ok := stats(pod)
|
||||
if !ok {
|
||||
return message
|
||||
return
|
||||
}
|
||||
for _, containerStats := range podStats.Containers {
|
||||
for _, container := range pod.Spec.Containers {
|
||||
|
@ -1077,11 +1086,16 @@ func evictionMessage(resourceToReclaim v1.ResourceName, pod *v1.Pod, stats stats
|
|||
}
|
||||
if usage != nil && usage.Cmp(requests) > 0 {
|
||||
message += fmt.Sprintf(containerMessage, container.Name, usage.String(), requests.String())
|
||||
containers = append(containers, container.Name)
|
||||
containerUsage = append(containerUsage, usage.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return message
|
||||
annotations[OffendingContainersKey] = strings.Join(containers, ",")
|
||||
annotations[OffendingContainersUsageKey] = strings.Join(containerUsage, ",")
|
||||
annotations[StarvedResourceKey] = string(resourceToReclaim)
|
||||
return
|
||||
}
|
||||
|
||||
// thresholdStopCh is a ThresholdStopCh which can only be closed after notifierRefreshInterval time has passed
|
||||
|
|
|
@ -72,6 +72,9 @@ type EventRecorder interface {
|
|||
|
||||
// PastEventf is just like Eventf, but with an option to specify the event's 'timestamp' field.
|
||||
PastEventf(object runtime.Object, timestamp metav1.Time, eventtype, reason, messageFmt string, args ...interface{})
|
||||
|
||||
// AnnotatedEventf is just like eventf, but with annotations attached
|
||||
AnnotatedEventf(object runtime.Object, annotations map[string]string, eventtype, reason, messageFmt string, args ...interface{})
|
||||
}
|
||||
|
||||
// EventBroadcaster knows how to receive events and send them to any EventSink, watcher, or log.
|
||||
|
@ -250,7 +253,7 @@ type recorderImpl struct {
|
|||
clock clock.Clock
|
||||
}
|
||||
|
||||
func (recorder *recorderImpl) generateEvent(object runtime.Object, timestamp metav1.Time, eventtype, reason, message string) {
|
||||
func (recorder *recorderImpl) generateEvent(object runtime.Object, annotations map[string]string, timestamp metav1.Time, eventtype, reason, message string) {
|
||||
ref, err := ref.GetReference(recorder.scheme, object)
|
||||
if err != nil {
|
||||
glog.Errorf("Could not construct reference to: '%#v' due to: '%v'. Will not report event: '%v' '%v' '%v'", object, err, eventtype, reason, message)
|
||||
|
@ -262,7 +265,7 @@ func (recorder *recorderImpl) generateEvent(object runtime.Object, timestamp met
|
|||
return
|
||||
}
|
||||
|
||||
event := recorder.makeEvent(ref, eventtype, reason, message)
|
||||
event := recorder.makeEvent(ref, annotations, eventtype, reason, message)
|
||||
event.Source = recorder.source
|
||||
|
||||
go func() {
|
||||
|
@ -281,7 +284,7 @@ func validateEventType(eventtype string) bool {
|
|||
}
|
||||
|
||||
func (recorder *recorderImpl) Event(object runtime.Object, eventtype, reason, message string) {
|
||||
recorder.generateEvent(object, metav1.Now(), eventtype, reason, message)
|
||||
recorder.generateEvent(object, nil, metav1.Now(), eventtype, reason, message)
|
||||
}
|
||||
|
||||
func (recorder *recorderImpl) Eventf(object runtime.Object, eventtype, reason, messageFmt string, args ...interface{}) {
|
||||
|
@ -289,10 +292,14 @@ func (recorder *recorderImpl) Eventf(object runtime.Object, eventtype, reason, m
|
|||
}
|
||||
|
||||
func (recorder *recorderImpl) PastEventf(object runtime.Object, timestamp metav1.Time, eventtype, reason, messageFmt string, args ...interface{}) {
|
||||
recorder.generateEvent(object, timestamp, eventtype, reason, fmt.Sprintf(messageFmt, args...))
|
||||
recorder.generateEvent(object, nil, timestamp, eventtype, reason, fmt.Sprintf(messageFmt, args...))
|
||||
}
|
||||
|
||||
func (recorder *recorderImpl) makeEvent(ref *v1.ObjectReference, eventtype, reason, message string) *v1.Event {
|
||||
func (recorder *recorderImpl) AnnotatedEventf(object runtime.Object, annotations map[string]string, eventtype, reason, messageFmt string, args ...interface{}) {
|
||||
recorder.generateEvent(object, annotations, metav1.Now(), eventtype, reason, fmt.Sprintf(messageFmt, args...))
|
||||
}
|
||||
|
||||
func (recorder *recorderImpl) makeEvent(ref *v1.ObjectReference, annotations map[string]string, eventtype, reason, message string) *v1.Event {
|
||||
t := metav1.Time{Time: recorder.clock.Now()}
|
||||
namespace := ref.Namespace
|
||||
if namespace == "" {
|
||||
|
@ -300,8 +307,9 @@ func (recorder *recorderImpl) makeEvent(ref *v1.ObjectReference, eventtype, reas
|
|||
}
|
||||
return &v1.Event{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: fmt.Sprintf("%v.%x", ref.Name, t.UnixNano()),
|
||||
Namespace: namespace,
|
||||
Name: fmt.Sprintf("%v.%x", ref.Name, t.UnixNano()),
|
||||
Namespace: namespace,
|
||||
Annotations: annotations,
|
||||
},
|
||||
InvolvedObject: *ref,
|
||||
Reason: reason,
|
||||
|
|
|
@ -45,6 +45,10 @@ func (f *FakeRecorder) Eventf(object runtime.Object, eventtype, reason, messageF
|
|||
func (f *FakeRecorder) PastEventf(object runtime.Object, timestamp metav1.Time, eventtype, reason, messageFmt string, args ...interface{}) {
|
||||
}
|
||||
|
||||
func (f *FakeRecorder) AnnotatedEventf(object runtime.Object, annotations map[string]string, eventtype, reason, messageFmt string, args ...interface{}) {
|
||||
f.Eventf(object, eventtype, reason, messageFmt, args)
|
||||
}
|
||||
|
||||
// NewFakeRecorder creates new fake event recorder with event channel with
|
||||
// buffer of given size.
|
||||
func NewFakeRecorder(bufferSize int) *FakeRecorder {
|
||||
|
|
|
@ -152,6 +152,7 @@ go_test(
|
|||
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/fields:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
|
||||
|
|
|
@ -20,11 +20,13 @@ import (
|
|||
"fmt"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/fields"
|
||||
nodeutil "k8s.io/kubernetes/pkg/api/v1/node"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
|
||||
|
@ -50,11 +52,13 @@ const (
|
|||
// pressure conditions often surface after evictions because the kubelet only updates
|
||||
// node conditions periodically.
|
||||
// we wait this period after evictions to make sure that we wait out this delay
|
||||
pressureDelay = 20 * time.Second
|
||||
testContextFmt = "when we run containers that should cause %s"
|
||||
noPressure = v1.NodeConditionType("NoPressure")
|
||||
lotsOfDisk = 10240 // 10 Gb in Mb
|
||||
lotsOfFiles = 1000000000 // 1 billion
|
||||
pressureDelay = 20 * time.Second
|
||||
testContextFmt = "when we run containers that should cause %s"
|
||||
noPressure = v1.NodeConditionType("NoPressure")
|
||||
lotsOfDisk = 10240 // 10 Gb in Mb
|
||||
lotsOfFiles = 1000000000 // 1 billion
|
||||
resourceInodes = v1.ResourceName("inodes")
|
||||
noStarvedResource = v1.ResourceName("none")
|
||||
)
|
||||
|
||||
// InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods.
|
||||
|
@ -62,6 +66,7 @@ const (
|
|||
var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive]", func() {
|
||||
f := framework.NewDefaultFramework("inode-eviction-test")
|
||||
expectedNodeCondition := v1.NodeDiskPressure
|
||||
expectedStarvedResource := resourceInodes
|
||||
pressureTimeout := 15 * time.Minute
|
||||
inodesConsumed := uint64(200000)
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
|
@ -75,7 +80,7 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive]", fun
|
|||
initialConfig.EvictionHard = map[string]string{"nodefs.inodesFree": fmt.Sprintf("%d", inodesFree-inodesConsumed)}
|
||||
initialConfig.EvictionMinimumReclaim = map[string]string{}
|
||||
})
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logInodeMetrics, []podEvictSpec{
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logInodeMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: inodeConsumingPod("container-inode-hog", lotsOfFiles, nil),
|
||||
|
@ -98,6 +103,7 @@ var _ = framework.KubeDescribe("ImageGCNoEviction [Slow] [Serial] [Disruptive]",
|
|||
f := framework.NewDefaultFramework("image-gc-eviction-test")
|
||||
pressureTimeout := 10 * time.Minute
|
||||
expectedNodeCondition := v1.NodeDiskPressure
|
||||
expectedStarvedResource := resourceInodes
|
||||
inodesConsumed := uint64(100000)
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
|
@ -112,7 +118,7 @@ var _ = framework.KubeDescribe("ImageGCNoEviction [Slow] [Serial] [Disruptive]",
|
|||
})
|
||||
// Consume enough inodes to induce disk pressure,
|
||||
// but expect that image garbage collection can reduce it enough to avoid an eviction
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 0,
|
||||
pod: inodeConsumingPod("container-inode", 110000, nil),
|
||||
|
@ -126,6 +132,7 @@ var _ = framework.KubeDescribe("ImageGCNoEviction [Slow] [Serial] [Disruptive]",
|
|||
var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive]", func() {
|
||||
f := framework.NewDefaultFramework("memory-allocatable-eviction-test")
|
||||
expectedNodeCondition := v1.NodeMemoryPressure
|
||||
expectedStarvedResource := v1.ResourceMemory
|
||||
pressureTimeout := 10 * time.Minute
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
|
@ -140,7 +147,7 @@ var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disru
|
|||
initialConfig.EnforceNodeAllocatable = []string{kubetypes.NodeAllocatableEnforcementKey}
|
||||
initialConfig.CgroupsPerQOS = true
|
||||
})
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logMemoryMetrics, []podEvictSpec{
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logMemoryMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: getMemhogPod("memory-hog-pod", "memory-hog", v1.ResourceRequirements{}),
|
||||
|
@ -159,6 +166,7 @@ var _ = framework.KubeDescribe("LocalStorageEviction [Slow] [Serial] [Disruptive
|
|||
f := framework.NewDefaultFramework("localstorage-eviction-test")
|
||||
pressureTimeout := 10 * time.Minute
|
||||
expectedNodeCondition := v1.NodeDiskPressure
|
||||
expectedStarvedResource := v1.ResourceEphemeralStorage
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
diskConsumed := resource.MustParse("100Mi")
|
||||
|
@ -167,7 +175,7 @@ var _ = framework.KubeDescribe("LocalStorageEviction [Slow] [Serial] [Disruptive
|
|||
initialConfig.EvictionHard = map[string]string{"nodefs.available": fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
|
||||
initialConfig.EvictionMinimumReclaim = map[string]string{}
|
||||
})
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: diskConsumingPod("container-disk-hog", lotsOfDisk, nil, v1.ResourceRequirements{}),
|
||||
|
@ -187,6 +195,7 @@ var _ = framework.KubeDescribe("LocalStorageSoftEviction [Slow] [Serial] [Disrup
|
|||
f := framework.NewDefaultFramework("localstorage-eviction-test")
|
||||
pressureTimeout := 10 * time.Minute
|
||||
expectedNodeCondition := v1.NodeDiskPressure
|
||||
expectedStarvedResource := v1.ResourceEphemeralStorage
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
diskConsumed := resource.MustParse("100Mi")
|
||||
|
@ -204,7 +213,7 @@ var _ = framework.KubeDescribe("LocalStorageSoftEviction [Slow] [Serial] [Disrup
|
|||
// setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
|
||||
initialConfig.EvictionHard = map[string]string{"memory.available": "0%"}
|
||||
})
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: diskConsumingPod("container-disk-hog", lotsOfDisk, nil, v1.ResourceRequirements{}),
|
||||
|
@ -232,7 +241,7 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se
|
|||
useUnderLimit := 99 /* Mb */
|
||||
containerLimit := v1.ResourceList{v1.ResourceEphemeralStorage: sizeLimit}
|
||||
|
||||
runEvictionTest(f, evictionTestTimeout, noPressure, logDiskMetrics, []podEvictSpec{
|
||||
runEvictionTest(f, evictionTestTimeout, noPressure, noStarvedResource, logDiskMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1, // This pod should be evicted because emptyDir (default storage type) usage violation
|
||||
pod: diskConsumingPod("emptydir-disk-sizelimit", useOverLimit, &v1.VolumeSource{
|
||||
|
@ -274,6 +283,7 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se
|
|||
var _ = framework.KubeDescribe("PriorityMemoryEvictionOrdering [Slow] [Serial] [Disruptive]", func() {
|
||||
f := framework.NewDefaultFramework("priority-memory-eviction-ordering-test")
|
||||
expectedNodeCondition := v1.NodeMemoryPressure
|
||||
expectedStarvedResource := v1.ResourceMemory
|
||||
pressureTimeout := 10 * time.Minute
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
|
@ -310,7 +320,7 @@ var _ = framework.KubeDescribe("PriorityMemoryEvictionOrdering [Slow] [Serial] [
|
|||
}
|
||||
systemPriority := int32(2147483647)
|
||||
specs[1].pod.Spec.Priority = &systemPriority
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logMemoryMetrics, specs)
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logMemoryMetrics, specs)
|
||||
})
|
||||
})
|
||||
|
||||
|
@ -320,6 +330,7 @@ var _ = framework.KubeDescribe("PriorityMemoryEvictionOrdering [Slow] [Serial] [
|
|||
var _ = framework.KubeDescribe("PriorityLocalStorageEvictionOrdering [Slow] [Serial] [Disruptive]", func() {
|
||||
f := framework.NewDefaultFramework("priority-disk-eviction-ordering-test")
|
||||
expectedNodeCondition := v1.NodeDiskPressure
|
||||
expectedStarvedResource := v1.ResourceEphemeralStorage
|
||||
pressureTimeout := 10 * time.Minute
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
|
@ -358,7 +369,7 @@ var _ = framework.KubeDescribe("PriorityLocalStorageEvictionOrdering [Slow] [Ser
|
|||
}
|
||||
systemPriority := int32(2147483647)
|
||||
specs[1].pod.Spec.Priority = &systemPriority
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, specs)
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, specs)
|
||||
})
|
||||
})
|
||||
|
||||
|
@ -377,7 +388,7 @@ type podEvictSpec struct {
|
|||
// It ensures that lower evictionPriority pods are always evicted before higher evictionPriority pods (2 evicted before 1, etc.)
|
||||
// It ensures that all pods with non-zero evictionPriority are eventually evicted.
|
||||
// runEvictionTest then cleans up the testing environment by deleting provided pods, and ensures that expectedNodeCondition no longer exists
|
||||
func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expectedNodeCondition v1.NodeConditionType, logFunc func(), testSpecs []podEvictSpec) {
|
||||
func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expectedNodeCondition v1.NodeConditionType, expectedStarvedResource v1.ResourceName, logFunc func(), testSpecs []podEvictSpec) {
|
||||
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
|
||||
Context("", func() {
|
||||
BeforeEach(func() {
|
||||
|
@ -442,6 +453,9 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe
|
|||
logKubeletMetrics(kubeletmetrics.EvictionStatsAgeKey)
|
||||
return verifyEvictionOrdering(f, testSpecs)
|
||||
}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(BeNil())
|
||||
|
||||
By("checking for correctly formatted eviction events")
|
||||
verifyEvictionEvents(f, testSpecs, expectedStarvedResource)
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
|
@ -549,6 +563,60 @@ func verifyEvictionOrdering(f *framework.Framework, testSpecs []podEvictSpec) er
|
|||
return fmt.Errorf("pods that should be evicted are still running")
|
||||
}
|
||||
|
||||
func verifyEvictionEvents(f *framework.Framework, testSpecs []podEvictSpec, expectedStarvedResource v1.ResourceName) {
|
||||
for _, spec := range testSpecs {
|
||||
pod := spec.pod
|
||||
if spec.evictionPriority != 0 {
|
||||
selector := fields.Set{
|
||||
"involvedObject.kind": "Pod",
|
||||
"involvedObject.name": pod.Name,
|
||||
"involvedObject.namespace": f.Namespace.Name,
|
||||
"reason": eviction.Reason,
|
||||
}.AsSelector().String()
|
||||
podEvictEvents, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(metav1.ListOptions{FieldSelector: selector})
|
||||
Expect(err).To(BeNil(), "Unexpected error getting events during eviction test: %v", err)
|
||||
Expect(len(podEvictEvents.Items)).To(Equal(1), "Expected to find 1 eviction event for pod %s, got %d", pod.Name, len(podEvictEvents.Items))
|
||||
event := podEvictEvents.Items[0]
|
||||
|
||||
if expectedStarvedResource != noStarvedResource {
|
||||
// Check the eviction.StarvedResourceKey
|
||||
starved, found := event.Annotations[eviction.StarvedResourceKey]
|
||||
Expect(found).To(BeTrue(), "Expected to find an annotation on the eviction event for pod %s containing the starved resource %s, but it was not found",
|
||||
pod.Name, expectedStarvedResource)
|
||||
starvedResource := v1.ResourceName(starved)
|
||||
Expect(starvedResource).To(Equal(expectedStarvedResource), "Expected to the starved_resource annotation on pod %s to contain %s, but got %s instead",
|
||||
pod.Name, expectedStarvedResource, starvedResource)
|
||||
|
||||
// We only check these keys for memory, because ephemeral storage evictions may be due to volume usage, in which case these values are not present
|
||||
if expectedStarvedResource == v1.ResourceMemory {
|
||||
// Check the eviction.OffendingContainersKey
|
||||
offendersString, found := event.Annotations[eviction.OffendingContainersKey]
|
||||
Expect(found).To(BeTrue(), "Expected to find an annotation on the eviction event for pod %s containing the offending containers, but it was not found",
|
||||
pod.Name)
|
||||
offendingContainers := strings.Split(offendersString, ",")
|
||||
Expect(len(offendingContainers)).To(Equal(1), "Expected to find the offending container's usage in the %s annotation, but no container was found",
|
||||
eviction.OffendingContainersKey)
|
||||
Expect(offendingContainers[0]).To(Equal(pod.Spec.Containers[0].Name), "Expected to find the offending container: %s's usage in the %s annotation, but found %s instead",
|
||||
pod.Spec.Containers[0].Name, eviction.OffendingContainersKey, offendingContainers[0])
|
||||
|
||||
// Check the eviction.OffendingContainersUsageKey
|
||||
offendingUsageString, found := event.Annotations[eviction.OffendingContainersUsageKey]
|
||||
Expect(found).To(BeTrue(), "Expected to find an annotation on the eviction event for pod %s containing the offending containers' usage, but it was not found",
|
||||
pod.Name)
|
||||
offendingContainersUsage := strings.Split(offendingUsageString, ",")
|
||||
Expect(len(offendingContainersUsage)).To(Equal(1), "Expected to find the offending container's usage in the %s annotation, but found %+v",
|
||||
eviction.OffendingContainersUsageKey, offendingContainersUsage)
|
||||
usageQuantity, err := resource.ParseQuantity(offendingContainersUsage[0])
|
||||
Expect(err).To(BeNil(), "Expected to be able to parse pod %s's %s annotation as a quantity, but got err: %v", pod.Name, eviction.OffendingContainersUsageKey, err)
|
||||
request := pod.Spec.Containers[0].Resources.Requests[starvedResource]
|
||||
Expect(usageQuantity.Cmp(request)).To(Equal(1), "Expected usage of offending container: %s in pod %s to exceed its request %s",
|
||||
usageQuantity.String(), pod.Name, request.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns TRUE if the node has the node condition, FALSE otherwise
|
||||
func hasNodeCondition(f *framework.Framework, expectedNodeCondition v1.NodeConditionType) bool {
|
||||
localNodeStatus := getLocalNode(f).Status
|
||||
|
|
Loading…
Reference in New Issue