mirror of https://github.com/k3s-io/k3s
Merge pull request #59841 from dashpole/metrics_after_reclaim
Automatic merge from submit-queue (batch tested with PRs 59683, 59964, 59841, 59936, 59686). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Reevaluate eviction thresholds after reclaim functions **What this PR does / why we need it**: When the node comes under `DiskPressure` due to inodes or disk space, the eviction manager runs garbage collection functions to clean up dead containers and unused images. Currently, we use the strategy of trying to measure the disk space and inodes freed by garbage collection. However, as #46789 and #56573 point out, there are gaps in the implementation that can cause extra evictions even when they are not required. Furthermore, for nodes which frequently cycle through images, it results in a large number of evictions, as running out of inodes always causes an eviction. This PR changes this strategy to call the garbage collection functions and ignore the results. Then, it triggers another collection of node-level metrics, and sees if the node is still under DiskPressure. This way, we can simply observe the decrease in disk or inode usage, rather than trying to measure how much is freed. **Which issue(s) this PR fixes**: Fixes #46789 Fixes #56573 Related PR #56575 **Special notes for your reviewer**: This will look cleaner after #57802 removes arguments from [makeSignalObservations](https://github.com/kubernetes/kubernetes/pull/57802/files#diff-9e5246d8c78d50ce4ba440f98663f3e9R719). **Release note**: ```release-note NONE ``` /sig node /kind bug /priority important-soon cc @kubernetes/sig-node-pr-reviewspull/6/head
commit
270ed995f4
|
@ -19,6 +19,8 @@ package container
|
|||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
// Specified a policy for garbage collecting containers.
|
||||
|
@ -80,5 +82,6 @@ func (cgc *realContainerGC) GarbageCollect() error {
|
|||
}
|
||||
|
||||
func (cgc *realContainerGC) DeleteAllUnusedContainers() error {
|
||||
glog.Infof("attempting to delete unused containers")
|
||||
return cgc.runtime.GarbageCollect(cgc.policy, cgc.sourcesReadyProvider.AllReady(), true)
|
||||
}
|
||||
|
|
|
@ -349,7 +349,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
|||
m.recorder.Eventf(m.nodeRef, v1.EventTypeWarning, "EvictionThresholdMet", "Attempting to reclaim %s", resourceToReclaim)
|
||||
|
||||
// check if there are node-level resources we can reclaim to reduce pressure before evicting end-user pods.
|
||||
if m.reclaimNodeLevelResources(resourceToReclaim, observations) {
|
||||
if m.reclaimNodeLevelResources(resourceToReclaim, capacityProvider, activePods) {
|
||||
glog.Infof("eviction manager: able to reduce %v pressure without evicting pods.", resourceToReclaim)
|
||||
return nil
|
||||
}
|
||||
|
@ -437,26 +437,31 @@ func (m *managerImpl) waitForPodsCleanup(podCleanedUpFunc PodCleanedUpFunc, pods
|
|||
}
|
||||
|
||||
// reclaimNodeLevelResources attempts to reclaim node level resources. returns true if thresholds were satisfied and no pod eviction is required.
|
||||
func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceName, observations signalObservations) bool {
|
||||
func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceName, capacityProvider CapacityProvider, pods []*v1.Pod) bool {
|
||||
nodeReclaimFuncs := m.resourceToNodeReclaimFuncs[resourceToReclaim]
|
||||
for _, nodeReclaimFunc := range nodeReclaimFuncs {
|
||||
// attempt to reclaim the pressured resource.
|
||||
reclaimed, err := nodeReclaimFunc()
|
||||
if err != nil {
|
||||
if err := nodeReclaimFunc(); err != nil {
|
||||
glog.Warningf("eviction manager: unexpected error when attempting to reduce %v pressure: %v", resourceToReclaim, err)
|
||||
}
|
||||
// update our local observations based on the amount reported to have been reclaimed.
|
||||
// note: this is optimistic, other things could have been still consuming the pressured resource in the interim.
|
||||
for _, signal := range resourceClaimToSignal[resourceToReclaim] {
|
||||
value, ok := observations[signal]
|
||||
if !ok {
|
||||
glog.Errorf("eviction manager: unable to find value associated with signal %v", signal)
|
||||
continue
|
||||
}
|
||||
value.available.Add(*reclaimed)
|
||||
|
||||
}
|
||||
if len(nodeReclaimFuncs) > 0 {
|
||||
summary, err := m.summaryProvider.Get(true)
|
||||
if err != nil {
|
||||
glog.Errorf("eviction manager: failed to get get summary stats after resource reclaim: %v", err)
|
||||
return false
|
||||
}
|
||||
// evaluate all current thresholds to see if with adjusted observations, we think we have met min reclaim goals
|
||||
if len(thresholdsMet(m.thresholdsMet, observations, true)) == 0 {
|
||||
|
||||
// make observations and get a function to derive pod usage stats relative to those observations.
|
||||
observations, _ := makeSignalObservations(summary, capacityProvider, pods)
|
||||
debugLogObservations("observations after resource reclaim", observations)
|
||||
|
||||
// determine the set of thresholds met independent of grace period
|
||||
thresholds := thresholdsMet(m.config.Thresholds, observations, false)
|
||||
debugLogThresholdsWithObservation("thresholds after resource reclaim - ignoring grace period", thresholds, observations)
|
||||
|
||||
if len(thresholds) == 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
|
|
@ -88,21 +88,28 @@ func (m *mockCapacityProvider) GetNodeAllocatableReservation() v1.ResourceList {
|
|||
|
||||
// mockDiskGC is used to simulate invoking image and container garbage collection.
|
||||
type mockDiskGC struct {
|
||||
err error
|
||||
imageBytesFreed int64
|
||||
imageGCInvoked bool
|
||||
containerGCInvoked bool
|
||||
err error
|
||||
imageGCInvoked bool
|
||||
containerGCInvoked bool
|
||||
fakeSummaryProvider *fakeSummaryProvider
|
||||
summaryAfterGC *statsapi.Summary
|
||||
}
|
||||
|
||||
// DeleteUnusedImages returns the mocked values.
|
||||
func (m *mockDiskGC) DeleteUnusedImages() (int64, error) {
|
||||
func (m *mockDiskGC) DeleteUnusedImages() error {
|
||||
m.imageGCInvoked = true
|
||||
return m.imageBytesFreed, m.err
|
||||
if m.summaryAfterGC != nil && m.fakeSummaryProvider != nil {
|
||||
m.fakeSummaryProvider.result = m.summaryAfterGC
|
||||
}
|
||||
return m.err
|
||||
}
|
||||
|
||||
// DeleteAllUnusedContainers returns the mocked value
|
||||
func (m *mockDiskGC) DeleteAllUnusedContainers() error {
|
||||
m.containerGCInvoked = true
|
||||
if m.summaryAfterGC != nil && m.fakeSummaryProvider != nil {
|
||||
m.fakeSummaryProvider.result = m.summaryAfterGC
|
||||
}
|
||||
return m.err
|
||||
}
|
||||
|
||||
|
@ -211,7 +218,7 @@ func TestMemoryPressure(t *testing.T) {
|
|||
podKiller := &mockPodKiller{}
|
||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
||||
imageGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
|
||||
diskGC := &mockDiskGC{err: nil}
|
||||
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
||||
|
||||
config := Config{
|
||||
|
@ -239,7 +246,8 @@ func TestMemoryPressure(t *testing.T) {
|
|||
manager := &managerImpl{
|
||||
clock: fakeClock,
|
||||
killPodFunc: podKiller.killPodNow,
|
||||
imageGC: imageGC,
|
||||
imageGC: diskGC,
|
||||
containerGC: diskGC,
|
||||
config: config,
|
||||
recorder: &record.FakeRecorder{},
|
||||
summaryProvider: summaryProvider,
|
||||
|
@ -432,7 +440,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
|
|||
podKiller := &mockPodKiller{}
|
||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
||||
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
|
||||
diskGC := &mockDiskGC{err: nil}
|
||||
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
||||
|
||||
config := Config{
|
||||
|
@ -631,7 +639,7 @@ func TestMinReclaim(t *testing.T) {
|
|||
podKiller := &mockPodKiller{}
|
||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
||||
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
|
||||
diskGC := &mockDiskGC{err: nil}
|
||||
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
||||
|
||||
config := Config{
|
||||
|
@ -774,8 +782,6 @@ func TestNodeReclaimFuncs(t *testing.T) {
|
|||
podKiller := &mockPodKiller{}
|
||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
||||
imageGcFree := resource.MustParse("700Mi")
|
||||
diskGC := &mockDiskGC{imageBytesFreed: imageGcFree.Value(), err: nil}
|
||||
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
||||
|
||||
config := Config{
|
||||
|
@ -795,6 +801,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
|
|||
},
|
||||
}
|
||||
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("16Gi", "200Gi", podStats)}
|
||||
diskGC := &mockDiskGC{fakeSummaryProvider: summaryProvider, err: nil}
|
||||
manager := &managerImpl{
|
||||
clock: fakeClock,
|
||||
killPodFunc: podKiller.killPodNow,
|
||||
|
@ -819,6 +826,8 @@ func TestNodeReclaimFuncs(t *testing.T) {
|
|||
// induce hard threshold
|
||||
fakeClock.Step(1 * time.Minute)
|
||||
summaryProvider.result = summaryStatsMaker(".9Gi", "200Gi", podStats)
|
||||
// make GC successfully return disk usage to previous levels
|
||||
diskGC.summaryAfterGC = summaryStatsMaker("16Gi", "200Gi", podStats)
|
||||
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
|
||||
|
||||
// we should have disk pressure
|
||||
|
@ -842,7 +851,6 @@ func TestNodeReclaimFuncs(t *testing.T) {
|
|||
|
||||
// remove disk pressure
|
||||
fakeClock.Step(20 * time.Minute)
|
||||
summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
|
||||
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
|
||||
|
||||
// we should not have disk pressure
|
||||
|
@ -853,6 +861,8 @@ func TestNodeReclaimFuncs(t *testing.T) {
|
|||
// induce disk pressure!
|
||||
fakeClock.Step(1 * time.Minute)
|
||||
summaryProvider.result = summaryStatsMaker("400Mi", "200Gi", podStats)
|
||||
// Dont reclaim any disk
|
||||
diskGC.summaryAfterGC = summaryStatsMaker("400Mi", "200Gi", podStats)
|
||||
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
|
||||
|
||||
// we should have disk pressure
|
||||
|
@ -972,7 +982,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
|
|||
podKiller := &mockPodKiller{}
|
||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
||||
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
|
||||
diskGC := &mockDiskGC{err: nil}
|
||||
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
||||
|
||||
config := Config{
|
||||
|
@ -1175,7 +1185,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) {
|
|||
podKiller := &mockPodKiller{}
|
||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
||||
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
|
||||
diskGC := &mockDiskGC{err: nil}
|
||||
nodeRef := &v1.ObjectReference{
|
||||
Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: "",
|
||||
}
|
||||
|
@ -1308,7 +1318,7 @@ func TestAllocatableMemoryPressure(t *testing.T) {
|
|||
podKiller := &mockPodKiller{}
|
||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
||||
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
|
||||
diskGC := &mockDiskGC{err: nil}
|
||||
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
||||
|
||||
config := Config{
|
||||
|
|
|
@ -1071,38 +1071,15 @@ func buildResourceToNodeReclaimFuncs(imageGC ImageGC, containerGC ContainerGC, w
|
|||
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{}
|
||||
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{}
|
||||
// with an imagefs, imagefs pressure should delete unused images
|
||||
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)}
|
||||
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)}
|
||||
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||
} else {
|
||||
// without an imagefs, nodefs pressure should delete logs, and unused images
|
||||
// since imagefs and nodefs share a common device, they share common reclaim functions
|
||||
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)}
|
||||
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)}
|
||||
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)}
|
||||
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)}
|
||||
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||
}
|
||||
return resourceToReclaimFunc
|
||||
}
|
||||
|
||||
// deleteTerminatedContainers will delete terminated containers to free up disk pressure.
|
||||
func deleteTerminatedContainers(containerGC ContainerGC) nodeReclaimFunc {
|
||||
return func() (*resource.Quantity, error) {
|
||||
glog.Infof("eviction manager: attempting to delete unused containers")
|
||||
err := containerGC.DeleteAllUnusedContainers()
|
||||
// Calculating bytes freed is not yet supported.
|
||||
return resource.NewQuantity(int64(0), resource.BinarySI), err
|
||||
}
|
||||
}
|
||||
|
||||
// deleteImages will delete unused images to free up disk pressure.
|
||||
func deleteImages(imageGC ImageGC, reportBytesFreed bool) nodeReclaimFunc {
|
||||
return func() (*resource.Quantity, error) {
|
||||
glog.Infof("eviction manager: attempting to delete unused images")
|
||||
bytesFreed, err := imageGC.DeleteUnusedImages()
|
||||
reclaimed := int64(0)
|
||||
if reportBytesFreed {
|
||||
reclaimed = bytesFreed
|
||||
}
|
||||
return resource.NewQuantity(reclaimed, resource.BinarySI), err
|
||||
}
|
||||
}
|
||||
|
|
|
@ -81,15 +81,13 @@ type CapacityProvider interface {
|
|||
|
||||
// ImageGC is responsible for performing garbage collection of unused images.
|
||||
type ImageGC interface {
|
||||
// DeleteUnusedImages deletes unused images and returns the number of bytes freed, and an error.
|
||||
// This returns the bytes freed even if an error is returned.
|
||||
DeleteUnusedImages() (int64, error)
|
||||
// DeleteUnusedImages deletes unused images.
|
||||
DeleteUnusedImages() error
|
||||
}
|
||||
|
||||
// ContainerGC is responsible for performing garbage collection of unused containers.
|
||||
type ContainerGC interface {
|
||||
// DeleteAllUnusedContainers deletes all unused containers, even those that belong to pods that are terminated, but not deleted.
|
||||
// It returns an error if it is unsuccessful.
|
||||
DeleteAllUnusedContainers() error
|
||||
}
|
||||
|
||||
|
@ -134,9 +132,7 @@ type thresholdsObservedAt map[evictionapi.Threshold]time.Time
|
|||
type nodeConditionsObservedAt map[v1.NodeConditionType]time.Time
|
||||
|
||||
// nodeReclaimFunc is a function that knows how to reclaim a resource from the node without impacting pods.
|
||||
// Returns the quantity of resources reclaimed and an error, if applicable.
|
||||
// nodeReclaimFunc return the resources reclaimed even if an error occurs.
|
||||
type nodeReclaimFunc func() (*resource.Quantity, error)
|
||||
type nodeReclaimFunc func() error
|
||||
|
||||
// nodeReclaimFuncs is an ordered list of nodeReclaimFunc
|
||||
type nodeReclaimFuncs []nodeReclaimFunc
|
||||
|
|
|
@ -56,8 +56,8 @@ type ImageGCManager interface {
|
|||
|
||||
GetImageList() ([]container.Image, error)
|
||||
|
||||
// Delete all unused images and returns the number of bytes freed. The number of bytes freed is always returned.
|
||||
DeleteUnusedImages() (int64, error)
|
||||
// Delete all unused images.
|
||||
DeleteUnusedImages() error
|
||||
}
|
||||
|
||||
// A policy for garbage collecting images. Policy defines an allowed band in
|
||||
|
@ -308,8 +308,10 @@ func (im *realImageGCManager) GarbageCollect() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (im *realImageGCManager) DeleteUnusedImages() (int64, error) {
|
||||
return im.freeSpace(math.MaxInt64, time.Now())
|
||||
func (im *realImageGCManager) DeleteUnusedImages() error {
|
||||
glog.Infof("attempting to delete unused images")
|
||||
_, err := im.freeSpace(math.MaxInt64, time.Now())
|
||||
return err
|
||||
}
|
||||
|
||||
// Tries to free bytesToFree worth of images on the disk.
|
||||
|
|
|
@ -187,10 +187,10 @@ func TestDeleteUnusedImagesExemptSandboxImage(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
spaceFreed, err := manager.DeleteUnusedImages()
|
||||
err := manager.DeleteUnusedImages()
|
||||
assert := assert.New(t)
|
||||
assert.Len(fakeRuntime.ImageList, 1)
|
||||
require.NoError(t, err)
|
||||
assert.EqualValues(0, spaceFreed)
|
||||
}
|
||||
|
||||
func TestDetectImagesContainerStopped(t *testing.T) {
|
||||
|
@ -291,10 +291,9 @@ func TestDeleteUnusedImagesRemoveAllUnusedImages(t *testing.T) {
|
|||
}},
|
||||
}
|
||||
|
||||
spaceFreed, err := manager.DeleteUnusedImages()
|
||||
err := manager.DeleteUnusedImages()
|
||||
assert := assert.New(t)
|
||||
require.NoError(t, err)
|
||||
assert.EqualValues(3072, spaceFreed)
|
||||
assert.Len(fakeRuntime.ImageList, 1)
|
||||
}
|
||||
|
||||
|
|
|
@ -52,7 +52,8 @@ const (
|
|||
pressureDelay = 20 * time.Second
|
||||
testContextFmt = "when we run containers that should cause %s"
|
||||
noPressure = v1.NodeConditionType("NoPressure")
|
||||
lotsOfDisk = 10240 // 10 Gb in Mb
|
||||
lotsOfDisk = 10240 // 10 Gb in Mb
|
||||
lotsOfFiles = 1000000000 // 1 billion
|
||||
)
|
||||
|
||||
// InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods.
|
||||
|
@ -76,11 +77,11 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive]", fun
|
|||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logInodeMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: inodeConsumingPod("container-inode-hog", nil),
|
||||
pod: inodeConsumingPod("container-inode-hog", lotsOfFiles, nil),
|
||||
},
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: inodeConsumingPod("volume-inode-hog", &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}),
|
||||
pod: inodeConsumingPod("volume-inode-hog", lotsOfFiles, &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}),
|
||||
},
|
||||
{
|
||||
evictionPriority: 0,
|
||||
|
@ -90,6 +91,35 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive]", fun
|
|||
})
|
||||
})
|
||||
|
||||
// ImageGCNoEviction tests that the node does not evict pods when inodes are consumed by images
|
||||
// Disk pressure is induced by pulling large images
|
||||
var _ = framework.KubeDescribe("ImageGCNoEviction [Slow] [Serial] [Disruptive]", func() {
|
||||
f := framework.NewDefaultFramework("image-gc-eviction-test")
|
||||
pressureTimeout := 10 * time.Minute
|
||||
expectedNodeCondition := v1.NodeDiskPressure
|
||||
inodesConsumed := uint64(100000)
|
||||
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
// Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction.
|
||||
summary := eventuallyGetSummary()
|
||||
inodesFree := *summary.Node.Fs.InodesFree
|
||||
if inodesFree <= inodesConsumed {
|
||||
framework.Skipf("Too few inodes free on the host for the InodeEviction test to run")
|
||||
}
|
||||
initialConfig.EvictionHard = map[string]string{"nodefs.inodesFree": fmt.Sprintf("%d", inodesFree-inodesConsumed)}
|
||||
initialConfig.EvictionMinimumReclaim = map[string]string{}
|
||||
})
|
||||
// Consume enough inodes to induce disk pressure,
|
||||
// but expect that image garbage collection can reduce it enough to avoid an eviction
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 0,
|
||||
pod: inodeConsumingPod("container-inode", 110000, nil),
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// MemoryAllocatableEviction tests that the node responds to node memory pressure by evicting only responsible pods.
|
||||
// Node memory pressure is only encountered because we reserve the majority of the node's capacity via kube-reserved.
|
||||
var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive]", func() {
|
||||
|
@ -630,19 +660,19 @@ const (
|
|||
volumeName = "test-volume"
|
||||
)
|
||||
|
||||
func inodeConsumingPod(name string, volumeSource *v1.VolumeSource) *v1.Pod {
|
||||
func inodeConsumingPod(name string, numFiles int, volumeSource *v1.VolumeSource) *v1.Pod {
|
||||
// Each iteration creates an empty file
|
||||
return podWithCommand(volumeSource, v1.ResourceRequirements{}, name, "i=0; while true; do touch %s${i}.txt; sleep 0.001; i=$((i+=1)); done;")
|
||||
return podWithCommand(volumeSource, v1.ResourceRequirements{}, numFiles, name, "touch %s${i}.txt; sleep 0.001")
|
||||
}
|
||||
|
||||
func diskConsumingPod(name string, diskConsumedMB int, volumeSource *v1.VolumeSource, resources v1.ResourceRequirements) *v1.Pod {
|
||||
// Each iteration writes 1 Mb, so do diskConsumedMB iterations.
|
||||
return podWithCommand(volumeSource, resources, name, fmt.Sprintf("i=0; while [ $i -lt %d ];", diskConsumedMB)+" do dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null ; i=$(($i+1)); done; while true; do sleep 5; done")
|
||||
return podWithCommand(volumeSource, resources, diskConsumedMB, name, "dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null")
|
||||
}
|
||||
|
||||
// podWithCommand returns a pod with the provided volumeSource and resourceRequirements.
|
||||
// If a volumeSource is provided, then the volumeMountPath to the volume is inserted into the provided command.
|
||||
func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirements, name, command string) *v1.Pod {
|
||||
func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirements, iterations int, name, command string) *v1.Pod {
|
||||
path := ""
|
||||
volumeMounts := []v1.VolumeMount{}
|
||||
volumes := []v1.Volume{}
|
||||
|
@ -662,7 +692,7 @@ func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirem
|
|||
Command: []string{
|
||||
"sh",
|
||||
"-c",
|
||||
fmt.Sprintf(command, filepath.Join(path, "file")),
|
||||
fmt.Sprintf("i=0; while [ $i -lt %d ]; do %s; i=$(($i+1)); done; while true; do sleep 5; done", iterations, fmt.Sprintf(command, filepath.Join(path, "file"))),
|
||||
},
|
||||
Resources: resources,
|
||||
VolumeMounts: volumeMounts,
|
||||
|
|
Loading…
Reference in New Issue