mirror of https://github.com/k3s-io/k3s
reevaluate eviction thresholds after reclaim functions
parent
11ecad2629
commit
e0830d0b71
|
@ -19,6 +19,8 @@ package container
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/golang/glog"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Specified a policy for garbage collecting containers.
|
// Specified a policy for garbage collecting containers.
|
||||||
|
@ -80,5 +82,6 @@ func (cgc *realContainerGC) GarbageCollect() error {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cgc *realContainerGC) DeleteAllUnusedContainers() error {
|
func (cgc *realContainerGC) DeleteAllUnusedContainers() error {
|
||||||
|
glog.Infof("attempting to delete unused containers")
|
||||||
return cgc.runtime.GarbageCollect(cgc.policy, cgc.sourcesReadyProvider.AllReady(), true)
|
return cgc.runtime.GarbageCollect(cgc.policy, cgc.sourcesReadyProvider.AllReady(), true)
|
||||||
}
|
}
|
||||||
|
|
|
@ -341,7 +341,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
||||||
m.recorder.Eventf(m.nodeRef, v1.EventTypeWarning, "EvictionThresholdMet", "Attempting to reclaim %s", resourceToReclaim)
|
m.recorder.Eventf(m.nodeRef, v1.EventTypeWarning, "EvictionThresholdMet", "Attempting to reclaim %s", resourceToReclaim)
|
||||||
|
|
||||||
// check if there are node-level resources we can reclaim to reduce pressure before evicting end-user pods.
|
// check if there are node-level resources we can reclaim to reduce pressure before evicting end-user pods.
|
||||||
if m.reclaimNodeLevelResources(resourceToReclaim, observations) {
|
if m.reclaimNodeLevelResources(resourceToReclaim, capacityProvider, activePods) {
|
||||||
glog.Infof("eviction manager: able to reduce %v pressure without evicting pods.", resourceToReclaim)
|
glog.Infof("eviction manager: able to reduce %v pressure without evicting pods.", resourceToReclaim)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -429,26 +429,31 @@ func (m *managerImpl) waitForPodsCleanup(podCleanedUpFunc PodCleanedUpFunc, pods
|
||||||
}
|
}
|
||||||
|
|
||||||
// reclaimNodeLevelResources attempts to reclaim node level resources. returns true if thresholds were satisfied and no pod eviction is required.
|
// reclaimNodeLevelResources attempts to reclaim node level resources. returns true if thresholds were satisfied and no pod eviction is required.
|
||||||
func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceName, observations signalObservations) bool {
|
func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceName, capacityProvider CapacityProvider, pods []*v1.Pod) bool {
|
||||||
nodeReclaimFuncs := m.resourceToNodeReclaimFuncs[resourceToReclaim]
|
nodeReclaimFuncs := m.resourceToNodeReclaimFuncs[resourceToReclaim]
|
||||||
for _, nodeReclaimFunc := range nodeReclaimFuncs {
|
for _, nodeReclaimFunc := range nodeReclaimFuncs {
|
||||||
// attempt to reclaim the pressured resource.
|
// attempt to reclaim the pressured resource.
|
||||||
reclaimed, err := nodeReclaimFunc()
|
if err := nodeReclaimFunc(); err != nil {
|
||||||
if err != nil {
|
|
||||||
glog.Warningf("eviction manager: unexpected error when attempting to reduce %v pressure: %v", resourceToReclaim, err)
|
glog.Warningf("eviction manager: unexpected error when attempting to reduce %v pressure: %v", resourceToReclaim, err)
|
||||||
}
|
}
|
||||||
// update our local observations based on the amount reported to have been reclaimed.
|
|
||||||
// note: this is optimistic, other things could have been still consuming the pressured resource in the interim.
|
}
|
||||||
for _, signal := range resourceClaimToSignal[resourceToReclaim] {
|
if len(nodeReclaimFuncs) > 0 {
|
||||||
value, ok := observations[signal]
|
summary, err := m.summaryProvider.Get(true)
|
||||||
if !ok {
|
if err != nil {
|
||||||
glog.Errorf("eviction manager: unable to find value associated with signal %v", signal)
|
glog.Errorf("eviction manager: failed to get get summary stats after resource reclaim: %v", err)
|
||||||
continue
|
return false
|
||||||
}
|
|
||||||
value.available.Add(*reclaimed)
|
|
||||||
}
|
}
|
||||||
// evaluate all current thresholds to see if with adjusted observations, we think we have met min reclaim goals
|
|
||||||
if len(thresholdsMet(m.thresholdsMet, observations, true)) == 0 {
|
// make observations and get a function to derive pod usage stats relative to those observations.
|
||||||
|
observations, _ := makeSignalObservations(summary, capacityProvider, pods)
|
||||||
|
debugLogObservations("observations after resource reclaim", observations)
|
||||||
|
|
||||||
|
// determine the set of thresholds met independent of grace period
|
||||||
|
thresholds := thresholdsMet(m.config.Thresholds, observations, false)
|
||||||
|
debugLogThresholdsWithObservation("thresholds after resource reclaim - ignoring grace period", thresholds, observations)
|
||||||
|
|
||||||
|
if len(thresholds) == 0 {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,21 +88,28 @@ func (m *mockCapacityProvider) GetNodeAllocatableReservation() v1.ResourceList {
|
||||||
|
|
||||||
// mockDiskGC is used to simulate invoking image and container garbage collection.
|
// mockDiskGC is used to simulate invoking image and container garbage collection.
|
||||||
type mockDiskGC struct {
|
type mockDiskGC struct {
|
||||||
err error
|
err error
|
||||||
imageBytesFreed int64
|
imageGCInvoked bool
|
||||||
imageGCInvoked bool
|
containerGCInvoked bool
|
||||||
containerGCInvoked bool
|
fakeSummaryProvider *fakeSummaryProvider
|
||||||
|
summaryAfterGC *statsapi.Summary
|
||||||
}
|
}
|
||||||
|
|
||||||
// DeleteUnusedImages returns the mocked values.
|
// DeleteUnusedImages returns the mocked values.
|
||||||
func (m *mockDiskGC) DeleteUnusedImages() (int64, error) {
|
func (m *mockDiskGC) DeleteUnusedImages() error {
|
||||||
m.imageGCInvoked = true
|
m.imageGCInvoked = true
|
||||||
return m.imageBytesFreed, m.err
|
if m.summaryAfterGC != nil && m.fakeSummaryProvider != nil {
|
||||||
|
m.fakeSummaryProvider.result = m.summaryAfterGC
|
||||||
|
}
|
||||||
|
return m.err
|
||||||
}
|
}
|
||||||
|
|
||||||
// DeleteAllUnusedContainers returns the mocked value
|
// DeleteAllUnusedContainers returns the mocked value
|
||||||
func (m *mockDiskGC) DeleteAllUnusedContainers() error {
|
func (m *mockDiskGC) DeleteAllUnusedContainers() error {
|
||||||
m.containerGCInvoked = true
|
m.containerGCInvoked = true
|
||||||
|
if m.summaryAfterGC != nil && m.fakeSummaryProvider != nil {
|
||||||
|
m.fakeSummaryProvider.result = m.summaryAfterGC
|
||||||
|
}
|
||||||
return m.err
|
return m.err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -211,7 +218,7 @@ func TestMemoryPressure(t *testing.T) {
|
||||||
podKiller := &mockPodKiller{}
|
podKiller := &mockPodKiller{}
|
||||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||||
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
||||||
imageGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
|
diskGC := &mockDiskGC{err: nil}
|
||||||
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
||||||
|
|
||||||
config := Config{
|
config := Config{
|
||||||
|
@ -239,7 +246,8 @@ func TestMemoryPressure(t *testing.T) {
|
||||||
manager := &managerImpl{
|
manager := &managerImpl{
|
||||||
clock: fakeClock,
|
clock: fakeClock,
|
||||||
killPodFunc: podKiller.killPodNow,
|
killPodFunc: podKiller.killPodNow,
|
||||||
imageGC: imageGC,
|
imageGC: diskGC,
|
||||||
|
containerGC: diskGC,
|
||||||
config: config,
|
config: config,
|
||||||
recorder: &record.FakeRecorder{},
|
recorder: &record.FakeRecorder{},
|
||||||
summaryProvider: summaryProvider,
|
summaryProvider: summaryProvider,
|
||||||
|
@ -432,7 +440,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
|
||||||
podKiller := &mockPodKiller{}
|
podKiller := &mockPodKiller{}
|
||||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||||
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
||||||
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
|
diskGC := &mockDiskGC{err: nil}
|
||||||
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
||||||
|
|
||||||
config := Config{
|
config := Config{
|
||||||
|
@ -631,7 +639,7 @@ func TestMinReclaim(t *testing.T) {
|
||||||
podKiller := &mockPodKiller{}
|
podKiller := &mockPodKiller{}
|
||||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||||
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
||||||
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
|
diskGC := &mockDiskGC{err: nil}
|
||||||
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
||||||
|
|
||||||
config := Config{
|
config := Config{
|
||||||
|
@ -774,8 +782,6 @@ func TestNodeReclaimFuncs(t *testing.T) {
|
||||||
podKiller := &mockPodKiller{}
|
podKiller := &mockPodKiller{}
|
||||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||||
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
||||||
imageGcFree := resource.MustParse("700Mi")
|
|
||||||
diskGC := &mockDiskGC{imageBytesFreed: imageGcFree.Value(), err: nil}
|
|
||||||
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
||||||
|
|
||||||
config := Config{
|
config := Config{
|
||||||
|
@ -795,6 +801,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("16Gi", "200Gi", podStats)}
|
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("16Gi", "200Gi", podStats)}
|
||||||
|
diskGC := &mockDiskGC{fakeSummaryProvider: summaryProvider, err: nil}
|
||||||
manager := &managerImpl{
|
manager := &managerImpl{
|
||||||
clock: fakeClock,
|
clock: fakeClock,
|
||||||
killPodFunc: podKiller.killPodNow,
|
killPodFunc: podKiller.killPodNow,
|
||||||
|
@ -819,6 +826,8 @@ func TestNodeReclaimFuncs(t *testing.T) {
|
||||||
// induce hard threshold
|
// induce hard threshold
|
||||||
fakeClock.Step(1 * time.Minute)
|
fakeClock.Step(1 * time.Minute)
|
||||||
summaryProvider.result = summaryStatsMaker(".9Gi", "200Gi", podStats)
|
summaryProvider.result = summaryStatsMaker(".9Gi", "200Gi", podStats)
|
||||||
|
// make GC successfully return disk usage to previous levels
|
||||||
|
diskGC.summaryAfterGC = summaryStatsMaker("16Gi", "200Gi", podStats)
|
||||||
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
|
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
|
||||||
|
|
||||||
// we should have disk pressure
|
// we should have disk pressure
|
||||||
|
@ -842,7 +851,6 @@ func TestNodeReclaimFuncs(t *testing.T) {
|
||||||
|
|
||||||
// remove disk pressure
|
// remove disk pressure
|
||||||
fakeClock.Step(20 * time.Minute)
|
fakeClock.Step(20 * time.Minute)
|
||||||
summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
|
|
||||||
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
|
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
|
||||||
|
|
||||||
// we should not have disk pressure
|
// we should not have disk pressure
|
||||||
|
@ -853,6 +861,8 @@ func TestNodeReclaimFuncs(t *testing.T) {
|
||||||
// induce disk pressure!
|
// induce disk pressure!
|
||||||
fakeClock.Step(1 * time.Minute)
|
fakeClock.Step(1 * time.Minute)
|
||||||
summaryProvider.result = summaryStatsMaker("400Mi", "200Gi", podStats)
|
summaryProvider.result = summaryStatsMaker("400Mi", "200Gi", podStats)
|
||||||
|
// Dont reclaim any disk
|
||||||
|
diskGC.summaryAfterGC = summaryStatsMaker("400Mi", "200Gi", podStats)
|
||||||
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
|
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
|
||||||
|
|
||||||
// we should have disk pressure
|
// we should have disk pressure
|
||||||
|
@ -972,7 +982,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
|
||||||
podKiller := &mockPodKiller{}
|
podKiller := &mockPodKiller{}
|
||||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||||
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
||||||
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
|
diskGC := &mockDiskGC{err: nil}
|
||||||
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
||||||
|
|
||||||
config := Config{
|
config := Config{
|
||||||
|
@ -1175,7 +1185,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) {
|
||||||
podKiller := &mockPodKiller{}
|
podKiller := &mockPodKiller{}
|
||||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||||
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
||||||
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
|
diskGC := &mockDiskGC{err: nil}
|
||||||
nodeRef := &v1.ObjectReference{
|
nodeRef := &v1.ObjectReference{
|
||||||
Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: "",
|
Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: "",
|
||||||
}
|
}
|
||||||
|
@ -1308,7 +1318,7 @@ func TestAllocatableMemoryPressure(t *testing.T) {
|
||||||
podKiller := &mockPodKiller{}
|
podKiller := &mockPodKiller{}
|
||||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||||
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
|
||||||
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
|
diskGC := &mockDiskGC{err: nil}
|
||||||
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
||||||
|
|
||||||
config := Config{
|
config := Config{
|
||||||
|
|
|
@ -1059,38 +1059,15 @@ func buildResourceToNodeReclaimFuncs(imageGC ImageGC, containerGC ContainerGC, w
|
||||||
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{}
|
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{}
|
||||||
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{}
|
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{}
|
||||||
// with an imagefs, imagefs pressure should delete unused images
|
// with an imagefs, imagefs pressure should delete unused images
|
||||||
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)}
|
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||||
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)}
|
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||||
} else {
|
} else {
|
||||||
// without an imagefs, nodefs pressure should delete logs, and unused images
|
// without an imagefs, nodefs pressure should delete logs, and unused images
|
||||||
// since imagefs and nodefs share a common device, they share common reclaim functions
|
// since imagefs and nodefs share a common device, they share common reclaim functions
|
||||||
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)}
|
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||||
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)}
|
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||||
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)}
|
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||||
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)}
|
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||||
}
|
}
|
||||||
return resourceToReclaimFunc
|
return resourceToReclaimFunc
|
||||||
}
|
}
|
||||||
|
|
||||||
// deleteTerminatedContainers will delete terminated containers to free up disk pressure.
|
|
||||||
func deleteTerminatedContainers(containerGC ContainerGC) nodeReclaimFunc {
|
|
||||||
return func() (*resource.Quantity, error) {
|
|
||||||
glog.Infof("eviction manager: attempting to delete unused containers")
|
|
||||||
err := containerGC.DeleteAllUnusedContainers()
|
|
||||||
// Calculating bytes freed is not yet supported.
|
|
||||||
return resource.NewQuantity(int64(0), resource.BinarySI), err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// deleteImages will delete unused images to free up disk pressure.
|
|
||||||
func deleteImages(imageGC ImageGC, reportBytesFreed bool) nodeReclaimFunc {
|
|
||||||
return func() (*resource.Quantity, error) {
|
|
||||||
glog.Infof("eviction manager: attempting to delete unused images")
|
|
||||||
bytesFreed, err := imageGC.DeleteUnusedImages()
|
|
||||||
reclaimed := int64(0)
|
|
||||||
if reportBytesFreed {
|
|
||||||
reclaimed = bytesFreed
|
|
||||||
}
|
|
||||||
return resource.NewQuantity(reclaimed, resource.BinarySI), err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -78,15 +78,13 @@ type CapacityProvider interface {
|
||||||
|
|
||||||
// ImageGC is responsible for performing garbage collection of unused images.
|
// ImageGC is responsible for performing garbage collection of unused images.
|
||||||
type ImageGC interface {
|
type ImageGC interface {
|
||||||
// DeleteUnusedImages deletes unused images and returns the number of bytes freed, and an error.
|
// DeleteUnusedImages deletes unused images.
|
||||||
// This returns the bytes freed even if an error is returned.
|
DeleteUnusedImages() error
|
||||||
DeleteUnusedImages() (int64, error)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ContainerGC is responsible for performing garbage collection of unused containers.
|
// ContainerGC is responsible for performing garbage collection of unused containers.
|
||||||
type ContainerGC interface {
|
type ContainerGC interface {
|
||||||
// DeleteAllUnusedContainers deletes all unused containers, even those that belong to pods that are terminated, but not deleted.
|
// DeleteAllUnusedContainers deletes all unused containers, even those that belong to pods that are terminated, but not deleted.
|
||||||
// It returns an error if it is unsuccessful.
|
|
||||||
DeleteAllUnusedContainers() error
|
DeleteAllUnusedContainers() error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -131,9 +129,7 @@ type thresholdsObservedAt map[evictionapi.Threshold]time.Time
|
||||||
type nodeConditionsObservedAt map[v1.NodeConditionType]time.Time
|
type nodeConditionsObservedAt map[v1.NodeConditionType]time.Time
|
||||||
|
|
||||||
// nodeReclaimFunc is a function that knows how to reclaim a resource from the node without impacting pods.
|
// nodeReclaimFunc is a function that knows how to reclaim a resource from the node without impacting pods.
|
||||||
// Returns the quantity of resources reclaimed and an error, if applicable.
|
type nodeReclaimFunc func() error
|
||||||
// nodeReclaimFunc return the resources reclaimed even if an error occurs.
|
|
||||||
type nodeReclaimFunc func() (*resource.Quantity, error)
|
|
||||||
|
|
||||||
// nodeReclaimFuncs is an ordered list of nodeReclaimFunc
|
// nodeReclaimFuncs is an ordered list of nodeReclaimFunc
|
||||||
type nodeReclaimFuncs []nodeReclaimFunc
|
type nodeReclaimFuncs []nodeReclaimFunc
|
||||||
|
|
|
@ -56,8 +56,8 @@ type ImageGCManager interface {
|
||||||
|
|
||||||
GetImageList() ([]container.Image, error)
|
GetImageList() ([]container.Image, error)
|
||||||
|
|
||||||
// Delete all unused images and returns the number of bytes freed. The number of bytes freed is always returned.
|
// Delete all unused images.
|
||||||
DeleteUnusedImages() (int64, error)
|
DeleteUnusedImages() error
|
||||||
}
|
}
|
||||||
|
|
||||||
// A policy for garbage collecting images. Policy defines an allowed band in
|
// A policy for garbage collecting images. Policy defines an allowed band in
|
||||||
|
@ -308,8 +308,10 @@ func (im *realImageGCManager) GarbageCollect() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (im *realImageGCManager) DeleteUnusedImages() (int64, error) {
|
func (im *realImageGCManager) DeleteUnusedImages() error {
|
||||||
return im.freeSpace(math.MaxInt64, time.Now())
|
glog.Infof("attempting to delete unused images")
|
||||||
|
_, err := im.freeSpace(math.MaxInt64, time.Now())
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tries to free bytesToFree worth of images on the disk.
|
// Tries to free bytesToFree worth of images on the disk.
|
||||||
|
|
|
@ -187,10 +187,10 @@ func TestDeleteUnusedImagesExemptSandboxImage(t *testing.T) {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
spaceFreed, err := manager.DeleteUnusedImages()
|
err := manager.DeleteUnusedImages()
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
|
assert.Len(fakeRuntime.ImageList, 1)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.EqualValues(0, spaceFreed)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDetectImagesContainerStopped(t *testing.T) {
|
func TestDetectImagesContainerStopped(t *testing.T) {
|
||||||
|
@ -291,10 +291,9 @@ func TestDeleteUnusedImagesRemoveAllUnusedImages(t *testing.T) {
|
||||||
}},
|
}},
|
||||||
}
|
}
|
||||||
|
|
||||||
spaceFreed, err := manager.DeleteUnusedImages()
|
err := manager.DeleteUnusedImages()
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.EqualValues(3072, spaceFreed)
|
|
||||||
assert.Len(fakeRuntime.ImageList, 1)
|
assert.Len(fakeRuntime.ImageList, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -52,7 +52,8 @@ const (
|
||||||
pressureDelay = 20 * time.Second
|
pressureDelay = 20 * time.Second
|
||||||
testContextFmt = "when we run containers that should cause %s"
|
testContextFmt = "when we run containers that should cause %s"
|
||||||
noPressure = v1.NodeConditionType("NoPressure")
|
noPressure = v1.NodeConditionType("NoPressure")
|
||||||
lotsOfDisk = 10240 // 10 Gb in Mb
|
lotsOfDisk = 10240 // 10 Gb in Mb
|
||||||
|
lotsOfFiles = 1000000000 // 1 billion
|
||||||
)
|
)
|
||||||
|
|
||||||
// InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods.
|
// InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods.
|
||||||
|
@ -76,11 +77,11 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive]", fun
|
||||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logInodeMetrics, []podEvictSpec{
|
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logInodeMetrics, []podEvictSpec{
|
||||||
{
|
{
|
||||||
evictionPriority: 1,
|
evictionPriority: 1,
|
||||||
pod: inodeConsumingPod("container-inode-hog", nil),
|
pod: inodeConsumingPod("container-inode-hog", lotsOfFiles, nil),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
evictionPriority: 1,
|
evictionPriority: 1,
|
||||||
pod: inodeConsumingPod("volume-inode-hog", &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}),
|
pod: inodeConsumingPod("volume-inode-hog", lotsOfFiles, &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
evictionPriority: 0,
|
evictionPriority: 0,
|
||||||
|
@ -90,6 +91,35 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive]", fun
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// ImageGCNoEviction tests that the node does not evict pods when inodes are consumed by images
|
||||||
|
// Disk pressure is induced by pulling large images
|
||||||
|
var _ = framework.KubeDescribe("ImageGCNoEviction [Slow] [Serial] [Disruptive]", func() {
|
||||||
|
f := framework.NewDefaultFramework("image-gc-eviction-test")
|
||||||
|
pressureTimeout := 10 * time.Minute
|
||||||
|
expectedNodeCondition := v1.NodeDiskPressure
|
||||||
|
inodesConsumed := uint64(100000)
|
||||||
|
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||||
|
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||||
|
// Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction.
|
||||||
|
summary := eventuallyGetSummary()
|
||||||
|
inodesFree := *summary.Node.Fs.InodesFree
|
||||||
|
if inodesFree <= inodesConsumed {
|
||||||
|
framework.Skipf("Too few inodes free on the host for the InodeEviction test to run")
|
||||||
|
}
|
||||||
|
initialConfig.EvictionHard = map[string]string{"nodefs.inodesFree": fmt.Sprintf("%d", inodesFree-inodesConsumed)}
|
||||||
|
initialConfig.EvictionMinimumReclaim = map[string]string{}
|
||||||
|
})
|
||||||
|
// Consume enough inodes to induce disk pressure,
|
||||||
|
// but expect that image garbage collection can reduce it enough to avoid an eviction
|
||||||
|
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{
|
||||||
|
{
|
||||||
|
evictionPriority: 0,
|
||||||
|
pod: inodeConsumingPod("container-inode", 110000, nil),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
// MemoryAllocatableEviction tests that the node responds to node memory pressure by evicting only responsible pods.
|
// MemoryAllocatableEviction tests that the node responds to node memory pressure by evicting only responsible pods.
|
||||||
// Node memory pressure is only encountered because we reserve the majority of the node's capacity via kube-reserved.
|
// Node memory pressure is only encountered because we reserve the majority of the node's capacity via kube-reserved.
|
||||||
var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive]", func() {
|
var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive]", func() {
|
||||||
|
@ -630,19 +660,19 @@ const (
|
||||||
volumeName = "test-volume"
|
volumeName = "test-volume"
|
||||||
)
|
)
|
||||||
|
|
||||||
func inodeConsumingPod(name string, volumeSource *v1.VolumeSource) *v1.Pod {
|
func inodeConsumingPod(name string, numFiles int, volumeSource *v1.VolumeSource) *v1.Pod {
|
||||||
// Each iteration creates an empty file
|
// Each iteration creates an empty file
|
||||||
return podWithCommand(volumeSource, v1.ResourceRequirements{}, name, "i=0; while true; do touch %s${i}.txt; sleep 0.001; i=$((i+=1)); done;")
|
return podWithCommand(volumeSource, v1.ResourceRequirements{}, numFiles, name, "touch %s${i}.txt; sleep 0.001")
|
||||||
}
|
}
|
||||||
|
|
||||||
func diskConsumingPod(name string, diskConsumedMB int, volumeSource *v1.VolumeSource, resources v1.ResourceRequirements) *v1.Pod {
|
func diskConsumingPod(name string, diskConsumedMB int, volumeSource *v1.VolumeSource, resources v1.ResourceRequirements) *v1.Pod {
|
||||||
// Each iteration writes 1 Mb, so do diskConsumedMB iterations.
|
// Each iteration writes 1 Mb, so do diskConsumedMB iterations.
|
||||||
return podWithCommand(volumeSource, resources, name, fmt.Sprintf("i=0; while [ $i -lt %d ];", diskConsumedMB)+" do dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null ; i=$(($i+1)); done; while true; do sleep 5; done")
|
return podWithCommand(volumeSource, resources, diskConsumedMB, name, "dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null")
|
||||||
}
|
}
|
||||||
|
|
||||||
// podWithCommand returns a pod with the provided volumeSource and resourceRequirements.
|
// podWithCommand returns a pod with the provided volumeSource and resourceRequirements.
|
||||||
// If a volumeSource is provided, then the volumeMountPath to the volume is inserted into the provided command.
|
// If a volumeSource is provided, then the volumeMountPath to the volume is inserted into the provided command.
|
||||||
func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirements, name, command string) *v1.Pod {
|
func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirements, iterations int, name, command string) *v1.Pod {
|
||||||
path := ""
|
path := ""
|
||||||
volumeMounts := []v1.VolumeMount{}
|
volumeMounts := []v1.VolumeMount{}
|
||||||
volumes := []v1.Volume{}
|
volumes := []v1.Volume{}
|
||||||
|
@ -662,7 +692,7 @@ func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirem
|
||||||
Command: []string{
|
Command: []string{
|
||||||
"sh",
|
"sh",
|
||||||
"-c",
|
"-c",
|
||||||
fmt.Sprintf(command, filepath.Join(path, "file")),
|
fmt.Sprintf("i=0; while [ $i -lt %d ]; do %s; i=$(($i+1)); done; while true; do sleep 5; done", iterations, fmt.Sprintf(command, filepath.Join(path, "file"))),
|
||||||
},
|
},
|
||||||
Resources: resources,
|
Resources: resources,
|
||||||
VolumeMounts: volumeMounts,
|
VolumeMounts: volumeMounts,
|
||||||
|
|
Loading…
Reference in New Issue