reevaluate eviction thresholds after reclaim functions

pull/6/head
David Ashpole 2018-02-16 08:35:24 -08:00
parent 11ecad2629
commit e0830d0b71
8 changed files with 105 additions and 83 deletions

View File

@ -19,6 +19,8 @@ package container
import (
"fmt"
"time"
"github.com/golang/glog"
)
// Specified a policy for garbage collecting containers.
@ -80,5 +82,6 @@ func (cgc *realContainerGC) GarbageCollect() error {
}
func (cgc *realContainerGC) DeleteAllUnusedContainers() error {
glog.Infof("attempting to delete unused containers")
return cgc.runtime.GarbageCollect(cgc.policy, cgc.sourcesReadyProvider.AllReady(), true)
}

View File

@ -341,7 +341,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
m.recorder.Eventf(m.nodeRef, v1.EventTypeWarning, "EvictionThresholdMet", "Attempting to reclaim %s", resourceToReclaim)
// check if there are node-level resources we can reclaim to reduce pressure before evicting end-user pods.
if m.reclaimNodeLevelResources(resourceToReclaim, observations) {
if m.reclaimNodeLevelResources(resourceToReclaim, capacityProvider, activePods) {
glog.Infof("eviction manager: able to reduce %v pressure without evicting pods.", resourceToReclaim)
return nil
}
@ -429,26 +429,31 @@ func (m *managerImpl) waitForPodsCleanup(podCleanedUpFunc PodCleanedUpFunc, pods
}
// reclaimNodeLevelResources attempts to reclaim node level resources. returns true if thresholds were satisfied and no pod eviction is required.
func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceName, observations signalObservations) bool {
func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceName, capacityProvider CapacityProvider, pods []*v1.Pod) bool {
nodeReclaimFuncs := m.resourceToNodeReclaimFuncs[resourceToReclaim]
for _, nodeReclaimFunc := range nodeReclaimFuncs {
// attempt to reclaim the pressured resource.
reclaimed, err := nodeReclaimFunc()
if err != nil {
if err := nodeReclaimFunc(); err != nil {
glog.Warningf("eviction manager: unexpected error when attempting to reduce %v pressure: %v", resourceToReclaim, err)
}
// update our local observations based on the amount reported to have been reclaimed.
// note: this is optimistic, other things could have been still consuming the pressured resource in the interim.
for _, signal := range resourceClaimToSignal[resourceToReclaim] {
value, ok := observations[signal]
if !ok {
glog.Errorf("eviction manager: unable to find value associated with signal %v", signal)
continue
}
value.available.Add(*reclaimed)
}
if len(nodeReclaimFuncs) > 0 {
summary, err := m.summaryProvider.Get(true)
if err != nil {
glog.Errorf("eviction manager: failed to get get summary stats after resource reclaim: %v", err)
return false
}
// evaluate all current thresholds to see if with adjusted observations, we think we have met min reclaim goals
if len(thresholdsMet(m.thresholdsMet, observations, true)) == 0 {
// make observations and get a function to derive pod usage stats relative to those observations.
observations, _ := makeSignalObservations(summary, capacityProvider, pods)
debugLogObservations("observations after resource reclaim", observations)
// determine the set of thresholds met independent of grace period
thresholds := thresholdsMet(m.config.Thresholds, observations, false)
debugLogThresholdsWithObservation("thresholds after resource reclaim - ignoring grace period", thresholds, observations)
if len(thresholds) == 0 {
return true
}
}

View File

@ -88,21 +88,28 @@ func (m *mockCapacityProvider) GetNodeAllocatableReservation() v1.ResourceList {
// mockDiskGC is used to simulate invoking image and container garbage collection.
type mockDiskGC struct {
err error
imageBytesFreed int64
imageGCInvoked bool
containerGCInvoked bool
err error
imageGCInvoked bool
containerGCInvoked bool
fakeSummaryProvider *fakeSummaryProvider
summaryAfterGC *statsapi.Summary
}
// DeleteUnusedImages returns the mocked values.
func (m *mockDiskGC) DeleteUnusedImages() (int64, error) {
func (m *mockDiskGC) DeleteUnusedImages() error {
m.imageGCInvoked = true
return m.imageBytesFreed, m.err
if m.summaryAfterGC != nil && m.fakeSummaryProvider != nil {
m.fakeSummaryProvider.result = m.summaryAfterGC
}
return m.err
}
// DeleteAllUnusedContainers returns the mocked value
func (m *mockDiskGC) DeleteAllUnusedContainers() error {
m.containerGCInvoked = true
if m.summaryAfterGC != nil && m.fakeSummaryProvider != nil {
m.fakeSummaryProvider.result = m.summaryAfterGC
}
return m.err
}
@ -211,7 +218,7 @@ func TestMemoryPressure(t *testing.T) {
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
imageGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
@ -239,7 +246,8 @@ func TestMemoryPressure(t *testing.T) {
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
imageGC: imageGC,
imageGC: diskGC,
containerGC: diskGC,
config: config,
recorder: &record.FakeRecorder{},
summaryProvider: summaryProvider,
@ -432,7 +440,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
@ -631,7 +639,7 @@ func TestMinReclaim(t *testing.T) {
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
@ -774,8 +782,6 @@ func TestNodeReclaimFuncs(t *testing.T) {
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
imageGcFree := resource.MustParse("700Mi")
diskGC := &mockDiskGC{imageBytesFreed: imageGcFree.Value(), err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
@ -795,6 +801,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
},
}
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("16Gi", "200Gi", podStats)}
diskGC := &mockDiskGC{fakeSummaryProvider: summaryProvider, err: nil}
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
@ -819,6 +826,8 @@ func TestNodeReclaimFuncs(t *testing.T) {
// induce hard threshold
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker(".9Gi", "200Gi", podStats)
// make GC successfully return disk usage to previous levels
diskGC.summaryAfterGC = summaryStatsMaker("16Gi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
// we should have disk pressure
@ -842,7 +851,6 @@ func TestNodeReclaimFuncs(t *testing.T) {
// remove disk pressure
fakeClock.Step(20 * time.Minute)
summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
// we should not have disk pressure
@ -853,6 +861,8 @@ func TestNodeReclaimFuncs(t *testing.T) {
// induce disk pressure!
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("400Mi", "200Gi", podStats)
// Dont reclaim any disk
diskGC.summaryAfterGC = summaryStatsMaker("400Mi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
// we should have disk pressure
@ -972,7 +982,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
@ -1175,7 +1185,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) {
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{
Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: "",
}
@ -1308,7 +1318,7 @@ func TestAllocatableMemoryPressure(t *testing.T) {
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{

View File

@ -1059,38 +1059,15 @@ func buildResourceToNodeReclaimFuncs(imageGC ImageGC, containerGC ContainerGC, w
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{}
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{}
// with an imagefs, imagefs pressure should delete unused images
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)}
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)}
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
} else {
// without an imagefs, nodefs pressure should delete logs, and unused images
// since imagefs and nodefs share a common device, they share common reclaim functions
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)}
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)}
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)}
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)}
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
}
return resourceToReclaimFunc
}
// deleteTerminatedContainers will delete terminated containers to free up disk pressure.
func deleteTerminatedContainers(containerGC ContainerGC) nodeReclaimFunc {
return func() (*resource.Quantity, error) {
glog.Infof("eviction manager: attempting to delete unused containers")
err := containerGC.DeleteAllUnusedContainers()
// Calculating bytes freed is not yet supported.
return resource.NewQuantity(int64(0), resource.BinarySI), err
}
}
// deleteImages will delete unused images to free up disk pressure.
func deleteImages(imageGC ImageGC, reportBytesFreed bool) nodeReclaimFunc {
return func() (*resource.Quantity, error) {
glog.Infof("eviction manager: attempting to delete unused images")
bytesFreed, err := imageGC.DeleteUnusedImages()
reclaimed := int64(0)
if reportBytesFreed {
reclaimed = bytesFreed
}
return resource.NewQuantity(reclaimed, resource.BinarySI), err
}
}

View File

@ -78,15 +78,13 @@ type CapacityProvider interface {
// ImageGC is responsible for performing garbage collection of unused images.
type ImageGC interface {
// DeleteUnusedImages deletes unused images and returns the number of bytes freed, and an error.
// This returns the bytes freed even if an error is returned.
DeleteUnusedImages() (int64, error)
// DeleteUnusedImages deletes unused images.
DeleteUnusedImages() error
}
// ContainerGC is responsible for performing garbage collection of unused containers.
type ContainerGC interface {
// DeleteAllUnusedContainers deletes all unused containers, even those that belong to pods that are terminated, but not deleted.
// It returns an error if it is unsuccessful.
DeleteAllUnusedContainers() error
}
@ -131,9 +129,7 @@ type thresholdsObservedAt map[evictionapi.Threshold]time.Time
type nodeConditionsObservedAt map[v1.NodeConditionType]time.Time
// nodeReclaimFunc is a function that knows how to reclaim a resource from the node without impacting pods.
// Returns the quantity of resources reclaimed and an error, if applicable.
// nodeReclaimFunc return the resources reclaimed even if an error occurs.
type nodeReclaimFunc func() (*resource.Quantity, error)
type nodeReclaimFunc func() error
// nodeReclaimFuncs is an ordered list of nodeReclaimFunc
type nodeReclaimFuncs []nodeReclaimFunc

View File

@ -56,8 +56,8 @@ type ImageGCManager interface {
GetImageList() ([]container.Image, error)
// Delete all unused images and returns the number of bytes freed. The number of bytes freed is always returned.
DeleteUnusedImages() (int64, error)
// Delete all unused images.
DeleteUnusedImages() error
}
// A policy for garbage collecting images. Policy defines an allowed band in
@ -308,8 +308,10 @@ func (im *realImageGCManager) GarbageCollect() error {
return nil
}
func (im *realImageGCManager) DeleteUnusedImages() (int64, error) {
return im.freeSpace(math.MaxInt64, time.Now())
func (im *realImageGCManager) DeleteUnusedImages() error {
glog.Infof("attempting to delete unused images")
_, err := im.freeSpace(math.MaxInt64, time.Now())
return err
}
// Tries to free bytesToFree worth of images on the disk.

View File

@ -187,10 +187,10 @@ func TestDeleteUnusedImagesExemptSandboxImage(t *testing.T) {
},
}
spaceFreed, err := manager.DeleteUnusedImages()
err := manager.DeleteUnusedImages()
assert := assert.New(t)
assert.Len(fakeRuntime.ImageList, 1)
require.NoError(t, err)
assert.EqualValues(0, spaceFreed)
}
func TestDetectImagesContainerStopped(t *testing.T) {
@ -291,10 +291,9 @@ func TestDeleteUnusedImagesRemoveAllUnusedImages(t *testing.T) {
}},
}
spaceFreed, err := manager.DeleteUnusedImages()
err := manager.DeleteUnusedImages()
assert := assert.New(t)
require.NoError(t, err)
assert.EqualValues(3072, spaceFreed)
assert.Len(fakeRuntime.ImageList, 1)
}

View File

@ -52,7 +52,8 @@ const (
pressureDelay = 20 * time.Second
testContextFmt = "when we run containers that should cause %s"
noPressure = v1.NodeConditionType("NoPressure")
lotsOfDisk = 10240 // 10 Gb in Mb
lotsOfDisk = 10240 // 10 Gb in Mb
lotsOfFiles = 1000000000 // 1 billion
)
// InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods.
@ -76,11 +77,11 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive]", fun
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logInodeMetrics, []podEvictSpec{
{
evictionPriority: 1,
pod: inodeConsumingPod("container-inode-hog", nil),
pod: inodeConsumingPod("container-inode-hog", lotsOfFiles, nil),
},
{
evictionPriority: 1,
pod: inodeConsumingPod("volume-inode-hog", &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}),
pod: inodeConsumingPod("volume-inode-hog", lotsOfFiles, &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}),
},
{
evictionPriority: 0,
@ -90,6 +91,35 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive]", fun
})
})
// ImageGCNoEviction tests that the node does not evict pods when inodes are consumed by images
// Disk pressure is induced by pulling large images
var _ = framework.KubeDescribe("ImageGCNoEviction [Slow] [Serial] [Disruptive]", func() {
f := framework.NewDefaultFramework("image-gc-eviction-test")
pressureTimeout := 10 * time.Minute
expectedNodeCondition := v1.NodeDiskPressure
inodesConsumed := uint64(100000)
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
// Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction.
summary := eventuallyGetSummary()
inodesFree := *summary.Node.Fs.InodesFree
if inodesFree <= inodesConsumed {
framework.Skipf("Too few inodes free on the host for the InodeEviction test to run")
}
initialConfig.EvictionHard = map[string]string{"nodefs.inodesFree": fmt.Sprintf("%d", inodesFree-inodesConsumed)}
initialConfig.EvictionMinimumReclaim = map[string]string{}
})
// Consume enough inodes to induce disk pressure,
// but expect that image garbage collection can reduce it enough to avoid an eviction
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{
{
evictionPriority: 0,
pod: inodeConsumingPod("container-inode", 110000, nil),
},
})
})
})
// MemoryAllocatableEviction tests that the node responds to node memory pressure by evicting only responsible pods.
// Node memory pressure is only encountered because we reserve the majority of the node's capacity via kube-reserved.
var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive]", func() {
@ -630,19 +660,19 @@ const (
volumeName = "test-volume"
)
func inodeConsumingPod(name string, volumeSource *v1.VolumeSource) *v1.Pod {
func inodeConsumingPod(name string, numFiles int, volumeSource *v1.VolumeSource) *v1.Pod {
// Each iteration creates an empty file
return podWithCommand(volumeSource, v1.ResourceRequirements{}, name, "i=0; while true; do touch %s${i}.txt; sleep 0.001; i=$((i+=1)); done;")
return podWithCommand(volumeSource, v1.ResourceRequirements{}, numFiles, name, "touch %s${i}.txt; sleep 0.001")
}
func diskConsumingPod(name string, diskConsumedMB int, volumeSource *v1.VolumeSource, resources v1.ResourceRequirements) *v1.Pod {
// Each iteration writes 1 Mb, so do diskConsumedMB iterations.
return podWithCommand(volumeSource, resources, name, fmt.Sprintf("i=0; while [ $i -lt %d ];", diskConsumedMB)+" do dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null ; i=$(($i+1)); done; while true; do sleep 5; done")
return podWithCommand(volumeSource, resources, diskConsumedMB, name, "dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null")
}
// podWithCommand returns a pod with the provided volumeSource and resourceRequirements.
// If a volumeSource is provided, then the volumeMountPath to the volume is inserted into the provided command.
func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirements, name, command string) *v1.Pod {
func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirements, iterations int, name, command string) *v1.Pod {
path := ""
volumeMounts := []v1.VolumeMount{}
volumes := []v1.Volume{}
@ -662,7 +692,7 @@ func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirem
Command: []string{
"sh",
"-c",
fmt.Sprintf(command, filepath.Join(path, "file")),
fmt.Sprintf("i=0; while [ $i -lt %d ]; do %s; i=$(($i+1)); done; while true; do sleep 5; done", iterations, fmt.Sprintf(command, filepath.Join(path, "file"))),
},
Resources: resources,
VolumeMounts: volumeMounts,