remove disk allocatable evictions

pull/6/head
David Ashpole 2017-11-08 13:47:12 -08:00
parent 2d972c19bf
commit 527611ee41
5 changed files with 11 additions and 403 deletions

View File

@ -233,7 +233,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
activePods := podFunc()
// make observations and get a function to derive pod usage stats relative to those observations.
observations, statsFunc, err := makeSignalObservations(m.summaryProvider, capacityProvider, activePods, *m.dedicatedImageFs)
observations, statsFunc, err := makeSignalObservations(m.summaryProvider, capacityProvider, activePods)
if err != nil {
glog.Errorf("eviction manager: unexpected err: %v", err)
return nil

View File

@ -1436,297 +1436,3 @@ func TestAllocatableMemoryPressure(t *testing.T) {
}
}
}
// TestAllocatableNodeFsPressure
func TestAllocatableNodeFsPressure(t *testing.T) {
utilfeature.DefaultFeatureGate.Set("LocalStorageCapacityIsolation=True")
enablePodPriority(true)
podMaker := makePodWithDiskStats
summaryStatsMaker := makeDiskStats
podsToMake := []podToMake{
{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "900Mi"},
{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), logsFsUsed: "50Mi"},
{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "2Gi"), rootFsUsed: "1750Mi"},
{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "400Mi"},
{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "100Mi"},
}
pods := []*v1.Pod{}
podStats := map[*v1.Pod]statsapi.PodStats{}
for _, podToMake := range podsToMake {
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
pods = append(pods, pod)
podStats[pod] = podStat
}
podToEvict := pods[0]
activePodsFunc := func() []*v1.Pod {
return pods
}
fakeClock := clock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(newEphemeralStorageResourceList("6Gi", "1000m", "10Gi"), newEphemeralStorageResourceList("1Gi", "1000m", "10Gi"))
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
MaxPodGracePeriodSeconds: 5,
PressureTransitionPeriod: time.Minute * 5,
Thresholds: []evictionapi.Threshold{
{
Signal: evictionapi.SignalAllocatableNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Ki"),
},
},
},
}
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("3Gi", "6Gi", podStats)}
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
imageGC: diskGC,
containerGC: diskGC,
config: config,
recorder: &record.FakeRecorder{},
summaryProvider: summaryProvider,
nodeRef: nodeRef,
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
thresholdsFirstObservedAt: thresholdsObservedAt{},
}
// create a best effort pod to test admission
bestEffortPodToAdmit, _ := podMaker("best-admit", defaultPriority, newEphemeralStorageResourceList("", "", ""), newEphemeralStorageResourceList("", "", ""), "0Gi", "", "")
burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newEphemeralStorageResourceList("1Gi", "", ""), newEphemeralStorageResourceList("1Gi", "", ""), "1Gi", "", "")
// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
// we should not have disk pressure
if manager.IsUnderDiskPressure() {
t.Fatalf("Manager should not report disk pressure")
}
// try to admit our pods (they should succeed)
expected := []bool{true, true}
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
}
}
// induce disk pressure!
fakeClock.Step(1 * time.Minute)
pod, podStat := podMaker("guaranteed-high-2", defaultPriority, newEphemeralStorageResourceList("2000Mi", "100m", "1Gi"), newEphemeralStorageResourceList("2000Mi", "100m", "1Gi"), "2000Mi", "", "")
podStats[pod] = podStat
pods = append(pods, pod)
summaryProvider.result = summaryStatsMaker("6Gi", "6Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
// we should have disk pressure
if !manager.IsUnderDiskPressure() {
t.Fatalf("Manager should report disk pressure")
}
// check the right pod was killed
if podKiller.pod != podToEvict {
t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, podToEvict.Name)
}
observedGracePeriod := *podKiller.gracePeriodOverride
if observedGracePeriod != int64(0) {
t.Fatalf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
}
// reset state
podKiller.pod = nil
podKiller.gracePeriodOverride = nil
// try to admit our pod (should fail)
expected = []bool{false, false}
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
}
}
// reduce disk pressure
fakeClock.Step(1 * time.Minute)
pods[5] = pods[len(pods)-1]
pods = pods[:len(pods)-1]
// we should have disk pressure (because transition period not yet met)
if !manager.IsUnderDiskPressure() {
t.Fatalf("Manager should report disk pressure")
}
// try to admit our pod (should fail)
expected = []bool{false, false}
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
}
}
// move the clock past transition period to ensure that we stop reporting pressure
fakeClock.Step(5 * time.Minute)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
// we should not have disk pressure (because transition period met)
if manager.IsUnderDiskPressure() {
t.Fatalf("Manager should not report disk pressure")
}
// no pod should have been killed
if podKiller.pod != nil {
t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
}
// all pods should admit now
expected = []bool{true, true}
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
}
}
}
func TestNodeReclaimForAllocatableFuncs(t *testing.T) {
utilfeature.DefaultFeatureGate.Set("LocalStorageCapacityIsolation=True")
enablePodPriority(true)
podMaker := makePodWithDiskStats
summaryStatsMaker := makeDiskStats
podsToMake := []podToMake{
{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "900Mi"},
{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), logsFsUsed: "50Mi"},
{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "2Gi"), rootFsUsed: "1750Mi"},
{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "400Mi"},
{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "100Mi"},
}
pods := []*v1.Pod{}
podStats := map[*v1.Pod]statsapi.PodStats{}
for _, podToMake := range podsToMake {
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
pods = append(pods, pod)
podStats[pod] = podStat
}
podToEvict := pods[0]
activePodsFunc := func() []*v1.Pod {
return pods
}
fakeClock := clock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(newEphemeralStorageResourceList("6Gi", "1000m", "10Gi"), newEphemeralStorageResourceList("1Gi", "1000m", "10Gi"))
imageGcFree := resource.MustParse("800Mi")
diskGC := &mockDiskGC{imageBytesFreed: imageGcFree.Value(), err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
MaxPodGracePeriodSeconds: 5,
PressureTransitionPeriod: time.Minute * 5,
Thresholds: []evictionapi.Threshold{
{
Signal: evictionapi.SignalAllocatableNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("10Mi"),
},
},
},
}
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("6Gi", "6Gi", podStats)}
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
imageGC: diskGC,
containerGC: diskGC,
config: config,
recorder: &record.FakeRecorder{},
summaryProvider: summaryProvider,
nodeRef: nodeRef,
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
thresholdsFirstObservedAt: thresholdsObservedAt{},
}
// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
// we should not have disk pressure
if manager.IsUnderDiskPressure() {
t.Errorf("Manager should not report disk pressure")
}
// induce hard threshold
fakeClock.Step(1 * time.Minute)
pod, podStat := podMaker("guaranteed-high-2", defaultPriority, newEphemeralStorageResourceList("2000Mi", "100m", "1Gi"), newEphemeralStorageResourceList("2000Mi", "100m", "1Gi"), "2000Mi", "", "")
podStats[pod] = podStat
pods = append(pods, pod)
summaryProvider.result = summaryStatsMaker("6Gi", "6Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
// we should have disk pressure
if !manager.IsUnderDiskPressure() {
t.Fatalf("Manager should report disk pressure since soft threshold was met")
}
// verify image gc was invoked
if !diskGC.imageGCInvoked || !diskGC.containerGCInvoked {
t.Fatalf("Manager should have invoked image gc")
}
// verify no pod was killed because image gc was sufficient
if podKiller.pod == nil {
t.Fatalf("Manager should have killed a pod, but not killed")
}
// check the right pod was killed
if podKiller.pod != podToEvict {
t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, podToEvict.Name)
}
observedGracePeriod := *podKiller.gracePeriodOverride
if observedGracePeriod != int64(0) {
t.Fatalf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
}
// reset state
diskGC.imageGCInvoked = false
diskGC.containerGCInvoked = false
podKiller.pod = nil
podKiller.gracePeriodOverride = nil
// reduce disk pressure
fakeClock.Step(1 * time.Minute)
pods[5] = pods[len(pods)-1]
pods = pods[:len(pods)-1]
// we should have disk pressure (because transition period not yet met)
if !manager.IsUnderDiskPressure() {
t.Fatalf("Manager should report disk pressure")
}
// move the clock past transition period to ensure that we stop reporting pressure
fakeClock.Step(5 * time.Minute)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
// we should not have disk pressure (because transition period met)
if manager.IsUnderDiskPressure() {
t.Fatalf("Manager should not report disk pressure")
}
// no pod should have been killed
if podKiller.pod != nil {
t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
}
// no image gc should have occurred
if diskGC.imageGCInvoked || diskGC.containerGCInvoked {
t.Errorf("Manager chose to perform image gc when it was not neeed")
}
// no pod should have been killed
if podKiller.pod != nil {
t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
}
}

View File

@ -73,13 +73,11 @@ func init() {
signalToNodeCondition[evictionapi.SignalNodeFsAvailable] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalImageFsInodesFree] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalNodeFsInodesFree] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalAllocatableNodeFsAvailable] = v1.NodeDiskPressure
// map signals to resources (and vice-versa)
signalToResource = map[evictionapi.Signal]v1.ResourceName{}
signalToResource[evictionapi.SignalMemoryAvailable] = v1.ResourceMemory
signalToResource[evictionapi.SignalAllocatableMemoryAvailable] = v1.ResourceMemory
signalToResource[evictionapi.SignalAllocatableNodeFsAvailable] = resourceNodeFs
signalToResource[evictionapi.SignalImageFsAvailable] = resourceImageFs
signalToResource[evictionapi.SignalImageFsInodesFree] = resourceImageFsInodes
signalToResource[evictionapi.SignalNodeFsAvailable] = resourceNodeFs
@ -212,16 +210,6 @@ func getAllocatableThreshold(allocatableConfig []string) []evictionapi.Threshold
Quantity: resource.NewQuantity(int64(0), resource.BinarySI),
},
},
{
Signal: evictionapi.SignalAllocatableNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: resource.NewQuantity(int64(0), resource.BinarySI),
},
MinReclaim: &evictionapi.ThresholdValue{
Quantity: resource.NewQuantity(int64(0), resource.BinarySI),
},
},
}
}
}
@ -704,7 +692,7 @@ func (a byEvictionPriority) Less(i, j int) bool {
}
// makeSignalObservations derives observations using the specified summary provider.
func makeSignalObservations(summaryProvider stats.SummaryProvider, capacityProvider CapacityProvider, pods []*v1.Pod, withImageFs bool) (signalObservations, statsFunc, error) {
func makeSignalObservations(summaryProvider stats.SummaryProvider, capacityProvider CapacityProvider, pods []*v1.Pod) (signalObservations, statsFunc, error) {
summary, err := summaryProvider.Get()
if err != nil {
return nil, nil, err
@ -756,11 +744,11 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider, capacityProvi
}
}
nodeCapacity := capacityProvider.GetCapacity()
allocatableReservation := capacityProvider.GetNodeAllocatableReservation()
memoryAllocatableCapacity, memoryAllocatableAvailable, exist := getResourceAllocatable(nodeCapacity, allocatableReservation, v1.ResourceMemory)
if exist {
if memoryAllocatableCapacity, ok := capacityProvider.GetCapacity()[v1.ResourceMemory]; ok {
memoryAllocatableAvailable := memoryAllocatableCapacity.Copy()
if reserved, exists := capacityProvider.GetNodeAllocatableReservation()[v1.ResourceMemory]; exists {
memoryAllocatableAvailable.Sub(reserved)
}
for _, pod := range summary.Pods {
mu, err := podMemoryUsage(pod)
if err == nil {
@ -769,55 +757,15 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider, capacityProvi
}
result[evictionapi.SignalAllocatableMemoryAvailable] = signalObservation{
available: memoryAllocatableAvailable,
capacity: memoryAllocatableCapacity,
}
}
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
ephemeralStorageCapacity, ephemeralStorageAllocatable, exist := getResourceAllocatable(nodeCapacity, allocatableReservation, v1.ResourceEphemeralStorage)
if exist {
for _, pod := range pods {
podStat, ok := statsFunc(pod)
if !ok {
continue
}
fsStatsSet := []fsStatsType{}
if withImageFs {
fsStatsSet = []fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}
} else {
fsStatsSet = []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}
}
usage, err := podDiskUsage(podStat, pod, fsStatsSet)
if err != nil {
glog.Warningf("eviction manager: error getting pod disk usage %v", err)
continue
}
ephemeralStorageAllocatable.Sub(usage[resourceDisk])
}
result[evictionapi.SignalAllocatableNodeFsAvailable] = signalObservation{
available: ephemeralStorageAllocatable,
capacity: ephemeralStorageCapacity,
}
capacity: &memoryAllocatableCapacity,
}
} else {
glog.Errorf("Could not find capacity information for resource %v", v1.ResourceMemory)
}
return result, statsFunc, nil
}
func getResourceAllocatable(capacity v1.ResourceList, reservation v1.ResourceList, resourceName v1.ResourceName) (*resource.Quantity, *resource.Quantity, bool) {
if capacity, ok := capacity[resourceName]; ok {
allocate := capacity.Copy()
if reserved, exists := reservation[resourceName]; exists {
allocate.Sub(reserved)
}
return capacity.Copy(), allocate, true
}
glog.Errorf("Could not find capacity information for resource %v", resourceName)
return nil, nil, false
}
// thresholdsMet returns the set of thresholds that were met independent of grace period
func thresholdsMet(thresholds []evictionapi.Threshold, observations signalObservations, enforceMinReclaim bool) []evictionapi.Threshold {
results := []evictionapi.Threshold{}

View File

@ -78,16 +78,6 @@ func TestParseThresholdConfig(t *testing.T) {
Quantity: quantityMustParse("0"),
},
},
{
Signal: evictionapi.SignalAllocatableNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("0"),
},
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("0"),
},
},
{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
@ -793,7 +783,7 @@ func TestMakeSignalObservations(t *testing.T) {
if res.CmpInt64(int64(allocatableMemoryCapacity)) != 0 {
t.Errorf("Expected Threshold %v to be equal to value %v", res.Value(), allocatableMemoryCapacity)
}
actualObservations, statsFunc, err := makeSignalObservations(provider, capacityProvider, pods, false)
actualObservations, statsFunc, err := makeSignalObservations(provider, capacityProvider, pods)
if err != nil {
t.Errorf("Unexpected err: %v", err)
}

View File

@ -119,42 +119,6 @@ var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disru
})
})
// LocalStorageAllocatableEviction tests that the node responds to node disk pressure by evicting only responsible pods.
// Node disk pressure is only encountered because we reserve the majority of the node's capacity via kube-reserved.
var _ = framework.KubeDescribe("LocalStorageAllocatableEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {
f := framework.NewDefaultFramework("localstorageallocatable-eviction-test")
pressureTimeout := 10 * time.Minute
expectedNodeCondition := v1.NodeDiskPressure
Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
// Set up --kube-reserved for scratch storage
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
diskConsumed := uint64(200000000) // At least 200 Mb for pods to consume
summary := eventuallyGetSummary()
availableBytes := *(summary.Node.Fs.AvailableBytes)
initialConfig.KubeReserved = map[string]string{
string(v1.ResourceEphemeralStorage): fmt.Sprintf("%d", availableBytes-diskConsumed),
}
initialConfig.EnforceNodeAllocatable = []string{cm.NodeAllocatableEnforcementKey}
initialConfig.CgroupsPerQOS = true
initialConfig.FeatureGates[string(features.LocalStorageCapacityIsolation)] = true
// set evictionHard to be very small, so that only the allocatable eviction threshold triggers
initialConfig.EvictionHard = map[string]string{"nodefs.available": "1"}
initialConfig.EvictionMinimumReclaim = map[string]string{}
framework.Logf("KubeReserved: %+v", initialConfig.KubeReserved)
})
runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{
{
evictionPriority: 1,
pod: diskConsumingPod("container-disk-hog", 10000, nil, v1.ResourceRequirements{}),
},
{
evictionPriority: 0,
pod: innocentPod(),
},
})
})
})
// LocalStorageEviction tests that the node responds to node disk pressure by evicting only responsible pods
// Disk pressure is induced by running pods which consume disk space.
var _ = framework.KubeDescribe("LocalStorageEviction [Slow] [Serial] [Disruptive] [Flaky]", func() {