collect metrics on the /kubepods cgroup on-demand

pull/6/head
David Ashpole 2018-02-16 14:53:44 -08:00
parent 1a6a01ee79
commit 960856f4e8
17 changed files with 198 additions and 155 deletions

View File

@ -87,6 +87,8 @@ const (
SystemContainerRuntime = "runtime"
// SystemContainerMisc is the container name for the system container tracking non-kubernetes processes.
SystemContainerMisc = "misc"
// SystemContainerPods is the container name for the system container tracking user pods.
SystemContainerPods = "pods"
)
// PodStats holds pod-level unprocessed sample stats.

View File

@ -91,6 +91,9 @@ type ContainerManager interface {
UpdatePluginResources(*schedulercache.NodeInfo, *lifecycle.PodAdmitAttributes) error
InternalContainerLifecycle() InternalContainerLifecycle
// GetPodCgroupRoot returns the cgroup which contains all pods.
GetPodCgroupRoot() string
}
type NodeConfig struct {

View File

@ -500,6 +500,11 @@ func (cm *containerManagerImpl) GetNodeConfig() NodeConfig {
return cm.NodeConfig
}
// GetPodCgroupRoot returns the literal cgroupfs value for the cgroup containing all pods.
func (cm *containerManagerImpl) GetPodCgroupRoot() string {
return cm.cgroupManager.Name(CgroupName(cm.cgroupRoot))
}
func (cm *containerManagerImpl) GetMountedSubsystems() *CgroupSubsystems {
return cm.subsystems
}

View File

@ -90,6 +90,10 @@ func (cm *containerManagerStub) InternalContainerLifecycle() InternalContainerLi
return &internalContainerLifecycleImpl{cpumanager.NewFakeManager()}
}
func (cm *containerManagerStub) GetPodCgroupRoot() string {
return ""
}
func NewStubContainerManager() ContainerManager {
return &containerManagerStub{}
}

View File

@ -149,11 +149,11 @@ func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAd
}
// Start starts the control loop to observe and response to low compute resources.
func (m *managerImpl) Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, podCleanedUpFunc PodCleanedUpFunc, capacityProvider CapacityProvider, monitoringInterval time.Duration) {
func (m *managerImpl) Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, podCleanedUpFunc PodCleanedUpFunc, monitoringInterval time.Duration) {
// start the eviction manager monitoring
go func() {
for {
if evictedPods := m.synchronize(diskInfoProvider, podFunc, capacityProvider); evictedPods != nil {
if evictedPods := m.synchronize(diskInfoProvider, podFunc); evictedPods != nil {
glog.Infof("eviction manager: pods %s evicted, waiting for pod to be cleaned up", format.Pods(evictedPods))
m.waitForPodsCleanup(podCleanedUpFunc, evictedPods)
} else {
@ -219,7 +219,7 @@ func startMemoryThresholdNotifier(thresholds []evictionapi.Threshold, observatio
// synchronize is the main control loop that enforces eviction thresholds.
// Returns the pod that was killed, or nil if no pod was killed.
func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, capacityProvider CapacityProvider) []*v1.Pod {
func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) []*v1.Pod {
// if we have nothing to do, just return
thresholds := m.config.Thresholds
if len(thresholds) == 0 {
@ -248,7 +248,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
}
// make observations and get a function to derive pod usage stats relative to those observations.
observations, statsFunc := makeSignalObservations(summary, capacityProvider, activePods)
observations, statsFunc := makeSignalObservations(summary)
debugLogObservations("observations", observations)
// attempt to create a threshold notifier to improve eviction response time
@ -259,7 +259,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
err = startMemoryThresholdNotifier(m.config.Thresholds, observations, false, func(desc string) {
glog.Infof("soft memory eviction threshold crossed at %s", desc)
// TODO wait grace period for soft memory limit
m.synchronize(diskInfoProvider, podFunc, capacityProvider)
m.synchronize(diskInfoProvider, podFunc)
})
if err != nil {
glog.Warningf("eviction manager: failed to create soft memory threshold notifier: %v", err)
@ -267,7 +267,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
// start hard memory notification
err = startMemoryThresholdNotifier(m.config.Thresholds, observations, true, func(desc string) {
glog.Infof("hard memory eviction threshold crossed at %s", desc)
m.synchronize(diskInfoProvider, podFunc, capacityProvider)
m.synchronize(diskInfoProvider, podFunc)
})
if err != nil {
glog.Warningf("eviction manager: failed to create hard memory threshold notifier: %v", err)
@ -349,7 +349,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
m.recorder.Eventf(m.nodeRef, v1.EventTypeWarning, "EvictionThresholdMet", "Attempting to reclaim %s", resourceToReclaim)
// check if there are node-level resources we can reclaim to reduce pressure before evicting end-user pods.
if m.reclaimNodeLevelResources(resourceToReclaim, capacityProvider, activePods) {
if m.reclaimNodeLevelResources(resourceToReclaim) {
glog.Infof("eviction manager: able to reduce %v pressure without evicting pods.", resourceToReclaim)
return nil
}
@ -437,7 +437,7 @@ func (m *managerImpl) waitForPodsCleanup(podCleanedUpFunc PodCleanedUpFunc, pods
}
// reclaimNodeLevelResources attempts to reclaim node level resources. returns true if thresholds were satisfied and no pod eviction is required.
func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceName, capacityProvider CapacityProvider, pods []*v1.Pod) bool {
func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceName) bool {
nodeReclaimFuncs := m.resourceToNodeReclaimFuncs[resourceToReclaim]
for _, nodeReclaimFunc := range nodeReclaimFuncs {
// attempt to reclaim the pressured resource.
@ -454,7 +454,7 @@ func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceNam
}
// make observations and get a function to derive pod usage stats relative to those observations.
observations, _ := makeSignalObservations(summary, capacityProvider, pods)
observations, _ := makeSignalObservations(summary)
debugLogObservations("observations after resource reclaim", observations)
// determine the set of thresholds met independent of grace period

View File

@ -65,27 +65,6 @@ func (m *mockDiskInfoProvider) HasDedicatedImageFs() (bool, error) {
return m.dedicatedImageFs, nil
}
func newMockCapacityProvider(capacity, reservation v1.ResourceList) *mockCapacityProvider {
return &mockCapacityProvider{
capacity: capacity,
reservation: reservation,
}
}
type mockCapacityProvider struct {
capacity v1.ResourceList
reservation v1.ResourceList
}
func (m *mockCapacityProvider) GetCapacity() v1.ResourceList {
return m.capacity
}
func (m *mockCapacityProvider) GetNodeAllocatableReservation() v1.ResourceList {
return m.reservation
}
// mockDiskGC is used to simulate invoking image and container garbage collection.
type mockDiskGC struct {
err error
@ -139,6 +118,15 @@ func makeMemoryStats(nodeAvailableBytes string, podStats map[*v1.Pod]statsapi.Po
AvailableBytes: &availableBytes,
WorkingSetBytes: &WorkingSetBytes,
},
SystemContainers: []statsapi.ContainerStats{
{
Name: statsapi.SystemContainerPods,
Memory: &statsapi.MemoryStats{
AvailableBytes: &availableBytes,
WorkingSetBytes: &WorkingSetBytes,
},
},
},
},
Pods: []statsapi.PodStats{},
}
@ -217,7 +205,6 @@ func TestMemoryPressure(t *testing.T) {
fakeClock := clock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
@ -261,7 +248,7 @@ func TestMemoryPressure(t *testing.T) {
burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newResourceList("100m", "100Mi", ""), newResourceList("200m", "200Mi", ""), "0Gi")
// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have memory pressure
if manager.IsUnderMemoryPressure() {
@ -279,7 +266,7 @@ func TestMemoryPressure(t *testing.T) {
// induce soft threshold
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure
if !manager.IsUnderMemoryPressure() {
@ -294,7 +281,7 @@ func TestMemoryPressure(t *testing.T) {
// step forward in time pass the grace period
fakeClock.Step(3 * time.Minute)
summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure
if !manager.IsUnderMemoryPressure() {
@ -319,7 +306,7 @@ func TestMemoryPressure(t *testing.T) {
// remove memory pressure
fakeClock.Step(20 * time.Minute)
summaryProvider.result = summaryStatsMaker("3Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have memory pressure
if manager.IsUnderMemoryPressure() {
@ -329,7 +316,7 @@ func TestMemoryPressure(t *testing.T) {
// induce memory pressure!
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("500Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure
if !manager.IsUnderMemoryPressure() {
@ -357,7 +344,7 @@ func TestMemoryPressure(t *testing.T) {
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("2Gi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure (because transition period not yet met)
if !manager.IsUnderMemoryPressure() {
@ -381,7 +368,7 @@ func TestMemoryPressure(t *testing.T) {
fakeClock.Step(5 * time.Minute)
summaryProvider.result = summaryStatsMaker("2Gi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have memory pressure (because transition period met)
if manager.IsUnderMemoryPressure() {
@ -439,7 +426,6 @@ func TestDiskPressureNodeFs(t *testing.T) {
fakeClock := clock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
@ -482,7 +468,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi", "0Gi", "0Gi")
// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have disk pressure
if manager.IsUnderDiskPressure() {
@ -497,7 +483,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
// induce soft threshold
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("1.5Gi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure
if !manager.IsUnderDiskPressure() {
@ -512,7 +498,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
// step forward in time pass the grace period
fakeClock.Step(3 * time.Minute)
summaryProvider.result = summaryStatsMaker("1.5Gi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure
if !manager.IsUnderDiskPressure() {
@ -537,7 +523,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
// remove disk pressure
fakeClock.Step(20 * time.Minute)
summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have disk pressure
if manager.IsUnderDiskPressure() {
@ -547,7 +533,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
// induce disk pressure!
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("500Mi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure
if !manager.IsUnderDiskPressure() {
@ -572,7 +558,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure (because transition period not yet met)
if !manager.IsUnderDiskPressure() {
@ -593,7 +579,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
fakeClock.Step(5 * time.Minute)
summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have disk pressure (because transition period met)
if manager.IsUnderDiskPressure() {
@ -638,7 +624,6 @@ func TestMinReclaim(t *testing.T) {
fakeClock := clock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
@ -673,7 +658,7 @@ func TestMinReclaim(t *testing.T) {
}
// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have memory pressure
if manager.IsUnderMemoryPressure() {
@ -683,7 +668,7 @@ func TestMinReclaim(t *testing.T) {
// induce memory pressure!
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("500Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure
if !manager.IsUnderMemoryPressure() {
@ -703,7 +688,7 @@ func TestMinReclaim(t *testing.T) {
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("1.2Gi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure (because transition period not yet met)
if !manager.IsUnderMemoryPressure() {
@ -723,7 +708,7 @@ func TestMinReclaim(t *testing.T) {
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("2Gi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure (because transition period not yet met)
if !manager.IsUnderMemoryPressure() {
@ -739,7 +724,7 @@ func TestMinReclaim(t *testing.T) {
fakeClock.Step(5 * time.Minute)
summaryProvider.result = summaryStatsMaker("2Gi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have memory pressure (because transition period met)
if manager.IsUnderMemoryPressure() {
@ -781,7 +766,6 @@ func TestNodeReclaimFuncs(t *testing.T) {
fakeClock := clock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
@ -816,7 +800,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
}
// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have disk pressure
if manager.IsUnderDiskPressure() {
@ -828,7 +812,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
summaryProvider.result = summaryStatsMaker(".9Gi", "200Gi", podStats)
// make GC successfully return disk usage to previous levels
diskGC.summaryAfterGC = summaryStatsMaker("16Gi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure
if !manager.IsUnderDiskPressure() {
@ -851,7 +835,8 @@ func TestNodeReclaimFuncs(t *testing.T) {
// remove disk pressure
fakeClock.Step(20 * time.Minute)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have disk pressure
if manager.IsUnderDiskPressure() {
@ -863,7 +848,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
summaryProvider.result = summaryStatsMaker("400Mi", "200Gi", podStats)
// Dont reclaim any disk
diskGC.summaryAfterGC = summaryStatsMaker("400Mi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure
if !manager.IsUnderDiskPressure() {
@ -890,7 +875,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
diskGC.imageGCInvoked = false // reset state
diskGC.containerGCInvoked = false // reset state
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure (because transition period not yet met)
if !manager.IsUnderDiskPressure() {
@ -913,7 +898,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
diskGC.imageGCInvoked = false // reset state
diskGC.containerGCInvoked = false // reset state
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have disk pressure (because transition period met)
if manager.IsUnderDiskPressure() {
@ -981,7 +966,6 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
fakeClock := clock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
@ -1024,7 +1008,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0", "0", "0")
// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have disk pressure
if manager.IsUnderDiskPressure() {
@ -1039,7 +1023,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
// induce soft threshold
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("1.5Mi", "4Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure
if !manager.IsUnderDiskPressure() {
@ -1054,7 +1038,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
// step forward in time pass the grace period
fakeClock.Step(3 * time.Minute)
summaryProvider.result = summaryStatsMaker("1.5Mi", "4Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure
if !manager.IsUnderDiskPressure() {
@ -1079,7 +1063,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
// remove inode pressure
fakeClock.Step(20 * time.Minute)
summaryProvider.result = summaryStatsMaker("3Mi", "4Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have disk pressure
if manager.IsUnderDiskPressure() {
@ -1089,7 +1073,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
// induce inode pressure!
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("0.5Mi", "4Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure
if !manager.IsUnderDiskPressure() {
@ -1114,7 +1098,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("3Mi", "4Mi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure (because transition period not yet met)
if !manager.IsUnderDiskPressure() {
@ -1135,7 +1119,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
fakeClock.Step(5 * time.Minute)
summaryProvider.result = summaryStatsMaker("3Mi", "4Mi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have disk pressure (because transition period met)
if manager.IsUnderDiskPressure() {
@ -1184,7 +1168,6 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) {
fakeClock := clock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{
Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: "",
@ -1230,7 +1213,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) {
// induce soft threshold
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure
if !manager.IsUnderMemoryPressure() {
@ -1245,7 +1228,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) {
// step forward in time pass the grace period
fakeClock.Step(3 * time.Minute)
summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure
if !manager.IsUnderMemoryPressure() {
@ -1263,7 +1246,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) {
// remove memory pressure
fakeClock.Step(20 * time.Minute)
summaryProvider.result = summaryStatsMaker("3Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have memory pressure
if manager.IsUnderMemoryPressure() {
@ -1276,7 +1259,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) {
// induce memory pressure!
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("500Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure
if !manager.IsUnderMemoryPressure() {
@ -1294,7 +1277,6 @@ func TestAllocatableMemoryPressure(t *testing.T) {
utilfeature.DefaultFeatureGate.SetFromMap(map[string]bool{string(features.PodPriority): true})
podMaker := makePodWithMemoryStats
summaryStatsMaker := makeMemoryStats
constantCapacity := "4Gi"
podsToMake := []podToMake{
{name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "900Mi"},
{name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "50Mi"},
@ -1317,7 +1299,6 @@ func TestAllocatableMemoryPressure(t *testing.T) {
fakeClock := clock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
@ -1329,12 +1310,12 @@ func TestAllocatableMemoryPressure(t *testing.T) {
Signal: evictionapi.SignalAllocatableMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Ki"),
Quantity: quantityMustParse("1Gi"),
},
},
},
}
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(constantCapacity, podStats)}
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("4Gi", podStats)}
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
@ -1353,7 +1334,7 @@ func TestAllocatableMemoryPressure(t *testing.T) {
burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newResourceList("100m", "100Mi", ""), newResourceList("200m", "200Mi", ""), "0Gi")
// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have memory pressure
if manager.IsUnderMemoryPressure() {
@ -1372,8 +1353,8 @@ func TestAllocatableMemoryPressure(t *testing.T) {
fakeClock.Step(1 * time.Minute)
pod, podStat := podMaker("guaranteed-high-2", defaultPriority, newResourceList("100m", "1Gi", ""), newResourceList("100m", "1Gi", ""), "1Gi")
podStats[pod] = podStat
summaryProvider.result = summaryStatsMaker(constantCapacity, podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
summaryProvider.result = summaryStatsMaker("500Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure
if !manager.IsUnderMemoryPressure() {
@ -1407,9 +1388,9 @@ func TestAllocatableMemoryPressure(t *testing.T) {
delete(podStats, pod)
}
}
summaryProvider.result = summaryStatsMaker(constantCapacity, podStats)
summaryProvider.result = summaryStatsMaker("2Gi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure (because transition period not yet met)
if !manager.IsUnderMemoryPressure() {
@ -1431,9 +1412,9 @@ func TestAllocatableMemoryPressure(t *testing.T) {
// move the clock past transition period to ensure that we stop reporting pressure
fakeClock.Step(5 * time.Minute)
summaryProvider.result = summaryStatsMaker(constantCapacity, podStats)
summaryProvider.result = summaryStatsMaker("2Gi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have memory pressure (because transition period met)
if manager.IsUnderMemoryPressure() {

View File

@ -100,9 +100,6 @@ func validSignal(signal evictionapi.Signal) bool {
// ParseThresholdConfig parses the flags for thresholds.
func ParseThresholdConfig(allocatableConfig []string, evictionHard, evictionSoft, evictionSoftGracePeriod, evictionMinimumReclaim map[string]string) ([]evictionapi.Threshold, error) {
results := []evictionapi.Threshold{}
allocatableThresholds := getAllocatableThreshold(allocatableConfig)
results = append(results, allocatableThresholds...)
hardThresholds, err := parseThresholdStatements(evictionHard)
if err != nil {
return nil, err
@ -137,9 +134,31 @@ func ParseThresholdConfig(allocatableConfig []string, evictionHard, evictionSoft
}
}
}
for _, key := range allocatableConfig {
if key == kubetypes.NodeAllocatableEnforcementKey {
results = addAllocatableThresholds(results)
break
}
}
return results, nil
}
func addAllocatableThresholds(thresholds []evictionapi.Threshold) []evictionapi.Threshold {
additionalThresholds := []evictionapi.Threshold{}
for _, threshold := range thresholds {
if threshold.Signal == evictionapi.SignalMemoryAvailable && isHardEvictionThreshold(threshold) {
// Copy the SignalMemoryAvailable to SignalAllocatableMemoryAvailable
additionalThresholds = append(additionalThresholds, evictionapi.Threshold{
Signal: evictionapi.SignalAllocatableMemoryAvailable,
Operator: threshold.Operator,
Value: threshold.Value,
MinReclaim: threshold.MinReclaim,
})
}
}
return append(thresholds, additionalThresholds...)
}
// parseThresholdStatements parses the input statements into a list of Threshold objects.
func parseThresholdStatements(statements map[string]string) ([]evictionapi.Threshold, error) {
if len(statements) == 0 {
@ -204,27 +223,6 @@ func parseThresholdStatement(signal evictionapi.Signal, val string) (*evictionap
}, nil
}
// getAllocatableThreshold returns the thresholds applicable for the allocatable configuration
func getAllocatableThreshold(allocatableConfig []string) []evictionapi.Threshold {
for _, key := range allocatableConfig {
if key == kubetypes.NodeAllocatableEnforcementKey {
return []evictionapi.Threshold{
{
Signal: evictionapi.SignalAllocatableMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: resource.NewQuantity(int64(0), resource.BinarySI),
},
MinReclaim: &evictionapi.ThresholdValue{
Quantity: resource.NewQuantity(int64(0), resource.BinarySI),
},
},
}
}
}
return []evictionapi.Threshold{}
}
// parsePercentage parses a string representing a percentage value
func parsePercentage(input string) (float32, error) {
value, err := strconv.ParseFloat(strings.TrimRight(input, "%"), 32)
@ -724,7 +722,7 @@ func (a byEvictionPriority) Less(i, j int) bool {
}
// makeSignalObservations derives observations using the specified summary provider.
func makeSignalObservations(summary *statsapi.Summary, capacityProvider CapacityProvider, pods []*v1.Pod) (signalObservations, statsFunc) {
func makeSignalObservations(summary *statsapi.Summary) (signalObservations, statsFunc) {
// build the function to work against for pod stats
statsFunc := cachedStatsFunc(summary.Pods)
// build an evaluation context for current eviction signals
@ -737,6 +735,17 @@ func makeSignalObservations(summary *statsapi.Summary, capacityProvider Capacity
time: memory.Time,
}
}
if allocatableContainer, err := getSysContainer(summary.Node.SystemContainers, statsapi.SystemContainerPods); err != nil {
glog.Errorf("eviction manager: failed to construct signal: %q error: %v", evictionapi.SignalAllocatableMemoryAvailable, err)
} else {
if memory := allocatableContainer.Memory; memory != nil && memory.AvailableBytes != nil && memory.WorkingSetBytes != nil {
result[evictionapi.SignalAllocatableMemoryAvailable] = signalObservation{
available: resource.NewQuantity(int64(*memory.AvailableBytes), resource.BinarySI),
capacity: resource.NewQuantity(int64(*memory.AvailableBytes+*memory.WorkingSetBytes), resource.BinarySI),
time: memory.Time,
}
}
}
if nodeFs := summary.Node.Fs; nodeFs != nil {
if nodeFs.AvailableBytes != nil && nodeFs.CapacityBytes != nil {
result[evictionapi.SignalNodeFsAvailable] = signalObservation{
@ -771,7 +780,6 @@ func makeSignalObservations(summary *statsapi.Summary, capacityProvider Capacity
}
}
}
if rlimit := summary.Node.Rlimit; rlimit != nil {
if rlimit.NumOfRunningProcesses != nil && rlimit.MaxPID != nil {
available := int64(*rlimit.MaxPID) - int64(*rlimit.NumOfRunningProcesses)
@ -782,29 +790,18 @@ func makeSignalObservations(summary *statsapi.Summary, capacityProvider Capacity
}
}
}
if memoryAllocatableCapacity, ok := capacityProvider.GetCapacity()[v1.ResourceMemory]; ok {
memoryAllocatableAvailable := memoryAllocatableCapacity.Copy()
if reserved, exists := capacityProvider.GetNodeAllocatableReservation()[v1.ResourceMemory]; exists {
memoryAllocatableAvailable.Sub(reserved)
}
for _, pod := range summary.Pods {
mu, err := podMemoryUsage(pod)
if err == nil {
memoryAllocatableAvailable.Sub(mu[v1.ResourceMemory])
}
}
result[evictionapi.SignalAllocatableMemoryAvailable] = signalObservation{
available: memoryAllocatableAvailable,
capacity: &memoryAllocatableCapacity,
}
} else {
glog.Errorf("Could not find capacity information for resource %v", v1.ResourceMemory)
}
return result, statsFunc
}
func getSysContainer(sysContainers []statsapi.ContainerStats, name string) (*statsapi.ContainerStats, error) {
for _, cont := range sysContainers {
if cont.Name == name {
return &cont, nil
}
}
return nil, fmt.Errorf("system container %q not found in metrics", name)
}
// thresholdsMet returns the set of thresholds that were met independent of grace period
func thresholdsMet(thresholds []evictionapi.Threshold, observations signalObservations, enforceMinReclaim bool) []evictionapi.Threshold {
results := []evictionapi.Threshold{}

View File

@ -72,7 +72,7 @@ func TestParseThresholdConfig(t *testing.T) {
Signal: evictionapi.SignalAllocatableMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("0"),
Quantity: quantityMustParse("150Mi"),
},
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("0"),
@ -1004,6 +1004,15 @@ func TestMakeSignalObservations(t *testing.T) {
InodesFree: &nodeFsInodesFree,
Inodes: &nodeFsInodes,
},
SystemContainers: []statsapi.ContainerStats{
{
Name: statsapi.SystemContainerPods,
Memory: &statsapi.MemoryStats{
AvailableBytes: &nodeAvailableBytes,
WorkingSetBytes: &nodeWorkingSetBytes,
},
},
},
},
Pods: []statsapi.PodStats{},
}
@ -1017,20 +1026,19 @@ func TestMakeSignalObservations(t *testing.T) {
fakeStats.Pods = append(fakeStats.Pods, newPodStats(pod, containerWorkingSetBytes))
}
res := quantityMustParse("5Gi")
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("5Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("0Gi")})
// Allocatable thresholds are always 100%. Verify that Threshold == Capacity.
if res.CmpInt64(int64(allocatableMemoryCapacity)) != 0 {
t.Errorf("Expected Threshold %v to be equal to value %v", res.Value(), allocatableMemoryCapacity)
}
actualObservations, statsFunc := makeSignalObservations(fakeStats, capacityProvider, pods)
actualObservations, statsFunc := makeSignalObservations(fakeStats)
allocatableMemQuantity, found := actualObservations[evictionapi.SignalAllocatableMemoryAvailable]
if !found {
t.Errorf("Expected allocatable memory observation, but didnt find one")
}
if allocatableMemQuantity.available.Value() != 2*containerWorkingSetBytes {
t.Errorf("Expected %v, actual: %v", containerWorkingSetBytes, allocatableMemQuantity.available.Value())
if expectedBytes := int64(nodeAvailableBytes); allocatableMemQuantity.available.Value() != expectedBytes {
t.Errorf("Expected %v, actual: %v", expectedBytes, allocatableMemQuantity.available.Value())
}
if expectedBytes := int64(allocatableMemoryCapacity); allocatableMemQuantity.capacity.Value() != expectedBytes {
if expectedBytes := int64(nodeWorkingSetBytes + nodeAvailableBytes); allocatableMemQuantity.capacity.Value() != expectedBytes {
t.Errorf("Expected %v, actual: %v", expectedBytes, allocatableMemQuantity.capacity.Value())
}
memQuantity, found := actualObservations[evictionapi.SignalMemoryAvailable]

View File

@ -53,7 +53,7 @@ type Config struct {
// Manager evaluates when an eviction threshold for node stability has been met on the node.
type Manager interface {
// Start starts the control loop to monitor eviction thresholds at specified interval.
Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, podCleanedUpFunc PodCleanedUpFunc, capacityProvider CapacityProvider, monitoringInterval time.Duration)
Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, podCleanedUpFunc PodCleanedUpFunc, monitoringInterval time.Duration)
// IsUnderMemoryPressure returns true if the node is under memory pressure.
IsUnderMemoryPressure() bool
@ -71,14 +71,6 @@ type DiskInfoProvider interface {
HasDedicatedImageFs() (bool, error)
}
// CapacityProvider is responsible for providing the resource capacity and reservation information
type CapacityProvider interface {
// GetCapacity returns the amount of compute resources tracked by container manager available on the node.
GetCapacity() v1.ResourceList
// GetNodeAllocatableReservation returns the amount of compute resources that have to be reserved from scheduling.
GetNodeAllocatableReservation() v1.ResourceList
}
// ImageGC is responsible for performing garbage collection of unused images.
type ImageGC interface {
// DeleteUnusedImages deletes unused images.

View File

@ -1318,7 +1318,7 @@ func (kl *Kubelet) initializeRuntimeDependentModules() {
glog.Fatalf("Failed to start cAdvisor %v", err)
}
// eviction manager must start after cadvisor because it needs to know if the container runtime has a dedicated imagefs
kl.evictionManager.Start(kl.StatsProvider, kl.GetActivePods, kl.podResourcesAreReclaimed, kl.containerManager, evictionMonitoringPeriod)
kl.evictionManager.Start(kl.StatsProvider, kl.GetActivePods, kl.podResourcesAreReclaimed, evictionMonitoringPeriod)
// trigger on-demand stats collection once so that we have capacity information for ephemeral storage.
// ignore any errors, since if stats collection is not successful, the container manager will fail to start below.

View File

@ -211,6 +211,11 @@ func (kl *Kubelet) GetNodeConfig() cm.NodeConfig {
return kl.containerManager.GetNodeConfig()
}
// GetPodCgroupRoot returns the listeral cgroupfs value for the cgroup containing all pods
func (kl *Kubelet) GetPodCgroupRoot() string {
return kl.containerManager.GetPodCgroupRoot()
}
// GetHostIP returns host IP or nil in case of error.
func (kl *Kubelet) GetHostIP() (net.IP, error) {
node, err := kl.GetNode()

View File

@ -168,6 +168,7 @@ func (fk *fakeKubelet) StreamingConnectionIdleTimeout() time.Duration {
// Unused functions
func (_ *fakeKubelet) GetNode() (*v1.Node, error) { return nil, nil }
func (_ *fakeKubelet) GetNodeConfig() cm.NodeConfig { return cm.NodeConfig{} }
func (_ *fakeKubelet) GetPodCgroupRoot() string { return "" }
func (fk *fakeKubelet) ListVolumesForPod(podUID types.UID) (map[string]volume.Volume, bool) {
return map[string]volume.Volume{}, true

View File

@ -81,6 +81,9 @@ type StatsProvider interface {
// RlimitStats returns the rlimit stats of system.
RlimitStats() (*statsapi.RlimitStats, error)
// GetPodCgroupRoot returns the literal cgroupfs value for the cgroup containing all pods
GetPodCgroupRoot() string
}
type handler struct {

View File

@ -83,19 +83,23 @@ func (sp *summaryProviderImpl) Get(updateStats bool) (*statsapi.Summary, error)
Rlimit: rlimit,
}
systemContainers := map[string]string{
statsapi.SystemContainerKubelet: nodeConfig.KubeletCgroupsName,
statsapi.SystemContainerRuntime: nodeConfig.RuntimeCgroupsName,
statsapi.SystemContainerMisc: nodeConfig.SystemCgroupsName,
systemContainers := map[string]struct {
name string
forceStatsUpdate bool
}{
statsapi.SystemContainerKubelet: {nodeConfig.KubeletCgroupsName, false},
statsapi.SystemContainerRuntime: {nodeConfig.RuntimeCgroupsName, false},
statsapi.SystemContainerMisc: {nodeConfig.SystemCgroupsName, false},
statsapi.SystemContainerPods: {sp.provider.GetPodCgroupRoot(), updateStats},
}
for sys, name := range systemContainers {
for sys, cont := range systemContainers {
// skip if cgroup name is undefined (not all system containers are required)
if name == "" {
if cont.name == "" {
continue
}
s, _, err := sp.provider.GetCgroupStats(name, false)
s, _, err := sp.provider.GetCgroupStats(cont.name, cont.forceStatsUpdate)
if err != nil {
glog.Errorf("Failed to get system container stats for %q: %v", name, err)
glog.Errorf("Failed to get system container stats for %q: %v", cont.name, err)
continue
}
// System containers don't have a filesystem associated with them.

View File

@ -49,6 +49,7 @@ func TestSummaryProvider(t *testing.T) {
SystemCgroupsName: "/misc",
KubeletCgroupsName: "/kubelet",
}
cgroupRoot = "/kubepods"
cgroupStatsMap = map[string]struct {
cs *statsapi.ContainerStats
ns *statsapi.NetworkStats
@ -57,6 +58,7 @@ func TestSummaryProvider(t *testing.T) {
"/runtime": {cs: getContainerStats(), ns: getNetworkStats()},
"/misc": {cs: getContainerStats(), ns: getNetworkStats()},
"/kubelet": {cs: getContainerStats(), ns: getNetworkStats()},
"/pods": {cs: getContainerStats(), ns: getNetworkStats()},
}
rlimitStats = getRlimitStats()
)
@ -67,6 +69,7 @@ func TestSummaryProvider(t *testing.T) {
mockStatsProvider.
On("GetNode").Return(node, nil).
On("GetNodeConfig").Return(nodeConfig).
On("GetPodCgroupRoot").Return(cgroupRoot).
On("ListPodStats").Return(podStats, nil).
On("ImageFsStats").Return(imageFsStats, nil).
On("RootFsStats").Return(rootFsStats, nil).
@ -74,7 +77,8 @@ func TestSummaryProvider(t *testing.T) {
On("GetCgroupStats", "/", true).Return(cgroupStatsMap["/"].cs, cgroupStatsMap["/"].ns, nil).
On("GetCgroupStats", "/runtime", false).Return(cgroupStatsMap["/runtime"].cs, cgroupStatsMap["/runtime"].ns, nil).
On("GetCgroupStats", "/misc", false).Return(cgroupStatsMap["/misc"].cs, cgroupStatsMap["/misc"].ns, nil).
On("GetCgroupStats", "/kubelet", false).Return(cgroupStatsMap["/kubelet"].cs, cgroupStatsMap["/kubelet"].ns, nil)
On("GetCgroupStats", "/kubelet", false).Return(cgroupStatsMap["/kubelet"].cs, cgroupStatsMap["/kubelet"].ns, nil).
On("GetCgroupStats", "/kubepods", true).Return(cgroupStatsMap["/pods"].cs, cgroupStatsMap["/pods"].ns, nil)
provider := NewSummaryProvider(mockStatsProvider)
summary, err := provider.Get(true)
@ -88,7 +92,7 @@ func TestSummaryProvider(t *testing.T) {
assert.Equal(summary.Node.Fs, rootFsStats)
assert.Equal(summary.Node.Runtime, &statsapi.RuntimeStats{ImageFs: imageFsStats})
assert.Equal(len(summary.Node.SystemContainers), 3)
assert.Equal(len(summary.Node.SystemContainers), 4)
assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{
Name: "kubelet",
StartTime: cgroupStatsMap["/kubelet"].cs.StartTime,
@ -113,6 +117,14 @@ func TestSummaryProvider(t *testing.T) {
Accelerators: cgroupStatsMap["/runtime"].cs.Accelerators,
UserDefinedMetrics: cgroupStatsMap["/runtime"].cs.UserDefinedMetrics,
})
assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{
Name: "pods",
StartTime: cgroupStatsMap["/pods"].cs.StartTime,
CPU: cgroupStatsMap["/pods"].cs.CPU,
Memory: cgroupStatsMap["/pods"].cs.Memory,
Accelerators: cgroupStatsMap["/pods"].cs.Accelerators,
UserDefinedMetrics: cgroupStatsMap["/pods"].cs.UserDefinedMetrics,
})
assert.Equal(summary.Pods, podStats)
}

View File

@ -125,6 +125,20 @@ func (_m *StatsProvider) GetNodeConfig() cm.NodeConfig {
return r0
}
// GetPodCgroupRoot provides a mock function with given fields:
func (_m *StatsProvider) GetPodCgroupRoot() string {
ret := _m.Called()
var r0 string
if rf, ok := ret.Get(0).(func() string); ok {
r0 = rf()
} else {
r0 = ret.Get(0).(string)
}
return r0
}
// GetPodByName provides a mock function with given fields: namespace, name
func (_m *StatsProvider) GetPodByName(namespace string, name string) (*corev1.Pod, bool) {
ret := _m.Called(namespace, name)

View File

@ -107,9 +107,21 @@ var _ = framework.KubeDescribe("Summary API", func() {
"UserDefinedMetrics": BeEmpty(),
})
}
podsContExpectations := sysContExpectations().(*gstruct.FieldsMatcher)
podsContExpectations.Fields["Memory"] = ptrMatchAllFields(gstruct.Fields{
"Time": recent(maxStatsAge),
// Pods are limited by Node Allocatable
"AvailableBytes": bounded(1*framework.Kb, memoryLimit),
"UsageBytes": bounded(10*framework.Kb, 20*framework.Mb),
"WorkingSetBytes": bounded(10*framework.Kb, 20*framework.Mb),
"RSSBytes": bounded(1*framework.Kb, 20*framework.Mb),
"PageFaults": bounded(0, 1000000),
"MajorPageFaults": bounded(0, 10),
})
systemContainers := gstruct.Elements{
"kubelet": sysContExpectations(),
"runtime": sysContExpectations(),
"pods": podsContExpectations,
}
// The Kubelet only manages the 'misc' system container if the host is not running systemd.
if !systemdutil.IsRunningSystemd() {