mirror of https://github.com/k3s-io/k3s
Eviction manager observes and acts on disk pressure
parent
c669778333
commit
c3324b88a0
|
@ -2069,6 +2069,8 @@ const (
|
|||
NodeOutOfDisk NodeConditionType = "OutOfDisk"
|
||||
// NodeMemoryPressure means the kubelet is under pressure due to insufficient available memory.
|
||||
NodeMemoryPressure NodeConditionType = "MemoryPressure"
|
||||
// NodeDiskPressure means the kubelet is under pressure due to insufficient available disk.
|
||||
NodeDiskPressure NodeConditionType = "DiskPressure"
|
||||
// NodeNetworkUnavailable means that network for the node is not correctly configured.
|
||||
NodeNetworkUnavailable NodeConditionType = "NetworkUnavailable"
|
||||
)
|
||||
|
|
|
@ -2470,6 +2470,8 @@ const (
|
|||
NodeOutOfDisk NodeConditionType = "OutOfDisk"
|
||||
// NodeMemoryPressure means the kubelet is under pressure due to insufficient available memory.
|
||||
NodeMemoryPressure NodeConditionType = "MemoryPressure"
|
||||
// NodeDiskPressure means the kubelet is under pressure due to insufficient available disk.
|
||||
NodeDiskPressure NodeConditionType = "DiskPressure"
|
||||
// NodeNetworkUnavailable means that network for the node is not correctly configured.
|
||||
NodeNetworkUnavailable NodeConditionType = "NetworkUnavailable"
|
||||
)
|
||||
|
|
|
@ -54,6 +54,8 @@ type managerImpl struct {
|
|||
summaryProvider stats.SummaryProvider
|
||||
// records when a threshold was first observed
|
||||
thresholdsFirstObservedAt thresholdsObservedAt
|
||||
// resourceToRankFunc maps a resource to ranking function for that resource.
|
||||
resourceToRankFunc map[api.ResourceName]rankFunc
|
||||
}
|
||||
|
||||
// ensure it implements the required interface
|
||||
|
@ -87,12 +89,17 @@ func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAd
|
|||
if len(m.nodeConditions) == 0 {
|
||||
return lifecycle.PodAdmitResult{Admit: true}
|
||||
}
|
||||
notBestEffort := qos.BestEffort != qos.GetPodQOS(attrs.Pod)
|
||||
if notBestEffort {
|
||||
return lifecycle.PodAdmitResult{Admit: true}
|
||||
|
||||
// the node has memory pressure, admit if not best-effort
|
||||
if hasNodeCondition(m.nodeConditions, api.NodeMemoryPressure) {
|
||||
notBestEffort := qos.BestEffort != qos.GetPodQOS(attrs.Pod)
|
||||
if notBestEffort {
|
||||
return lifecycle.PodAdmitResult{Admit: true}
|
||||
}
|
||||
}
|
||||
|
||||
// reject pods when under memory pressure (if pod is best effort), or if under disk pressure.
|
||||
glog.Warningf("Failed to admit pod %v - %s", format.Pod(attrs.Pod), "node has conditions: %v", m.nodeConditions)
|
||||
// we reject all best effort pods until we are stable.
|
||||
return lifecycle.PodAdmitResult{
|
||||
Admit: false,
|
||||
Reason: reason,
|
||||
|
@ -102,6 +109,14 @@ func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAd
|
|||
|
||||
// Start starts the control loop to observe and response to low compute resources.
|
||||
func (m *managerImpl) Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, monitoringInterval time.Duration) error {
|
||||
// build the ranking functions now that we can know if the imagefs is dedicated or not.
|
||||
hasDedicatedImageFs, err := diskInfoProvider.HasDedicatedImageFs()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.resourceToRankFunc = buildResourceToRankFunc(hasDedicatedImageFs)
|
||||
|
||||
// start the eviction manager monitoring
|
||||
go wait.Until(func() { m.synchronize(podFunc) }, monitoringInterval, wait.NeverStop)
|
||||
return nil
|
||||
}
|
||||
|
@ -113,6 +128,13 @@ func (m *managerImpl) IsUnderMemoryPressure() bool {
|
|||
return hasNodeCondition(m.nodeConditions, api.NodeMemoryPressure)
|
||||
}
|
||||
|
||||
// IsUnderDiskPressure returns true if the node is under disk pressure.
|
||||
func (m *managerImpl) IsUnderDiskPressure() bool {
|
||||
m.RLock()
|
||||
defer m.RUnlock()
|
||||
return hasNodeCondition(m.nodeConditions, api.NodeDiskPressure)
|
||||
}
|
||||
|
||||
// synchronize is the main control loop that enforces eviction thresholds.
|
||||
func (m *managerImpl) synchronize(podFunc ActivePodsFunc) {
|
||||
// if we have nothing to do, just return
|
||||
|
@ -175,7 +197,7 @@ func (m *managerImpl) synchronize(podFunc ActivePodsFunc) {
|
|||
m.recorder.Eventf(m.nodeRef, api.EventTypeWarning, "EvictionThresholdMet", "Attempting to reclaim %s", resourceToReclaim)
|
||||
|
||||
// rank the pods for eviction
|
||||
rank, ok := resourceToRankFunc[resourceToReclaim]
|
||||
rank, ok := m.resourceToRankFunc[resourceToReclaim]
|
||||
if !ok {
|
||||
glog.Errorf("eviction manager: no ranking function for resource %s", resourceToReclaim)
|
||||
return
|
||||
|
|
|
@ -48,8 +48,8 @@ func (m *mockPodKiller) killPodNow(pod *api.Pod, status api.PodStatus, gracePeri
|
|||
func TestMemoryPressure(t *testing.T) {
|
||||
podMaker := func(name string, requests api.ResourceList, limits api.ResourceList, memoryWorkingSet string) (*api.Pod, statsapi.PodStats) {
|
||||
pod := newPod(name, []api.Container{
|
||||
newContainer(name, requests, api.ResourceList{}),
|
||||
})
|
||||
newContainer(name, requests, limits),
|
||||
}, nil)
|
||||
podStats := newPodMemoryStats(pod, resource.MustParse(memoryWorkingSet))
|
||||
return pod, podStats
|
||||
}
|
||||
|
@ -124,6 +124,7 @@ func TestMemoryPressure(t *testing.T) {
|
|||
nodeRef: nodeRef,
|
||||
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
||||
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
||||
resourceToRankFunc: buildResourceToRankFunc(false),
|
||||
}
|
||||
|
||||
// create a best effort pod to test admission
|
||||
|
@ -271,3 +272,235 @@ func TestMemoryPressure(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// parseQuantity parses the specified value (if provided) otherwise returns 0 value
|
||||
func parseQuantity(value string) resource.Quantity {
|
||||
if len(value) == 0 {
|
||||
return resource.MustParse("0")
|
||||
}
|
||||
return resource.MustParse(value)
|
||||
}
|
||||
|
||||
func TestDiskPressureNodeFs(t *testing.T) {
|
||||
podMaker := func(name string, requests api.ResourceList, limits api.ResourceList, rootFsUsed, logsUsed, perLocalVolumeUsed string) (*api.Pod, statsapi.PodStats) {
|
||||
pod := newPod(name, []api.Container{
|
||||
newContainer(name, requests, limits),
|
||||
}, nil)
|
||||
podStats := newPodDiskStats(pod, parseQuantity(rootFsUsed), parseQuantity(logsUsed), parseQuantity(perLocalVolumeUsed))
|
||||
return pod, podStats
|
||||
}
|
||||
summaryStatsMaker := func(rootFsAvailableBytes, imageFsAvailableBytes string, podStats map[*api.Pod]statsapi.PodStats) *statsapi.Summary {
|
||||
rootFsVal := resource.MustParse(rootFsAvailableBytes)
|
||||
rootFsBytes := uint64(rootFsVal.Value())
|
||||
imageFsVal := resource.MustParse(imageFsAvailableBytes)
|
||||
imageFsBytes := uint64(imageFsVal.Value())
|
||||
result := &statsapi.Summary{
|
||||
Node: statsapi.NodeStats{
|
||||
Fs: &statsapi.FsStats{
|
||||
AvailableBytes: &rootFsBytes,
|
||||
},
|
||||
Runtime: &statsapi.RuntimeStats{
|
||||
ImageFs: &statsapi.FsStats{
|
||||
AvailableBytes: &imageFsBytes,
|
||||
},
|
||||
},
|
||||
},
|
||||
Pods: []statsapi.PodStats{},
|
||||
}
|
||||
for _, podStat := range podStats {
|
||||
result.Pods = append(result.Pods, podStat)
|
||||
}
|
||||
return result
|
||||
}
|
||||
podsToMake := []struct {
|
||||
name string
|
||||
requests api.ResourceList
|
||||
limits api.ResourceList
|
||||
rootFsUsed string
|
||||
logsFsUsed string
|
||||
perLocalVolumeUsed string
|
||||
}{
|
||||
{name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "500Mi"},
|
||||
{name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", ""), perLocalVolumeUsed: "300Mi"},
|
||||
{name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsUsed: "800Mi"},
|
||||
{name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), logsFsUsed: "300Mi"},
|
||||
{name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "800Mi"},
|
||||
{name: "guaranteed-low", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "200Mi"},
|
||||
}
|
||||
pods := []*api.Pod{}
|
||||
podStats := map[*api.Pod]statsapi.PodStats{}
|
||||
for _, podToMake := range podsToMake {
|
||||
pod, podStat := podMaker(podToMake.name, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
|
||||
pods = append(pods, pod)
|
||||
podStats[pod] = podStat
|
||||
}
|
||||
activePodsFunc := func() []*api.Pod {
|
||||
return pods
|
||||
}
|
||||
|
||||
fakeClock := util.NewFakeClock(time.Now())
|
||||
podKiller := &mockPodKiller{}
|
||||
nodeRef := &api.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
||||
|
||||
config := Config{
|
||||
MaxPodGracePeriodSeconds: 5,
|
||||
PressureTransitionPeriod: time.Minute * 5,
|
||||
Thresholds: []Threshold{
|
||||
{
|
||||
Signal: SignalNodeFsAvailable,
|
||||
Operator: OpLessThan,
|
||||
Value: quantityMustParse("1Gi"),
|
||||
},
|
||||
{
|
||||
Signal: SignalNodeFsAvailable,
|
||||
Operator: OpLessThan,
|
||||
Value: quantityMustParse("2Gi"),
|
||||
GracePeriod: time.Minute * 2,
|
||||
},
|
||||
},
|
||||
}
|
||||
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("16Gi", "200Gi", podStats)}
|
||||
manager := &managerImpl{
|
||||
clock: fakeClock,
|
||||
killPodFunc: podKiller.killPodNow,
|
||||
config: config,
|
||||
recorder: &record.FakeRecorder{},
|
||||
summaryProvider: summaryProvider,
|
||||
nodeRef: nodeRef,
|
||||
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
||||
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
||||
resourceToRankFunc: buildResourceToRankFunc(false),
|
||||
}
|
||||
|
||||
// create a best effort pod to test admission
|
||||
podToAdmit, _ := podMaker("pod-to-admit", newResourceList("", ""), newResourceList("", ""), "0Gi", "0Gi", "0Gi")
|
||||
|
||||
// synchronize
|
||||
manager.synchronize(activePodsFunc)
|
||||
|
||||
// we should not have disk pressure
|
||||
if manager.IsUnderDiskPressure() {
|
||||
t.Errorf("Manager should not report disk pressure")
|
||||
}
|
||||
|
||||
// try to admit our pod (should succeed)
|
||||
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
|
||||
t.Errorf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
|
||||
}
|
||||
|
||||
// induce soft threshold
|
||||
fakeClock.Step(1 * time.Minute)
|
||||
summaryProvider.result = summaryStatsMaker("1.5Gi", "200Gi", podStats)
|
||||
manager.synchronize(activePodsFunc)
|
||||
|
||||
// we should have disk pressure
|
||||
if !manager.IsUnderDiskPressure() {
|
||||
t.Errorf("Manager should report disk pressure since soft threshold was met")
|
||||
}
|
||||
|
||||
// verify no pod was yet killed because there has not yet been enough time passed.
|
||||
if podKiller.pod != nil {
|
||||
t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod)
|
||||
}
|
||||
|
||||
// step forward in time pass the grace period
|
||||
fakeClock.Step(3 * time.Minute)
|
||||
summaryProvider.result = summaryStatsMaker("1.5Gi", "200Gi", podStats)
|
||||
manager.synchronize(activePodsFunc)
|
||||
|
||||
// we should have disk pressure
|
||||
if !manager.IsUnderDiskPressure() {
|
||||
t.Errorf("Manager should report disk pressure since soft threshold was met")
|
||||
}
|
||||
|
||||
// verify the right pod was killed with the right grace period.
|
||||
if podKiller.pod != pods[0] {
|
||||
t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, pods[0])
|
||||
}
|
||||
if podKiller.gracePeriodOverride == nil {
|
||||
t.Errorf("Manager chose to kill pod but should have had a grace period override.")
|
||||
}
|
||||
observedGracePeriod := *podKiller.gracePeriodOverride
|
||||
if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds {
|
||||
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod)
|
||||
}
|
||||
// reset state
|
||||
podKiller.pod = nil
|
||||
podKiller.gracePeriodOverride = nil
|
||||
|
||||
// remove disk pressure
|
||||
fakeClock.Step(20 * time.Minute)
|
||||
summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
|
||||
manager.synchronize(activePodsFunc)
|
||||
|
||||
// we should not have disk pressure
|
||||
if manager.IsUnderDiskPressure() {
|
||||
t.Errorf("Manager should not report disk pressure")
|
||||
}
|
||||
|
||||
// induce disk pressure!
|
||||
fakeClock.Step(1 * time.Minute)
|
||||
summaryProvider.result = summaryStatsMaker("500Mi", "200Gi", podStats)
|
||||
manager.synchronize(activePodsFunc)
|
||||
|
||||
// we should have disk pressure
|
||||
if !manager.IsUnderDiskPressure() {
|
||||
t.Errorf("Manager should report disk pressure")
|
||||
}
|
||||
|
||||
// check the right pod was killed
|
||||
if podKiller.pod != pods[0] {
|
||||
t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, pods[0])
|
||||
}
|
||||
observedGracePeriod = *podKiller.gracePeriodOverride
|
||||
if observedGracePeriod != int64(0) {
|
||||
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
|
||||
}
|
||||
|
||||
// try to admit our pod (should fail)
|
||||
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
|
||||
t.Errorf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
|
||||
}
|
||||
|
||||
// reduce disk pressure
|
||||
fakeClock.Step(1 * time.Minute)
|
||||
summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
|
||||
podKiller.pod = nil // reset state
|
||||
manager.synchronize(activePodsFunc)
|
||||
|
||||
// we should have disk pressure (because transition period not yet met)
|
||||
if !manager.IsUnderDiskPressure() {
|
||||
t.Errorf("Manager should report disk pressure")
|
||||
}
|
||||
|
||||
// no pod should have been killed
|
||||
if podKiller.pod != nil {
|
||||
t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod)
|
||||
}
|
||||
|
||||
// try to admit our pod (should fail)
|
||||
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
|
||||
t.Errorf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
|
||||
}
|
||||
|
||||
// move the clock past transition period to ensure that we stop reporting pressure
|
||||
fakeClock.Step(5 * time.Minute)
|
||||
summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
|
||||
podKiller.pod = nil // reset state
|
||||
manager.synchronize(activePodsFunc)
|
||||
|
||||
// we should not have disk pressure (because transition period met)
|
||||
if manager.IsUnderDiskPressure() {
|
||||
t.Errorf("Manager should not report disk pressure")
|
||||
}
|
||||
|
||||
// no pod should have been killed
|
||||
if podKiller.pod != nil {
|
||||
t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod)
|
||||
}
|
||||
|
||||
// try to admit our pod (should succeed)
|
||||
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
|
||||
t.Errorf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -41,21 +41,24 @@ const (
|
|||
message = "The node was low on compute resources."
|
||||
// disk, in bytes. internal to this module, used to account for local disk usage.
|
||||
resourceDisk api.ResourceName = "disk"
|
||||
// imagefs, in bytes. internal to this module, used to account for local image filesystem usage.
|
||||
resourceImageFs api.ResourceName = "imagefs"
|
||||
// nodefs, in bytes. internal to this module, used to account for local node root filesystem usage.
|
||||
resourceNodeFs api.ResourceName = "nodefs"
|
||||
)
|
||||
|
||||
// resourceToRankFunc maps a resource to ranking function for that resource.
|
||||
var resourceToRankFunc = map[api.ResourceName]rankFunc{
|
||||
api.ResourceMemory: rankMemoryPressure,
|
||||
}
|
||||
|
||||
// signalToNodeCondition maps a signal to the node condition to report if threshold is met.
|
||||
var signalToNodeCondition = map[Signal]api.NodeConditionType{
|
||||
SignalMemoryAvailable: api.NodeMemoryPressure,
|
||||
SignalMemoryAvailable: api.NodeMemoryPressure,
|
||||
SignalImageFsAvailable: api.NodeDiskPressure,
|
||||
SignalNodeFsAvailable: api.NodeDiskPressure,
|
||||
}
|
||||
|
||||
// signalToResource maps a Signal to its associated Resource.
|
||||
var signalToResource = map[Signal]api.ResourceName{
|
||||
SignalMemoryAvailable: api.ResourceMemory,
|
||||
SignalMemoryAvailable: api.ResourceMemory,
|
||||
SignalImageFsAvailable: resourceImageFs,
|
||||
SignalNodeFsAvailable: resourceNodeFs,
|
||||
}
|
||||
|
||||
// validSignal returns true if the signal is supported.
|
||||
|
@ -160,7 +163,6 @@ func parseThresholdStatement(statement string) (Threshold, error) {
|
|||
if quantity.Sign() < 0 {
|
||||
return Threshold{}, fmt.Errorf("eviction threshold %v cannot be negative: %s", signal, &quantity)
|
||||
}
|
||||
|
||||
return Threshold{
|
||||
Signal: signal,
|
||||
Operator: operator,
|
||||
|
@ -252,14 +254,52 @@ func memoryUsage(memStats *statsapi.MemoryStats) *resource.Quantity {
|
|||
return resource.NewQuantity(usage, resource.BinarySI)
|
||||
}
|
||||
|
||||
// podUsage aggregates usage of compute resources.
|
||||
// it supports the following memory and disk.
|
||||
func podUsage(podStats statsapi.PodStats) (api.ResourceList, error) {
|
||||
// localVolumeNames returns the set of volumes for the pod that are local
|
||||
func localVolumeNames(pod *api.Pod) []string {
|
||||
result := []string{}
|
||||
for _, volume := range pod.Spec.Volumes {
|
||||
if volume.HostPath != nil ||
|
||||
(volume.EmptyDir != nil && volume.EmptyDir.Medium != api.StorageMediumMemory) ||
|
||||
volume.ConfigMap != nil ||
|
||||
volume.GitRepo != nil {
|
||||
result = append(result, volume.Name)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// podDiskUsage aggregates pod disk usage for the specified stats to measure.
|
||||
func podDiskUsage(podStats statsapi.PodStats, pod *api.Pod, statsToMeasure []fsStats) (api.ResourceList, error) {
|
||||
disk := resource.Quantity{Format: resource.BinarySI}
|
||||
for _, container := range podStats.Containers {
|
||||
if hasFsStats(statsToMeasure, fsStatsRoot) {
|
||||
disk.Add(*diskUsage(container.Rootfs))
|
||||
}
|
||||
if hasFsStats(statsToMeasure, fsStatsLogs) {
|
||||
disk.Add(*diskUsage(container.Logs))
|
||||
}
|
||||
}
|
||||
if hasFsStats(statsToMeasure, fsStatsLocalVolumeSource) {
|
||||
volumeNames := localVolumeNames(pod)
|
||||
for _, volumeName := range volumeNames {
|
||||
for _, volumeStats := range podStats.VolumeStats {
|
||||
if volumeStats.Name == volumeName {
|
||||
disk.Add(*diskUsage(&volumeStats.FsStats))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return api.ResourceList{
|
||||
resourceDisk: disk,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// podMemoryUsage aggregates pod memory usage.
|
||||
func podMemoryUsage(podStats statsapi.PodStats) (api.ResourceList, error) {
|
||||
disk := resource.Quantity{Format: resource.BinarySI}
|
||||
memory := resource.Quantity{Format: resource.BinarySI}
|
||||
for _, container := range podStats.Containers {
|
||||
// disk usage (if known)
|
||||
// TODO: need to handle volumes
|
||||
for _, fsStats := range []*statsapi.FsStats{container.Rootfs, container.Logs} {
|
||||
disk.Add(*diskUsage(fsStats))
|
||||
}
|
||||
|
@ -384,12 +424,12 @@ func memory(stats statsFunc) cmpFunc {
|
|||
return 1
|
||||
}
|
||||
// if we cant get usage for p1 measured, we want p2 first
|
||||
p1Usage, err := podUsage(p1Stats)
|
||||
p1Usage, err := podMemoryUsage(p1Stats)
|
||||
if err != nil {
|
||||
return -1
|
||||
}
|
||||
// if we cant get usage for p2 measured, we want p1 first
|
||||
p2Usage, err := podUsage(p2Stats)
|
||||
p2Usage, err := podMemoryUsage(p2Stats)
|
||||
if err != nil {
|
||||
return 1
|
||||
}
|
||||
|
@ -411,7 +451,7 @@ func memory(stats statsFunc) cmpFunc {
|
|||
}
|
||||
|
||||
// disk compares pods by largest consumer of disk relative to request.
|
||||
func disk(stats statsFunc) cmpFunc {
|
||||
func disk(stats statsFunc, fsStatsToMeasure []fsStats) cmpFunc {
|
||||
return func(p1, p2 *api.Pod) int {
|
||||
p1Stats, found := stats(p1)
|
||||
// if we have no usage stats for p1, we want p2 first
|
||||
|
@ -424,20 +464,20 @@ func disk(stats statsFunc) cmpFunc {
|
|||
return 1
|
||||
}
|
||||
// if we cant get usage for p1 measured, we want p2 first
|
||||
p1Usage, err := podUsage(p1Stats)
|
||||
p1Usage, err := podDiskUsage(p1Stats, p1, fsStatsToMeasure)
|
||||
if err != nil {
|
||||
return -1
|
||||
}
|
||||
// if we cant get usage for p2 measured, we want p1 first
|
||||
p2Usage, err := podUsage(p2Stats)
|
||||
p2Usage, err := podDiskUsage(p2Stats, p2, fsStatsToMeasure)
|
||||
if err != nil {
|
||||
return 1
|
||||
}
|
||||
|
||||
// disk is best effort, so we don't measure relative to a request.
|
||||
// TODO: add disk as a guaranteed resource
|
||||
p1Disk := p1Usage[api.ResourceStorage]
|
||||
p2Disk := p2Usage[api.ResourceStorage]
|
||||
p1Disk := p1Usage[resourceDisk]
|
||||
p2Disk := p2Usage[resourceDisk]
|
||||
// if p2 is using more than p1, we want p2 first
|
||||
return p2Disk.Cmp(p1Disk)
|
||||
}
|
||||
|
@ -448,9 +488,11 @@ func rankMemoryPressure(pods []*api.Pod, stats statsFunc) {
|
|||
orderedBy(qosComparator, memory(stats)).Sort(pods)
|
||||
}
|
||||
|
||||
// rankDiskPressure orders the input pods for eviction in response to disk pressure.
|
||||
func rankDiskPressure(pods []*api.Pod, stats statsFunc) {
|
||||
orderedBy(qosComparator, disk(stats)).Sort(pods)
|
||||
// rankDiskPressureFunc returns a rankFunc that measures the specified fs stats.
|
||||
func rankDiskPressureFunc(fsStatsToMeasure []fsStats) rankFunc {
|
||||
return func(pods []*api.Pod, stats statsFunc) {
|
||||
orderedBy(qosComparator, disk(stats, fsStatsToMeasure)).Sort(pods)
|
||||
}
|
||||
}
|
||||
|
||||
// byEvictionPriority implements sort.Interface for []api.ResourceName.
|
||||
|
@ -474,7 +516,18 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider) (signalObserv
|
|||
statsFunc := cachedStatsFunc(summary.Pods)
|
||||
// build an evaluation context for current eviction signals
|
||||
result := signalObservations{}
|
||||
result[SignalMemoryAvailable] = resource.NewQuantity(int64(*summary.Node.Memory.AvailableBytes), resource.BinarySI)
|
||||
|
||||
if memory := summary.Node.Memory; memory != nil && memory.AvailableBytes != nil {
|
||||
result[SignalMemoryAvailable] = resource.NewQuantity(int64(*memory.AvailableBytes), resource.BinarySI)
|
||||
}
|
||||
if nodeFs := summary.Node.Fs; nodeFs != nil && nodeFs.AvailableBytes != nil {
|
||||
result[SignalNodeFsAvailable] = resource.NewQuantity(int64(*nodeFs.AvailableBytes), resource.BinarySI)
|
||||
}
|
||||
if summary.Node.Runtime != nil {
|
||||
if imageFs := summary.Node.Runtime.ImageFs; imageFs != nil && imageFs.AvailableBytes != nil {
|
||||
result[SignalImageFsAvailable] = resource.NewQuantity(int64(*imageFs.AvailableBytes), resource.BinarySI)
|
||||
}
|
||||
}
|
||||
return result, statsFunc, nil
|
||||
}
|
||||
|
||||
|
@ -569,6 +622,16 @@ func nodeConditionsObservedSince(observedAt nodeConditionsObservedAt, period tim
|
|||
return results
|
||||
}
|
||||
|
||||
// hgasFsStats returns true if the fsStat is in the input list
|
||||
func hasFsStats(inputs []fsStats, item fsStats) bool {
|
||||
for _, input := range inputs {
|
||||
if input == item {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// hasNodeCondition returns true if the node condition is in the input list
|
||||
func hasNodeCondition(inputs []api.NodeConditionType, item api.NodeConditionType) bool {
|
||||
for _, input := range inputs {
|
||||
|
@ -612,3 +675,21 @@ func isSoftEviction(thresholds []Threshold, starvedResource api.ResourceName) bo
|
|||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// buildresourceToRankFunc returns ranking functions associated with resources
|
||||
func buildResourceToRankFunc(withImageFs bool) map[api.ResourceName]rankFunc {
|
||||
resourceToRankFunc := map[api.ResourceName]rankFunc{
|
||||
api.ResourceMemory: rankMemoryPressure,
|
||||
}
|
||||
// usage of an imagefs is optional
|
||||
if withImageFs {
|
||||
// with an imagefs, nodefs pod rank func for eviction only includes logs and local volumes
|
||||
resourceToRankFunc[resourceNodeFs] = rankDiskPressureFunc([]fsStats{fsStatsLogs, fsStatsLocalVolumeSource})
|
||||
// with an imagefs, imagefs pod rank func for eviction only includes rootfs
|
||||
resourceToRankFunc[resourceImageFs] = rankDiskPressureFunc([]fsStats{fsStatsRoot})
|
||||
} else {
|
||||
// without an imagefs, nodefs pod rank func for eviction looks at all fs stats
|
||||
resourceToRankFunc[resourceNodeFs] = rankDiskPressureFunc([]fsStats{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource})
|
||||
}
|
||||
return resourceToRankFunc
|
||||
}
|
||||
|
|
|
@ -75,6 +75,41 @@ func TestParseThresholdConfig(t *testing.T) {
|
|||
},
|
||||
},
|
||||
},
|
||||
"disk flag values": {
|
||||
evictionHard: "imagefs.available<150Mi,nodefs.available<100Mi",
|
||||
evictionSoft: "imagefs.available<300Mi,nodefs.available<200Mi",
|
||||
evictionSoftGracePeriod: "imagefs.available=30s,nodefs.available=30s",
|
||||
evictionMinReclaim: "imagefs.available=2Gi,nodefs.available=1Gi",
|
||||
expectErr: false,
|
||||
expectThresholds: []Threshold{
|
||||
{
|
||||
Signal: SignalImageFsAvailable,
|
||||
Operator: OpLessThan,
|
||||
Value: quantityMustParse("150Mi"),
|
||||
MinReclaim: quantityMustParse("2Gi"),
|
||||
},
|
||||
{
|
||||
Signal: SignalNodeFsAvailable,
|
||||
Operator: OpLessThan,
|
||||
Value: quantityMustParse("100Mi"),
|
||||
MinReclaim: quantityMustParse("1Gi"),
|
||||
},
|
||||
{
|
||||
Signal: SignalImageFsAvailable,
|
||||
Operator: OpLessThan,
|
||||
Value: quantityMustParse("300Mi"),
|
||||
GracePeriod: gracePeriod,
|
||||
MinReclaim: quantityMustParse("2Gi"),
|
||||
},
|
||||
{
|
||||
Signal: SignalNodeFsAvailable,
|
||||
Operator: OpLessThan,
|
||||
Value: quantityMustParse("200Mi"),
|
||||
GracePeriod: gracePeriod,
|
||||
MinReclaim: quantityMustParse("1Gi"),
|
||||
},
|
||||
},
|
||||
},
|
||||
"invalid-signal": {
|
||||
evictionHard: "mem.available<150Mi",
|
||||
evictionSoft: "",
|
||||
|
@ -127,6 +162,7 @@ func TestParseThresholdConfig(t *testing.T) {
|
|||
evictionHard: "",
|
||||
evictionSoft: "memory.available<150Mi",
|
||||
evictionSoftGracePeriod: "memory.available=-30s",
|
||||
evictionMinReclaim: "",
|
||||
expectErr: true,
|
||||
expectThresholds: []Threshold{},
|
||||
},
|
||||
|
@ -199,13 +235,13 @@ func thresholdEqual(a Threshold, b Threshold) bool {
|
|||
func TestOrderedByQoS(t *testing.T) {
|
||||
bestEffort := newPod("best-effort", []api.Container{
|
||||
newContainer("best-effort", newResourceList("", ""), newResourceList("", "")),
|
||||
})
|
||||
}, nil)
|
||||
burstable := newPod("burstable", []api.Container{
|
||||
newContainer("burstable", newResourceList("100m", "100Mi"), newResourceList("200m", "200Mi")),
|
||||
})
|
||||
}, nil)
|
||||
guaranteed := newPod("guaranteed", []api.Container{
|
||||
newContainer("guaranteed", newResourceList("200m", "200Mi"), newResourceList("200m", "200Mi")),
|
||||
})
|
||||
}, nil)
|
||||
|
||||
pods := []*api.Pod{guaranteed, burstable, bestEffort}
|
||||
orderedBy(qosComparator).Sort(pods)
|
||||
|
@ -218,26 +254,158 @@ func TestOrderedByQoS(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
// TestOrderedByDisk ensures we order pods by greediest disk consumer
|
||||
func TestOrderedByDisk(t *testing.T) {
|
||||
pod1 := newPod("best-effort-high", []api.Container{
|
||||
newContainer("best-effort-high", newResourceList("", ""), newResourceList("", "")),
|
||||
}, []api.Volume{
|
||||
newVolume("local-volume", api.VolumeSource{
|
||||
EmptyDir: &api.EmptyDirVolumeSource{},
|
||||
}),
|
||||
})
|
||||
pod2 := newPod("best-effort-low", []api.Container{
|
||||
newContainer("best-effort-low", newResourceList("", ""), newResourceList("", "")),
|
||||
}, []api.Volume{
|
||||
newVolume("local-volume", api.VolumeSource{
|
||||
EmptyDir: &api.EmptyDirVolumeSource{},
|
||||
}),
|
||||
})
|
||||
pod3 := newPod("burstable-high", []api.Container{
|
||||
newContainer("burstable-high", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")),
|
||||
}, []api.Volume{
|
||||
newVolume("local-volume", api.VolumeSource{
|
||||
EmptyDir: &api.EmptyDirVolumeSource{},
|
||||
}),
|
||||
})
|
||||
pod4 := newPod("burstable-low", []api.Container{
|
||||
newContainer("burstable-low", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")),
|
||||
}, []api.Volume{
|
||||
newVolume("local-volume", api.VolumeSource{
|
||||
EmptyDir: &api.EmptyDirVolumeSource{},
|
||||
}),
|
||||
})
|
||||
pod5 := newPod("guaranteed-high", []api.Container{
|
||||
newContainer("guaranteed-high", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")),
|
||||
}, []api.Volume{
|
||||
newVolume("local-volume", api.VolumeSource{
|
||||
EmptyDir: &api.EmptyDirVolumeSource{},
|
||||
}),
|
||||
})
|
||||
pod6 := newPod("guaranteed-low", []api.Container{
|
||||
newContainer("guaranteed-low", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")),
|
||||
}, []api.Volume{
|
||||
newVolume("local-volume", api.VolumeSource{
|
||||
EmptyDir: &api.EmptyDirVolumeSource{},
|
||||
}),
|
||||
})
|
||||
stats := map[*api.Pod]statsapi.PodStats{
|
||||
pod1: newPodDiskStats(pod1, resource.MustParse("50Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 200Mi
|
||||
pod2: newPodDiskStats(pod2, resource.MustParse("100Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 300Mi
|
||||
pod3: newPodDiskStats(pod3, resource.MustParse("200Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 400Mi
|
||||
pod4: newPodDiskStats(pod4, resource.MustParse("300Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 450Mi
|
||||
pod5: newPodDiskStats(pod5, resource.MustParse("400Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 550Mi
|
||||
pod6: newPodDiskStats(pod6, resource.MustParse("500Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 650Mi
|
||||
}
|
||||
statsFn := func(pod *api.Pod) (statsapi.PodStats, bool) {
|
||||
result, found := stats[pod]
|
||||
return result, found
|
||||
}
|
||||
pods := []*api.Pod{pod1, pod2, pod3, pod4, pod5, pod6}
|
||||
orderedBy(disk(statsFn, []fsStats{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource})).Sort(pods)
|
||||
expected := []*api.Pod{pod6, pod5, pod4, pod3, pod2, pod1}
|
||||
for i := range expected {
|
||||
if pods[i] != expected[i] {
|
||||
t.Errorf("Expected pod[%d]: %s, but got: %s", i, expected[i].Name, pods[i].Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestOrderedByQoSDisk ensures we order pods by qos and then greediest disk consumer
|
||||
func TestOrderedByQoSDisk(t *testing.T) {
|
||||
pod1 := newPod("best-effort-high", []api.Container{
|
||||
newContainer("best-effort-high", newResourceList("", ""), newResourceList("", "")),
|
||||
}, []api.Volume{
|
||||
newVolume("local-volume", api.VolumeSource{
|
||||
EmptyDir: &api.EmptyDirVolumeSource{},
|
||||
}),
|
||||
})
|
||||
pod2 := newPod("best-effort-low", []api.Container{
|
||||
newContainer("best-effort-low", newResourceList("", ""), newResourceList("", "")),
|
||||
}, []api.Volume{
|
||||
newVolume("local-volume", api.VolumeSource{
|
||||
EmptyDir: &api.EmptyDirVolumeSource{},
|
||||
}),
|
||||
})
|
||||
pod3 := newPod("burstable-high", []api.Container{
|
||||
newContainer("burstable-high", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")),
|
||||
}, []api.Volume{
|
||||
newVolume("local-volume", api.VolumeSource{
|
||||
EmptyDir: &api.EmptyDirVolumeSource{},
|
||||
}),
|
||||
})
|
||||
pod4 := newPod("burstable-low", []api.Container{
|
||||
newContainer("burstable-low", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")),
|
||||
}, []api.Volume{
|
||||
newVolume("local-volume", api.VolumeSource{
|
||||
EmptyDir: &api.EmptyDirVolumeSource{},
|
||||
}),
|
||||
})
|
||||
pod5 := newPod("guaranteed-high", []api.Container{
|
||||
newContainer("guaranteed-high", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")),
|
||||
}, []api.Volume{
|
||||
newVolume("local-volume", api.VolumeSource{
|
||||
EmptyDir: &api.EmptyDirVolumeSource{},
|
||||
}),
|
||||
})
|
||||
pod6 := newPod("guaranteed-low", []api.Container{
|
||||
newContainer("guaranteed-low", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")),
|
||||
}, []api.Volume{
|
||||
newVolume("local-volume", api.VolumeSource{
|
||||
EmptyDir: &api.EmptyDirVolumeSource{},
|
||||
}),
|
||||
})
|
||||
stats := map[*api.Pod]statsapi.PodStats{
|
||||
pod1: newPodDiskStats(pod1, resource.MustParse("50Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 200Mi
|
||||
pod2: newPodDiskStats(pod2, resource.MustParse("100Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 300Mi
|
||||
pod3: newPodDiskStats(pod3, resource.MustParse("200Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 400Mi
|
||||
pod4: newPodDiskStats(pod4, resource.MustParse("300Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 450Mi
|
||||
pod5: newPodDiskStats(pod5, resource.MustParse("400Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 550Mi
|
||||
pod6: newPodDiskStats(pod6, resource.MustParse("500Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 650Mi
|
||||
}
|
||||
statsFn := func(pod *api.Pod) (statsapi.PodStats, bool) {
|
||||
result, found := stats[pod]
|
||||
return result, found
|
||||
}
|
||||
pods := []*api.Pod{pod1, pod2, pod3, pod4, pod5, pod6}
|
||||
orderedBy(qosComparator, disk(statsFn, []fsStats{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource})).Sort(pods)
|
||||
expected := []*api.Pod{pod2, pod1, pod4, pod3, pod6, pod5}
|
||||
for i := range expected {
|
||||
if pods[i] != expected[i] {
|
||||
t.Errorf("Expected pod[%d]: %s, but got: %s", i, expected[i].Name, pods[i].Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestOrderedByMemory ensures we order pods by greediest memory consumer relative to request.
|
||||
func TestOrderedByMemory(t *testing.T) {
|
||||
pod1 := newPod("best-effort-high", []api.Container{
|
||||
newContainer("best-effort-high", newResourceList("", ""), newResourceList("", "")),
|
||||
})
|
||||
}, nil)
|
||||
pod2 := newPod("best-effort-low", []api.Container{
|
||||
newContainer("best-effort-low", newResourceList("", ""), newResourceList("", "")),
|
||||
})
|
||||
}, nil)
|
||||
pod3 := newPod("burstable-high", []api.Container{
|
||||
newContainer("burstable-high", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")),
|
||||
})
|
||||
}, nil)
|
||||
pod4 := newPod("burstable-low", []api.Container{
|
||||
newContainer("burstable-low", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")),
|
||||
})
|
||||
}, nil)
|
||||
pod5 := newPod("guaranteed-high", []api.Container{
|
||||
newContainer("guaranteed-high", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")),
|
||||
})
|
||||
}, nil)
|
||||
pod6 := newPod("guaranteed-low", []api.Container{
|
||||
newContainer("guaranteed-low", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")),
|
||||
})
|
||||
}, nil)
|
||||
stats := map[*api.Pod]statsapi.PodStats{
|
||||
pod1: newPodMemoryStats(pod1, resource.MustParse("500Mi")), // 500 relative to request
|
||||
pod2: newPodMemoryStats(pod2, resource.MustParse("300Mi")), // 300 relative to request
|
||||
|
@ -264,22 +432,22 @@ func TestOrderedByMemory(t *testing.T) {
|
|||
func TestOrderedByQoSMemory(t *testing.T) {
|
||||
pod1 := newPod("best-effort-high", []api.Container{
|
||||
newContainer("best-effort-high", newResourceList("", ""), newResourceList("", "")),
|
||||
})
|
||||
}, nil)
|
||||
pod2 := newPod("best-effort-low", []api.Container{
|
||||
newContainer("best-effort-low", newResourceList("", ""), newResourceList("", "")),
|
||||
})
|
||||
}, nil)
|
||||
pod3 := newPod("burstable-high", []api.Container{
|
||||
newContainer("burstable-high", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")),
|
||||
})
|
||||
}, nil)
|
||||
pod4 := newPod("burstable-low", []api.Container{
|
||||
newContainer("burstable-low", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")),
|
||||
})
|
||||
}, nil)
|
||||
pod5 := newPod("guaranteed-high", []api.Container{
|
||||
newContainer("guaranteed-high", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")),
|
||||
})
|
||||
}, nil)
|
||||
pod6 := newPod("guaranteed-low", []api.Container{
|
||||
newContainer("guaranteed-low", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")),
|
||||
})
|
||||
}, nil)
|
||||
stats := map[*api.Pod]statsapi.PodStats{
|
||||
pod1: newPodMemoryStats(pod1, resource.MustParse("500Mi")), // 500 relative to request
|
||||
pod2: newPodMemoryStats(pod2, resource.MustParse("50Mi")), // 50 relative to request
|
||||
|
@ -346,11 +514,21 @@ func TestMakeSignalObservations(t *testing.T) {
|
|||
return pod
|
||||
}
|
||||
nodeAvailableBytes := uint64(1024 * 1024 * 1024)
|
||||
imageFsAvailableBytes := uint64(1024 * 1024)
|
||||
nodeFsAvailableBytes := uint64(1024)
|
||||
fakeStats := &statsapi.Summary{
|
||||
Node: statsapi.NodeStats{
|
||||
Memory: &statsapi.MemoryStats{
|
||||
AvailableBytes: &nodeAvailableBytes,
|
||||
},
|
||||
Runtime: &statsapi.RuntimeStats{
|
||||
ImageFs: &statsapi.FsStats{
|
||||
AvailableBytes: &imageFsAvailableBytes,
|
||||
},
|
||||
},
|
||||
Fs: &statsapi.FsStats{
|
||||
AvailableBytes: &nodeFsAvailableBytes,
|
||||
},
|
||||
},
|
||||
Pods: []statsapi.PodStats{},
|
||||
}
|
||||
|
@ -370,12 +548,26 @@ func TestMakeSignalObservations(t *testing.T) {
|
|||
if err != nil {
|
||||
t.Errorf("Unexpected err: %v", err)
|
||||
}
|
||||
quantity, found := actualObservations[SignalMemoryAvailable]
|
||||
memQuantity, found := actualObservations[SignalMemoryAvailable]
|
||||
if !found {
|
||||
t.Errorf("Expected available memory observation: %v", err)
|
||||
}
|
||||
if expectedBytes := int64(nodeAvailableBytes); quantity.Value() != expectedBytes {
|
||||
t.Errorf("Expected %v, actual: %v", expectedBytes, quantity.Value())
|
||||
if expectedBytes := int64(nodeAvailableBytes); memQuantity.Value() != expectedBytes {
|
||||
t.Errorf("Expected %v, actual: %v", expectedBytes, memQuantity.Value())
|
||||
}
|
||||
nodeFsQuantity, found := actualObservations[SignalNodeFsAvailable]
|
||||
if !found {
|
||||
t.Errorf("Expected available nodefs observation: %v", err)
|
||||
}
|
||||
if expectedBytes := int64(nodeFsAvailableBytes); nodeFsQuantity.Value() != expectedBytes {
|
||||
t.Errorf("Expected %v, actual: %v", expectedBytes, nodeFsQuantity.Value())
|
||||
}
|
||||
imageFsQuantity, found := actualObservations[SignalImageFsAvailable]
|
||||
if !found {
|
||||
t.Errorf("Expected available imagefs observation: %v", err)
|
||||
}
|
||||
if expectedBytes := int64(imageFsAvailableBytes); imageFsQuantity.Value() != expectedBytes {
|
||||
t.Errorf("Expected %v, actual: %v", expectedBytes, imageFsQuantity.Value())
|
||||
}
|
||||
for _, pod := range pods {
|
||||
podStats, found := statsFunc(pod)
|
||||
|
@ -670,6 +862,18 @@ func TestReclaimResources(t *testing.T) {
|
|||
},
|
||||
result: []api.ResourceName{api.ResourceMemory},
|
||||
},
|
||||
"imagefs.available": {
|
||||
inputs: []Threshold{
|
||||
{Signal: SignalImageFsAvailable},
|
||||
},
|
||||
result: []api.ResourceName{resourceImageFs},
|
||||
},
|
||||
"nodefs.available": {
|
||||
inputs: []Threshold{
|
||||
{Signal: SignalNodeFsAvailable},
|
||||
},
|
||||
result: []api.ResourceName{resourceNodeFs},
|
||||
},
|
||||
}
|
||||
for testName, testCase := range testCases {
|
||||
actual := reclaimResources(testCase.inputs)
|
||||
|
@ -681,6 +885,40 @@ func TestReclaimResources(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
// newPodDiskStats returns stats with specified usage amounts.
|
||||
func newPodDiskStats(pod *api.Pod, rootFsUsed, logsUsed, perLocalVolumeUsed resource.Quantity) statsapi.PodStats {
|
||||
result := statsapi.PodStats{
|
||||
PodRef: statsapi.PodReference{
|
||||
Name: pod.Name, Namespace: pod.Namespace, UID: string(pod.UID),
|
||||
},
|
||||
}
|
||||
|
||||
rootFsUsedBytes := uint64(rootFsUsed.Value())
|
||||
logsUsedBytes := uint64(logsUsed.Value())
|
||||
for range pod.Spec.Containers {
|
||||
result.Containers = append(result.Containers, statsapi.ContainerStats{
|
||||
Rootfs: &statsapi.FsStats{
|
||||
UsedBytes: &rootFsUsedBytes,
|
||||
},
|
||||
Logs: &statsapi.FsStats{
|
||||
UsedBytes: &logsUsedBytes,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
perLocalVolumeUsedBytes := uint64(perLocalVolumeUsed.Value())
|
||||
for _, volumeName := range localVolumeNames(pod) {
|
||||
result.VolumeStats = append(result.VolumeStats, statsapi.VolumeStats{
|
||||
Name: volumeName,
|
||||
FsStats: statsapi.FsStats{
|
||||
UsedBytes: &perLocalVolumeUsedBytes,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func newPodMemoryStats(pod *api.Pod, workingSet resource.Quantity) statsapi.PodStats {
|
||||
result := statsapi.PodStats{
|
||||
PodRef: statsapi.PodReference{
|
||||
|
@ -723,13 +961,21 @@ func newContainer(name string, requests api.ResourceList, limits api.ResourceLis
|
|||
}
|
||||
}
|
||||
|
||||
func newPod(name string, containers []api.Container) *api.Pod {
|
||||
func newVolume(name string, volumeSource api.VolumeSource) api.Volume {
|
||||
return api.Volume{
|
||||
Name: name,
|
||||
VolumeSource: volumeSource,
|
||||
}
|
||||
}
|
||||
|
||||
func newPod(name string, containers []api.Container, volumes []api.Volume) *api.Pod {
|
||||
return &api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: name,
|
||||
},
|
||||
Spec: api.PodSpec{
|
||||
Containers: containers,
|
||||
Volumes: volumes,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,6 +30,22 @@ type Signal string
|
|||
const (
|
||||
// SignalMemoryAvailable is memory available (i.e. capacity - workingSet), in bytes.
|
||||
SignalMemoryAvailable Signal = "memory.available"
|
||||
// SignalNodeFsAvailable is amount of storage available on filesystem that kubelet uses for volumes, daemon logs, etc.
|
||||
SignalNodeFsAvailable Signal = "nodefs.available"
|
||||
// SignalImageFsAvailable is amount of storage available on filesystem that container runtime uses for for storing images and container writable layers.
|
||||
SignalImageFsAvailable Signal = "imagefs.available"
|
||||
)
|
||||
|
||||
// fsStats defines the types of filesystem stats to collect.
|
||||
type fsStats string
|
||||
|
||||
const (
|
||||
// fsStatsLocalVolumeSource identifies stats for pod local volume sources.
|
||||
fsStatsLocalVolumeSource fsStats = "localVolumeSource"
|
||||
// fsStatsLogs identifies stats for pod logs.
|
||||
fsStatsLogs fsStats = "logs"
|
||||
// fsStatsRoot identifies stats for pod container writable layers.
|
||||
fsStatsRoot fsStats = "root"
|
||||
)
|
||||
|
||||
// ThresholdOperator is the operator used to express a Threshold.
|
||||
|
@ -71,6 +87,9 @@ type Manager interface {
|
|||
|
||||
// IsUnderMemoryPressure returns true if the node is under memory pressure.
|
||||
IsUnderMemoryPressure() bool
|
||||
|
||||
// IsUnderDiskPressure returns true if the node is under disk pressure.
|
||||
IsUnderDiskPressure() bool
|
||||
}
|
||||
|
||||
// DiskInfoProvider is responsible for informing the manager how disk is configured.
|
||||
|
|
|
@ -587,6 +587,64 @@ func (kl *Kubelet) setNodeMemoryPressureCondition(node *api.Node) {
|
|||
}
|
||||
}
|
||||
|
||||
// setNodeDiskPressureCondition for the node.
|
||||
// TODO: this needs to move somewhere centralized...
|
||||
func (kl *Kubelet) setNodeDiskPressureCondition(node *api.Node) {
|
||||
currentTime := unversioned.NewTime(kl.clock.Now())
|
||||
var condition *api.NodeCondition
|
||||
|
||||
// Check if NodeDiskPressure condition already exists and if it does, just pick it up for update.
|
||||
for i := range node.Status.Conditions {
|
||||
if node.Status.Conditions[i].Type == api.NodeDiskPressure {
|
||||
condition = &node.Status.Conditions[i]
|
||||
}
|
||||
}
|
||||
|
||||
newCondition := false
|
||||
// If the NodeDiskPressure condition doesn't exist, create one
|
||||
if condition == nil {
|
||||
condition = &api.NodeCondition{
|
||||
Type: api.NodeDiskPressure,
|
||||
Status: api.ConditionUnknown,
|
||||
}
|
||||
// cannot be appended to node.Status.Conditions here because it gets
|
||||
// copied to the slice. So if we append to the slice here none of the
|
||||
// updates we make below are reflected in the slice.
|
||||
newCondition = true
|
||||
}
|
||||
|
||||
// Update the heartbeat time
|
||||
condition.LastHeartbeatTime = currentTime
|
||||
|
||||
// Note: The conditions below take care of the case when a new NodeDiskressure condition is
|
||||
// created and as well as the case when the condition already exists. When a new condition
|
||||
// is created its status is set to api.ConditionUnknown which matches either
|
||||
// condition.Status != api.ConditionTrue or
|
||||
// condition.Status != api.ConditionFalse in the conditions below depending on whether
|
||||
// the kubelet is under disk pressure or not.
|
||||
if kl.evictionManager.IsUnderDiskPressure() {
|
||||
if condition.Status != api.ConditionTrue {
|
||||
condition.Status = api.ConditionTrue
|
||||
condition.Reason = "KubeletHasDiskPressure"
|
||||
condition.Message = "kubelet has disk pressure"
|
||||
condition.LastTransitionTime = currentTime
|
||||
kl.recordNodeStatusEvent(api.EventTypeNormal, "NodeHasDiskPressure")
|
||||
}
|
||||
} else {
|
||||
if condition.Status != api.ConditionFalse {
|
||||
condition.Status = api.ConditionFalse
|
||||
condition.Reason = "KubeletHasNoDiskPressure"
|
||||
condition.Message = "kubelet has no disk pressure"
|
||||
condition.LastTransitionTime = currentTime
|
||||
kl.recordNodeStatusEvent(api.EventTypeNormal, "NodeHasNoDiskPressure")
|
||||
}
|
||||
}
|
||||
|
||||
if newCondition {
|
||||
node.Status.Conditions = append(node.Status.Conditions, *condition)
|
||||
}
|
||||
}
|
||||
|
||||
// Set OODcondition for the node.
|
||||
func (kl *Kubelet) setNodeOODCondition(node *api.Node) {
|
||||
currentTime := unversioned.NewTime(kl.clock.Now())
|
||||
|
@ -700,6 +758,7 @@ func (kl *Kubelet) defaultNodeStatusFuncs() []func(*api.Node) error {
|
|||
withoutError(kl.setNodeStatusInfo),
|
||||
withoutError(kl.setNodeOODCondition),
|
||||
withoutError(kl.setNodeMemoryPressureCondition),
|
||||
withoutError(kl.setNodeDiskPressureCondition),
|
||||
withoutError(kl.setNodeReadyCondition),
|
||||
withoutError(kl.setNodeVolumesInUseStatus),
|
||||
withoutError(kl.recordNodeSchedulableEvent),
|
||||
|
|
|
@ -133,6 +133,14 @@ func TestUpdateNewNodeStatus(t *testing.T) {
|
|||
LastHeartbeatTime: unversioned.Time{},
|
||||
LastTransitionTime: unversioned.Time{},
|
||||
},
|
||||
{
|
||||
Type: api.NodeDiskPressure,
|
||||
Status: api.ConditionFalse,
|
||||
Reason: "KubeletHasNoDiskPressure",
|
||||
Message: fmt.Sprintf("kubelet has no disk pressure"),
|
||||
LastHeartbeatTime: unversioned.Time{},
|
||||
LastTransitionTime: unversioned.Time{},
|
||||
},
|
||||
{
|
||||
Type: api.NodeReady,
|
||||
Status: api.ConditionTrue,
|
||||
|
@ -316,6 +324,14 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
|
|||
LastHeartbeatTime: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
LastTransitionTime: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
},
|
||||
{
|
||||
Type: api.NodeDiskPressure,
|
||||
Status: api.ConditionFalse,
|
||||
Reason: "KubeletHasSufficientDisk",
|
||||
Message: fmt.Sprintf("kubelet has sufficient disk space available"),
|
||||
LastHeartbeatTime: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
LastTransitionTime: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
},
|
||||
{
|
||||
Type: api.NodeReady,
|
||||
Status: api.ConditionTrue,
|
||||
|
@ -380,6 +396,14 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
|
|||
LastHeartbeatTime: unversioned.Time{},
|
||||
LastTransitionTime: unversioned.Time{},
|
||||
},
|
||||
{
|
||||
Type: api.NodeDiskPressure,
|
||||
Status: api.ConditionFalse,
|
||||
Reason: "KubeletHasSufficientDisk",
|
||||
Message: fmt.Sprintf("kubelet has sufficient disk space available"),
|
||||
LastHeartbeatTime: unversioned.Time{},
|
||||
LastTransitionTime: unversioned.Time{},
|
||||
},
|
||||
{
|
||||
Type: api.NodeReady,
|
||||
Status: api.ConditionTrue,
|
||||
|
@ -489,7 +513,6 @@ func TestUpdateExistingNodeOutOfDiskStatusWithTransitionFrequency(t *testing.T)
|
|||
LastTransitionTime: unversioned.NewTime(clock.Now()),
|
||||
},
|
||||
{
|
||||
|
||||
Type: api.NodeOutOfDisk,
|
||||
Status: api.ConditionTrue,
|
||||
Reason: "KubeletOutOfDisk",
|
||||
|
@ -509,8 +532,13 @@ func TestUpdateExistingNodeOutOfDiskStatusWithTransitionFrequency(t *testing.T)
|
|||
NumCores: 2,
|
||||
MemoryCapacity: 1024,
|
||||
}
|
||||
fsInfo := cadvisorapiv2.FsInfo{
|
||||
Device: "123",
|
||||
}
|
||||
mockCadvisor.On("Start").Return(nil)
|
||||
mockCadvisor.On("MachineInfo").Return(machineInfo, nil)
|
||||
mockCadvisor.On("ImagesFsInfo").Return(fsInfo, nil)
|
||||
mockCadvisor.On("RootFsInfo").Return(fsInfo, nil)
|
||||
versionInfo := &cadvisorapi.VersionInfo{
|
||||
KernelVersion: "3.16.0-0.bpo.4-amd64",
|
||||
ContainerOsVersion: "Debian GNU/Linux 7 (wheezy)",
|
||||
|
@ -671,6 +699,14 @@ func TestUpdateNodeStatusWithRuntimeStateError(t *testing.T) {
|
|||
LastHeartbeatTime: unversioned.Time{},
|
||||
LastTransitionTime: unversioned.Time{},
|
||||
},
|
||||
{
|
||||
Type: api.NodeDiskPressure,
|
||||
Status: api.ConditionFalse,
|
||||
Reason: "KubeletHasNoDiskPressure",
|
||||
Message: fmt.Sprintf("kubelet has no disk pressure"),
|
||||
LastHeartbeatTime: unversioned.Time{},
|
||||
LastTransitionTime: unversioned.Time{},
|
||||
},
|
||||
{}, //placeholder
|
||||
},
|
||||
NodeInfo: api.NodeSystemInfo{
|
||||
|
|
Loading…
Reference in New Issue