mirror of https://github.com/k3s-io/k3s
Split NodeDiskPressure into NodeInodePressure and NodeDiskPressure
parent
3933ddbc9a
commit
fed3f37eef
|
@ -2224,6 +2224,8 @@ const (
|
|||
NodeDiskPressure NodeConditionType = "DiskPressure"
|
||||
// NodeNetworkUnavailable means that network for the node is not correctly configured.
|
||||
NodeNetworkUnavailable NodeConditionType = "NetworkUnavailable"
|
||||
// NodeInodePressure means the kublet is under pressure due to insufficient available inodes.
|
||||
NodeInodePressure NodeConditionType = "InodePressure"
|
||||
)
|
||||
|
||||
type NodeCondition struct {
|
||||
|
|
|
@ -136,6 +136,13 @@ func (m *managerImpl) IsUnderDiskPressure() bool {
|
|||
return hasNodeCondition(m.nodeConditions, api.NodeDiskPressure)
|
||||
}
|
||||
|
||||
// IsUnderDiskPressure returns true if the node is under disk pressure.
|
||||
func (m *managerImpl) IsUnderInodePressure() bool {
|
||||
m.RLock()
|
||||
defer m.RUnlock()
|
||||
return hasNodeCondition(m.nodeConditions, api.NodeInodePressure)
|
||||
}
|
||||
|
||||
// synchronize is the main control loop that enforces eviction thresholds.
|
||||
func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) {
|
||||
// if we have nothing to do, just return
|
||||
|
|
|
@ -1014,7 +1014,7 @@ func TestDiskPressureNodeFsInodes(t *testing.T) {
|
|||
manager.synchronize(diskInfoProvider, activePodsFunc)
|
||||
|
||||
// we should not have disk pressure
|
||||
if manager.IsUnderDiskPressure() {
|
||||
if manager.IsUnderInodePressure() {
|
||||
t.Errorf("Manager should not report disk pressure")
|
||||
}
|
||||
|
||||
|
@ -1029,7 +1029,7 @@ func TestDiskPressureNodeFsInodes(t *testing.T) {
|
|||
manager.synchronize(diskInfoProvider, activePodsFunc)
|
||||
|
||||
// we should have disk pressure
|
||||
if !manager.IsUnderDiskPressure() {
|
||||
if !manager.IsUnderInodePressure() {
|
||||
t.Errorf("Manager should report disk pressure since soft threshold was met")
|
||||
}
|
||||
|
||||
|
@ -1044,7 +1044,7 @@ func TestDiskPressureNodeFsInodes(t *testing.T) {
|
|||
manager.synchronize(diskInfoProvider, activePodsFunc)
|
||||
|
||||
// we should have disk pressure
|
||||
if !manager.IsUnderDiskPressure() {
|
||||
if !manager.IsUnderInodePressure() {
|
||||
t.Errorf("Manager should report disk pressure since soft threshold was met")
|
||||
}
|
||||
|
||||
|
@ -1069,7 +1069,7 @@ func TestDiskPressureNodeFsInodes(t *testing.T) {
|
|||
manager.synchronize(diskInfoProvider, activePodsFunc)
|
||||
|
||||
// we should not have disk pressure
|
||||
if manager.IsUnderDiskPressure() {
|
||||
if manager.IsUnderInodePressure() {
|
||||
t.Errorf("Manager should not report disk pressure")
|
||||
}
|
||||
|
||||
|
@ -1079,7 +1079,7 @@ func TestDiskPressureNodeFsInodes(t *testing.T) {
|
|||
manager.synchronize(diskInfoProvider, activePodsFunc)
|
||||
|
||||
// we should have disk pressure
|
||||
if !manager.IsUnderDiskPressure() {
|
||||
if !manager.IsUnderInodePressure() {
|
||||
t.Errorf("Manager should report disk pressure")
|
||||
}
|
||||
|
||||
|
@ -1104,7 +1104,7 @@ func TestDiskPressureNodeFsInodes(t *testing.T) {
|
|||
manager.synchronize(diskInfoProvider, activePodsFunc)
|
||||
|
||||
// we should have disk pressure (because transition period not yet met)
|
||||
if !manager.IsUnderDiskPressure() {
|
||||
if !manager.IsUnderInodePressure() {
|
||||
t.Errorf("Manager should report disk pressure")
|
||||
}
|
||||
|
||||
|
@ -1125,7 +1125,7 @@ func TestDiskPressureNodeFsInodes(t *testing.T) {
|
|||
manager.synchronize(diskInfoProvider, activePodsFunc)
|
||||
|
||||
// we should not have disk pressure (because transition period met)
|
||||
if manager.IsUnderDiskPressure() {
|
||||
if manager.IsUnderInodePressure() {
|
||||
t.Errorf("Manager should not report disk pressure")
|
||||
}
|
||||
|
||||
|
|
|
@ -68,8 +68,8 @@ func init() {
|
|||
signalToNodeCondition[SignalMemoryAvailable] = api.NodeMemoryPressure
|
||||
signalToNodeCondition[SignalImageFsAvailable] = api.NodeDiskPressure
|
||||
signalToNodeCondition[SignalNodeFsAvailable] = api.NodeDiskPressure
|
||||
signalToNodeCondition[SignalImageFsInodesFree] = api.NodeDiskPressure
|
||||
signalToNodeCondition[SignalNodeFsInodesFree] = api.NodeDiskPressure
|
||||
signalToNodeCondition[SignalImageFsInodesFree] = api.NodeInodePressure
|
||||
signalToNodeCondition[SignalNodeFsInodesFree] = api.NodeInodePressure
|
||||
|
||||
// map signals to resources (and vice-versa)
|
||||
signalToResource = map[Signal]api.ResourceName{}
|
||||
|
|
|
@ -104,6 +104,9 @@ type Manager interface {
|
|||
|
||||
// IsUnderDiskPressure returns true if the node is under disk pressure.
|
||||
IsUnderDiskPressure() bool
|
||||
|
||||
// IsUnderInodePressure returns true if the node is under disk pressure.
|
||||
IsUnderInodePressure() bool
|
||||
}
|
||||
|
||||
// DiskInfoProvider is responsible for informing the manager how disk is configured.
|
||||
|
|
|
@ -742,6 +742,65 @@ func (kl *Kubelet) setNodeDiskPressureCondition(node *api.Node) {
|
|||
}
|
||||
}
|
||||
|
||||
// setNodeInodePressureCondition for the node.
|
||||
// TODO: this needs to move somewhere centralized...
|
||||
func (kl *Kubelet) setNodeInodePressureCondition(node *api.Node) {
|
||||
currentTime := unversioned.NewTime(kl.clock.Now())
|
||||
var condition *api.NodeCondition
|
||||
|
||||
// Check if NodeInodePressure condition already exists and if it does, just pick it up for update.
|
||||
for i := range node.Status.Conditions {
|
||||
if node.Status.Conditions[i].Type == api.NodeInodePressure {
|
||||
condition = &node.Status.Conditions[i]
|
||||
}
|
||||
}
|
||||
|
||||
newCondition := false
|
||||
// If the NodeInodePressure condition doesn't exist, create one
|
||||
if condition == nil {
|
||||
condition = &api.NodeCondition{
|
||||
Type: api.NodeInodePressure,
|
||||
Status: api.ConditionUnknown,
|
||||
}
|
||||
// cannot be appended to node.Status.Conditions here because it gets
|
||||
// copied to the slice. So if we append to the slice here none of the
|
||||
// updates we make below are reflected in the slice.
|
||||
newCondition = true
|
||||
}
|
||||
|
||||
// Update the heartbeat time
|
||||
condition.LastHeartbeatTime = currentTime
|
||||
|
||||
// Note: The conditions below take care of the case when a new NodeInodePressure condition is
|
||||
// created and as well as the case when the condition already exists. When a new condition
|
||||
// is created its status is set to api.ConditionUnknown which matches either
|
||||
// condition.Status != api.ConditionTrue or
|
||||
// condition.Status != api.ConditionFalse in the conditions below depending on whether
|
||||
// the kubelet is under inode pressure or not.
|
||||
if kl.evictionManager.IsUnderInodePressure() {
|
||||
if condition.Status != api.ConditionTrue {
|
||||
condition.Status = api.ConditionTrue
|
||||
condition.Reason = "KubeletHasInodePressure"
|
||||
condition.Message = "kubelet has inode pressure"
|
||||
condition.LastTransitionTime = currentTime
|
||||
kl.recordNodeStatusEvent(api.EventTypeNormal, "NodeHasInodePressure")
|
||||
}
|
||||
} else {
|
||||
if condition.Status != api.ConditionFalse {
|
||||
condition.Status = api.ConditionFalse
|
||||
condition.Reason = "KubeletHasNoInodePressure"
|
||||
condition.Message = "kubelet has no inode pressure"
|
||||
condition.LastTransitionTime = currentTime
|
||||
kl.recordNodeStatusEvent(api.EventTypeNormal, "NodeHasNoInodePressure")
|
||||
}
|
||||
}
|
||||
|
||||
if newCondition {
|
||||
node.Status.Conditions = append(node.Status.Conditions, *condition)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Set OODcondition for the node.
|
||||
func (kl *Kubelet) setNodeOODCondition(node *api.Node) {
|
||||
currentTime := unversioned.NewTime(kl.clock.Now())
|
||||
|
@ -856,6 +915,7 @@ func (kl *Kubelet) defaultNodeStatusFuncs() []func(*api.Node) error {
|
|||
withoutError(kl.setNodeOODCondition),
|
||||
withoutError(kl.setNodeMemoryPressureCondition),
|
||||
withoutError(kl.setNodeDiskPressureCondition),
|
||||
withoutError(kl.setNodeInodePressureCondition),
|
||||
withoutError(kl.setNodeReadyCondition),
|
||||
withoutError(kl.setNodeVolumesInUseStatus),
|
||||
withoutError(kl.recordNodeSchedulableEvent),
|
||||
|
|
|
@ -149,6 +149,14 @@ func TestUpdateNewNodeStatus(t *testing.T) {
|
|||
LastHeartbeatTime: unversioned.Time{},
|
||||
LastTransitionTime: unversioned.Time{},
|
||||
},
|
||||
{
|
||||
Type: api.NodeInodePressure,
|
||||
Status: api.ConditionFalse,
|
||||
Reason: "KubeletHasNoInodePressure",
|
||||
Message: fmt.Sprintf("kubelet has no inode pressure"),
|
||||
LastHeartbeatTime: unversioned.Time{},
|
||||
LastTransitionTime: unversioned.Time{},
|
||||
},
|
||||
{
|
||||
Type: api.NodeReady,
|
||||
Status: api.ConditionTrue,
|
||||
|
@ -340,6 +348,14 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
|
|||
LastHeartbeatTime: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
LastTransitionTime: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
},
|
||||
{
|
||||
Type: api.NodeInodePressure,
|
||||
Status: api.ConditionFalse,
|
||||
Reason: "KubeletHasSufficientInode",
|
||||
Message: fmt.Sprintf("kubelet has sufficient inodes available"),
|
||||
LastHeartbeatTime: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
LastTransitionTime: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
},
|
||||
{
|
||||
Type: api.NodeReady,
|
||||
Status: api.ConditionTrue,
|
||||
|
@ -412,6 +428,14 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
|
|||
LastHeartbeatTime: unversioned.Time{},
|
||||
LastTransitionTime: unversioned.Time{},
|
||||
},
|
||||
{
|
||||
Type: api.NodeInodePressure,
|
||||
Status: api.ConditionFalse,
|
||||
Reason: "KubeletHasSufficientInode",
|
||||
Message: fmt.Sprintf("kubelet has sufficient inodes available"),
|
||||
LastHeartbeatTime: unversioned.Time{},
|
||||
LastTransitionTime: unversioned.Time{},
|
||||
},
|
||||
{
|
||||
Type: api.NodeReady,
|
||||
Status: api.ConditionTrue,
|
||||
|
@ -716,6 +740,14 @@ func TestUpdateNodeStatusWithRuntimeStateError(t *testing.T) {
|
|||
LastHeartbeatTime: unversioned.Time{},
|
||||
LastTransitionTime: unversioned.Time{},
|
||||
},
|
||||
{
|
||||
Type: api.NodeInodePressure,
|
||||
Status: api.ConditionFalse,
|
||||
Reason: "KubeletHasNoInodePressure",
|
||||
Message: fmt.Sprintf("kubelet has no inode pressure"),
|
||||
LastHeartbeatTime: unversioned.Time{},
|
||||
LastTransitionTime: unversioned.Time{},
|
||||
},
|
||||
{}, //placeholder
|
||||
},
|
||||
NodeInfo: api.NodeSystemInfo{
|
||||
|
|
|
@ -37,6 +37,7 @@ var (
|
|||
ErrMaxVolumeCountExceeded = newPredicateFailureError("MaxVolumeCount")
|
||||
ErrNodeUnderMemoryPressure = newPredicateFailureError("NodeUnderMemoryPressure")
|
||||
ErrNodeUnderDiskPressure = newPredicateFailureError("NodeUnderDiskPressure")
|
||||
ErrNodeUnderInodePressure = newPredicateFailureError("NodeUnderInodePressure")
|
||||
// ErrFakePredicate is used for test only. The fake predicates returning false also returns error
|
||||
// as ErrFakePredicate.
|
||||
ErrFakePredicate = newPredicateFailureError("FakePredicateError")
|
||||
|
|
|
@ -1168,3 +1168,21 @@ func CheckNodeDiskPressurePredicate(pod *api.Pod, meta interface{}, nodeInfo *sc
|
|||
|
||||
return true, nil, nil
|
||||
}
|
||||
|
||||
// CheckNodeDiskPressurePredicate checks if a pod can be scheduled on a node
|
||||
// reporting disk pressure condition.
|
||||
func CheckNodeInodePressurePredicate(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
|
||||
node := nodeInfo.Node()
|
||||
if node == nil {
|
||||
return false, nil, fmt.Errorf("node not found")
|
||||
}
|
||||
|
||||
// is node under presure?
|
||||
for _, cond := range node.Status.Conditions {
|
||||
if cond.Type == api.NodeInodePressure && cond.Status == api.ConditionTrue {
|
||||
return false, []algorithm.PredicateFailureReason{ErrNodeUnderInodePressure}, nil
|
||||
}
|
||||
}
|
||||
|
||||
return true, nil, nil
|
||||
}
|
||||
|
|
|
@ -3028,3 +3028,75 @@ func TestPodSchedulesOnNodeWithDiskPressureCondition(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPodSchedulesOnNodeWithInodePressureCondition(t *testing.T) {
|
||||
pod := &api.Pod{
|
||||
Spec: api.PodSpec{
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Name: "container",
|
||||
Image: "image",
|
||||
ImagePullPolicy: "Always",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// specify a node with no inode pressure condition on
|
||||
noPressureNode := &api.Node{
|
||||
Status: api.NodeStatus{
|
||||
Conditions: []api.NodeCondition{
|
||||
{
|
||||
Type: "Ready",
|
||||
Status: "True",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// specify a node with pressure condition on
|
||||
pressureNode := &api.Node{
|
||||
Status: api.NodeStatus{
|
||||
Conditions: []api.NodeCondition{
|
||||
{
|
||||
Type: "InodePressure",
|
||||
Status: "True",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
pod *api.Pod
|
||||
nodeInfo *schedulercache.NodeInfo
|
||||
fits bool
|
||||
name string
|
||||
}{
|
||||
{
|
||||
pod: pod,
|
||||
nodeInfo: makeEmptyNodeInfo(noPressureNode),
|
||||
fits: true,
|
||||
name: "pod schedulable on node without pressure condition on",
|
||||
},
|
||||
{
|
||||
pod: pod,
|
||||
nodeInfo: makeEmptyNodeInfo(pressureNode),
|
||||
fits: false,
|
||||
name: "pod not schedulable on node with pressure condition on",
|
||||
},
|
||||
}
|
||||
expectedFailureReasons := []algorithm.PredicateFailureReason{ErrNodeUnderInodePressure}
|
||||
|
||||
for _, test := range tests {
|
||||
fits, reasons, err := CheckNodeInodePressurePredicate(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
|
||||
if err != nil {
|
||||
t.Errorf("%s: unexpected error: %v", test.name, err)
|
||||
}
|
||||
if !fits && !reflect.DeepEqual(reasons, expectedFailureReasons) {
|
||||
t.Errorf("%s: unexpected failure reasons: %v, want: %v", test.name, reasons, expectedFailureReasons)
|
||||
}
|
||||
if fits != test.fits {
|
||||
t.Errorf("%s: expected %v got %v", test.name, test.fits, fits)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -306,6 +306,76 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
|
|||
},
|
||||
},
|
||||
},
|
||||
|
||||
// Do not change this JSON after the corresponding release has been tagged.
|
||||
// A failure indicates backwards compatibility with the specified release was broken.
|
||||
"1.5": {
|
||||
JSON: `{
|
||||
"kind": "Policy",
|
||||
"apiVersion": "v1",
|
||||
"predicates": [
|
||||
{"name": "MatchNodeSelector"},
|
||||
{"name": "PodFitsResources"},
|
||||
{"name": "PodFitsHostPorts"},
|
||||
{"name": "HostName"},
|
||||
{"name": "NoDiskConflict"},
|
||||
{"name": "NoVolumeZoneConflict"},
|
||||
{"name": "PodToleratesNodeTaints"},
|
||||
{"name": "CheckNodeMemoryPressure"},
|
||||
{"name": "CheckNodeDiskPressure"},
|
||||
{"name": "CheckNodeInodePressure"},
|
||||
{"name": "MaxEBSVolumeCount"},
|
||||
{"name": "MaxGCEPDVolumeCount"},
|
||||
{"name": "MatchInterPodAffinity"},
|
||||
{"name": "GeneralPredicates"},
|
||||
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
|
||||
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
|
||||
],"priorities": [
|
||||
{"name": "EqualPriority", "weight": 2},
|
||||
{"name": "ImageLocalityPriority", "weight": 2},
|
||||
{"name": "LeastRequestedPriority", "weight": 2},
|
||||
{"name": "BalancedResourceAllocation", "weight": 2},
|
||||
{"name": "SelectorSpreadPriority", "weight": 2},
|
||||
{"name": "NodePreferAvoidPodsPriority", "weight": 2},
|
||||
{"name": "NodeAffinityPriority", "weight": 2},
|
||||
{"name": "TaintTolerationPriority", "weight": 2},
|
||||
{"name": "InterPodAffinityPriority", "weight": 2},
|
||||
{"name": "MostRequestedPriority", "weight": 2}
|
||||
]
|
||||
}`,
|
||||
ExpectedPolicy: schedulerapi.Policy{
|
||||
Predicates: []schedulerapi.PredicatePolicy{
|
||||
{Name: "MatchNodeSelector"},
|
||||
{Name: "PodFitsResources"},
|
||||
{Name: "PodFitsHostPorts"},
|
||||
{Name: "HostName"},
|
||||
{Name: "NoDiskConflict"},
|
||||
{Name: "NoVolumeZoneConflict"},
|
||||
{Name: "PodToleratesNodeTaints"},
|
||||
{Name: "CheckNodeMemoryPressure"},
|
||||
{Name: "CheckNodeDiskPressure"},
|
||||
{Name: "CheckNodeInodePressure"},
|
||||
{Name: "MaxEBSVolumeCount"},
|
||||
{Name: "MaxGCEPDVolumeCount"},
|
||||
{Name: "MatchInterPodAffinity"},
|
||||
{Name: "GeneralPredicates"},
|
||||
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
|
||||
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
|
||||
},
|
||||
Priorities: []schedulerapi.PriorityPolicy{
|
||||
{Name: "EqualPriority", Weight: 2},
|
||||
{Name: "ImageLocalityPriority", Weight: 2},
|
||||
{Name: "LeastRequestedPriority", Weight: 2},
|
||||
{Name: "BalancedResourceAllocation", Weight: 2},
|
||||
{Name: "SelectorSpreadPriority", Weight: 2},
|
||||
{Name: "NodePreferAvoidPodsPriority", Weight: 2},
|
||||
{Name: "NodeAffinityPriority", Weight: 2},
|
||||
{Name: "TaintTolerationPriority", Weight: 2},
|
||||
{Name: "InterPodAffinityPriority", Weight: 2},
|
||||
{Name: "MostRequestedPriority", Weight: 2},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
registeredPredicates := sets.NewString(factory.ListRegisteredFitPredicates()...)
|
||||
|
|
|
@ -158,6 +158,9 @@ func defaultPredicates() sets.String {
|
|||
// Fit is determined by node disk pressure condition.
|
||||
factory.RegisterFitPredicate("CheckNodeDiskPressure", predicates.CheckNodeDiskPressurePredicate),
|
||||
|
||||
// Fit is determined by node disk pressure condition.
|
||||
factory.RegisterFitPredicate("CheckNodeInodePressure", predicates.CheckNodeInodePressurePredicate),
|
||||
|
||||
// Fit is determined by inter-pod affinity.
|
||||
factory.RegisterFitPredicateFactory(
|
||||
"MatchInterPodAffinity",
|
||||
|
|
Loading…
Reference in New Issue