cleanup logic related with OutOfDisk

- cleanup OOD logic in scheduling and node controller
- update comments and testcases
pull/564/head
Wei Huang 2018-12-13 00:31:46 -08:00
parent f4487a06fe
commit 8f87e71e0c
No known key found for this signature in database
GPG Key ID: BE5E9752F8B6E005
9 changed files with 15 additions and 174 deletions

View File

@ -95,9 +95,6 @@ var (
v1.NodeMemoryPressure: {
v1.ConditionTrue: schedulerapi.TaintNodeMemoryPressure,
},
v1.NodeOutOfDisk: {
v1.ConditionTrue: schedulerapi.TaintNodeOutOfDisk,
},
v1.NodeDiskPressure: {
v1.ConditionTrue: schedulerapi.TaintNodeDiskPressure,
},
@ -114,7 +111,6 @@ var (
schedulerapi.TaintNodeUnreachable: v1.NodeReady,
schedulerapi.TaintNodeNetworkUnavailable: v1.NodeNetworkUnavailable,
schedulerapi.TaintNodeMemoryPressure: v1.NodeMemoryPressure,
schedulerapi.TaintNodeOutOfDisk: v1.NodeOutOfDisk,
schedulerapi.TaintNodeDiskPressure: v1.NodeDiskPressure,
schedulerapi.TaintNodePIDPressure: v1.NodePIDPressure,
}
@ -229,7 +225,7 @@ type Controller struct {
useTaintBasedEvictions bool
// if set to true, NodeController will taint Nodes based on its condition for 'NetworkUnavailable',
// 'MemoryPressure', 'OutOfDisk' and 'DiskPressure'.
// 'MemoryPressure', 'PIDPressure' and 'DiskPressure'.
taintNodeByCondition bool
nodeUpdateQueue workqueue.Interface
@ -921,7 +917,6 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
// remaining node conditions should also be set to Unknown
remainingNodeConditionTypes := []v1.NodeConditionType{
v1.NodeOutOfDisk,
v1.NodeMemoryPressure,
v1.NodeDiskPressure,
v1.NodePIDPressure,

View File

@ -1423,14 +1423,6 @@ func TestMonitorNodeHealthUpdateStatus(t *testing.T) {
LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
LastTransitionTime: fakeNow,
},
{
Type: v1.NodeOutOfDisk,
Status: v1.ConditionUnknown,
Reason: "NodeStatusNeverUpdated",
Message: "Kubelet never posted node status.",
LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
LastTransitionTime: fakeNow,
},
{
Type: v1.NodeMemoryPressure,
Status: v1.ConditionUnknown,
@ -1498,13 +1490,6 @@ func TestMonitorNodeHealthUpdateStatus(t *testing.T) {
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
{
Type: v1.NodeOutOfDisk,
Status: v1.ConditionFalse,
// Node status hasn't been updated for 1hr.
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
Capacity: v1.ResourceList{
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),
@ -1526,13 +1511,6 @@ func TestMonitorNodeHealthUpdateStatus(t *testing.T) {
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
{
Type: v1.NodeOutOfDisk,
Status: v1.ConditionFalse,
// Node status hasn't been updated for 1hr.
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
Capacity: v1.ResourceList{
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),
@ -1555,14 +1533,6 @@ func TestMonitorNodeHealthUpdateStatus(t *testing.T) {
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Time{Time: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)},
},
{
Type: v1.NodeOutOfDisk,
Status: v1.ConditionUnknown,
Reason: "NodeStatusUnknown",
Message: "Kubelet stopped posting node status.",
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Time{Time: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)},
},
{
Type: v1.NodeMemoryPressure,
Status: v1.ConditionUnknown,
@ -1770,14 +1740,6 @@ func TestMonitorNodeHealthUpdateNodeAndPodStatusWithLease(t *testing.T) {
LastHeartbeatTime: nodeCreationTime,
LastTransitionTime: fakeNow,
},
{
Type: v1.NodeOutOfDisk,
Status: v1.ConditionUnknown,
Reason: "NodeStatusNeverUpdated",
Message: "Kubelet never posted node status.",
LastHeartbeatTime: nodeCreationTime,
LastTransitionTime: fakeNow,
},
{
Type: v1.NodeMemoryPressure,
Status: v1.ConditionUnknown,
@ -1872,14 +1834,6 @@ func TestMonitorNodeHealthUpdateNodeAndPodStatusWithLease(t *testing.T) {
LastHeartbeatTime: nodeCreationTime,
LastTransitionTime: metav1.Time{Time: fakeNow.Add(time.Hour)},
},
{
Type: v1.NodeOutOfDisk,
Status: v1.ConditionUnknown,
Reason: "NodeStatusNeverUpdated",
Message: "Kubelet never posted node status.",
LastHeartbeatTime: nodeCreationTime,
LastTransitionTime: metav1.Time{Time: fakeNow.Add(time.Hour)},
},
{
Type: v1.NodeMemoryPressure,
Status: v1.ConditionUnknown,
@ -1930,7 +1884,7 @@ func TestMonitorNodeHealthUpdateNodeAndPodStatusWithLease(t *testing.T) {
LastTransitionTime: fakeNow,
},
{
Type: v1.NodeOutOfDisk,
Type: v1.NodeDiskPressure,
Status: v1.ConditionFalse,
LastHeartbeatTime: fakeNow,
LastTransitionTime: fakeNow,
@ -1958,7 +1912,7 @@ func TestMonitorNodeHealthUpdateNodeAndPodStatusWithLease(t *testing.T) {
LastTransitionTime: fakeNow,
},
{
Type: v1.NodeOutOfDisk,
Type: v1.NodeDiskPressure,
Status: v1.ConditionFalse,
LastHeartbeatTime: fakeNow,
LastTransitionTime: fakeNow,
@ -1985,7 +1939,7 @@ func TestMonitorNodeHealthUpdateNodeAndPodStatusWithLease(t *testing.T) {
LastTransitionTime: fakeNow,
},
{
Type: v1.NodeOutOfDisk,
Type: v1.NodeDiskPressure,
Status: v1.ConditionFalse,
LastHeartbeatTime: fakeNow,
LastTransitionTime: fakeNow,
@ -2020,7 +1974,7 @@ func TestMonitorNodeHealthUpdateNodeAndPodStatusWithLease(t *testing.T) {
LastTransitionTime: fakeNow,
},
{
Type: v1.NodeOutOfDisk,
Type: v1.NodeDiskPressure,
Status: v1.ConditionFalse,
LastHeartbeatTime: fakeNow,
LastTransitionTime: fakeNow,
@ -2048,7 +2002,7 @@ func TestMonitorNodeHealthUpdateNodeAndPodStatusWithLease(t *testing.T) {
LastTransitionTime: fakeNow,
},
{
Type: v1.NodeOutOfDisk,
Type: v1.NodeDiskPressure,
Status: v1.ConditionFalse,
LastHeartbeatTime: metav1.Time{Time: fakeNow.Add(time.Hour)},
LastTransitionTime: fakeNow,
@ -2075,7 +2029,7 @@ func TestMonitorNodeHealthUpdateNodeAndPodStatusWithLease(t *testing.T) {
LastTransitionTime: fakeNow,
},
{
Type: v1.NodeOutOfDisk,
Type: v1.NodeDiskPressure,
Status: v1.ConditionFalse,
LastHeartbeatTime: metav1.Time{Time: fakeNow.Add(time.Hour)},
LastTransitionTime: fakeNow,
@ -2109,12 +2063,6 @@ func TestMonitorNodeHealthUpdateNodeAndPodStatusWithLease(t *testing.T) {
LastHeartbeatTime: fakeNow,
LastTransitionTime: fakeNow,
},
{
Type: v1.NodeOutOfDisk,
Status: v1.ConditionFalse,
LastHeartbeatTime: fakeNow,
LastTransitionTime: fakeNow,
},
},
Capacity: v1.ResourceList{
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),
@ -2137,12 +2085,6 @@ func TestMonitorNodeHealthUpdateNodeAndPodStatusWithLease(t *testing.T) {
LastHeartbeatTime: fakeNow,
LastTransitionTime: fakeNow,
},
{
Type: v1.NodeOutOfDisk,
Status: v1.ConditionFalse,
LastHeartbeatTime: fakeNow,
LastTransitionTime: fakeNow,
},
},
Capacity: v1.ResourceList{
v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"),
@ -2166,14 +2108,6 @@ func TestMonitorNodeHealthUpdateNodeAndPodStatusWithLease(t *testing.T) {
LastHeartbeatTime: fakeNow,
LastTransitionTime: metav1.Time{Time: fakeNow.Add(time.Hour)},
},
{
Type: v1.NodeOutOfDisk,
Status: v1.ConditionUnknown,
Reason: "NodeStatusUnknown",
Message: "Kubelet stopped posting node status.",
LastHeartbeatTime: fakeNow,
LastTransitionTime: metav1.Time{Time: fakeNow.Add(time.Hour)},
},
{
Type: v1.NodeMemoryPressure,
Status: v1.ConditionUnknown,
@ -2759,10 +2693,6 @@ func TestTaintsNodeByCondition(t *testing.T) {
nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder()
outOfDiskTaint := &v1.Taint{
Key: schedulerapi.TaintNodeOutOfDisk,
Effect: v1.TaintEffectNoSchedule,
}
networkUnavailableTaint := &v1.Taint{
Key: schedulerapi.TaintNodeNetworkUnavailable,
Effect: v1.TaintEffectNoSchedule,
@ -2812,7 +2742,7 @@ func TestTaintsNodeByCondition(t *testing.T) {
ExpectedTaints: []*v1.Taint{networkUnavailableTaint},
},
{
Name: "NetworkUnavailable and OutOfDisk are true",
Name: "NetworkUnavailable is true",
Node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
@ -2836,48 +2766,6 @@ func TestTaintsNodeByCondition(t *testing.T) {
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
{
Type: v1.NodeOutOfDisk,
Status: v1.ConditionTrue,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
ExpectedTaints: []*v1.Taint{networkUnavailableTaint, outOfDiskTaint},
},
{
Name: "NetworkUnavailable is true, OutOfDisk is unknown",
Node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region1",
kubeletapis.LabelZoneFailureDomain: "zone1",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
{
Type: v1.NodeNetworkUnavailable,
Status: v1.ConditionTrue,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
{
Type: v1.NodeOutOfDisk,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},

View File

@ -128,7 +128,7 @@ const (
// beta: v1.12
//
// Taint nodes based on their condition status for 'NetworkUnavailable',
// 'MemoryPressure', 'OutOfDisk' and 'DiskPressure'.
// 'MemoryPressure', 'PIDPressure' and 'DiskPressure'.
TaintNodesByCondition utilfeature.Feature = "TaintNodesByCondition"
// owner: @jsafrane

View File

@ -63,8 +63,6 @@ var (
ErrNodeUnderDiskPressure = newPredicateFailureError("NodeUnderDiskPressure", "node(s) had disk pressure")
// ErrNodeUnderPIDPressure is used for NodeUnderPIDPressure predicate error.
ErrNodeUnderPIDPressure = newPredicateFailureError("NodeUnderPIDPressure", "node(s) had pid pressure")
// ErrNodeOutOfDisk is used for NodeOutOfDisk predicate error.
ErrNodeOutOfDisk = newPredicateFailureError("NodeOutOfDisk", "node(s) were out of disk space")
// ErrNodeNotReady is used for NodeNotReady predicate error.
ErrNodeNotReady = newPredicateFailureError("NodeNotReady", "node(s) were not ready")
// ErrNodeNetworkUnavailable is used for NodeNetworkUnavailable predicate error.

View File

@ -1563,7 +1563,7 @@ func CheckNodePIDPressurePredicate(pod *v1.Pod, meta algorithm.PredicateMetadata
return true, nil, nil
}
// CheckNodeConditionPredicate checks if a pod can be scheduled on a node reporting out of disk,
// CheckNodeConditionPredicate checks if a pod can be scheduled on a node reporting
// network unavailable and not ready condition. Only node conditions are accounted in this predicate.
func CheckNodeConditionPredicate(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
reasons := []algorithm.PredicateFailureReason{}
@ -1576,12 +1576,9 @@ func CheckNodeConditionPredicate(pod *v1.Pod, meta algorithm.PredicateMetadata,
for _, cond := range node.Status.Conditions {
// We consider the node for scheduling only when its:
// - NodeReady condition status is ConditionTrue,
// - NodeOutOfDisk condition status is ConditionFalse,
// - NodeNetworkUnavailable condition status is ConditionFalse.
if cond.Type == v1.NodeReady && cond.Status != v1.ConditionTrue {
reasons = append(reasons, ErrNodeNotReady)
} else if cond.Type == v1.NodeOutOfDisk && cond.Status != v1.ConditionFalse {
reasons = append(reasons, ErrNodeOutOfDisk)
} else if cond.Type == v1.NodeNetworkUnavailable && cond.Status != v1.ConditionFalse {
reasons = append(reasons, ErrNodeNetworkUnavailable)
}

View File

@ -4521,47 +4521,17 @@ func TestNodeConditionPredicate(t *testing.T) {
schedulable: false,
},
{
name: "node3 ignored - node out of disk",
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node3"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}}},
schedulable: false,
},
{
name: "node4 considered",
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node4"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeOutOfDisk, Status: v1.ConditionFalse}}}},
schedulable: true,
},
{
name: "node5 ignored - node out of disk",
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node5"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionTrue}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}}},
schedulable: false,
},
{
name: "node6 considered",
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node6"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionTrue}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionFalse}}}},
schedulable: true,
},
{
name: "node7 ignored - node out of disk, node not Ready",
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node7"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}}},
schedulable: false,
},
{
name: "node8 ignored - node not Ready",
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node8"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionFalse}}}},
schedulable: false,
},
{
name: "node9 ignored - node unschedulable",
name: "node3 ignored - node unschedulable",
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node9"}, Spec: v1.NodeSpec{Unschedulable: true}},
schedulable: false,
},
{
name: "node10 considered",
name: "node4 considered",
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node10"}, Spec: v1.NodeSpec{Unschedulable: false}},
schedulable: true,
},
{
name: "node11 considered",
name: "node5 considered",
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node11"}},
schedulable: true,
},

View File

@ -45,11 +45,6 @@ const (
// and removed when node becomes scheduable.
TaintNodeUnschedulable = "node.kubernetes.io/unschedulable"
// TaintNodeOutOfDisk will be added when node becomes out of disk
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk.
TaintNodeOutOfDisk = "node.kubernetes.io/out-of-disk"
// TaintNodeMemoryPressure will be added when node has memory pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough memory.

View File

@ -1061,7 +1061,6 @@ func nodesWherePreemptionMightHelp(nodes []*v1.Node, failedPredicatesMap FailedP
predicates.ErrNodeUnderDiskPressure,
predicates.ErrNodeUnderPIDPressure,
predicates.ErrNodeUnderMemoryPressure,
predicates.ErrNodeOutOfDisk,
predicates.ErrNodeUnschedulable,
predicates.ErrNodeUnknownCondition,
predicates.ErrVolumeZoneConflict,

View File

@ -1212,7 +1212,7 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) {
{
name: "Mix of failed predicates works fine",
failedPredMap: FailedPredicateMap{
"machine1": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeSelectorNotMatch, algorithmpredicates.ErrNodeOutOfDisk, algorithmpredicates.NewInsufficientResourceError(v1.ResourceMemory, 1000, 500, 300)},
"machine1": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeSelectorNotMatch, algorithmpredicates.ErrNodeUnderDiskPressure, algorithmpredicates.NewInsufficientResourceError(v1.ResourceMemory, 1000, 500, 300)},
"machine2": []algorithm.PredicateFailureReason{algorithmpredicates.ErrPodNotMatchHostName, algorithmpredicates.ErrDiskConflict},
"machine3": []algorithm.PredicateFailureReason{algorithmpredicates.NewInsufficientResourceError(v1.ResourceMemory, 1000, 600, 400)},
"machine4": []algorithm.PredicateFailureReason{},
@ -1225,9 +1225,8 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) {
"machine1": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeUnderDiskPressure},
"machine2": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeUnderPIDPressure},
"machine3": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeUnderMemoryPressure},
"machine4": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeOutOfDisk},
},
expected: map[string]bool{},
expected: map[string]bool{"machine4": true},
},
{
name: "Node condition errors and ErrNodeUnknownCondition should be considered unresolvable",