Merge pull request #50362 from k82cn/k8s_50360

Automatic merge from submit-queue

Moved node condition filter into a predicates.

**Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #50360 

**Release note**:
```release-note
A new predicates, named 'CheckNodeCondition', was added to replace node condition filter. 'NetworkUnavailable', 'OutOfDisk' and 'NotReady' maybe reported as a reason when failed to schedule pods.
```
pull/6/head
Kubernetes Submit Queue 2017-08-12 15:10:31 -07:00 committed by GitHub
commit 2820b45caa
9 changed files with 217 additions and 84 deletions

View File

@ -38,6 +38,10 @@ var (
ErrNodeUnderMemoryPressure = newPredicateFailureError("NodeUnderMemoryPressure")
ErrNodeUnderDiskPressure = newPredicateFailureError("NodeUnderDiskPressure")
ErrNodeOutOfDisk = newPredicateFailureError("NodeOutOfDisk")
ErrNodeNotReady = newPredicateFailureError("NodeNotReady")
ErrNodeNetworkUnavailable = newPredicateFailureError("NodeNetworkUnavailable")
ErrNodeUnschedulable = newPredicateFailureError("NodeUnschedulable")
ErrNodeUnknownCondition = newPredicateFailureError("NodeUnknownCondition")
ErrVolumeNodeConflict = newPredicateFailureError("NoVolumeNodeConflict")
// ErrFakePredicate is used for test only. The fake predicates returning false also returns error
// as ErrFakePredicate.

View File

@ -1301,6 +1301,37 @@ func CheckNodeDiskPressurePredicate(pod *v1.Pod, meta interface{}, nodeInfo *sch
return true, nil, nil
}
// CheckNodeConditionPredicate checks if a pod can be scheduled on a node reporting out of disk,
// network unavailable and not ready condition. Only node conditions are accounted in this predicate.
func CheckNodeConditionPredicate(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
reasons := []algorithm.PredicateFailureReason{}
if nodeInfo == nil || nodeInfo.Node() == nil {
return false, []algorithm.PredicateFailureReason{ErrNodeUnknownCondition}, nil
}
node := nodeInfo.Node()
for _, cond := range node.Status.Conditions {
// We consider the node for scheduling only when its:
// - NodeReady condition status is ConditionTrue,
// - NodeOutOfDisk condition status is ConditionFalse,
// - NodeNetworkUnavailable condition status is ConditionFalse.
if cond.Type == v1.NodeReady && cond.Status != v1.ConditionTrue {
reasons = append(reasons, ErrNodeNotReady)
} else if cond.Type == v1.NodeOutOfDisk && cond.Status != v1.ConditionFalse {
reasons = append(reasons, ErrNodeOutOfDisk)
} else if cond.Type == v1.NodeNetworkUnavailable && cond.Status != v1.ConditionFalse {
reasons = append(reasons, ErrNodeNetworkUnavailable)
}
}
if node.Spec.Unschedulable {
reasons = append(reasons, ErrNodeUnschedulable)
}
return len(reasons) == 0, reasons, nil
}
type VolumeNodeChecker struct {
pvInfo PersistentVolumeInfo
pvcInfo PersistentVolumeClaimInfo

View File

@ -3442,6 +3442,78 @@ func TestPodSchedulesOnNodeWithDiskPressureCondition(t *testing.T) {
}
}
func TestNodeConditionPredicate(t *testing.T) {
tests := []struct {
node *v1.Node
schedulable bool
}{
// node1 considered
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node1"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionTrue}}}},
schedulable: true,
},
// node2 ignored - node not Ready
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node2"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}}},
schedulable: false,
},
// node3 ignored - node out of disk
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node3"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}}},
schedulable: false,
},
// node4 considered
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node4"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeOutOfDisk, Status: v1.ConditionFalse}}}},
schedulable: true,
},
// node5 ignored - node out of disk
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node5"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionTrue}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}}},
schedulable: false,
},
// node6 considered
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node6"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionTrue}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionFalse}}}},
schedulable: true,
},
// node7 ignored - node out of disk, node not Ready
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node7"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}}},
schedulable: false,
},
// node8 ignored - node not Ready
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node8"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionFalse}}}},
schedulable: false,
},
// node9 ignored - node unschedulable
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node9"}, Spec: v1.NodeSpec{Unschedulable: true}},
schedulable: false,
},
// node10 considered
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node10"}, Spec: v1.NodeSpec{Unschedulable: false}},
schedulable: true,
},
// node11 considered
{
node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node11"}},
schedulable: true,
},
}
for _, test := range tests {
nodeInfo := makeEmptyNodeInfo(test.node)
if fit, reasons, err := CheckNodeConditionPredicate(nil, nil, nodeInfo); fit != test.schedulable {
t.Errorf("%s: expected: %t, got %t; %+v, %v",
test.node.Name, test.schedulable, fit, reasons, err)
}
}
}
func createPodWithVolume(pod, pv, pvc string) *v1.Pod {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: pod, Namespace: "default"},

View File

@ -387,6 +387,79 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
},
},
},
// Do not change this JSON after the corresponding release has been tagged.
// A failure indicates backwards compatibility with the specified release was broken.
"1.8": {
JSON: `{
"kind": "Policy",
"apiVersion": "v1",
"predicates": [
{"name": "MatchNodeSelector"},
{"name": "PodFitsResources"},
{"name": "PodFitsHostPorts"},
{"name": "HostName"},
{"name": "NoDiskConflict"},
{"name": "NoVolumeZoneConflict"},
{"name": "PodToleratesNodeTaints"},
{"name": "CheckNodeMemoryPressure"},
{"name": "CheckNodeDiskPressure"},
{"name": "CheckNodeCondition"},
{"name": "MaxEBSVolumeCount"},
{"name": "MaxGCEPDVolumeCount"},
{"name": "MaxAzureDiskVolumeCount"},
{"name": "MatchInterPodAffinity"},
{"name": "GeneralPredicates"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}},
{"name": "NoVolumeNodeConflict"}
],"priorities": [
{"name": "EqualPriority", "weight": 2},
{"name": "ImageLocalityPriority", "weight": 2},
{"name": "LeastRequestedPriority", "weight": 2},
{"name": "BalancedResourceAllocation", "weight": 2},
{"name": "SelectorSpreadPriority", "weight": 2},
{"name": "NodePreferAvoidPodsPriority", "weight": 2},
{"name": "NodeAffinityPriority", "weight": 2},
{"name": "TaintTolerationPriority", "weight": 2},
{"name": "InterPodAffinityPriority", "weight": 2},
{"name": "MostRequestedPriority", "weight": 2}
]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
{Name: "MatchNodeSelector"},
{Name: "PodFitsResources"},
{Name: "PodFitsHostPorts"},
{Name: "HostName"},
{Name: "NoDiskConflict"},
{Name: "NoVolumeZoneConflict"},
{Name: "PodToleratesNodeTaints"},
{Name: "CheckNodeMemoryPressure"},
{Name: "CheckNodeDiskPressure"},
{Name: "CheckNodeCondition"},
{Name: "MaxEBSVolumeCount"},
{Name: "MaxGCEPDVolumeCount"},
{Name: "MaxAzureDiskVolumeCount"},
{Name: "MatchInterPodAffinity"},
{Name: "GeneralPredicates"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
{Name: "NoVolumeNodeConflict"},
},
Priorities: []schedulerapi.PriorityPolicy{
{Name: "EqualPriority", Weight: 2},
{Name: "ImageLocalityPriority", Weight: 2},
{Name: "LeastRequestedPriority", Weight: 2},
{Name: "BalancedResourceAllocation", Weight: 2},
{Name: "SelectorSpreadPriority", Weight: 2},
{Name: "NodePreferAvoidPodsPriority", Weight: 2},
{Name: "NodeAffinityPriority", Weight: 2},
{Name: "TaintTolerationPriority", Weight: 2},
{Name: "InterPodAffinityPriority", Weight: 2},
{Name: "MostRequestedPriority", Weight: 2},
},
},
},
}
registeredPredicates := sets.NewString(factory.ListRegisteredFitPredicates()...)

View File

@ -175,6 +175,10 @@ func defaultPredicates() sets.String {
// Fit is determined by node disk pressure condition.
factory.RegisterFitPredicate("CheckNodeDiskPressure", predicates.CheckNodeDiskPressurePredicate),
// Fit is determied by node condtions: not ready, network unavailable and out of disk.
factory.RegisterFitPredicate("CheckNodeCondition", predicates.CheckNodeConditionPredicate),
factory.RegisterMandatoryFitPredicate("CheckNodeCondition", predicates.CheckNodeConditionPredicate),
// Fit is determined by volume zone requirements.
factory.RegisterFitPredicateFactory(
"NoVolumeNodeConflict",

View File

@ -705,7 +705,7 @@ func (f *ConfigFactory) CreateFromKeys(predicateKeys, priorityKeys sets.String,
SchedulerCache: f.schedulerCache,
Ecache: f.equivalencePodCache,
// The scheduler only needs to consider schedulable nodes.
NodeLister: &nodePredicateLister{f.nodeLister},
NodeLister: &nodeLister{f.nodeLister},
Algorithm: algo,
Binder: f.getBinder(extenders),
PodConditionUpdater: &podConditionUpdater{f.client},
@ -720,12 +720,12 @@ func (f *ConfigFactory) CreateFromKeys(predicateKeys, priorityKeys sets.String,
}, nil
}
type nodePredicateLister struct {
type nodeLister struct {
corelisters.NodeLister
}
func (n *nodePredicateLister) List() ([]*v1.Node, error) {
return n.ListWithPredicate(getNodeConditionPredicate())
func (n *nodeLister) List() ([]*v1.Node, error) {
return n.NodeLister.List(labels.Everything())
}
func (f *ConfigFactory) GetPriorityFunctionConfigs(priorityKeys sets.String) ([]algorithm.PriorityConfig, error) {
@ -770,11 +770,10 @@ func (f *ConfigFactory) getPluginArgs() (*PluginFactoryArgs, error) {
ControllerLister: f.controllerLister,
ReplicaSetLister: f.replicaSetLister,
StatefulSetLister: f.statefulSetLister,
// All fit predicates only need to consider schedulable nodes.
NodeLister: &nodePredicateLister{f.nodeLister},
NodeInfo: &predicates.CachedNodeInfo{NodeLister: f.nodeLister},
PVInfo: &predicates.CachedPersistentVolumeInfo{PersistentVolumeLister: f.pVLister},
PVCInfo: &predicates.CachedPersistentVolumeClaimInfo{PersistentVolumeClaimLister: f.pVCLister},
NodeLister: &nodeLister{f.nodeLister},
NodeInfo: &predicates.CachedNodeInfo{NodeLister: f.nodeLister},
PVInfo: &predicates.CachedPersistentVolumeInfo{PersistentVolumeLister: f.pVLister},
PVCInfo: &predicates.CachedPersistentVolumeClaimInfo{PersistentVolumeClaimLister: f.pVCLister},
HardPodAffinitySymmetricWeight: f.hardPodAffinitySymmetricWeight,
}, nil
}
@ -793,34 +792,6 @@ func (f *ConfigFactory) ResponsibleForPod(pod *v1.Pod) bool {
return f.schedulerName == pod.Spec.SchedulerName
}
func getNodeConditionPredicate() corelisters.NodeConditionPredicate {
return func(node *v1.Node) bool {
for i := range node.Status.Conditions {
cond := &node.Status.Conditions[i]
// We consider the node for scheduling only when its:
// - NodeReady condition status is ConditionTrue,
// - NodeOutOfDisk condition status is ConditionFalse,
// - NodeNetworkUnavailable condition status is ConditionFalse.
if cond.Type == v1.NodeReady && cond.Status != v1.ConditionTrue {
glog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
return false
} else if cond.Type == v1.NodeOutOfDisk && cond.Status != v1.ConditionFalse {
glog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
return false
} else if cond.Type == v1.NodeNetworkUnavailable && cond.Status != v1.ConditionFalse {
glog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
return false
}
}
// Ignore nodes that are marked unschedulable
if node.Spec.Unschedulable {
glog.V(4).Infof("Ignoring node %v since it is unschedulable", node.Name)
return false
}
return true
}
}
// unassignedNonTerminatedPod selects pods that are unassigned and non-terminal.
func unassignedNonTerminatedPod(pod *v1.Pod) bool {
if len(pod.Spec.NodeName) != 0 {

View File

@ -525,46 +525,3 @@ func TestInvalidFactoryArgs(t *testing.T) {
}
}
func TestNodeConditionPredicate(t *testing.T) {
nodeFunc := getNodeConditionPredicate()
nodeList := &v1.NodeList{
Items: []v1.Node{
// node1 considered
{ObjectMeta: metav1.ObjectMeta{Name: "node1"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionTrue}}}},
// node2 ignored - node not Ready
{ObjectMeta: metav1.ObjectMeta{Name: "node2"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}}},
// node3 ignored - node out of disk
{ObjectMeta: metav1.ObjectMeta{Name: "node3"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}}},
// node4 considered
{ObjectMeta: metav1.ObjectMeta{Name: "node4"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeOutOfDisk, Status: v1.ConditionFalse}}}},
// node5 ignored - node out of disk
{ObjectMeta: metav1.ObjectMeta{Name: "node5"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionTrue}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}}},
// node6 considered
{ObjectMeta: metav1.ObjectMeta{Name: "node6"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionTrue}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionFalse}}}},
// node7 ignored - node out of disk, node not Ready
{ObjectMeta: metav1.ObjectMeta{Name: "node7"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}}},
// node8 ignored - node not Ready
{ObjectMeta: metav1.ObjectMeta{Name: "node8"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionFalse}}}},
// node9 ignored - node unschedulable
{ObjectMeta: metav1.ObjectMeta{Name: "node9"}, Spec: v1.NodeSpec{Unschedulable: true}},
// node10 considered
{ObjectMeta: metav1.ObjectMeta{Name: "node10"}, Spec: v1.NodeSpec{Unschedulable: false}},
// node11 considered
{ObjectMeta: metav1.ObjectMeta{Name: "node11"}},
},
}
nodeNames := []string{}
for _, node := range nodeList.Items {
if nodeFunc(&node) {
nodeNames = append(nodeNames, node.Name)
}
}
expectedNodes := []string{"node1", "node4", "node6", "node10", "node11"}
if !reflect.DeepEqual(expectedNodes, nodeNames) {
t.Errorf("expected: %v, got %v", expectedNodes, nodeNames)
}
}

View File

@ -72,9 +72,10 @@ var (
schedulerFactoryMutex sync.Mutex
// maps that hold registered algorithm types
fitPredicateMap = make(map[string]FitPredicateFactory)
priorityFunctionMap = make(map[string]PriorityConfigFactory)
algorithmProviderMap = make(map[string]AlgorithmProviderConfig)
fitPredicateMap = make(map[string]FitPredicateFactory)
mandatoryFitPredicateMap = make(map[string]FitPredicateFactory)
priorityFunctionMap = make(map[string]PriorityConfigFactory)
algorithmProviderMap = make(map[string]AlgorithmProviderConfig)
// Registered metadata producers
priorityMetadataProducer MetadataProducerFactory
@ -99,6 +100,17 @@ func RegisterFitPredicate(name string, predicate algorithm.FitPredicate) string
return RegisterFitPredicateFactory(name, func(PluginFactoryArgs) algorithm.FitPredicate { return predicate })
}
// RegisterMandatoryFitPredicate registers a fit predicate with the algorithm registry, the predicate is used by
// kubelet, DaemonSet; it is always included in configuration. Returns the name with which the predicate was
// registered.
func RegisterMandatoryFitPredicate(name string, predicate algorithm.FitPredicate) string {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
validateAlgorithmNameOrDie(name)
mandatoryFitPredicateMap[name] = func(PluginFactoryArgs) algorithm.FitPredicate { return predicate }
return name
}
// RegisterFitPredicateFactory registers a fit predicate factory with the
// algorithm registry. Returns the name with which the predicate was registered.
func RegisterFitPredicateFactory(name string, predicateFactory FitPredicateFactory) string {
@ -308,6 +320,14 @@ func getFitPredicateFunctions(names sets.String, args PluginFactoryArgs) (map[st
}
predicates[name] = factory(args)
}
// Always include required fit predicates.
for name, factory := range mandatoryFitPredicateMap {
if _, found := predicates[name]; !found {
predicates[name] = factory(args)
}
}
return predicates, nil
}

View File

@ -139,7 +139,8 @@ func TestSchedulerCreationFromConfigMap(t *testing.T) {
// Verify that the config is applied correctly.
schedPredicates := sched.Config().Algorithm.Predicates()
schedPrioritizers := sched.Config().Algorithm.Prioritizers()
if len(schedPredicates) != 2 || len(schedPrioritizers) != 2 {
// Includes one mandatory predicates.
if len(schedPredicates) != 3 || len(schedPrioritizers) != 2 {
t.Errorf("Unexpected number of predicates or priority functions. Number of predicates: %v, number of prioritizers: %v", len(schedPredicates), len(schedPrioritizers))
}
// Check a predicate and a priority function.