Merge pull request #67788 from mohamed-mehany/inter-pod-affinity-optimization

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions here: https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md.

Affinity/Anti-Affinity Optimization of Pod Being Scheduled

**What this PR does / why we need it**:
Following #66948, it was noticed that the applied optimizations for anti-affinity rules lookup of existing pods could be further applied to checking affinity and anti-affinity terms of the Pod being scheduled. This is done by mapping topology pairs to pods that potentially match the pod being scheduled instead of mapping nodes to matching pods, and accordingly the search space is reduced.

**Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*:
Fixes #67738

**Special notes for your reviewer**:
/sig scheduling
/sig scalability

**Release note**:

```release-note
Improve performance of Pod affinity/anti-affinity in the scheduler
```
pull/8/head
Kubernetes Submit Queue 2018-09-03 11:40:09 -07:00 committed by GitHub
commit 7548764f96
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 151 additions and 166 deletions

View File

@ -71,15 +71,15 @@ type predicateMetadata struct {
podPorts []*v1.ContainerPort podPorts []*v1.ContainerPort
topologyPairsAntiAffinityPodsMap *topologyPairsMaps topologyPairsAntiAffinityPodsMap *topologyPairsMaps
// A map of node name to a list of Pods on the node that can potentially match // A map of topology pairs to a list of Pods that can potentially match
// the affinity rules of the "pod". // the affinity rules of the "pod" and its inverse.
nodeNameToMatchingAffinityPods map[string][]*v1.Pod topologyPairsPotentialAffinityPods *topologyPairsMaps
// A map of node name to a list of Pods on the node that can potentially match // A map of topology pairs to a list of Pods that can potentially match
// the anti-affinity rules of the "pod". // the anti-affinity rules of the "pod" and its inverse.
nodeNameToMatchingAntiAffinityPods map[string][]*v1.Pod topologyPairsPotentialAntiAffinityPods *topologyPairsMaps
serviceAffinityInUse bool serviceAffinityInUse bool
serviceAffinityMatchingPodList []*v1.Pod serviceAffinityMatchingPodList []*v1.Pod
serviceAffinityMatchingPodServices []*v1.Service serviceAffinityMatchingPodServices []*v1.Service
// ignoredExtendedResources is a set of extended resource names that will // ignoredExtendedResources is a set of extended resource names that will
// be ignored in the PodFitsResources predicate. // be ignored in the PodFitsResources predicate.
// //
@ -134,19 +134,19 @@ func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInf
if err != nil { if err != nil {
return nil return nil
} }
affinityPods, antiAffinityPods, err := getPodsMatchingAffinity(pod, nodeNameToInfoMap) topologyPairsAffinityPodsMaps, topologyPairsAntiAffinityPodsMaps, err := getPodsMatchingAffinity(pod, nodeNameToInfoMap)
if err != nil { if err != nil {
glog.Errorf("[predicate meta data generation] error finding pods that match affinity terms: %v", err) glog.Errorf("[predicate meta data generation] error finding pods that match affinity terms: %v", err)
return nil return nil
} }
predicateMetadata := &predicateMetadata{ predicateMetadata := &predicateMetadata{
pod: pod, pod: pod,
podBestEffort: isPodBestEffort(pod), podBestEffort: isPodBestEffort(pod),
podRequest: GetResourceRequest(pod), podRequest: GetResourceRequest(pod),
podPorts: schedutil.GetContainerPorts(pod), podPorts: schedutil.GetContainerPorts(pod),
nodeNameToMatchingAffinityPods: affinityPods, topologyPairsPotentialAffinityPods: topologyPairsAffinityPodsMaps,
nodeNameToMatchingAntiAffinityPods: antiAffinityPods, topologyPairsPotentialAntiAffinityPods: topologyPairsAntiAffinityPodsMaps,
topologyPairsAntiAffinityPodsMap: topologyPairsMaps, topologyPairsAntiAffinityPodsMap: topologyPairsMaps,
} }
for predicateName, precomputeFunc := range predicateMetadataProducers { for predicateName, precomputeFunc := range predicateMetadataProducers {
glog.V(10).Infof("Precompute: %v", predicateName) glog.V(10).Infof("Precompute: %v", predicateName)
@ -200,33 +200,9 @@ func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod) error {
return fmt.Errorf("deletedPod and meta.pod must not be the same") return fmt.Errorf("deletedPod and meta.pod must not be the same")
} }
meta.topologyPairsAntiAffinityPodsMap.removePod(deletedPod) meta.topologyPairsAntiAffinityPodsMap.removePod(deletedPod)
// Delete pod from the matching affinity or anti-affinity pods if exists. // Delete pod from the matching affinity or anti-affinity topology pairs maps.
affinity := meta.pod.Spec.Affinity meta.topologyPairsPotentialAffinityPods.removePod(deletedPod)
podNodeName := deletedPod.Spec.NodeName meta.topologyPairsPotentialAntiAffinityPods.removePod(deletedPod)
if affinity != nil && len(podNodeName) > 0 {
if affinity.PodAffinity != nil {
for i, p := range meta.nodeNameToMatchingAffinityPods[podNodeName] {
if p == deletedPod {
s := meta.nodeNameToMatchingAffinityPods[podNodeName]
s[i] = s[len(s)-1]
s = s[:len(s)-1]
meta.nodeNameToMatchingAffinityPods[podNodeName] = s
break
}
}
}
if affinity.PodAntiAffinity != nil {
for i, p := range meta.nodeNameToMatchingAntiAffinityPods[podNodeName] {
if p == deletedPod {
s := meta.nodeNameToMatchingAntiAffinityPods[podNodeName]
s[i] = s[len(s)-1]
s = s[:len(s)-1]
meta.nodeNameToMatchingAntiAffinityPods[podNodeName] = s
break
}
}
}
}
// All pods in the serviceAffinityMatchingPodList are in the same namespace. // All pods in the serviceAffinityMatchingPodList are in the same namespace.
// So, if the namespace of the first one is not the same as the namespace of the // So, if the namespace of the first one is not the same as the namespace of the
// deletedPod, we don't need to check the list, as deletedPod isn't in the list. // deletedPod, we don't need to check the list, as deletedPod isn't in the list.
@ -267,29 +243,27 @@ func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulercache
affinity := meta.pod.Spec.Affinity affinity := meta.pod.Spec.Affinity
podNodeName := addedPod.Spec.NodeName podNodeName := addedPod.Spec.NodeName
if affinity != nil && len(podNodeName) > 0 { if affinity != nil && len(podNodeName) > 0 {
podNode := nodeInfo.Node()
// It is assumed that when the added pod matches affinity of the meta.pod, all the terms must match,
// this should be changed when the implementation of targetPodMatchesAffinityOfPod/podMatchesAffinityTermProperties
// is changed
if targetPodMatchesAffinityOfPod(meta.pod, addedPod) { if targetPodMatchesAffinityOfPod(meta.pod, addedPod) {
found := false affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
for _, p := range meta.nodeNameToMatchingAffinityPods[podNodeName] { for _, term := range affinityTerms {
if p == addedPod { if topologyValue, ok := podNode.Labels[term.TopologyKey]; ok {
found = true pair := topologyPair{key: term.TopologyKey, value: topologyValue}
break meta.topologyPairsPotentialAffinityPods.addTopologyPair(pair, addedPod)
} }
} }
if !found {
meta.nodeNameToMatchingAffinityPods[podNodeName] = append(meta.nodeNameToMatchingAffinityPods[podNodeName], addedPod)
}
} }
if targetPodMatchesAntiAffinityOfPod(meta.pod, addedPod) { if targetPodMatchesAntiAffinityOfPod(meta.pod, addedPod) {
found := false antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
for _, p := range meta.nodeNameToMatchingAntiAffinityPods[podNodeName] { for _, term := range antiAffinityTerms {
if p == addedPod { if topologyValue, ok := podNode.Labels[term.TopologyKey]; ok {
found = true pair := topologyPair{key: term.TopologyKey, value: topologyValue}
break meta.topologyPairsPotentialAntiAffinityPods.addTopologyPair(pair, addedPod)
} }
} }
if !found {
meta.nodeNameToMatchingAntiAffinityPods[podNodeName] = append(meta.nodeNameToMatchingAntiAffinityPods[podNodeName], addedPod)
}
} }
} }
// If addedPod is in the same namespace as the meta.pod, update the list // If addedPod is in the same namespace as the meta.pod, update the list
@ -308,22 +282,17 @@ func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulercache
// its maps and slices, but it does not copy the contents of pointer values. // its maps and slices, but it does not copy the contents of pointer values.
func (meta *predicateMetadata) ShallowCopy() algorithm.PredicateMetadata { func (meta *predicateMetadata) ShallowCopy() algorithm.PredicateMetadata {
newPredMeta := &predicateMetadata{ newPredMeta := &predicateMetadata{
pod: meta.pod, pod: meta.pod,
podBestEffort: meta.podBestEffort, podBestEffort: meta.podBestEffort,
podRequest: meta.podRequest, podRequest: meta.podRequest,
serviceAffinityInUse: meta.serviceAffinityInUse, serviceAffinityInUse: meta.serviceAffinityInUse,
ignoredExtendedResources: meta.ignoredExtendedResources, ignoredExtendedResources: meta.ignoredExtendedResources,
topologyPairsAntiAffinityPodsMap: meta.topologyPairsAntiAffinityPodsMap,
} }
newPredMeta.podPorts = append([]*v1.ContainerPort(nil), meta.podPorts...) newPredMeta.podPorts = append([]*v1.ContainerPort(nil), meta.podPorts...)
newPredMeta.nodeNameToMatchingAffinityPods = make(map[string][]*v1.Pod) newPredMeta.topologyPairsPotentialAffinityPods = newTopologyPairsMaps()
for k, v := range meta.nodeNameToMatchingAffinityPods { newPredMeta.topologyPairsPotentialAffinityPods.appendMaps(meta.topologyPairsPotentialAffinityPods)
newPredMeta.nodeNameToMatchingAffinityPods[k] = append([]*v1.Pod(nil), v...) newPredMeta.topologyPairsPotentialAntiAffinityPods = newTopologyPairsMaps()
} newPredMeta.topologyPairsPotentialAntiAffinityPods.appendMaps(meta.topologyPairsPotentialAntiAffinityPods)
newPredMeta.nodeNameToMatchingAntiAffinityPods = make(map[string][]*v1.Pod)
for k, v := range meta.nodeNameToMatchingAntiAffinityPods {
newPredMeta.nodeNameToMatchingAntiAffinityPods[k] = append([]*v1.Pod(nil), v...)
}
newPredMeta.topologyPairsAntiAffinityPodsMap = newTopologyPairsMaps() newPredMeta.topologyPairsAntiAffinityPodsMap = newTopologyPairsMaps()
newPredMeta.topologyPairsAntiAffinityPodsMap.appendMaps(meta.topologyPairsAntiAffinityPodsMap) newPredMeta.topologyPairsAntiAffinityPodsMap.appendMaps(meta.topologyPairsAntiAffinityPodsMap)
newPredMeta.serviceAffinityMatchingPodServices = append([]*v1.Service(nil), newPredMeta.serviceAffinityMatchingPodServices = append([]*v1.Service(nil),
@ -373,12 +342,12 @@ func podMatchesAffinityTermProperties(pod *v1.Pod, properties []*affinityTermPro
// It ignores topology. It returns a set of Pods that are checked later by the affinity // It ignores topology. It returns a set of Pods that are checked later by the affinity
// predicate. With this set of pods available, the affinity predicate does not // predicate. With this set of pods available, the affinity predicate does not
// need to check all the pods in the cluster. // need to check all the pods in the cluster.
func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) (affinityPods map[string][]*v1.Pod, antiAffinityPods map[string][]*v1.Pod, err error) { func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) (topologyPairsAffinityPodsMaps *topologyPairsMaps, topologyPairsAntiAffinityPodsMaps *topologyPairsMaps, err error) {
allNodeNames := make([]string, 0, len(nodeInfoMap)) allNodeNames := make([]string, 0, len(nodeInfoMap))
affinity := pod.Spec.Affinity affinity := pod.Spec.Affinity
if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) { if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) {
return nil, nil, nil return newTopologyPairsMaps(), newTopologyPairsMaps(), nil
} }
for name := range nodeInfoMap { for name := range nodeInfoMap {
@ -387,16 +356,16 @@ func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache
var lock sync.Mutex var lock sync.Mutex
var firstError error var firstError error
affinityPods = make(map[string][]*v1.Pod) topologyPairsAffinityPodsMaps = newTopologyPairsMaps()
antiAffinityPods = make(map[string][]*v1.Pod) topologyPairsAntiAffinityPodsMaps = newTopologyPairsMaps()
appendResult := func(nodeName string, affPods, antiAffPods []*v1.Pod) { appendResult := func(nodeName string, nodeTopologyPairsAffinityPodsMaps, nodeTopologyPairsAntiAffinityPodsMaps *topologyPairsMaps) {
lock.Lock() lock.Lock()
defer lock.Unlock() defer lock.Unlock()
if len(affPods) > 0 { if len(nodeTopologyPairsAffinityPodsMaps.topologyPairToPods) > 0 {
affinityPods[nodeName] = affPods topologyPairsAffinityPodsMaps.appendMaps(nodeTopologyPairsAffinityPodsMaps)
} }
if len(antiAffPods) > 0 { if len(nodeTopologyPairsAntiAffinityPodsMaps.topologyPairToPods) > 0 {
antiAffinityPods[nodeName] = antiAffPods topologyPairsAntiAffinityPodsMaps.appendMaps(nodeTopologyPairsAntiAffinityPodsMaps)
} }
} }
@ -417,6 +386,8 @@ func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache
return nil, nil, err return nil, nil, err
} }
affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
processNode := func(i int) { processNode := func(i int) {
nodeInfo := nodeInfoMap[allNodeNames[i]] nodeInfo := nodeInfoMap[allNodeNames[i]]
node := nodeInfo.Node() node := nodeInfo.Node()
@ -424,24 +395,34 @@ func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache
catchError(fmt.Errorf("nodeInfo.Node is nil")) catchError(fmt.Errorf("nodeInfo.Node is nil"))
return return
} }
affPods := make([]*v1.Pod, 0, len(nodeInfo.Pods())) nodeTopologyPairsAffinityPodsMaps := newTopologyPairsMaps()
antiAffPods := make([]*v1.Pod, 0, len(nodeInfo.Pods())) nodeTopologyPairsAntiAffinityPodsMaps := newTopologyPairsMaps()
for _, existingPod := range nodeInfo.Pods() { for _, existingPod := range nodeInfo.Pods() {
// Check affinity properties. // Check affinity properties.
if podMatchesAffinityTermProperties(existingPod, affinityProperties) { if podMatchesAffinityTermProperties(existingPod, affinityProperties) {
affPods = append(affPods, existingPod) for _, term := range affinityTerms {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
nodeTopologyPairsAffinityPodsMaps.addTopologyPair(pair, existingPod)
}
}
} }
// Check anti-affinity properties. // Check anti-affinity properties.
if podMatchesAffinityTermProperties(existingPod, antiAffinityProperties) { if podMatchesAffinityTermProperties(existingPod, antiAffinityProperties) {
antiAffPods = append(antiAffPods, existingPod) for _, term := range antiAffinityTerms {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
nodeTopologyPairsAntiAffinityPodsMaps.addTopologyPair(pair, existingPod)
}
}
} }
} }
if len(antiAffPods) > 0 || len(affPods) > 0 { if len(nodeTopologyPairsAffinityPodsMaps.topologyPairToPods) > 0 || len(nodeTopologyPairsAntiAffinityPodsMaps.topologyPairToPods) > 0 {
appendResult(node.Name, affPods, antiAffPods) appendResult(node.Name, nodeTopologyPairsAffinityPodsMaps, nodeTopologyPairsAntiAffinityPodsMaps)
} }
} }
workqueue.Parallelize(16, len(allNodeNames), processNode) workqueue.Parallelize(16, len(allNodeNames), processNode)
return affinityPods, antiAffinityPods, firstError return topologyPairsAffinityPodsMaps, topologyPairsAntiAffinityPodsMaps, firstError
} }
// podMatchesAffinity returns true if "targetPod" matches any affinity rule of // podMatchesAffinity returns true if "targetPod" matches any affinity rule of

View File

@ -52,13 +52,6 @@ func (s sortableServices) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
var _ = sort.Interface(&sortableServices{}) var _ = sort.Interface(&sortableServices{})
func sortNodePodMap(np map[string][]*v1.Pod) {
for _, pl := range np {
sortablePods := sortablePods(pl)
sort.Sort(sortablePods)
}
}
// predicateMetadataEquivalent returns true if the two metadata are equivalent. // predicateMetadataEquivalent returns true if the two metadata are equivalent.
// Note: this function does not compare podRequest. // Note: this function does not compare podRequest.
func predicateMetadataEquivalent(meta1, meta2 *predicateMetadata) error { func predicateMetadataEquivalent(meta1, meta2 *predicateMetadata) error {
@ -77,15 +70,11 @@ func predicateMetadataEquivalent(meta1, meta2 *predicateMetadata) error {
for !reflect.DeepEqual(meta1.podPorts, meta2.podPorts) { for !reflect.DeepEqual(meta1.podPorts, meta2.podPorts) {
return fmt.Errorf("podPorts are not equal") return fmt.Errorf("podPorts are not equal")
} }
sortNodePodMap(meta1.nodeNameToMatchingAffinityPods) if !reflect.DeepEqual(meta1.topologyPairsPotentialAffinityPods, meta2.topologyPairsPotentialAffinityPods) {
sortNodePodMap(meta2.nodeNameToMatchingAffinityPods) return fmt.Errorf("topologyPairsPotentialAffinityPods are not equal")
if !reflect.DeepEqual(meta1.nodeNameToMatchingAffinityPods, meta2.nodeNameToMatchingAffinityPods) {
return fmt.Errorf("nodeNameToMatchingAffinityPods are not euqal")
} }
sortNodePodMap(meta1.nodeNameToMatchingAntiAffinityPods) if !reflect.DeepEqual(meta1.topologyPairsPotentialAntiAffinityPods, meta2.topologyPairsPotentialAntiAffinityPods) {
sortNodePodMap(meta2.nodeNameToMatchingAntiAffinityPods) return fmt.Errorf("topologyPairsPotentialAntiAffinityPods are not equal")
if !reflect.DeepEqual(meta1.nodeNameToMatchingAntiAffinityPods, meta2.nodeNameToMatchingAntiAffinityPods) {
return fmt.Errorf("nodeNameToMatchingAntiAffinityPods are not euqal")
} }
if !reflect.DeepEqual(meta1.topologyPairsAntiAffinityPodsMap.podToTopologyPairs, if !reflect.DeepEqual(meta1.topologyPairsAntiAffinityPodsMap.podToTopologyPairs,
meta2.topologyPairsAntiAffinityPodsMap.podToTopologyPairs) { meta2.topologyPairsAntiAffinityPodsMap.podToTopologyPairs) {
@ -454,42 +443,71 @@ func TestPredicateMetadata_ShallowCopy(t *testing.T) {
}, },
}, },
}, },
nodeNameToMatchingAffinityPods: map[string][]*v1.Pod{ topologyPairsPotentialAffinityPods: &topologyPairsMaps{
"nodeA": { topologyPairToPods: map[topologyPair]podSet{
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1}, {key: "name", value: "nodeA"}: {
Spec: v1.PodSpec{NodeName: "nodeA"}, &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
}: struct{}{},
},
{key: "name", value: "nodeC"}: {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
},
}: struct{}{},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeC"},
}: struct{}{},
}, },
}, },
"nodeC": { podToTopologyPairs: map[string]topologyPairSet{
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"}, "p1_": {
Spec: v1.PodSpec{ topologyPair{key: "name", value: "nodeA"}: struct{}{},
NodeName: "nodeC",
},
}, },
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1}, "p2_": {
Spec: v1.PodSpec{NodeName: "nodeC"}, topologyPair{key: "name", value: "nodeC"}: struct{}{},
},
"p6_": {
topologyPair{key: "name", value: "nodeC"}: struct{}{},
}, },
}, },
}, },
nodeNameToMatchingAntiAffinityPods: map[string][]*v1.Pod{ topologyPairsPotentialAntiAffinityPods: &topologyPairsMaps{
"nodeN": { topologyPairToPods: map[topologyPair]podSet{
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1}, {key: "name", value: "nodeN"}: {
Spec: v1.PodSpec{NodeName: "nodeN"}, &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeN"},
}: struct{}{},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeM",
},
}: struct{}{},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p3"},
Spec: v1.PodSpec{
NodeName: "nodeM",
},
}: struct{}{},
}, },
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"}, {key: "name", value: "nodeM"}: {
Spec: v1.PodSpec{ &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1},
NodeName: "nodeM", Spec: v1.PodSpec{NodeName: "nodeM"},
}, }: struct{}{},
},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p3"},
Spec: v1.PodSpec{
NodeName: "nodeM",
},
}, },
}, },
"nodeM": { podToTopologyPairs: map[string]topologyPairSet{
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1}, "p1_": {
Spec: v1.PodSpec{NodeName: "nodeM"}, topologyPair{key: "name", value: "nodeN"}: struct{}{},
},
"p2_": {
topologyPair{key: "name", value: "nodeN"}: struct{}{},
},
"p3_": {
topologyPair{key: "name", value: "nodeN"}: struct{}{},
},
"p6_": {
topologyPair{key: "name", value: "nodeM"}: struct{}{},
}, },
}, },
}, },

View File

@ -1393,30 +1393,21 @@ func (c *PodAffinityChecker) satisfiesExistingPodsAntiAffinity(pod *v1.Pod, meta
return nil, nil return nil, nil
} }
// anyPodsMatchingTopologyTerms checks whether any of the nodes given via // nodeMatchesTopologyTerms checks whether "nodeInfo" matches
// "targetPods" matches topology of all the "terms" for the give "pod" and "nodeInfo". // topology of all the "terms" for the given "pod".
func (c *PodAffinityChecker) anyPodsMatchingTopologyTerms(pod *v1.Pod, targetPods map[string][]*v1.Pod, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) (bool, error) { func (c *PodAffinityChecker) nodeMatchesTopologyTerms(pod *v1.Pod, topologyPairs *topologyPairsMaps, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) bool {
for nodeName, targetPods := range targetPods { node := nodeInfo.Node()
targetPodNodeInfo, err := c.info.GetNodeInfo(nodeName) for _, term := range terms {
if err != nil { if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
return false, err pair := topologyPair{key: term.TopologyKey, value: topologyValue}
} if _, ok := topologyPairs.topologyPairToPods[pair]; !ok {
if len(targetPods) > 0 { return false
allTermsMatched := true
for _, term := range terms {
if !priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), targetPodNodeInfo, term.TopologyKey) {
allTermsMatched = false
break
}
}
if allTermsMatched {
// We have 1 or more pods on the target node that have already matched namespace and selector
// and all of the terms topologies matched the target node. So, there is at least 1 matching pod on the node.
return true, nil
} }
} else {
return false
} }
} }
return false, nil return true
} }
// Checks if scheduling the pod onto this node would break any rules of this pod. // Checks if scheduling the pod onto this node would break any rules of this pod.
@ -1429,20 +1420,15 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
} }
if predicateMeta, ok := meta.(*predicateMetadata); ok { if predicateMeta, ok := meta.(*predicateMetadata); ok {
// Check all affinity terms. // Check all affinity terms.
matchingPods := predicateMeta.nodeNameToMatchingAffinityPods topologyPairsPotentialAffinityPods := predicateMeta.topologyPairsPotentialAffinityPods
if affinityTerms := GetPodAffinityTerms(affinity.PodAffinity); len(affinityTerms) > 0 { if affinityTerms := GetPodAffinityTerms(affinity.PodAffinity); len(affinityTerms) > 0 {
matchExists, err := c.anyPodsMatchingTopologyTerms(pod, matchingPods, nodeInfo, affinityTerms) matchExists := c.nodeMatchesTopologyTerms(pod, topologyPairsPotentialAffinityPods, nodeInfo, affinityTerms)
if err != nil {
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinity, err: %v", podName(pod), node.Name, err)
glog.Errorf(errMessage)
return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
}
if !matchExists { if !matchExists {
// This pod may the first pod in a series that have affinity to themselves. In order // This pod may the first pod in a series that have affinity to themselves. In order
// to not leave such pods in pending state forever, we check that if no other pod // to not leave such pods in pending state forever, we check that if no other pod
// in the cluster matches the namespace and selector of this pod and the pod matches // in the cluster matches the namespace and selector of this pod and the pod matches
// its own terms, then we allow the pod to pass the affinity check. // its own terms, then we allow the pod to pass the affinity check.
if !(len(matchingPods) == 0 && targetPodMatchesAffinityOfPod(pod, pod)) { if !(len(topologyPairsPotentialAffinityPods.topologyPairToPods) == 0 && targetPodMatchesAffinityOfPod(pod, pod)) {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity", glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
podName(pod), node.Name) podName(pod), node.Name)
return ErrPodAffinityRulesNotMatch, nil return ErrPodAffinityRulesNotMatch, nil
@ -1451,12 +1437,12 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
} }
// Check all anti-affinity terms. // Check all anti-affinity terms.
matchingPods = predicateMeta.nodeNameToMatchingAntiAffinityPods topologyPairsPotentialAntiAffinityPods := predicateMeta.topologyPairsPotentialAntiAffinityPods
if antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity); len(antiAffinityTerms) > 0 { if antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity); len(antiAffinityTerms) > 0 {
matchExists, err := c.anyPodsMatchingTopologyTerms(pod, matchingPods, nodeInfo, antiAffinityTerms) matchExists := c.nodeMatchesTopologyTerms(pod, topologyPairsPotentialAntiAffinityPods, nodeInfo, antiAffinityTerms)
if err != nil || matchExists { if matchExists {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinity, err: %v", glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinity",
podName(pod), node.Name, err) podName(pod), node.Name)
return ErrPodAntiAffinityRulesNotMatch, nil return ErrPodAntiAffinityRulesNotMatch, nil
} }
} }