From ac4a082b33a5e116ed9931ef46376a23df8b59f6 Mon Sep 17 00:00:00 2001 From: Ahmad Diaa Date: Wed, 22 Aug 2018 00:58:40 +0200 Subject: [PATCH 1/2] use topologyPairsMaps for inter pod affinity/anti-affinity maps --- .../algorithm/predicates/metadata.go | 158 ++++++++---------- .../algorithm/predicates/metadata_test.go | 102 ++++++----- .../algorithm/predicates/predicates.go | 33 ++-- 3 files changed, 141 insertions(+), 152 deletions(-) diff --git a/pkg/scheduler/algorithm/predicates/metadata.go b/pkg/scheduler/algorithm/predicates/metadata.go index 163ad90ba2..69ab1c24e9 100644 --- a/pkg/scheduler/algorithm/predicates/metadata.go +++ b/pkg/scheduler/algorithm/predicates/metadata.go @@ -71,15 +71,15 @@ type predicateMetadata struct { podPorts []*v1.ContainerPort topologyPairsAntiAffinityPodsMap *topologyPairsMaps - // A map of node name to a list of Pods on the node that can potentially match - // the affinity rules of the "pod". - nodeNameToMatchingAffinityPods map[string][]*v1.Pod - // A map of node name to a list of Pods on the node that can potentially match - // the anti-affinity rules of the "pod". - nodeNameToMatchingAntiAffinityPods map[string][]*v1.Pod - serviceAffinityInUse bool - serviceAffinityMatchingPodList []*v1.Pod - serviceAffinityMatchingPodServices []*v1.Service + // A map of topology pairs to a list of Pods that can potentially match + // the affinity rules of the "pod" and its inverse. + topologyPairsPotentialAffinityPods *topologyPairsMaps + // A map of topology pairs to a list of Pods that can potentially match + // the anti-affinity rules of the "pod" and its inverse. + topologyPairsPotentialAntiAffinityPods *topologyPairsMaps + serviceAffinityInUse bool + serviceAffinityMatchingPodList []*v1.Pod + serviceAffinityMatchingPodServices []*v1.Service // ignoredExtendedResources is a set of extended resource names that will // be ignored in the PodFitsResources predicate. // @@ -134,19 +134,19 @@ func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInf if err != nil { return nil } - affinityPods, antiAffinityPods, err := getPodsMatchingAffinity(pod, nodeNameToInfoMap) + topologyPairsAffinityPodsMaps, topologyPairsAntiAffinityPodsMaps, err := getPodsMatchingAffinity(pod, nodeNameToInfoMap) if err != nil { glog.Errorf("[predicate meta data generation] error finding pods that match affinity terms: %v", err) return nil } predicateMetadata := &predicateMetadata{ - pod: pod, - podBestEffort: isPodBestEffort(pod), - podRequest: GetResourceRequest(pod), - podPorts: schedutil.GetContainerPorts(pod), - nodeNameToMatchingAffinityPods: affinityPods, - nodeNameToMatchingAntiAffinityPods: antiAffinityPods, - topologyPairsAntiAffinityPodsMap: topologyPairsMaps, + pod: pod, + podBestEffort: isPodBestEffort(pod), + podRequest: GetResourceRequest(pod), + podPorts: schedutil.GetContainerPorts(pod), + topologyPairsPotentialAffinityPods: topologyPairsAffinityPodsMaps, + topologyPairsPotentialAntiAffinityPods: topologyPairsAntiAffinityPodsMaps, + topologyPairsAntiAffinityPodsMap: topologyPairsMaps, } for predicateName, precomputeFunc := range predicateMetadataProducers { glog.V(10).Infof("Precompute: %v", predicateName) @@ -200,33 +200,9 @@ func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod) error { return fmt.Errorf("deletedPod and meta.pod must not be the same") } meta.topologyPairsAntiAffinityPodsMap.removePod(deletedPod) - // Delete pod from the matching affinity or anti-affinity pods if exists. - affinity := meta.pod.Spec.Affinity - podNodeName := deletedPod.Spec.NodeName - if affinity != nil && len(podNodeName) > 0 { - if affinity.PodAffinity != nil { - for i, p := range meta.nodeNameToMatchingAffinityPods[podNodeName] { - if p == deletedPod { - s := meta.nodeNameToMatchingAffinityPods[podNodeName] - s[i] = s[len(s)-1] - s = s[:len(s)-1] - meta.nodeNameToMatchingAffinityPods[podNodeName] = s - break - } - } - } - if affinity.PodAntiAffinity != nil { - for i, p := range meta.nodeNameToMatchingAntiAffinityPods[podNodeName] { - if p == deletedPod { - s := meta.nodeNameToMatchingAntiAffinityPods[podNodeName] - s[i] = s[len(s)-1] - s = s[:len(s)-1] - meta.nodeNameToMatchingAntiAffinityPods[podNodeName] = s - break - } - } - } - } + // Delete pod from the matching affinity or anti-affinity topology pairs maps. + meta.topologyPairsPotentialAffinityPods.removePod(deletedPod) + meta.topologyPairsPotentialAntiAffinityPods.removePod(deletedPod) // All pods in the serviceAffinityMatchingPodList are in the same namespace. // So, if the namespace of the first one is not the same as the namespace of the // deletedPod, we don't need to check the list, as deletedPod isn't in the list. @@ -267,29 +243,24 @@ func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulercache affinity := meta.pod.Spec.Affinity podNodeName := addedPod.Spec.NodeName if affinity != nil && len(podNodeName) > 0 { + podNode := nodeInfo.Node() + affinityTerms := GetPodAffinityTerms(affinity.PodAffinity) + antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity) if targetPodMatchesAffinityOfPod(meta.pod, addedPod) { - found := false - for _, p := range meta.nodeNameToMatchingAffinityPods[podNodeName] { - if p == addedPod { - found = true - break + for _, term := range affinityTerms { + if topologyValue, ok := podNode.Labels[term.TopologyKey]; ok { + pair := topologyPair{key: term.TopologyKey, value: topologyValue} + meta.topologyPairsPotentialAffinityPods.addTopologyPair(pair, addedPod) } } - if !found { - meta.nodeNameToMatchingAffinityPods[podNodeName] = append(meta.nodeNameToMatchingAffinityPods[podNodeName], addedPod) - } } if targetPodMatchesAntiAffinityOfPod(meta.pod, addedPod) { - found := false - for _, p := range meta.nodeNameToMatchingAntiAffinityPods[podNodeName] { - if p == addedPod { - found = true - break + for _, term := range antiAffinityTerms { + if topologyValue, ok := podNode.Labels[term.TopologyKey]; ok { + pair := topologyPair{key: term.TopologyKey, value: topologyValue} + meta.topologyPairsPotentialAntiAffinityPods.addTopologyPair(pair, addedPod) } } - if !found { - meta.nodeNameToMatchingAntiAffinityPods[podNodeName] = append(meta.nodeNameToMatchingAntiAffinityPods[podNodeName], addedPod) - } } } // If addedPod is in the same namespace as the meta.pod, update the list @@ -308,22 +279,17 @@ func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulercache // its maps and slices, but it does not copy the contents of pointer values. func (meta *predicateMetadata) ShallowCopy() algorithm.PredicateMetadata { newPredMeta := &predicateMetadata{ - pod: meta.pod, - podBestEffort: meta.podBestEffort, - podRequest: meta.podRequest, - serviceAffinityInUse: meta.serviceAffinityInUse, - ignoredExtendedResources: meta.ignoredExtendedResources, - topologyPairsAntiAffinityPodsMap: meta.topologyPairsAntiAffinityPodsMap, + pod: meta.pod, + podBestEffort: meta.podBestEffort, + podRequest: meta.podRequest, + serviceAffinityInUse: meta.serviceAffinityInUse, + ignoredExtendedResources: meta.ignoredExtendedResources, } newPredMeta.podPorts = append([]*v1.ContainerPort(nil), meta.podPorts...) - newPredMeta.nodeNameToMatchingAffinityPods = make(map[string][]*v1.Pod) - for k, v := range meta.nodeNameToMatchingAffinityPods { - newPredMeta.nodeNameToMatchingAffinityPods[k] = append([]*v1.Pod(nil), v...) - } - newPredMeta.nodeNameToMatchingAntiAffinityPods = make(map[string][]*v1.Pod) - for k, v := range meta.nodeNameToMatchingAntiAffinityPods { - newPredMeta.nodeNameToMatchingAntiAffinityPods[k] = append([]*v1.Pod(nil), v...) - } + newPredMeta.topologyPairsPotentialAffinityPods = newTopologyPairsMaps() + newPredMeta.topologyPairsPotentialAffinityPods.appendMaps(meta.topologyPairsPotentialAffinityPods) + newPredMeta.topologyPairsPotentialAntiAffinityPods = newTopologyPairsMaps() + newPredMeta.topologyPairsPotentialAntiAffinityPods.appendMaps(meta.topologyPairsPotentialAntiAffinityPods) newPredMeta.topologyPairsAntiAffinityPodsMap = newTopologyPairsMaps() newPredMeta.topologyPairsAntiAffinityPodsMap.appendMaps(meta.topologyPairsAntiAffinityPodsMap) newPredMeta.serviceAffinityMatchingPodServices = append([]*v1.Service(nil), @@ -373,12 +339,12 @@ func podMatchesAffinityTermProperties(pod *v1.Pod, properties []*affinityTermPro // It ignores topology. It returns a set of Pods that are checked later by the affinity // predicate. With this set of pods available, the affinity predicate does not // need to check all the pods in the cluster. -func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) (affinityPods map[string][]*v1.Pod, antiAffinityPods map[string][]*v1.Pod, err error) { +func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) (topologyPairsAffinityPodsMaps *topologyPairsMaps, topologyPairsAntiAffinityPodsMaps *topologyPairsMaps, err error) { allNodeNames := make([]string, 0, len(nodeInfoMap)) affinity := pod.Spec.Affinity if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) { - return nil, nil, nil + return newTopologyPairsMaps(), newTopologyPairsMaps(), nil } for name := range nodeInfoMap { @@ -387,16 +353,16 @@ func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache var lock sync.Mutex var firstError error - affinityPods = make(map[string][]*v1.Pod) - antiAffinityPods = make(map[string][]*v1.Pod) - appendResult := func(nodeName string, affPods, antiAffPods []*v1.Pod) { + topologyPairsAffinityPodsMaps = newTopologyPairsMaps() + topologyPairsAntiAffinityPodsMaps = newTopologyPairsMaps() + appendResult := func(nodeName string, nodeTopologyPairsAffinityPodsMaps, nodeTopologyPairsAntiAffinityPodsMaps *topologyPairsMaps) { lock.Lock() defer lock.Unlock() - if len(affPods) > 0 { - affinityPods[nodeName] = affPods + if len(nodeTopologyPairsAffinityPodsMaps.topologyPairToPods) > 0 { + topologyPairsAffinityPodsMaps.appendMaps(nodeTopologyPairsAffinityPodsMaps) } - if len(antiAffPods) > 0 { - antiAffinityPods[nodeName] = antiAffPods + if len(nodeTopologyPairsAntiAffinityPodsMaps.topologyPairToPods) > 0 { + topologyPairsAntiAffinityPodsMaps.appendMaps(nodeTopologyPairsAntiAffinityPodsMaps) } } @@ -417,6 +383,8 @@ func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache return nil, nil, err } + affinityTerms := GetPodAffinityTerms(affinity.PodAffinity) + antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity) processNode := func(i int) { nodeInfo := nodeInfoMap[allNodeNames[i]] node := nodeInfo.Node() @@ -424,24 +392,34 @@ func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache catchError(fmt.Errorf("nodeInfo.Node is nil")) return } - affPods := make([]*v1.Pod, 0, len(nodeInfo.Pods())) - antiAffPods := make([]*v1.Pod, 0, len(nodeInfo.Pods())) + nodeTopologyPairsAffinityPodsMaps := newTopologyPairsMaps() + nodeTopologyPairsAntiAffinityPodsMaps := newTopologyPairsMaps() for _, existingPod := range nodeInfo.Pods() { // Check affinity properties. if podMatchesAffinityTermProperties(existingPod, affinityProperties) { - affPods = append(affPods, existingPod) + for _, term := range affinityTerms { + if topologyValue, ok := node.Labels[term.TopologyKey]; ok { + pair := topologyPair{key: term.TopologyKey, value: topologyValue} + nodeTopologyPairsAffinityPodsMaps.addTopologyPair(pair, existingPod) + } + } } // Check anti-affinity properties. if podMatchesAffinityTermProperties(existingPod, antiAffinityProperties) { - antiAffPods = append(antiAffPods, existingPod) + for _, term := range antiAffinityTerms { + if topologyValue, ok := node.Labels[term.TopologyKey]; ok { + pair := topologyPair{key: term.TopologyKey, value: topologyValue} + nodeTopologyPairsAntiAffinityPodsMaps.addTopologyPair(pair, existingPod) + } + } } } - if len(antiAffPods) > 0 || len(affPods) > 0 { - appendResult(node.Name, affPods, antiAffPods) + if len(nodeTopologyPairsAffinityPodsMaps.topologyPairToPods) > 0 || len(nodeTopologyPairsAntiAffinityPodsMaps.topologyPairToPods) > 0 { + appendResult(node.Name, nodeTopologyPairsAffinityPodsMaps, nodeTopologyPairsAntiAffinityPodsMaps) } } workqueue.Parallelize(16, len(allNodeNames), processNode) - return affinityPods, antiAffinityPods, firstError + return topologyPairsAffinityPodsMaps, topologyPairsAntiAffinityPodsMaps, firstError } // podMatchesAffinity returns true if "targetPod" matches any affinity rule of diff --git a/pkg/scheduler/algorithm/predicates/metadata_test.go b/pkg/scheduler/algorithm/predicates/metadata_test.go index 04da8ed5c0..0d724e5ae8 100644 --- a/pkg/scheduler/algorithm/predicates/metadata_test.go +++ b/pkg/scheduler/algorithm/predicates/metadata_test.go @@ -52,13 +52,6 @@ func (s sortableServices) Swap(i, j int) { s[i], s[j] = s[j], s[i] } var _ = sort.Interface(&sortableServices{}) -func sortNodePodMap(np map[string][]*v1.Pod) { - for _, pl := range np { - sortablePods := sortablePods(pl) - sort.Sort(sortablePods) - } -} - // predicateMetadataEquivalent returns true if the two metadata are equivalent. // Note: this function does not compare podRequest. func predicateMetadataEquivalent(meta1, meta2 *predicateMetadata) error { @@ -77,15 +70,11 @@ func predicateMetadataEquivalent(meta1, meta2 *predicateMetadata) error { for !reflect.DeepEqual(meta1.podPorts, meta2.podPorts) { return fmt.Errorf("podPorts are not equal") } - sortNodePodMap(meta1.nodeNameToMatchingAffinityPods) - sortNodePodMap(meta2.nodeNameToMatchingAffinityPods) - if !reflect.DeepEqual(meta1.nodeNameToMatchingAffinityPods, meta2.nodeNameToMatchingAffinityPods) { - return fmt.Errorf("nodeNameToMatchingAffinityPods are not euqal") + if !reflect.DeepEqual(meta1.topologyPairsPotentialAffinityPods, meta2.topologyPairsPotentialAffinityPods) { + return fmt.Errorf("topologyPairsPotentialAffinityPods are not equal") } - sortNodePodMap(meta1.nodeNameToMatchingAntiAffinityPods) - sortNodePodMap(meta2.nodeNameToMatchingAntiAffinityPods) - if !reflect.DeepEqual(meta1.nodeNameToMatchingAntiAffinityPods, meta2.nodeNameToMatchingAntiAffinityPods) { - return fmt.Errorf("nodeNameToMatchingAntiAffinityPods are not euqal") + if !reflect.DeepEqual(meta1.topologyPairsPotentialAntiAffinityPods, meta2.topologyPairsPotentialAntiAffinityPods) { + return fmt.Errorf("topologyPairsPotentialAntiAffinityPods are not equal") } if !reflect.DeepEqual(meta1.topologyPairsAntiAffinityPodsMap.podToTopologyPairs, meta2.topologyPairsAntiAffinityPodsMap.podToTopologyPairs) { @@ -454,42 +443,71 @@ func TestPredicateMetadata_ShallowCopy(t *testing.T) { }, }, }, - nodeNameToMatchingAffinityPods: map[string][]*v1.Pod{ - "nodeA": { - &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1}, - Spec: v1.PodSpec{NodeName: "nodeA"}, + topologyPairsPotentialAffinityPods: &topologyPairsMaps{ + topologyPairToPods: map[topologyPair]podSet{ + {key: "name", value: "nodeA"}: { + &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1}, + Spec: v1.PodSpec{NodeName: "nodeA"}, + }: struct{}{}, + }, + {key: "name", value: "nodeC"}: { + &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"}, + Spec: v1.PodSpec{ + NodeName: "nodeC", + }, + }: struct{}{}, + &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1}, + Spec: v1.PodSpec{NodeName: "nodeC"}, + }: struct{}{}, }, }, - "nodeC": { - &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"}, - Spec: v1.PodSpec{ - NodeName: "nodeC", - }, + podToTopologyPairs: map[string]topologyPairSet{ + "p1_": { + topologyPair{key: "name", value: "nodeA"}: struct{}{}, }, - &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1}, - Spec: v1.PodSpec{NodeName: "nodeC"}, + "p2_": { + topologyPair{key: "name", value: "nodeC"}: struct{}{}, + }, + "p6_": { + topologyPair{key: "name", value: "nodeC"}: struct{}{}, }, }, }, - nodeNameToMatchingAntiAffinityPods: map[string][]*v1.Pod{ - "nodeN": { - &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1}, - Spec: v1.PodSpec{NodeName: "nodeN"}, + topologyPairsPotentialAntiAffinityPods: &topologyPairsMaps{ + topologyPairToPods: map[topologyPair]podSet{ + {key: "name", value: "nodeN"}: { + &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1}, + Spec: v1.PodSpec{NodeName: "nodeN"}, + }: struct{}{}, + &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"}, + Spec: v1.PodSpec{ + NodeName: "nodeM", + }, + }: struct{}{}, + &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p3"}, + Spec: v1.PodSpec{ + NodeName: "nodeM", + }, + }: struct{}{}, }, - &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"}, - Spec: v1.PodSpec{ - NodeName: "nodeM", - }, - }, - &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p3"}, - Spec: v1.PodSpec{ - NodeName: "nodeM", - }, + {key: "name", value: "nodeM"}: { + &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1}, + Spec: v1.PodSpec{NodeName: "nodeM"}, + }: struct{}{}, }, }, - "nodeM": { - &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1}, - Spec: v1.PodSpec{NodeName: "nodeM"}, + podToTopologyPairs: map[string]topologyPairSet{ + "p1_": { + topologyPair{key: "name", value: "nodeN"}: struct{}{}, + }, + "p2_": { + topologyPair{key: "name", value: "nodeN"}: struct{}{}, + }, + "p3_": { + topologyPair{key: "name", value: "nodeN"}: struct{}{}, + }, + "p6_": { + topologyPair{key: "name", value: "nodeM"}: struct{}{}, }, }, }, diff --git a/pkg/scheduler/algorithm/predicates/predicates.go b/pkg/scheduler/algorithm/predicates/predicates.go index c919282c83..79cb008413 100644 --- a/pkg/scheduler/algorithm/predicates/predicates.go +++ b/pkg/scheduler/algorithm/predicates/predicates.go @@ -1395,23 +1395,16 @@ func (c *PodAffinityChecker) satisfiesExistingPodsAntiAffinity(pod *v1.Pod, meta // anyPodsMatchingTopologyTerms checks whether any of the nodes given via // "targetPods" matches topology of all the "terms" for the give "pod" and "nodeInfo". -func (c *PodAffinityChecker) anyPodsMatchingTopologyTerms(pod *v1.Pod, targetPods map[string][]*v1.Pod, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) (bool, error) { - for nodeName, targetPods := range targetPods { - targetPodNodeInfo, err := c.info.GetNodeInfo(nodeName) - if err != nil { - return false, err - } - if len(targetPods) > 0 { - allTermsMatched := true - for _, term := range terms { - if !priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), targetPodNodeInfo, term.TopologyKey) { - allTermsMatched = false - break - } - } - if allTermsMatched { - // We have 1 or more pods on the target node that have already matched namespace and selector - // and all of the terms topologies matched the target node. So, there is at least 1 matching pod on the node. +func (c *PodAffinityChecker) anyPodsMatchingTopologyTerms(pod *v1.Pod, targetPods *topologyPairsMaps, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) (bool, error) { + podNameToMatchingTermsCount := make(map[string]int) + node := nodeInfo.Node() + podTermsCount := len(terms) + for _, term := range terms { + pair := topologyPair{key: term.TopologyKey, value: node.Labels[term.TopologyKey]} + for existingPod := range targetPods.topologyPairToPods[pair] { + existingPodFullName := schedutil.GetPodFullName(existingPod) + podNameToMatchingTermsCount[existingPodFullName] = podNameToMatchingTermsCount[existingPodFullName] + 1 + if podNameToMatchingTermsCount[existingPodFullName] == podTermsCount { return true, nil } } @@ -1429,7 +1422,7 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, } if predicateMeta, ok := meta.(*predicateMetadata); ok { // Check all affinity terms. - matchingPods := predicateMeta.nodeNameToMatchingAffinityPods + matchingPods := predicateMeta.topologyPairsPotentialAffinityPods if affinityTerms := GetPodAffinityTerms(affinity.PodAffinity); len(affinityTerms) > 0 { matchExists, err := c.anyPodsMatchingTopologyTerms(pod, matchingPods, nodeInfo, affinityTerms) if err != nil { @@ -1442,7 +1435,7 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, // to not leave such pods in pending state forever, we check that if no other pod // in the cluster matches the namespace and selector of this pod and the pod matches // its own terms, then we allow the pod to pass the affinity check. - if !(len(matchingPods) == 0 && targetPodMatchesAffinityOfPod(pod, pod)) { + if !(len(matchingPods.topologyPairToPods) == 0 && targetPodMatchesAffinityOfPod(pod, pod)) { glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity", podName(pod), node.Name) return ErrPodAffinityRulesNotMatch, nil @@ -1451,7 +1444,7 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, } // Check all anti-affinity terms. - matchingPods = predicateMeta.nodeNameToMatchingAntiAffinityPods + matchingPods = predicateMeta.topologyPairsPotentialAntiAffinityPods if antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity); len(antiAffinityTerms) > 0 { matchExists, err := c.anyPodsMatchingTopologyTerms(pod, matchingPods, nodeInfo, antiAffinityTerms) if err != nil || matchExists { From ee393b4e06cc642f2e30232b51e8d0abe715e78e Mon Sep 17 00:00:00 2001 From: Ahmad Diaa Date: Tue, 28 Aug 2018 22:22:06 +0200 Subject: [PATCH 2/2] addressed reviewer comments --- .../algorithm/predicates/metadata.go | 7 ++- .../algorithm/predicates/predicates.go | 43 ++++++++----------- 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/pkg/scheduler/algorithm/predicates/metadata.go b/pkg/scheduler/algorithm/predicates/metadata.go index 69ab1c24e9..f3dd8c895d 100644 --- a/pkg/scheduler/algorithm/predicates/metadata.go +++ b/pkg/scheduler/algorithm/predicates/metadata.go @@ -244,9 +244,11 @@ func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulercache podNodeName := addedPod.Spec.NodeName if affinity != nil && len(podNodeName) > 0 { podNode := nodeInfo.Node() - affinityTerms := GetPodAffinityTerms(affinity.PodAffinity) - antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity) + // It is assumed that when the added pod matches affinity of the meta.pod, all the terms must match, + // this should be changed when the implementation of targetPodMatchesAffinityOfPod/podMatchesAffinityTermProperties + // is changed if targetPodMatchesAffinityOfPod(meta.pod, addedPod) { + affinityTerms := GetPodAffinityTerms(affinity.PodAffinity) for _, term := range affinityTerms { if topologyValue, ok := podNode.Labels[term.TopologyKey]; ok { pair := topologyPair{key: term.TopologyKey, value: topologyValue} @@ -255,6 +257,7 @@ func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulercache } } if targetPodMatchesAntiAffinityOfPod(meta.pod, addedPod) { + antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity) for _, term := range antiAffinityTerms { if topologyValue, ok := podNode.Labels[term.TopologyKey]; ok { pair := topologyPair{key: term.TopologyKey, value: topologyValue} diff --git a/pkg/scheduler/algorithm/predicates/predicates.go b/pkg/scheduler/algorithm/predicates/predicates.go index 79cb008413..d67aafdc50 100644 --- a/pkg/scheduler/algorithm/predicates/predicates.go +++ b/pkg/scheduler/algorithm/predicates/predicates.go @@ -1393,23 +1393,21 @@ func (c *PodAffinityChecker) satisfiesExistingPodsAntiAffinity(pod *v1.Pod, meta return nil, nil } -// anyPodsMatchingTopologyTerms checks whether any of the nodes given via -// "targetPods" matches topology of all the "terms" for the give "pod" and "nodeInfo". -func (c *PodAffinityChecker) anyPodsMatchingTopologyTerms(pod *v1.Pod, targetPods *topologyPairsMaps, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) (bool, error) { - podNameToMatchingTermsCount := make(map[string]int) +// nodeMatchesTopologyTerms checks whether "nodeInfo" matches +// topology of all the "terms" for the given "pod". +func (c *PodAffinityChecker) nodeMatchesTopologyTerms(pod *v1.Pod, topologyPairs *topologyPairsMaps, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) bool { node := nodeInfo.Node() - podTermsCount := len(terms) for _, term := range terms { - pair := topologyPair{key: term.TopologyKey, value: node.Labels[term.TopologyKey]} - for existingPod := range targetPods.topologyPairToPods[pair] { - existingPodFullName := schedutil.GetPodFullName(existingPod) - podNameToMatchingTermsCount[existingPodFullName] = podNameToMatchingTermsCount[existingPodFullName] + 1 - if podNameToMatchingTermsCount[existingPodFullName] == podTermsCount { - return true, nil + if topologyValue, ok := node.Labels[term.TopologyKey]; ok { + pair := topologyPair{key: term.TopologyKey, value: topologyValue} + if _, ok := topologyPairs.topologyPairToPods[pair]; !ok { + return false } + } else { + return false } } - return false, nil + return true } // Checks if scheduling the pod onto this node would break any rules of this pod. @@ -1422,20 +1420,15 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, } if predicateMeta, ok := meta.(*predicateMetadata); ok { // Check all affinity terms. - matchingPods := predicateMeta.topologyPairsPotentialAffinityPods + topologyPairsPotentialAffinityPods := predicateMeta.topologyPairsPotentialAffinityPods if affinityTerms := GetPodAffinityTerms(affinity.PodAffinity); len(affinityTerms) > 0 { - matchExists, err := c.anyPodsMatchingTopologyTerms(pod, matchingPods, nodeInfo, affinityTerms) - if err != nil { - errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinity, err: %v", podName(pod), node.Name, err) - glog.Errorf(errMessage) - return ErrPodAffinityRulesNotMatch, errors.New(errMessage) - } + matchExists := c.nodeMatchesTopologyTerms(pod, topologyPairsPotentialAffinityPods, nodeInfo, affinityTerms) if !matchExists { // This pod may the first pod in a series that have affinity to themselves. In order // to not leave such pods in pending state forever, we check that if no other pod // in the cluster matches the namespace and selector of this pod and the pod matches // its own terms, then we allow the pod to pass the affinity check. - if !(len(matchingPods.topologyPairToPods) == 0 && targetPodMatchesAffinityOfPod(pod, pod)) { + if !(len(topologyPairsPotentialAffinityPods.topologyPairToPods) == 0 && targetPodMatchesAffinityOfPod(pod, pod)) { glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity", podName(pod), node.Name) return ErrPodAffinityRulesNotMatch, nil @@ -1444,12 +1437,12 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, } // Check all anti-affinity terms. - matchingPods = predicateMeta.topologyPairsPotentialAntiAffinityPods + topologyPairsPotentialAntiAffinityPods := predicateMeta.topologyPairsPotentialAntiAffinityPods if antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity); len(antiAffinityTerms) > 0 { - matchExists, err := c.anyPodsMatchingTopologyTerms(pod, matchingPods, nodeInfo, antiAffinityTerms) - if err != nil || matchExists { - glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinity, err: %v", - podName(pod), node.Name, err) + matchExists := c.nodeMatchesTopologyTerms(pod, topologyPairsPotentialAntiAffinityPods, nodeInfo, antiAffinityTerms) + if matchExists { + glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinity", + podName(pod), node.Name) return ErrPodAntiAffinityRulesNotMatch, nil } }