add topologyValue map to reduce search space

pull/8/head
Mohamed Mehany 2018-07-30 04:59:26 +02:00 committed by Ahmad Diaa
parent 6b41352679
commit 3fb6912d08
3 changed files with 108 additions and 35 deletions

View File

@ -54,6 +54,9 @@ type predicateMetadata struct {
podPorts []*v1.ContainerPort podPorts []*v1.ContainerPort
//key is a pod full name with the anti-affinity rules. //key is a pod full name with the anti-affinity rules.
matchingAntiAffinityTerms map[string][]matchingPodAntiAffinityTerm matchingAntiAffinityTerms map[string][]matchingPodAntiAffinityTerm
// A map of antiffinity terms' topology ke values to the pods' names
// that can potentially match the affinity rules of the pod
topologyValueToAntiAffinityPods map[string][]string
// A map of node name to a list of Pods on the node that can potentially match // A map of node name to a list of Pods on the node that can potentially match
// the affinity rules of the "pod". // the affinity rules of the "pod".
nodeNameToMatchingAffinityPods map[string][]*v1.Pod nodeNameToMatchingAffinityPods map[string][]*v1.Pod
@ -113,7 +116,7 @@ func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInf
if pod == nil { if pod == nil {
return nil return nil
} }
matchingTerms, err := getMatchingAntiAffinityTerms(pod, nodeNameToInfoMap) matchingTerms, topologyValues, err := getMatchingAntiAffinityTerms(pod, nodeNameToInfoMap)
if err != nil { if err != nil {
return nil return nil
} }
@ -130,6 +133,7 @@ func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInf
matchingAntiAffinityTerms: matchingTerms, matchingAntiAffinityTerms: matchingTerms,
nodeNameToMatchingAffinityPods: affinityPods, nodeNameToMatchingAffinityPods: affinityPods,
nodeNameToMatchingAntiAffinityPods: antiAffinityPods, nodeNameToMatchingAntiAffinityPods: antiAffinityPods,
topologyValueToAntiAffinityPods: topologyValues,
} }
for predicateName, precomputeFunc := range predicateMetadataProducers { for predicateName, precomputeFunc := range predicateMetadataProducers {
glog.V(10).Infof("Precompute: %v", predicateName) glog.V(10).Infof("Precompute: %v", predicateName)
@ -145,6 +149,21 @@ func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod) error {
if deletedPodFullName == schedutil.GetPodFullName(meta.pod) { if deletedPodFullName == schedutil.GetPodFullName(meta.pod) {
return fmt.Errorf("deletedPod and meta.pod must not be the same") return fmt.Errorf("deletedPod and meta.pod must not be the same")
} }
// Delete pod from matching topology values map
for _, term := range meta.matchingAntiAffinityTerms[deletedPodFullName] {
if topologyValue, ok := term.node.Labels[term.term.TopologyKey]; ok {
for index, podName := range meta.topologyValueToAntiAffinityPods[topologyValue] {
if podName == deletedPodFullName {
podsList := meta.topologyValueToAntiAffinityPods[topologyValue]
meta.topologyValueToAntiAffinityPods[topologyValue] = append(podsList[:index],
podsList[index+1:]...)
break
}
}
}
}
// Delete any anti-affinity rule from the deletedPod. // Delete any anti-affinity rule from the deletedPod.
delete(meta.matchingAntiAffinityTerms, deletedPodFullName) delete(meta.matchingAntiAffinityTerms, deletedPodFullName)
// Delete pod from the matching affinity or anti-affinity pods if exists. // Delete pod from the matching affinity or anti-affinity pods if exists.
@ -203,7 +222,7 @@ func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulercache
return fmt.Errorf("invalid node in nodeInfo") return fmt.Errorf("invalid node in nodeInfo")
} }
// Add matching anti-affinity terms of the addedPod to the map. // Add matching anti-affinity terms of the addedPod to the map.
podMatchingTerms, err := getMatchingAntiAffinityTermsOfExistingPod(meta.pod, addedPod, nodeInfo.Node()) podMatchingTerms, podTopologyValuesToMatchingPods, err := getMatchingAntiAffinityTermsOfExistingPod(meta.pod, addedPod, nodeInfo.Node())
if err != nil { if err != nil {
return err return err
} }
@ -215,6 +234,10 @@ func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulercache
} else { } else {
meta.matchingAntiAffinityTerms[addedPodFullName] = podMatchingTerms meta.matchingAntiAffinityTerms[addedPodFullName] = podMatchingTerms
} }
for topologyValue, pods := range podTopologyValuesToMatchingPods {
meta.topologyValueToAntiAffinityPods[topologyValue] = append(meta.topologyValueToAntiAffinityPods[topologyValue], pods...)
}
} }
// Add the pod to nodeNameToMatchingAffinityPods and nodeNameToMatchingAntiAffinityPods if needed. // Add the pod to nodeNameToMatchingAffinityPods and nodeNameToMatchingAntiAffinityPods if needed.
affinity := meta.pod.Spec.Affinity affinity := meta.pod.Spec.Affinity
@ -280,10 +303,15 @@ func (meta *predicateMetadata) ShallowCopy() algorithm.PredicateMetadata {
for k, v := range meta.nodeNameToMatchingAntiAffinityPods { for k, v := range meta.nodeNameToMatchingAntiAffinityPods {
newPredMeta.nodeNameToMatchingAntiAffinityPods[k] = append([]*v1.Pod(nil), v...) newPredMeta.nodeNameToMatchingAntiAffinityPods[k] = append([]*v1.Pod(nil), v...)
} }
newPredMeta.topologyValueToAntiAffinityPods = make(map[string][]string)
for k, v := range meta.topologyValueToAntiAffinityPods {
newPredMeta.topologyValueToAntiAffinityPods[k] = append([]string(nil), v...)
}
newPredMeta.serviceAffinityMatchingPodServices = append([]*v1.Service(nil), newPredMeta.serviceAffinityMatchingPodServices = append([]*v1.Service(nil),
meta.serviceAffinityMatchingPodServices...) meta.serviceAffinityMatchingPodServices...)
newPredMeta.serviceAffinityMatchingPodList = append([]*v1.Pod(nil), newPredMeta.serviceAffinityMatchingPodList = append([]*v1.Pod(nil),
meta.serviceAffinityMatchingPodList...) meta.serviceAffinityMatchingPodList...)
return (algorithm.PredicateMetadata)(newPredMeta) return (algorithm.PredicateMetadata)(newPredMeta)
} }

View File

@ -475,6 +475,10 @@ func TestPredicateMetadata_ShallowCopy(t *testing.T) {
}, },
}, },
}, },
topologyValueToAntiAffinityPods: map[string][]string{
"machine1": {"p1", "p2"},
"machine2": {"p3"},
},
nodeNameToMatchingAffinityPods: map[string][]*v1.Pod{ nodeNameToMatchingAffinityPods: map[string][]*v1.Pod{
"nodeA": { "nodeA": {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1}, &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},

View File

@ -1246,7 +1246,7 @@ func GetPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.Po
return terms return terms
} }
func getMatchingAntiAffinityTerms(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) (map[string][]matchingPodAntiAffinityTerm, error) { func getMatchingAntiAffinityTerms(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) (map[string][]matchingPodAntiAffinityTerm, map[string][]string, error) {
allNodeNames := make([]string, 0, len(nodeInfoMap)) allNodeNames := make([]string, 0, len(nodeInfoMap))
for name := range nodeInfoMap { for name := range nodeInfoMap {
allNodeNames = append(allNodeNames, name) allNodeNames = append(allNodeNames, name)
@ -1254,14 +1254,25 @@ func getMatchingAntiAffinityTerms(pod *v1.Pod, nodeInfoMap map[string]*scheduler
var lock sync.Mutex var lock sync.Mutex
var firstError error var firstError error
result := make(map[string][]matchingPodAntiAffinityTerm) podsToMatchingAntiAffinityTerms := make(map[string][]matchingPodAntiAffinityTerm)
appendResult := func(toAppend map[string][]matchingPodAntiAffinityTerm) { topologyValuesToMatchingPods := make(map[string][]string)
appendPodsMatchingAntiAffinityTerms := func(toAppend map[string][]matchingPodAntiAffinityTerm) {
lock.Lock() lock.Lock()
defer lock.Unlock() defer lock.Unlock()
for uid, terms := range toAppend { for uid, terms := range toAppend {
result[uid] = append(result[uid], terms...) podsToMatchingAntiAffinityTerms[uid] = append(podsToMatchingAntiAffinityTerms[uid], terms...)
} }
} }
appendTopologyValuesMatchingPods := func(toAppend map[string][]string) {
lock.Lock()
defer lock.Unlock()
for topologyValue, pods := range toAppend {
topologyValuesToMatchingPods[topologyValue] = append(topologyValuesToMatchingPods[topologyValue], pods...)
}
}
catchError := func(err error) { catchError := func(err error) {
lock.Lock() lock.Lock()
defer lock.Unlock() defer lock.Unlock()
@ -1277,7 +1288,9 @@ func getMatchingAntiAffinityTerms(pod *v1.Pod, nodeInfoMap map[string]*scheduler
catchError(fmt.Errorf("node not found")) catchError(fmt.Errorf("node not found"))
return return
} }
nodeResult := make(map[string][]matchingPodAntiAffinityTerm) nodePodsToMatchingAntiAffinityTerms := make(map[string][]matchingPodAntiAffinityTerm)
nodeTopologyValuesToMatchingPods := make(map[string][]string)
for _, existingPod := range nodeInfo.PodsWithAffinity() { for _, existingPod := range nodeInfo.PodsWithAffinity() {
affinity := existingPod.Spec.Affinity affinity := existingPod.Spec.Affinity
if affinity == nil { if affinity == nil {
@ -1292,40 +1305,55 @@ func getMatchingAntiAffinityTerms(pod *v1.Pod, nodeInfoMap map[string]*scheduler
} }
if priorityutil.PodMatchesTermsNamespaceAndSelector(pod, namespaces, selector) { if priorityutil.PodMatchesTermsNamespaceAndSelector(pod, namespaces, selector) {
existingPodFullName := schedutil.GetPodFullName(existingPod) existingPodFullName := schedutil.GetPodFullName(existingPod)
nodeResult[existingPodFullName] = append( nodePodsToMatchingAntiAffinityTerms[existingPodFullName] = append(
nodeResult[existingPodFullName], nodePodsToMatchingAntiAffinityTerms[existingPodFullName],
matchingPodAntiAffinityTerm{term: &term, node: node}) matchingPodAntiAffinityTerm{term: &term, node: node})
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
nodeTopologyValuesToMatchingPods[topologyValue] = append(nodeTopologyValuesToMatchingPods[topologyValue], existingPodFullName)
}
} }
} }
} }
if len(nodeResult) > 0 { if len(nodePodsToMatchingAntiAffinityTerms) > 0 {
appendResult(nodeResult) appendPodsMatchingAntiAffinityTerms(nodePodsToMatchingAntiAffinityTerms)
}
if len(nodeTopologyValuesToMatchingPods) > 0 {
appendTopologyValuesMatchingPods(nodeTopologyValuesToMatchingPods)
} }
} }
workqueue.Parallelize(16, len(allNodeNames), processNode) workqueue.Parallelize(16, len(allNodeNames), processNode)
return result, firstError return podsToMatchingAntiAffinityTerms, topologyValuesToMatchingPods, firstError
} }
func getMatchingAntiAffinityTermsOfExistingPod(newPod *v1.Pod, existingPod *v1.Pod, node *v1.Node) ([]matchingPodAntiAffinityTerm, error) { func getMatchingAntiAffinityTermsOfExistingPod(newPod *v1.Pod, existingPod *v1.Pod, node *v1.Node) ([]matchingPodAntiAffinityTerm, map[string][]string, error) {
var result []matchingPodAntiAffinityTerm var podMatchingTerms []matchingPodAntiAffinityTerm
topologyValuesToMatchingPods := make(map[string][]string)
affinity := existingPod.Spec.Affinity affinity := existingPod.Spec.Affinity
if affinity != nil && affinity.PodAntiAffinity != nil { if affinity != nil && affinity.PodAntiAffinity != nil {
for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) { for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(existingPod, &term) namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(existingPod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector) selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil { if err != nil {
return nil, err return nil, nil, err
} }
if priorityutil.PodMatchesTermsNamespaceAndSelector(newPod, namespaces, selector) { if priorityutil.PodMatchesTermsNamespaceAndSelector(newPod, namespaces, selector) {
result = append(result, matchingPodAntiAffinityTerm{term: &term, node: node}) podMatchingTerms = append(podMatchingTerms, matchingPodAntiAffinityTerm{term: &term, node: node})
existingPodFullName := schedutil.GetPodFullName(existingPod)
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
topologyValuesToMatchingPods[topologyValue] = append(topologyValuesToMatchingPods[topologyValue], existingPodFullName)
}
} }
} }
} }
return result, nil return podMatchingTerms, topologyValuesToMatchingPods, nil
} }
func (c *PodAffinityChecker) getMatchingAntiAffinityTerms(pod *v1.Pod, allPods []*v1.Pod) (map[string][]matchingPodAntiAffinityTerm, error) { func (c *PodAffinityChecker) getMatchingAntiAffinityTerms(pod *v1.Pod, allPods []*v1.Pod) (map[string][]matchingPodAntiAffinityTerm, map[string][]string, error) {
result := make(map[string][]matchingPodAntiAffinityTerm) result := make(map[string][]matchingPodAntiAffinityTerm)
topologyValuesToMatchingPods := make(map[string][]string)
for _, existingPod := range allPods { for _, existingPod := range allPods {
affinity := existingPod.Spec.Affinity affinity := existingPod.Spec.Affinity
if affinity != nil && affinity.PodAntiAffinity != nil { if affinity != nil && affinity.PodAntiAffinity != nil {
@ -1335,19 +1363,22 @@ func (c *PodAffinityChecker) getMatchingAntiAffinityTerms(pod *v1.Pod, allPods [
glog.Errorf("Node not found, %v", existingPod.Spec.NodeName) glog.Errorf("Node not found, %v", existingPod.Spec.NodeName)
continue continue
} }
return nil, err return nil, nil, err
} }
existingPodMatchingTerms, err := getMatchingAntiAffinityTermsOfExistingPod(pod, existingPod, existingPodNode) existingPodMatchingTerms, podTopologyValuesToMatchingPods, err := getMatchingAntiAffinityTermsOfExistingPod(pod, existingPod, existingPodNode)
if err != nil { if err != nil {
return nil, err return nil, nil, err
} }
if len(existingPodMatchingTerms) > 0 { if len(existingPodMatchingTerms) > 0 {
existingPodFullName := schedutil.GetPodFullName(existingPod) existingPodFullName := schedutil.GetPodFullName(existingPod)
result[existingPodFullName] = existingPodMatchingTerms result[existingPodFullName] = existingPodMatchingTerms
} }
for topologyValue, pods := range podTopologyValuesToMatchingPods {
topologyValuesToMatchingPods[topologyValue] = append(topologyValuesToMatchingPods[topologyValue], pods...)
}
} }
} }
return result, nil return result, topologyValuesToMatchingPods, nil
} }
// Checks if scheduling the pod onto this node would break any anti-affinity // Checks if scheduling the pod onto this node would break any anti-affinity
@ -1358,8 +1389,11 @@ func (c *PodAffinityChecker) satisfiesExistingPodsAntiAffinity(pod *v1.Pod, meta
return ErrExistingPodsAntiAffinityRulesNotMatch, fmt.Errorf("Node is nil") return ErrExistingPodsAntiAffinityRulesNotMatch, fmt.Errorf("Node is nil")
} }
var matchingTerms map[string][]matchingPodAntiAffinityTerm var matchingTerms map[string][]matchingPodAntiAffinityTerm
var topologyValuesToMatchingPods map[string][]string
if predicateMeta, ok := meta.(*predicateMetadata); ok { if predicateMeta, ok := meta.(*predicateMetadata); ok {
matchingTerms = predicateMeta.matchingAntiAffinityTerms matchingTerms = predicateMeta.matchingAntiAffinityTerms
topologyValuesToMatchingPods = predicateMeta.topologyValueToAntiAffinityPods
} else { } else {
// Filter out pods whose nodeName is equal to nodeInfo.node.Name, but are not // Filter out pods whose nodeName is equal to nodeInfo.node.Name, but are not
// present in nodeInfo. Pods on other nodes pass the filter. // present in nodeInfo. Pods on other nodes pass the filter.
@ -1369,24 +1403,31 @@ func (c *PodAffinityChecker) satisfiesExistingPodsAntiAffinity(pod *v1.Pod, meta
glog.Error(errMessage) glog.Error(errMessage)
return ErrExistingPodsAntiAffinityRulesNotMatch, errors.New(errMessage) return ErrExistingPodsAntiAffinityRulesNotMatch, errors.New(errMessage)
} }
if matchingTerms, err = c.getMatchingAntiAffinityTerms(pod, filteredPods); err != nil { if matchingTerms, topologyValuesToMatchingPods, err = c.getMatchingAntiAffinityTerms(pod, filteredPods); err != nil {
errMessage := fmt.Sprintf("Failed to get all terms that pod %+v matches, err: %+v", podName(pod), err) errMessage := fmt.Sprintf("Failed to get all terms that pod %+v matches, err: %+v", podName(pod), err)
glog.Error(errMessage) glog.Error(errMessage)
return ErrExistingPodsAntiAffinityRulesNotMatch, errors.New(errMessage) return ErrExistingPodsAntiAffinityRulesNotMatch, errors.New(errMessage)
} }
} }
for _, terms := range matchingTerms {
for i := range terms { // Iterate over topology values, to get matching pods and get their matching terms to check for same topolgy key
term := &terms[i] // currently ignored if predicateMetadata is not precomputed
if len(term.term.TopologyKey) == 0 { for _, topologyValue := range node.Labels {
errMessage := fmt.Sprintf("Empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity") potentialPods := topologyValuesToMatchingPods[topologyValue]
glog.Error(errMessage) for _, matchingPod := range potentialPods {
return ErrExistingPodsAntiAffinityRulesNotMatch, errors.New(errMessage) podTerms := matchingTerms[matchingPod]
} for i := range podTerms {
if priorityutil.NodesHaveSameTopologyKey(node, term.node, term.term.TopologyKey) { term := &podTerms[i]
glog.V(10).Infof("Cannot schedule pod %+v onto node %v,because of PodAntiAffinityTerm %v", if len(term.term.TopologyKey) == 0 {
podName(pod), node.Name, term.term) errMessage := fmt.Sprintf("Empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity")
return ErrExistingPodsAntiAffinityRulesNotMatch, nil glog.Error(errMessage)
return ErrExistingPodsAntiAffinityRulesNotMatch, errors.New(errMessage)
}
if priorityutil.NodesHaveSameTopologyKey(node, term.node, term.term.TopologyKey) {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v,because of PodAntiAffinityTerm %v",
podName(pod), node.Name, term.term)
return ErrExistingPodsAntiAffinityRulesNotMatch, nil
}
} }
} }
} }