mirror of https://github.com/k3s-io/k3s
Improve performance of affinity/anti-affinity predicate
parent
9c40f5b5a6
commit
418c7502f0
|
@ -20,14 +20,17 @@ import (
|
|||
"fmt"
|
||||
"sync"
|
||||
|
||||
"github.com/golang/glog"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/client-go/util/workqueue"
|
||||
"k8s.io/kubernetes/pkg/scheduler/algorithm"
|
||||
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
|
||||
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
|
||||
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
|
||||
|
||||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
// PredicateMetadataFactory defines a factory of predicate metadata.
|
||||
|
@ -50,7 +53,13 @@ type predicateMetadata struct {
|
|||
podRequest *schedulercache.Resource
|
||||
podPorts []*v1.ContainerPort
|
||||
//key is a pod full name with the anti-affinity rules.
|
||||
matchingAntiAffinityTerms map[string][]matchingPodAntiAffinityTerm
|
||||
matchingAntiAffinityTerms map[string][]matchingPodAntiAffinityTerm
|
||||
// A map of node name to a list of Pods on the node that can potentially match
|
||||
// the affinity rules of the "pod".
|
||||
matchingAffinityPods map[string][]*v1.Pod
|
||||
// A map of node name to a list of Pods on the node that can potentially match
|
||||
// the anti-affinity rules of the "pod".
|
||||
matchingAntiAffinityPods map[string][]*v1.Pod
|
||||
serviceAffinityInUse bool
|
||||
serviceAffinityMatchingPodList []*v1.Pod
|
||||
serviceAffinityMatchingPodServices []*v1.Service
|
||||
|
@ -108,12 +117,19 @@ func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInf
|
|||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
affinityPods, antiAffinityPods, err := getPodsMatchingAffinity(pod, nodeNameToInfoMap)
|
||||
if err != nil {
|
||||
glog.Errorf("[predicate meta data generation] error finding pods that match affinity terms")
|
||||
return nil
|
||||
}
|
||||
predicateMetadata := &predicateMetadata{
|
||||
pod: pod,
|
||||
podBestEffort: isPodBestEffort(pod),
|
||||
podRequest: GetResourceRequest(pod),
|
||||
podPorts: schedutil.GetContainerPorts(pod),
|
||||
matchingAntiAffinityTerms: matchingTerms,
|
||||
matchingAffinityPods: affinityPods,
|
||||
matchingAntiAffinityPods: antiAffinityPods,
|
||||
}
|
||||
for predicateName, precomputeFunc := range predicateMetadataProducers {
|
||||
glog.V(10).Infof("Precompute: %v", predicateName)
|
||||
|
@ -131,6 +147,27 @@ func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod) error {
|
|||
}
|
||||
// Delete any anti-affinity rule from the deletedPod.
|
||||
delete(meta.matchingAntiAffinityTerms, deletedPodFullName)
|
||||
// Delete pod from the matching affinity or anti-affinity pods if exists.
|
||||
affinity := meta.pod.Spec.Affinity
|
||||
podNodeName := deletedPod.Spec.NodeName
|
||||
if affinity != nil && len(podNodeName) > 0 {
|
||||
if affinity.PodAffinity != nil {
|
||||
for i, p := range meta.matchingAffinityPods[podNodeName] {
|
||||
if p == deletedPod {
|
||||
meta.matchingAffinityPods[podNodeName] = append(meta.matchingAffinityPods[podNodeName][:i], meta.matchingAffinityPods[podNodeName][i+1:]...)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if affinity.PodAntiAffinity != nil {
|
||||
for i, p := range meta.matchingAntiAffinityPods[podNodeName] {
|
||||
if p == deletedPod {
|
||||
meta.matchingAntiAffinityPods[podNodeName] = append(meta.matchingAntiAffinityPods[podNodeName][:i], meta.matchingAntiAffinityPods[podNodeName][i+1:]...)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// All pods in the serviceAffinityMatchingPodList are in the same namespace.
|
||||
// So, if the namespace of the first one is not the same as the namespace of the
|
||||
// deletedPod, we don't need to check the list, as deletedPod isn't in the list.
|
||||
|
@ -173,6 +210,35 @@ func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulercache
|
|||
meta.matchingAntiAffinityTerms[addedPodFullName] = podMatchingTerms
|
||||
}
|
||||
}
|
||||
// Add the pod to matchingAffinityPods and matchingAntiAffinityPods if needed.
|
||||
affinity := meta.pod.Spec.Affinity
|
||||
podNodeName := addedPod.Spec.NodeName
|
||||
if affinity != nil && len(podNodeName) > 0 {
|
||||
if targetPodMatchesAffinityOfPod(meta.pod, addedPod) {
|
||||
found := false
|
||||
for _, p := range meta.matchingAffinityPods[podNodeName] {
|
||||
if p == addedPod {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
meta.matchingAffinityPods[podNodeName] = append(meta.matchingAffinityPods[podNodeName], addedPod)
|
||||
}
|
||||
}
|
||||
if targetPodMatchesAntiAffinityOfPod(meta.pod, addedPod) {
|
||||
found := false
|
||||
for _, p := range meta.matchingAntiAffinityPods[podNodeName] {
|
||||
if p == addedPod {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
meta.matchingAntiAffinityPods[podNodeName] = append(meta.matchingAntiAffinityPods[podNodeName], addedPod)
|
||||
}
|
||||
}
|
||||
}
|
||||
// If addedPod is in the same namespace as the meta.pod, update the list
|
||||
// of matching pods if applicable.
|
||||
if meta.serviceAffinityInUse && addedPod.Namespace == meta.pod.Namespace {
|
||||
|
@ -200,9 +266,162 @@ func (meta *predicateMetadata) ShallowCopy() algorithm.PredicateMetadata {
|
|||
for k, v := range meta.matchingAntiAffinityTerms {
|
||||
newPredMeta.matchingAntiAffinityTerms[k] = append([]matchingPodAntiAffinityTerm(nil), v...)
|
||||
}
|
||||
newPredMeta.matchingAffinityPods = make(map[string][]*v1.Pod)
|
||||
for k, v := range meta.matchingAffinityPods {
|
||||
newPredMeta.matchingAffinityPods[k] = append([]*v1.Pod(nil), v...)
|
||||
}
|
||||
newPredMeta.matchingAntiAffinityPods = make(map[string][]*v1.Pod)
|
||||
for k, v := range meta.matchingAntiAffinityPods {
|
||||
newPredMeta.matchingAntiAffinityPods[k] = append([]*v1.Pod(nil), v...)
|
||||
}
|
||||
newPredMeta.serviceAffinityMatchingPodServices = append([]*v1.Service(nil),
|
||||
meta.serviceAffinityMatchingPodServices...)
|
||||
newPredMeta.serviceAffinityMatchingPodList = append([]*v1.Pod(nil),
|
||||
meta.serviceAffinityMatchingPodList...)
|
||||
return (algorithm.PredicateMetadata)(newPredMeta)
|
||||
}
|
||||
|
||||
type affinityTermProperties struct {
|
||||
namespaces sets.String
|
||||
selector labels.Selector
|
||||
}
|
||||
|
||||
// getAffinityTermProperties receives a Pod and affinity terms and returns the namespaces and
|
||||
// selectors of the terms.
|
||||
func getAffinityTermProperties(pod *v1.Pod, terms []v1.PodAffinityTerm) (properties []*affinityTermProperties, err error) {
|
||||
if terms == nil {
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
for _, term := range terms {
|
||||
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term)
|
||||
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
properties = append(properties, &affinityTermProperties{namespaces: namespaces, selector: selector})
|
||||
}
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
// podMatchesAffinityTermProperties return true IFF the given pod matches all the given properties.
|
||||
func podMatchesAffinityTermProperties(pod *v1.Pod, properties []*affinityTermProperties) bool {
|
||||
if len(properties) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, property := range properties {
|
||||
if !priorityutil.PodMatchesTermsNamespaceAndSelector(pod, property.namespaces, property.selector) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// getPodsMatchingAffinity finds existing Pods that match affinity terms of the given "pod".
|
||||
// It ignores topology. It returns a set of Pods that are checked later by the affinity
|
||||
// predicate. With this set of pods available, the affinity predicate does not
|
||||
// need to check all the pods in the cluster.
|
||||
func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) (affinityPods map[string][]*v1.Pod, antiAffinityPods map[string][]*v1.Pod, err error) {
|
||||
allNodeNames := make([]string, 0, len(nodeInfoMap))
|
||||
|
||||
affinity := pod.Spec.Affinity
|
||||
if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) {
|
||||
return nil, nil, nil
|
||||
}
|
||||
|
||||
for name := range nodeInfoMap {
|
||||
allNodeNames = append(allNodeNames, name)
|
||||
}
|
||||
|
||||
var lock sync.Mutex
|
||||
var firstError error
|
||||
affinityPods = make(map[string][]*v1.Pod)
|
||||
antiAffinityPods = make(map[string][]*v1.Pod)
|
||||
appendResult := func(nodeName string, affPods, antiAffPods []*v1.Pod) {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
if len(affPods) > 0 {
|
||||
affinityPods[nodeName] = affPods
|
||||
}
|
||||
if len(antiAffPods) > 0 {
|
||||
antiAffinityPods[nodeName] = antiAffPods
|
||||
}
|
||||
}
|
||||
|
||||
catchError := func(err error) {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
if firstError == nil {
|
||||
firstError = err
|
||||
}
|
||||
}
|
||||
|
||||
affinityProperties, err := getAffinityTermProperties(pod, GetPodAffinityTerms(affinity.PodAffinity))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
antiAffinityProperties, err := getAffinityTermProperties(pod, GetPodAntiAffinityTerms(affinity.PodAntiAffinity))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
processNode := func(i int) {
|
||||
nodeInfo := nodeInfoMap[allNodeNames[i]]
|
||||
node := nodeInfo.Node()
|
||||
if node == nil {
|
||||
catchError(fmt.Errorf("nodeInfo.Node is nil"))
|
||||
return
|
||||
}
|
||||
affPods := make([]*v1.Pod, 0, len(nodeInfo.Pods()))
|
||||
antiAffPods := make([]*v1.Pod, 0, len(nodeInfo.Pods()))
|
||||
for _, existingPod := range nodeInfo.Pods() {
|
||||
// Check affinity properties.
|
||||
if podMatchesAffinityTermProperties(existingPod, affinityProperties) {
|
||||
affPods = append(affPods, existingPod)
|
||||
}
|
||||
// Check anti-affinity properties.
|
||||
if podMatchesAffinityTermProperties(existingPod, antiAffinityProperties) {
|
||||
antiAffPods = append(antiAffPods, existingPod)
|
||||
}
|
||||
}
|
||||
if len(antiAffPods) > 0 || len(affPods) > 0 {
|
||||
appendResult(node.Name, affPods, antiAffPods)
|
||||
}
|
||||
}
|
||||
workqueue.Parallelize(16, len(allNodeNames), processNode)
|
||||
return affinityPods, antiAffinityPods, firstError
|
||||
}
|
||||
|
||||
// podMatchesAffinity returns true if "targetPod" matches any affinity rule of
|
||||
// "pod". Similar to getPodsMatchingAffinity, this function does not check topology.
|
||||
// So, whether the targetPod actually matches or not needs further checks for a specific
|
||||
// node.
|
||||
func targetPodMatchesAffinityOfPod(pod, targetPod *v1.Pod) bool {
|
||||
affinity := pod.Spec.Affinity
|
||||
if affinity == nil || affinity.PodAffinity == nil {
|
||||
return false
|
||||
}
|
||||
affinityProperties, err := getAffinityTermProperties(pod, GetPodAffinityTerms(affinity.PodAffinity))
|
||||
if err != nil {
|
||||
glog.Errorf("error in getting affinity properties of Pod %v", pod.Name)
|
||||
return false
|
||||
}
|
||||
return podMatchesAffinityTermProperties(targetPod, affinityProperties)
|
||||
}
|
||||
|
||||
// targetPodMatchesAntiAffinityOfPod returns true if "targetPod" matches any anti-affinity
|
||||
// rule of "pod". Similar to getPodsMatchingAffinity, this function does not check topology.
|
||||
// So, whether the targetPod actually matches or not needs further checks for a specific
|
||||
// node.
|
||||
func targetPodMatchesAntiAffinityOfPod(pod, targetPod *v1.Pod) bool {
|
||||
affinity := pod.Spec.Affinity
|
||||
if affinity == nil || affinity.PodAntiAffinity == nil {
|
||||
return false
|
||||
}
|
||||
properties, err := getAffinityTermProperties(pod, GetPodAntiAffinityTerms(affinity.PodAntiAffinity))
|
||||
if err != nil {
|
||||
glog.Errorf("error in getting anti-affinity properties of Pod %v", pod.Name)
|
||||
return false
|
||||
}
|
||||
return podMatchesAffinityTermProperties(targetPod, properties)
|
||||
}
|
||||
|
|
|
@ -88,6 +88,13 @@ func (s sortableServices) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
|||
|
||||
var _ = sort.Interface(&sortableServices{})
|
||||
|
||||
func sortNodePodMap(np map[string][]*v1.Pod) {
|
||||
for _, pl := range np {
|
||||
sortablePods := sortablePods(pl)
|
||||
sort.Sort(sortablePods)
|
||||
}
|
||||
}
|
||||
|
||||
// predicateMetadataEquivalent returns true if the two metadata are equivalent.
|
||||
// Note: this function does not compare podRequest.
|
||||
func predicateMetadataEquivalent(meta1, meta2 *predicateMetadata) error {
|
||||
|
@ -111,6 +118,16 @@ func predicateMetadataEquivalent(meta1, meta2 *predicateMetadata) error {
|
|||
if !reflect.DeepEqual(meta1.matchingAntiAffinityTerms, meta2.matchingAntiAffinityTerms) {
|
||||
return fmt.Errorf("matchingAntiAffinityTerms are not euqal")
|
||||
}
|
||||
sortNodePodMap(meta1.matchingAffinityPods)
|
||||
sortNodePodMap(meta2.matchingAffinityPods)
|
||||
if !reflect.DeepEqual(meta1.matchingAffinityPods, meta2.matchingAffinityPods) {
|
||||
return fmt.Errorf("matchingAffinityPods are not euqal")
|
||||
}
|
||||
sortNodePodMap(meta1.matchingAntiAffinityPods)
|
||||
sortNodePodMap(meta2.matchingAntiAffinityPods)
|
||||
if !reflect.DeepEqual(meta1.matchingAntiAffinityPods, meta2.matchingAntiAffinityPods) {
|
||||
return fmt.Errorf("matchingAntiAffinityPods are not euqal")
|
||||
}
|
||||
if meta1.serviceAffinityInUse {
|
||||
sortablePods1 := sortablePods(meta1.serviceAffinityMatchingPodList)
|
||||
sort.Sort(sortablePods1)
|
||||
|
@ -189,6 +206,34 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
|
|||
},
|
||||
},
|
||||
}
|
||||
affinityComplex := &v1.PodAffinity{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
|
||||
{
|
||||
LabelSelector: &metav1.LabelSelector{
|
||||
MatchExpressions: []metav1.LabelSelectorRequirement{
|
||||
{
|
||||
Key: "foo",
|
||||
Operator: metav1.LabelSelectorOpIn,
|
||||
Values: []string{"bar", "buzz"},
|
||||
},
|
||||
},
|
||||
},
|
||||
TopologyKey: "region",
|
||||
},
|
||||
{
|
||||
LabelSelector: &metav1.LabelSelector{
|
||||
MatchExpressions: []metav1.LabelSelectorRequirement{
|
||||
{
|
||||
Key: "service",
|
||||
Operator: metav1.LabelSelectorOpNotIn,
|
||||
Values: []string{"bar", "security", "test"},
|
||||
},
|
||||
},
|
||||
},
|
||||
TopologyKey: "zone",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
description string
|
||||
|
@ -312,6 +357,41 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
|
|||
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "metadata matching pod affinity and anti-affinity are updated correctly after adding and removing a pod",
|
||||
pendingPod: &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
|
||||
},
|
||||
existingPods: []*v1.Pod{
|
||||
{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
|
||||
Spec: v1.PodSpec{NodeName: "nodeA"},
|
||||
},
|
||||
{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
|
||||
Spec: v1.PodSpec{
|
||||
NodeName: "nodeC",
|
||||
Affinity: &v1.Affinity{
|
||||
PodAntiAffinity: antiAffinityFooBar,
|
||||
PodAffinity: affinityComplex,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
addedPod: &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1},
|
||||
Spec: v1.PodSpec{
|
||||
NodeName: "nodeA",
|
||||
Affinity: &v1.Affinity{
|
||||
PodAntiAffinity: antiAffinityComplex,
|
||||
},
|
||||
},
|
||||
},
|
||||
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: selector1}}},
|
||||
nodes: []*v1.Node{
|
||||
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}},
|
||||
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}},
|
||||
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
@ -360,6 +440,7 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
|
|||
// on the idea that shallow-copy should produce an object that is deep-equal to the original
|
||||
// object.
|
||||
func TestPredicateMetadata_ShallowCopy(t *testing.T) {
|
||||
selector1 := map[string]string{"foo": "bar"}
|
||||
source := predicateMetadata{
|
||||
pod: &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
|
@ -392,6 +473,45 @@ func TestPredicateMetadata_ShallowCopy(t *testing.T) {
|
|||
},
|
||||
},
|
||||
},
|
||||
matchingAffinityPods: map[string][]*v1.Pod{
|
||||
"nodeA": {
|
||||
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
|
||||
Spec: v1.PodSpec{NodeName: "nodeA"},
|
||||
},
|
||||
},
|
||||
"nodeC": {
|
||||
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
|
||||
Spec: v1.PodSpec{
|
||||
NodeName: "nodeC",
|
||||
},
|
||||
},
|
||||
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1},
|
||||
Spec: v1.PodSpec{NodeName: "nodeC"},
|
||||
},
|
||||
},
|
||||
},
|
||||
matchingAntiAffinityPods: map[string][]*v1.Pod{
|
||||
"nodeN": {
|
||||
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
|
||||
Spec: v1.PodSpec{NodeName: "nodeN"},
|
||||
},
|
||||
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
|
||||
Spec: v1.PodSpec{
|
||||
NodeName: "nodeM",
|
||||
},
|
||||
},
|
||||
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p3"},
|
||||
Spec: v1.PodSpec{
|
||||
NodeName: "nodeM",
|
||||
},
|
||||
},
|
||||
},
|
||||
"nodeM": {
|
||||
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1},
|
||||
Spec: v1.PodSpec{NodeName: "nodeM"},
|
||||
},
|
||||
},
|
||||
},
|
||||
serviceAffinityInUse: true,
|
||||
serviceAffinityMatchingPodList: []*v1.Pod{
|
||||
{ObjectMeta: metav1.ObjectMeta{Name: "pod1"}},
|
||||
|
|
|
@ -1150,7 +1150,7 @@ func (c *PodAffinityChecker) InterPodAffinityMatches(pod *v1.Pod, meta algorithm
|
|||
if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) {
|
||||
return true, nil, nil
|
||||
}
|
||||
if failedPredicates, error := c.satisfiesPodsAffinityAntiAffinity(pod, nodeInfo, affinity); failedPredicates != nil {
|
||||
if failedPredicates, error := c.satisfiesPodsAffinityAntiAffinity(pod, meta, nodeInfo, affinity); failedPredicates != nil {
|
||||
failedPredicates := append([]algorithm.PredicateFailureReason{ErrPodAffinityNotMatch}, failedPredicates)
|
||||
return false, failedPredicates, error
|
||||
}
|
||||
|
@ -1380,60 +1380,129 @@ func (c *PodAffinityChecker) satisfiesExistingPodsAntiAffinity(pod *v1.Pod, meta
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
// anyMatchingPodInTopology checks that any of the given Pods are in the
|
||||
// topology specified by the affinity term.
|
||||
func (c *PodAffinityChecker) anyMatchingPodInTopology(pod *v1.Pod, matchingPods map[string][]*v1.Pod, nodeInfo *schedulercache.NodeInfo, term *v1.PodAffinityTerm) (bool, error) {
|
||||
if len(term.TopologyKey) == 0 {
|
||||
return false, fmt.Errorf("empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity")
|
||||
}
|
||||
if len(matchingPods) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
// Special case: When the topological domain is node, we can limit our
|
||||
// search to pods on that node without searching the entire cluster.
|
||||
if term.TopologyKey == kubeletapis.LabelHostname {
|
||||
if pods, ok := matchingPods[nodeInfo.Node().Name]; ok {
|
||||
// It may seem odd that we are comparing a node with itself to see if it
|
||||
// has the same topology key, but it is necessary to check extra conditions
|
||||
// that the function performs, such as checking that node labels are not nil.
|
||||
return len(pods) > 0 && priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), nodeInfo.Node(), term.TopologyKey), nil
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
// Topology key is not "Hostname". Checking all matching pods.
|
||||
for nodeName, pods := range matchingPods {
|
||||
matchingPodNodeInfo, err := c.info.GetNodeInfo(nodeName)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if len(pods) > 0 && priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), matchingPodNodeInfo, term.TopologyKey) {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Checks if scheduling the pod onto this node would break any rules of this pod.
|
||||
func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, nodeInfo *schedulercache.NodeInfo, affinity *v1.Affinity) (algorithm.PredicateFailureReason, error) {
|
||||
func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
|
||||
meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo,
|
||||
affinity *v1.Affinity) (algorithm.PredicateFailureReason, error) {
|
||||
node := nodeInfo.Node()
|
||||
if node == nil {
|
||||
return ErrPodAffinityRulesNotMatch, fmt.Errorf("Node is nil")
|
||||
}
|
||||
filteredPods, err := c.podLister.FilteredList(nodeInfo.Filter, labels.Everything())
|
||||
if err != nil {
|
||||
return ErrPodAffinityRulesNotMatch, err
|
||||
}
|
||||
|
||||
// Check all affinity terms.
|
||||
for _, term := range GetPodAffinityTerms(affinity.PodAffinity) {
|
||||
termMatches, matchingPodExists, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, nodeInfo, &term)
|
||||
if err != nil {
|
||||
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v, err: %v", podName(pod), node.Name, term, err)
|
||||
glog.Error(errMessage)
|
||||
return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
|
||||
}
|
||||
if !termMatches {
|
||||
// If the requirement matches a pod's own labels are namespace, and there are
|
||||
// no other such pods, then disregard the requirement. This is necessary to
|
||||
// not block forever because the first pod of the collection can't be scheduled.
|
||||
if matchingPodExists {
|
||||
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v",
|
||||
podName(pod), node.Name, term)
|
||||
return ErrPodAffinityRulesNotMatch, nil
|
||||
}
|
||||
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term)
|
||||
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
|
||||
if predicateMeta, ok := meta.(*predicateMetadata); ok {
|
||||
// Check all affinity terms.
|
||||
matchingPods := predicateMeta.matchingAffinityPods
|
||||
for _, term := range GetPodAffinityTerms(affinity.PodAffinity) {
|
||||
termMatches, err := c.anyMatchingPodInTopology(pod, matchingPods, nodeInfo, &term)
|
||||
if err != nil {
|
||||
errMessage := fmt.Sprintf("Cannot parse selector on term %v for pod %v. Details %v", term, podName(pod), err)
|
||||
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v, err: %v", podName(pod), node.Name, term, err)
|
||||
glog.Errorf(errMessage)
|
||||
return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
|
||||
}
|
||||
if !termMatches {
|
||||
// This pod may the first pod in a series that have affinity to themselves. In order
|
||||
// to not leave such pods in pending state forever, we check that if no other pod
|
||||
// in the cluster matches the namespace and selector of this pod and the pod matches
|
||||
// its own terms, then we allow the pod to pass the affinity check.
|
||||
if !(len(matchingPods) == 0 && targetPodMatchesAffinityOfPod(pod, pod)) {
|
||||
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v",
|
||||
podName(pod), node.Name, term)
|
||||
return ErrPodAffinityRulesNotMatch, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check all anti-affinity terms.
|
||||
matchingPods = predicateMeta.matchingAntiAffinityPods
|
||||
for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
|
||||
termMatches, err := c.anyMatchingPodInTopology(pod, matchingPods, nodeInfo, &term)
|
||||
if err != nil || termMatches {
|
||||
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinityTerm %v, err: %v",
|
||||
podName(pod), node.Name, term, err)
|
||||
return ErrPodAntiAffinityRulesNotMatch, nil
|
||||
}
|
||||
}
|
||||
} else { // We don't have precomputed metadata. We have to follow a slow path to check affinity rules.
|
||||
filteredPods, err := c.podLister.FilteredList(nodeInfo.Filter, labels.Everything())
|
||||
if err != nil {
|
||||
return ErrPodAffinityRulesNotMatch, err
|
||||
}
|
||||
|
||||
// Check all affinity terms.
|
||||
for _, term := range GetPodAffinityTerms(affinity.PodAffinity) {
|
||||
termMatches, matchingPodExists, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, nodeInfo, &term)
|
||||
if err != nil {
|
||||
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v, err: %v", podName(pod), node.Name, term, err)
|
||||
glog.Error(errMessage)
|
||||
return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
|
||||
}
|
||||
match := priorityutil.PodMatchesTermsNamespaceAndSelector(pod, namespaces, selector)
|
||||
if !match {
|
||||
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v",
|
||||
podName(pod), node.Name, term)
|
||||
return ErrPodAffinityRulesNotMatch, nil
|
||||
if !termMatches {
|
||||
// If the requirement matches a pod's own labels are namespace, and there are
|
||||
// no other such pods, then disregard the requirement. This is necessary to
|
||||
// not block forever because the first pod of the collection can't be scheduled.
|
||||
if matchingPodExists {
|
||||
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v",
|
||||
podName(pod), node.Name, term)
|
||||
return ErrPodAffinityRulesNotMatch, nil
|
||||
}
|
||||
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term)
|
||||
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
|
||||
if err != nil {
|
||||
errMessage := fmt.Sprintf("Cannot parse selector on term %v for pod %v. Details %v", term, podName(pod), err)
|
||||
glog.Error(errMessage)
|
||||
return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
|
||||
}
|
||||
match := priorityutil.PodMatchesTermsNamespaceAndSelector(pod, namespaces, selector)
|
||||
if !match {
|
||||
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v",
|
||||
podName(pod), node.Name, term)
|
||||
return ErrPodAffinityRulesNotMatch, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check all anti-affinity terms.
|
||||
for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
|
||||
termMatches, _, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, nodeInfo, &term)
|
||||
if err != nil || termMatches {
|
||||
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinityTerm %v, err: %v",
|
||||
podName(pod), node.Name, term, err)
|
||||
return ErrPodAntiAffinityRulesNotMatch, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check all anti-affinity terms.
|
||||
for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
|
||||
termMatches, _, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, nodeInfo, &term)
|
||||
if err != nil || termMatches {
|
||||
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinityTerm %v, err: %v",
|
||||
podName(pod), node.Name, term, err)
|
||||
return ErrPodAntiAffinityRulesNotMatch, nil
|
||||
}
|
||||
}
|
||||
|
||||
if glog.V(10) {
|
||||
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
||||
// not logged. There is visible performance gain from it.
|
||||
|
|
|
@ -2793,7 +2793,7 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
|
|||
},
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelA}},
|
||||
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelA}},
|
||||
},
|
||||
nodes: []v1.Node{
|
||||
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
|
||||
|
@ -3132,7 +3132,8 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
|
|||
|
||||
for indexTest, test := range tests {
|
||||
nodeListInfo := FakeNodeListInfo(test.nodes)
|
||||
for indexNode, node := range test.nodes {
|
||||
nodeInfoMap := make(map[string]*schedulercache.NodeInfo)
|
||||
for i, node := range test.nodes {
|
||||
var podsOnNode []*v1.Pod
|
||||
for _, pod := range test.pods {
|
||||
if pod.Spec.NodeName == node.Name {
|
||||
|
@ -3140,21 +3141,23 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
nodeInfo := schedulercache.NewNodeInfo(podsOnNode...)
|
||||
nodeInfo.SetNode(&test.nodes[i])
|
||||
nodeInfoMap[node.Name] = nodeInfo
|
||||
}
|
||||
|
||||
for indexNode, node := range test.nodes {
|
||||
testFit := PodAffinityChecker{
|
||||
info: nodeListInfo,
|
||||
podLister: schedulertesting.FakePodLister(test.pods),
|
||||
}
|
||||
nodeInfo := schedulercache.NewNodeInfo(podsOnNode...)
|
||||
nodeInfo.SetNode(&node)
|
||||
nodeInfoMap := map[string]*schedulercache.NodeInfo{node.Name: nodeInfo}
|
||||
|
||||
var meta algorithm.PredicateMetadata
|
||||
|
||||
if !test.nometa {
|
||||
meta = PredicateMetadata(test.pod, nodeInfoMap)
|
||||
}
|
||||
|
||||
fits, reasons, _ := testFit.InterPodAffinityMatches(test.pod, meta, nodeInfo)
|
||||
fits, reasons, _ := testFit.InterPodAffinityMatches(test.pod, meta, nodeInfoMap[node.Name])
|
||||
if !fits && !reflect.DeepEqual(reasons, test.nodesExpectAffinityFailureReasons[indexNode]) {
|
||||
t.Errorf("index: %d test: %s unexpected failure reasons: %v expect: %v", indexTest, test.test, reasons, test.nodesExpectAffinityFailureReasons[indexNode])
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue