mirror of https://github.com/k3s-io/k3s
Merge pull request #62715 from bsalamat/fix_antiaffinity
Automatic merge from submit-queue (batch tested with PRs 62761, 62715). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Fix inter-pod anti-affinity check to consider a pod a match when all the anti-affinity terms match **What this PR does / why we need it**: Inter-pod anti-affinity check used to incorrectly consider a pod a match when any of the anti-affinity terms matched the pod. This PR fixes the logic to consider a pod a match when all the terms match the pod. **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes #62567 **Special notes for your reviewer**: **Release note**: ```release-note Fix inter-pod anti-affinity check to consider a pod a match when all the anti-affinity terms match. ``` /sig schedulingpull/8/head
commit
98968c36d8
|
@ -1377,34 +1377,27 @@ func (c *PodAffinityChecker) satisfiesExistingPodsAntiAffinity(pod *v1.Pod, meta
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
// anyMatchingPodInTopology checks that any of the given Pods are in the
|
||||
// topology specified by the affinity term.
|
||||
func (c *PodAffinityChecker) anyMatchingPodInTopology(pod *v1.Pod, matchingPods map[string][]*v1.Pod, nodeInfo *schedulercache.NodeInfo, term *v1.PodAffinityTerm) (bool, error) {
|
||||
if len(term.TopologyKey) == 0 {
|
||||
return false, fmt.Errorf("empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity")
|
||||
}
|
||||
if len(matchingPods) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
// Special case: When the topological domain is node, we can limit our
|
||||
// search to pods on that node without searching the entire cluster.
|
||||
if term.TopologyKey == kubeletapis.LabelHostname {
|
||||
if pods, ok := matchingPods[nodeInfo.Node().Name]; ok {
|
||||
// It may seem odd that we are comparing a node with itself to see if it
|
||||
// has the same topology key, but it is necessary to check extra conditions
|
||||
// that the function performs, such as checking that node labels are not nil.
|
||||
return len(pods) > 0 && priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), nodeInfo.Node(), term.TopologyKey), nil
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
// Topology key is not "Hostname". Checking all matching pods.
|
||||
for nodeName, pods := range matchingPods {
|
||||
matchingPodNodeInfo, err := c.info.GetNodeInfo(nodeName)
|
||||
// anyPodsMatchingTopologyTerms checks whether any of the nodes given via
|
||||
// "targetPods" matches topology of all the "terms" for the give "pod" and "nodeInfo".
|
||||
func (c *PodAffinityChecker) anyPodsMatchingTopologyTerms(pod *v1.Pod, targetPods map[string][]*v1.Pod, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) (bool, error) {
|
||||
for nodeName, targetPods := range targetPods {
|
||||
targetPodNodeInfo, err := c.info.GetNodeInfo(nodeName)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if len(pods) > 0 && priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), matchingPodNodeInfo, term.TopologyKey) {
|
||||
return true, nil
|
||||
if len(targetPods) > 0 {
|
||||
allTermsMatched := true
|
||||
for _, term := range terms {
|
||||
if !priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), targetPodNodeInfo, term.TopologyKey) {
|
||||
allTermsMatched = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if allTermsMatched {
|
||||
// We have 1 or more pods on the target node that have already matched namespace and selector
|
||||
// and all of the terms topologies matched the target node. So, there is at least 1 matching pod on the node.
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
|
@ -1421,21 +1414,21 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
|
|||
if predicateMeta, ok := meta.(*predicateMetadata); ok {
|
||||
// Check all affinity terms.
|
||||
matchingPods := predicateMeta.nodeNameToMatchingAffinityPods
|
||||
for _, term := range GetPodAffinityTerms(affinity.PodAffinity) {
|
||||
termMatches, err := c.anyMatchingPodInTopology(pod, matchingPods, nodeInfo, &term)
|
||||
if affinityTerms := GetPodAffinityTerms(affinity.PodAffinity); len(affinityTerms) > 0 {
|
||||
matchExists, err := c.anyPodsMatchingTopologyTerms(pod, matchingPods, nodeInfo, affinityTerms)
|
||||
if err != nil {
|
||||
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v, err: %v", podName(pod), node.Name, term, err)
|
||||
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinity, err: %v", podName(pod), node.Name, err)
|
||||
glog.Errorf(errMessage)
|
||||
return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
|
||||
}
|
||||
if !termMatches {
|
||||
if !matchExists {
|
||||
// This pod may the first pod in a series that have affinity to themselves. In order
|
||||
// to not leave such pods in pending state forever, we check that if no other pod
|
||||
// in the cluster matches the namespace and selector of this pod and the pod matches
|
||||
// its own terms, then we allow the pod to pass the affinity check.
|
||||
if !(len(matchingPods) == 0 && targetPodMatchesAffinityOfPod(pod, pod)) {
|
||||
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v",
|
||||
podName(pod), node.Name, term)
|
||||
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
|
||||
podName(pod), node.Name)
|
||||
return ErrPodAffinityRulesNotMatch, nil
|
||||
}
|
||||
}
|
||||
|
@ -1443,11 +1436,11 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
|
|||
|
||||
// Check all anti-affinity terms.
|
||||
matchingPods = predicateMeta.nodeNameToMatchingAntiAffinityPods
|
||||
for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
|
||||
termMatches, err := c.anyMatchingPodInTopology(pod, matchingPods, nodeInfo, &term)
|
||||
if err != nil || termMatches {
|
||||
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinityTerm %v, err: %v",
|
||||
podName(pod), node.Name, term, err)
|
||||
if antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity); len(antiAffinityTerms) > 0 {
|
||||
matchExists, err := c.anyPodsMatchingTopologyTerms(pod, matchingPods, nodeInfo, antiAffinityTerms)
|
||||
if err != nil || matchExists {
|
||||
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinity, err: %v",
|
||||
podName(pod), node.Name, err)
|
||||
return ErrPodAntiAffinityRulesNotMatch, nil
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2952,6 +2952,55 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
|
|||
},
|
||||
test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA and nodeB.",
|
||||
},
|
||||
{
|
||||
pod: &v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
Affinity: &v1.Affinity{
|
||||
PodAntiAffinity: &v1.PodAntiAffinity{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
|
||||
{
|
||||
LabelSelector: &metav1.LabelSelector{
|
||||
MatchExpressions: []metav1.LabelSelectorRequirement{
|
||||
{
|
||||
Key: "foo",
|
||||
Operator: metav1.LabelSelectorOpIn,
|
||||
Values: []string{"abc"},
|
||||
},
|
||||
},
|
||||
},
|
||||
TopologyKey: "region",
|
||||
},
|
||||
{
|
||||
LabelSelector: &metav1.LabelSelector{
|
||||
MatchExpressions: []metav1.LabelSelectorRequirement{
|
||||
{
|
||||
Key: "service",
|
||||
Operator: metav1.LabelSelectorOpIn,
|
||||
Values: []string{"securityscan"},
|
||||
},
|
||||
},
|
||||
},
|
||||
TopologyKey: "zone",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
{Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "abc", "service": "securityscan"}}},
|
||||
},
|
||||
nodes: []v1.Node{
|
||||
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}},
|
||||
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}},
|
||||
},
|
||||
nodesExpectAffinityFailureReasons: [][]algorithm.PredicateFailureReason{{ErrPodAffinityNotMatch, ErrPodAntiAffinityRulesNotMatch}},
|
||||
fits: map[string]bool{
|
||||
"nodeA": false,
|
||||
"nodeB": true,
|
||||
},
|
||||
test: "This test ensures that anti-affinity matches a pod when all terms of the anti-affinity rule matches a pod.",
|
||||
},
|
||||
{
|
||||
pod: &v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
|
|
Loading…
Reference in New Issue