From e3f4e1e37817c28c78500e071e8f8a6f77d78858 Mon Sep 17 00:00:00 2001 From: "Bobby (Babak) Salamat" Date: Mon, 14 Jan 2019 11:12:42 -0800 Subject: [PATCH 1/2] Do not snapshot scheduler cache before starting preemption --- pkg/scheduler/core/generic_scheduler.go | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pkg/scheduler/core/generic_scheduler.go b/pkg/scheduler/core/generic_scheduler.go index 2f49b0733f..322468e07c 100644 --- a/pkg/scheduler/core/generic_scheduler.go +++ b/pkg/scheduler/core/generic_scheduler.go @@ -272,6 +272,13 @@ func (g *genericScheduler) selectHost(priorityList schedulerapi.HostPriorityList // returns 1) the node, 2) the list of preempted pods if such a node is found, // 3) A list of pods whose nominated node name should be cleared, and 4) any // possible error. +// Preempt does not update its snapshot. It uses the same snapshot used in the +// scheduling cycle. This is to avoid a scenario where preempt finds feasible +// nodes without preempting any pod. When there are many pending pods in the +// scheduling queue a nominated pod will go back to the queue and behind +// other pods with the same priority. The nominated pod prevents other pods from +// using the nominated resources and the nominated pod could take a long time +// before it is retried after many other pending pods. func (g *genericScheduler) Preempt(pod *v1.Pod, nodeLister algorithm.NodeLister, scheduleErr error) (*v1.Node, []*v1.Pod, []*v1.Pod, error) { // Scheduler may return various types of errors. Consider preemption only if // the error is of type FitError. @@ -279,10 +286,6 @@ func (g *genericScheduler) Preempt(pod *v1.Pod, nodeLister algorithm.NodeLister, if !ok || fitError == nil { return nil, nil, nil, nil } - err := g.snapshot() - if err != nil { - return nil, nil, nil, err - } if !podEligibleToPreemptOthers(pod, g.cachedNodeInfoMap) { klog.V(5).Infof("Pod %v/%v is not eligible for more preemption.", pod.Namespace, pod.Name) return nil, nil, nil, nil @@ -1073,7 +1076,7 @@ func nodesWherePreemptionMightHelp(nodes []*v1.Node, failedPredicatesMap FailedP potentialNodes := []*v1.Node{} for _, node := range nodes { unresolvableReasonExist := false - failedPredicates, found := failedPredicatesMap[node.Name] + failedPredicates, _ := failedPredicatesMap[node.Name] // If we assume that scheduler looks at all nodes and populates the failedPredicateMap // (which is the case today), the !found case should never happen, but we'd prefer // to rely less on such assumptions in the code when checking does not impose @@ -1104,7 +1107,7 @@ func nodesWherePreemptionMightHelp(nodes []*v1.Node, failedPredicatesMap FailedP break } } - if !found || !unresolvableReasonExist { + if !unresolvableReasonExist { klog.V(3).Infof("Node %v is a potential node for preemption.", node.Name) potentialNodes = append(potentialNodes, node) } From 127321296cc79da764701dcdabf99e6c87f17cef Mon Sep 17 00:00:00 2001 From: "Bobby (Babak) Salamat" Date: Mon, 14 Jan 2019 14:27:16 -0800 Subject: [PATCH 2/2] Fix and improve preemption test to work with the new logic --- pkg/scheduler/core/generic_scheduler_test.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pkg/scheduler/core/generic_scheduler_test.go b/pkg/scheduler/core/generic_scheduler_test.go index f72f2026ee..f8c9994be7 100644 --- a/pkg/scheduler/core/generic_scheduler_test.go +++ b/pkg/scheduler/core/generic_scheduler_test.go @@ -1407,6 +1407,7 @@ func TestPreempt(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { + t.Logf("===== Running test %v", t.Name()) stop := make(chan struct{}) cache := schedulerinternalcache.New(time.Duration(0), stop) for _, pod := range test.pods { @@ -1443,14 +1444,18 @@ func TestPreempt(t *testing.T) { false, false, schedulerapi.DefaultPercentageOfNodesToScore) + scheduler.(*genericScheduler).snapshot() // Call Preempt and check the expected results. node, victims, _, err := scheduler.Preempt(test.pod, schedulertesting.FakeNodeLister(makeNodeList(nodeNames)), error(&FitError{Pod: test.pod, FailedPredicates: failedPredMap})) if err != nil { t.Errorf("unexpected error in preemption: %v", err) } - if (node != nil && node.Name != test.expectedNode) || (node == nil && len(test.expectedNode) != 0) { + if node != nil && node.Name != test.expectedNode { t.Errorf("expected node: %v, got: %v", test.expectedNode, node.GetName()) } + if node == nil && len(test.expectedNode) != 0 { + t.Errorf("expected node: %v, got: nothing", test.expectedNode) + } if len(victims) != len(test.expectedPods) { t.Errorf("expected %v pods, got %v.", len(test.expectedPods), len(victims)) }