Merge pull request #72895 from bsalamat/no_refresh_preemption

Do not snapshot scheduler cache before starting preemption
2019-01-14 22:06:14 -08:00 · 2019-01-14 22:06:14 -08:00 · 148248353b
parent d3540b90cb 127321296c
commit 148248353b
2 changed files with 15 additions and 7 deletions
--- a/pkg/scheduler/core/generic_scheduler.go
+++ b/pkg/scheduler/core/generic_scheduler.go
@ -275,6 +275,13 @@ func (g *genericScheduler) selectHost(priorityList schedulerapi.HostPriorityList
 // returns 1) the node, 2) the list of preempted pods if such a node is found,
 // 3) A list of pods whose nominated node name should be cleared, and 4) any
 // possible error.
+// Preempt does not update its snapshot. It uses the same snapshot used in the
+// scheduling cycle. This is to avoid a scenario where preempt finds feasible
+// nodes without preempting any pod. When there are many pending pods in the
+// scheduling queue a nominated pod will go back to the queue and behind
+// other pods with the same priority. The nominated pod prevents other pods from
+// using the nominated resources and the nominated pod could take a long time
+// before it is retried after many other pending pods.
 func (g *genericScheduler) Preempt(pod *v1.Pod, nodeLister algorithm.NodeLister, scheduleErr error) (*v1.Node, []*v1.Pod, []*v1.Pod, error) {
 	// Scheduler may return various types of errors. Consider preemption only if
 	// the error is of type FitError.
@ -282,10 +289,6 @@ func (g *genericScheduler) Preempt(pod *v1.Pod, nodeLister algorithm.NodeLister,
 	if !ok || fitError == nil {
 		return nil, nil, nil, nil
 	}
-	err := g.snapshot()
-	if err != nil {
-		return nil, nil, nil, err
-	}
 	if !podEligibleToPreemptOthers(pod, g.cachedNodeInfoMap) {
 		klog.V(5).Infof("Pod %v/%v is not eligible for more preemption.", pod.Namespace, pod.Name)
 		return nil, nil, nil, nil
@ -1076,7 +1079,7 @@ func nodesWherePreemptionMightHelp(nodes []*v1.Node, failedPredicatesMap FailedP
 	potentialNodes := []*v1.Node{}
 	for _, node := range nodes {
 		unresolvableReasonExist := false
-		failedPredicates, found := failedPredicatesMap[node.Name]
+		failedPredicates, _ := failedPredicatesMap[node.Name]
 		// If we assume that scheduler looks at all nodes and populates the failedPredicateMap
 		// (which is the case today), the !found case should never happen, but we'd prefer
 		// to rely less on such assumptions in the code when checking does not impose
@ -1107,7 +1110,7 @@ func nodesWherePreemptionMightHelp(nodes []*v1.Node, failedPredicatesMap FailedP
 				break
 			}
 		}
-		if !found || !unresolvableReasonExist {
+		if !unresolvableReasonExist {
 			klog.V(3).Infof("Node %v is a potential node for preemption.", node.Name)
 			potentialNodes = append(potentialNodes, node)
 		}
--- a/pkg/scheduler/core/generic_scheduler_test.go
+++ b/pkg/scheduler/core/generic_scheduler_test.go
@ -1407,6 +1407,7 @@ func TestPreempt(t *testing.T) {

 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
+			t.Logf("===== Running test %v", t.Name())
 			stop := make(chan struct{})
 			cache := schedulerinternalcache.New(time.Duration(0), stop)
 			for _, pod := range test.pods {
@ -1443,14 +1444,18 @@ func TestPreempt(t *testing.T) {
 				false,
 				false,
 				schedulerapi.DefaultPercentageOfNodesToScore)
+			scheduler.(*genericScheduler).snapshot()
 			// Call Preempt and check the expected results.
 			node, victims, _, err := scheduler.Preempt(test.pod, schedulertesting.FakeNodeLister(makeNodeList(nodeNames)), error(&FitError{Pod: test.pod, FailedPredicates: failedPredMap}))
 			if err != nil {
 				t.Errorf("unexpected error in preemption: %v", err)
 			}
-			if (node != nil && node.Name != test.expectedNode) || (node == nil && len(test.expectedNode) != 0) {
+			if node != nil && node.Name != test.expectedNode {
 				t.Errorf("expected node: %v, got: %v", test.expectedNode, node.GetName())
 			}
+			if node == nil && len(test.expectedNode) != 0 {
+				t.Errorf("expected node: %v, got: nothing", test.expectedNode)
+			}
 			if len(victims) != len(test.expectedPods) {
 				t.Errorf("expected %v pods, got %v.", len(test.expectedPods), len(victims))
 			}