[scheduler] Modify the scheduling result struct and improve logging for successful binding

pull/564/head
Guoliang Wang 2018-12-07 11:40:45 +08:00
parent 3a6d4c10bf
commit 3e69638772
5 changed files with 115 additions and 67 deletions

View File

@ -18,6 +18,7 @@ package core
import ( import (
"fmt" "fmt"
"reflect"
"testing" "testing"
"time" "time"
@ -342,7 +343,7 @@ func TestGenericSchedulerWithExtenders(t *testing.T) {
prioritizers []algorithm.PriorityConfig prioritizers []algorithm.PriorityConfig
extenders []FakeExtender extenders []FakeExtender
nodes []string nodes []string
expectedHost string expectedResult ScheduleResult
expectsErr bool expectsErr bool
}{ }{
{ {
@ -387,7 +388,11 @@ func TestGenericSchedulerWithExtenders(t *testing.T) {
}, },
}, },
nodes: []string{"machine1", "machine2"}, nodes: []string{"machine1", "machine2"},
expectedHost: "machine1", expectedResult: ScheduleResult{
SuggestedHost: "machine1",
EvaluatedNodes: 2,
FeasibleNodes: 1,
},
name: "test 3", name: "test 3",
}, },
{ {
@ -416,7 +421,11 @@ func TestGenericSchedulerWithExtenders(t *testing.T) {
}, },
}, },
nodes: []string{"machine1"}, nodes: []string{"machine1"},
expectedHost: "machine1", expectedResult: ScheduleResult{
SuggestedHost: "machine1",
EvaluatedNodes: 1,
FeasibleNodes: 1,
},
name: "test 5", name: "test 5",
}, },
{ {
@ -435,7 +444,11 @@ func TestGenericSchedulerWithExtenders(t *testing.T) {
}, },
}, },
nodes: []string{"machine1", "machine2"}, nodes: []string{"machine1", "machine2"},
expectedHost: "machine2", expectedResult: ScheduleResult{
SuggestedHost: "machine2",
EvaluatedNodes: 2,
FeasibleNodes: 2,
},
name: "test 6", name: "test 6",
}, },
{ {
@ -449,7 +462,11 @@ func TestGenericSchedulerWithExtenders(t *testing.T) {
}, },
}, },
nodes: []string{"machine1", "machine2"}, nodes: []string{"machine1", "machine2"},
expectedHost: "machine2", // machine2 has higher score expectedResult: ScheduleResult{
SuggestedHost: "machine2",
EvaluatedNodes: 2,
FeasibleNodes: 2,
}, // machine2 has higher score
name: "test 7", name: "test 7",
}, },
{ {
@ -471,7 +488,11 @@ func TestGenericSchedulerWithExtenders(t *testing.T) {
}, },
nodes: []string{"machine1", "machine2"}, nodes: []string{"machine1", "machine2"},
expectsErr: false, expectsErr: false,
expectedHost: "machine2", // machine2 has higher score expectedResult: ScheduleResult{
SuggestedHost: "machine2",
EvaluatedNodes: 2,
FeasibleNodes: 2,
}, // machine2 has higher score
name: "test 8", name: "test 8",
}, },
{ {
@ -493,7 +514,11 @@ func TestGenericSchedulerWithExtenders(t *testing.T) {
}, },
nodes: []string{"machine1", "machine2"}, nodes: []string{"machine1", "machine2"},
expectsErr: false, expectsErr: false,
expectedHost: "machine1", expectedResult: ScheduleResult{
SuggestedHost: "machine1",
EvaluatedNodes: 2,
FeasibleNodes: 1,
},
name: "test 9", name: "test 9",
}, },
} }
@ -525,18 +550,19 @@ func TestGenericSchedulerWithExtenders(t *testing.T) {
false, false,
schedulerapi.DefaultPercentageOfNodesToScore) schedulerapi.DefaultPercentageOfNodesToScore)
podIgnored := &v1.Pod{} podIgnored := &v1.Pod{}
machine, err := scheduler.Schedule(podIgnored, schedulertesting.FakeNodeLister(makeNodeList(test.nodes))) result, err := scheduler.Schedule(podIgnored, schedulertesting.FakeNodeLister(makeNodeList(test.nodes)))
if test.expectsErr { if test.expectsErr {
if err == nil { if err == nil {
t.Errorf("Unexpected non-error, machine %s", machine) t.Errorf("Unexpected non-error, result %+v", result)
} }
} else { } else {
if err != nil { if err != nil {
t.Errorf("Unexpected error: %v", err) t.Errorf("Unexpected error: %v", err)
return return
} }
if test.expectedHost != machine {
t.Errorf("Expected: %s, Saw: %s", test.expectedHost, machine) if !reflect.DeepEqual(result, test.expectedResult) {
t.Errorf("Expected: %+v, Saw: %+v", test.expectedResult, result)
} }
} }
}) })

View File

@ -104,7 +104,7 @@ func (f *FitError) Error() string {
// onto machines. // onto machines.
// TODO: Rename this type. // TODO: Rename this type.
type ScheduleAlgorithm interface { type ScheduleAlgorithm interface {
Schedule(*v1.Pod, algorithm.NodeLister) (selectedMachine string, err error) Schedule(*v1.Pod, algorithm.NodeLister) (scheduleResult ScheduleResult, err error)
// Preempt receives scheduling errors for a pod and tries to create room for // Preempt receives scheduling errors for a pod and tries to create room for
// the pod by preempting lower priority pods if possible. // the pod by preempting lower priority pods if possible.
// It returns the node where preemption happened, a list of preempted pods, a // It returns the node where preemption happened, a list of preempted pods, a
@ -118,6 +118,17 @@ type ScheduleAlgorithm interface {
Prioritizers() []algorithm.PriorityConfig Prioritizers() []algorithm.PriorityConfig
} }
// ScheduleResult represents the result of one pod scheduled. It will contain
// the final selected Node, along with the selected intermediate information.
type ScheduleResult struct {
// Name of the scheduler suggest host
SuggestedHost string
// Number of nodes scheduler evaluated on one pod scheduled
EvaluatedNodes int
// Number of feasible nodes on one pod scheduled
FeasibleNodes int
}
type genericScheduler struct { type genericScheduler struct {
cache schedulerinternalcache.Cache cache schedulerinternalcache.Cache
schedulingQueue internalqueue.SchedulingQueue schedulingQueue internalqueue.SchedulingQueue
@ -147,36 +158,35 @@ func (g *genericScheduler) snapshot() error {
// Schedule tries to schedule the given pod to one of the nodes in the node list. // Schedule tries to schedule the given pod to one of the nodes in the node list.
// If it succeeds, it will return the name of the node. // If it succeeds, it will return the name of the node.
// If it fails, it will return a FitError error with reasons. // If it fails, it will return a FitError error with reasons.
func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister) (string, error) { func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister) (result ScheduleResult, err error) {
trace := utiltrace.New(fmt.Sprintf("Scheduling %s/%s", pod.Namespace, pod.Name)) trace := utiltrace.New(fmt.Sprintf("Scheduling %s/%s", pod.Namespace, pod.Name))
defer trace.LogIfLong(100 * time.Millisecond) defer trace.LogIfLong(100 * time.Millisecond)
if err := podPassesBasicChecks(pod, g.pvcLister); err != nil { if err := podPassesBasicChecks(pod, g.pvcLister); err != nil {
return "", err return result, err
} }
nodes, err := nodeLister.List() nodes, err := nodeLister.List()
if err != nil { if err != nil {
return "", err return result, err
} }
if len(nodes) == 0 { if len(nodes) == 0 {
return "", ErrNoNodesAvailable return result, ErrNoNodesAvailable
} }
err = g.snapshot() if err := g.snapshot(); err != nil {
if err != nil { return result, err
return "", err
} }
trace.Step("Computing predicates") trace.Step("Computing predicates")
startPredicateEvalTime := time.Now() startPredicateEvalTime := time.Now()
filteredNodes, failedPredicateMap, err := g.findNodesThatFit(pod, nodes) filteredNodes, failedPredicateMap, err := g.findNodesThatFit(pod, nodes)
if err != nil { if err != nil {
return "", err return result, err
} }
if len(filteredNodes) == 0 { if len(filteredNodes) == 0 {
return "", &FitError{ return result, &FitError{
Pod: pod, Pod: pod,
NumAllNodes: len(nodes), NumAllNodes: len(nodes),
FailedPredicates: failedPredicateMap, FailedPredicates: failedPredicateMap,
@ -190,19 +200,29 @@ func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister
// When only one node after predicate, just use it. // When only one node after predicate, just use it.
if len(filteredNodes) == 1 { if len(filteredNodes) == 1 {
metrics.SchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPriorityEvalTime)) metrics.SchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPriorityEvalTime))
return filteredNodes[0].Name, nil return ScheduleResult{
SuggestedHost: filteredNodes[0].Name,
EvaluatedNodes: 1 + len(failedPredicateMap),
FeasibleNodes: 1,
}, nil
} }
metaPrioritiesInterface := g.priorityMetaProducer(pod, g.cachedNodeInfoMap) metaPrioritiesInterface := g.priorityMetaProducer(pod, g.cachedNodeInfoMap)
priorityList, err := PrioritizeNodes(pod, g.cachedNodeInfoMap, metaPrioritiesInterface, g.prioritizers, filteredNodes, g.extenders) priorityList, err := PrioritizeNodes(pod, g.cachedNodeInfoMap, metaPrioritiesInterface, g.prioritizers, filteredNodes, g.extenders)
if err != nil { if err != nil {
return "", err return result, err
} }
metrics.SchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPriorityEvalTime)) metrics.SchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPriorityEvalTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PriorityEvaluation).Observe(metrics.SinceInSeconds(startPriorityEvalTime)) metrics.SchedulingLatency.WithLabelValues(metrics.PriorityEvaluation).Observe(metrics.SinceInSeconds(startPriorityEvalTime))
trace.Step("Selecting host") trace.Step("Selecting host")
return g.selectHost(priorityList)
host, err := g.selectHost(priorityList)
return ScheduleResult{
SuggestedHost: host,
EvaluatedNodes: len(filteredNodes) + len(failedPredicateMap),
FeasibleNodes: len(filteredNodes),
}, err
} }
// Prioritizers returns a slice containing all the scheduler's priority // Prioritizers returns a slice containing all the scheduler's priority

View File

@ -482,13 +482,13 @@ func TestGenericScheduler(t *testing.T) {
test.alwaysCheckAllPredicates, test.alwaysCheckAllPredicates,
false, false,
schedulerapi.DefaultPercentageOfNodesToScore) schedulerapi.DefaultPercentageOfNodesToScore)
machine, err := scheduler.Schedule(test.pod, schedulertesting.FakeNodeLister(makeNodeList(test.nodes))) result, err := scheduler.Schedule(test.pod, schedulertesting.FakeNodeLister(makeNodeList(test.nodes)))
if !reflect.DeepEqual(err, test.wErr) { if !reflect.DeepEqual(err, test.wErr) {
t.Errorf("Unexpected error: %v, expected: %v", err, test.wErr) t.Errorf("Unexpected error: %v, expected: %v", err, test.wErr)
} }
if test.expectedHosts != nil && !test.expectedHosts.Has(machine) { if test.expectedHosts != nil && !test.expectedHosts.Has(result.SuggestedHost) {
t.Errorf("Expected: %s, got: %s", test.expectedHosts, machine) t.Errorf("Expected: %s, got: %s", test.expectedHosts, result.SuggestedHost)
} }
}) })
} }

View File

@ -291,15 +291,16 @@ func (sched *Scheduler) recordSchedulingFailure(pod *v1.Pod, err error, reason s
}) })
} }
// schedule implements the scheduling algorithm and returns the suggested host. // schedule implements the scheduling algorithm and returns the suggested result(host,
func (sched *Scheduler) schedule(pod *v1.Pod) (string, error) { // evaluated nodes number,feasible nodes number).
host, err := sched.config.Algorithm.Schedule(pod, sched.config.NodeLister) func (sched *Scheduler) schedule(pod *v1.Pod) (core.ScheduleResult, error) {
result, err := sched.config.Algorithm.Schedule(pod, sched.config.NodeLister)
if err != nil { if err != nil {
pod = pod.DeepCopy() pod = pod.DeepCopy()
sched.recordSchedulingFailure(pod, err, v1.PodReasonUnschedulable, err.Error()) sched.recordSchedulingFailure(pod, err, v1.PodReasonUnschedulable, err.Error())
return "", err return core.ScheduleResult{}, err
} }
return host, err return result, err
} }
// preempt tries to create room for a pod that has failed to schedule, by preempting lower priority pods if possible. // preempt tries to create room for a pod that has failed to schedule, by preempting lower priority pods if possible.
@ -468,7 +469,7 @@ func (sched *Scheduler) scheduleOne() {
// Synchronously attempt to find a fit for the pod. // Synchronously attempt to find a fit for the pod.
start := time.Now() start := time.Now()
suggestedHost, err := sched.schedule(pod) scheduleResult, err := sched.schedule(pod)
if err != nil { if err != nil {
// schedule() may have failed because the pod would not fit on any host, so we try to // schedule() may have failed because the pod would not fit on any host, so we try to
// preempt, with the expectation that the next time the pod is tried for scheduling it // preempt, with the expectation that the next time the pod is tried for scheduling it
@ -507,7 +508,7 @@ func (sched *Scheduler) scheduleOne() {
// Otherwise, binding of volumes is started after the pod is assumed, but before pod binding. // Otherwise, binding of volumes is started after the pod is assumed, but before pod binding.
// //
// This function modifies 'assumedPod' if volume binding is required. // This function modifies 'assumedPod' if volume binding is required.
allBound, err := sched.assumeVolumes(assumedPod, suggestedHost) allBound, err := sched.assumeVolumes(assumedPod, scheduleResult.SuggestedHost)
if err != nil { if err != nil {
klog.Errorf("error assuming volumes: %v", err) klog.Errorf("error assuming volumes: %v", err)
metrics.PodScheduleErrors.Inc() metrics.PodScheduleErrors.Inc()
@ -516,7 +517,7 @@ func (sched *Scheduler) scheduleOne() {
// Run "reserve" plugins. // Run "reserve" plugins.
for _, pl := range plugins.ReservePlugins() { for _, pl := range plugins.ReservePlugins() {
if err := pl.Reserve(plugins, assumedPod, suggestedHost); err != nil { if err := pl.Reserve(plugins, assumedPod, scheduleResult.SuggestedHost); err != nil {
klog.Errorf("error while running %v reserve plugin for pod %v: %v", pl.Name(), assumedPod.Name, err) klog.Errorf("error while running %v reserve plugin for pod %v: %v", pl.Name(), assumedPod.Name, err)
sched.recordSchedulingFailure(assumedPod, err, SchedulerError, sched.recordSchedulingFailure(assumedPod, err, SchedulerError,
fmt.Sprintf("reserve plugin %v failed", pl.Name())) fmt.Sprintf("reserve plugin %v failed", pl.Name()))
@ -524,8 +525,8 @@ func (sched *Scheduler) scheduleOne() {
return return
} }
} }
// assume modifies `assumedPod` by setting NodeName=suggestedHost // assume modifies `assumedPod` by setting NodeName=scheduleResult.SuggestedHost
err = sched.assume(assumedPod, suggestedHost) err = sched.assume(assumedPod, scheduleResult.SuggestedHost)
if err != nil { if err != nil {
klog.Errorf("error assuming pod: %v", err) klog.Errorf("error assuming pod: %v", err)
metrics.PodScheduleErrors.Inc() metrics.PodScheduleErrors.Inc()
@ -545,7 +546,7 @@ func (sched *Scheduler) scheduleOne() {
// Run "prebind" plugins. // Run "prebind" plugins.
for _, pl := range plugins.PrebindPlugins() { for _, pl := range plugins.PrebindPlugins() {
approved, err := pl.Prebind(plugins, assumedPod, suggestedHost) approved, err := pl.Prebind(plugins, assumedPod, scheduleResult.SuggestedHost)
if err != nil { if err != nil {
approved = false approved = false
klog.Errorf("error while running %v prebind plugin for pod %v: %v", pl.Name(), assumedPod.Name, err) klog.Errorf("error while running %v prebind plugin for pod %v: %v", pl.Name(), assumedPod.Name, err)
@ -571,7 +572,7 @@ func (sched *Scheduler) scheduleOne() {
ObjectMeta: metav1.ObjectMeta{Namespace: assumedPod.Namespace, Name: assumedPod.Name, UID: assumedPod.UID}, ObjectMeta: metav1.ObjectMeta{Namespace: assumedPod.Namespace, Name: assumedPod.Name, UID: assumedPod.UID},
Target: v1.ObjectReference{ Target: v1.ObjectReference{
Kind: "Node", Kind: "Node",
Name: suggestedHost, Name: scheduleResult.SuggestedHost,
}, },
}) })
metrics.E2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start)) metrics.E2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start))
@ -579,6 +580,7 @@ func (sched *Scheduler) scheduleOne() {
klog.Errorf("error binding pod: %v", err) klog.Errorf("error binding pod: %v", err)
metrics.PodScheduleErrors.Inc() metrics.PodScheduleErrors.Inc()
} else { } else {
klog.V(2).Infof("pod %v/%v is bound successfully on node %v, %d nodes evaluated, %d nodes were found feasible", assumedPod.Namespace, assumedPod.Name, scheduleResult.SuggestedHost, scheduleResult.EvaluatedNodes, scheduleResult.FeasibleNodes)
metrics.PodScheduleSuccesses.Inc() metrics.PodScheduleSuccesses.Inc()
} }
}() }()

View File

@ -145,12 +145,12 @@ func PriorityOne(pod *v1.Pod, nodeNameToInfo map[string]*schedulernodeinfo.NodeI
} }
type mockScheduler struct { type mockScheduler struct {
machine string result core.ScheduleResult
err error err error
} }
func (es mockScheduler) Schedule(pod *v1.Pod, ml algorithm.NodeLister) (string, error) { func (es mockScheduler) Schedule(pod *v1.Pod, ml algorithm.NodeLister) (core.ScheduleResult, error) {
return es.machine, es.err return es.result, es.err
} }
func (es mockScheduler) Predicates() map[string]predicates.FitPredicate { func (es mockScheduler) Predicates() map[string]predicates.FitPredicate {
@ -222,7 +222,7 @@ func TestScheduler(t *testing.T) {
{ {
name: "bind assumed pod scheduled", name: "bind assumed pod scheduled",
sendPod: podWithID("foo", ""), sendPod: podWithID("foo", ""),
algo: mockScheduler{testNode.Name, nil}, algo: mockScheduler{core.ScheduleResult{SuggestedHost: testNode.Name, EvaluatedNodes: 1, FeasibleNodes: 1}, nil},
expectBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: testNode.Name}}, expectBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: testNode.Name}},
expectAssumedPod: podWithID("foo", testNode.Name), expectAssumedPod: podWithID("foo", testNode.Name),
eventReason: "Scheduled", eventReason: "Scheduled",
@ -230,7 +230,7 @@ func TestScheduler(t *testing.T) {
{ {
name: "error pod failed scheduling", name: "error pod failed scheduling",
sendPod: podWithID("foo", ""), sendPod: podWithID("foo", ""),
algo: mockScheduler{testNode.Name, errS}, algo: mockScheduler{core.ScheduleResult{SuggestedHost: testNode.Name, EvaluatedNodes: 1, FeasibleNodes: 1}, errS},
expectError: errS, expectError: errS,
expectErrorPod: podWithID("foo", ""), expectErrorPod: podWithID("foo", ""),
eventReason: "FailedScheduling", eventReason: "FailedScheduling",
@ -238,7 +238,7 @@ func TestScheduler(t *testing.T) {
{ {
name: "error bind forget pod failed scheduling", name: "error bind forget pod failed scheduling",
sendPod: podWithID("foo", ""), sendPod: podWithID("foo", ""),
algo: mockScheduler{testNode.Name, nil}, algo: mockScheduler{core.ScheduleResult{SuggestedHost: testNode.Name, EvaluatedNodes: 1, FeasibleNodes: 1}, nil},
expectBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: testNode.Name}}, expectBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: testNode.Name}},
expectAssumedPod: podWithID("foo", testNode.Name), expectAssumedPod: podWithID("foo", testNode.Name),
injectBindError: errB, injectBindError: errB,
@ -248,7 +248,7 @@ func TestScheduler(t *testing.T) {
eventReason: "FailedScheduling", eventReason: "FailedScheduling",
}, { }, {
sendPod: deletingPod("foo"), sendPod: deletingPod("foo"),
algo: mockScheduler{"", nil}, algo: mockScheduler{core.ScheduleResult{}, nil},
eventReason: "FailedScheduling", eventReason: "FailedScheduling",
}, },
} }