Add the logic to pick one node for preemption

pull/6/head
Bobby (Babak) Salamat 2017-08-09 11:36:35 -07:00
parent 2c63705e09
commit 1cec8bac9c
2 changed files with 301 additions and 42 deletions

View File

@ -18,6 +18,7 @@ package core
import (
"fmt"
"math"
"sort"
"strings"
"sync"
@ -176,8 +177,9 @@ func (g *genericScheduler) preempt(pod *v1.Pod, nodeLister algorithm.NodeLister)
if len(nodeToPods) == 0 {
return "", nil
}
// TODO: Add a node scoring mechanism and perform preemption
return "", nil
node := pickOneNodeForPreemption(nodeToPods)
// TODO: Add actual preemption of pods
return node, nil
}
// Filters the nodes to find the ones that fit based on the given predicate functions
@ -444,6 +446,89 @@ func EqualPriorityMap(_ *v1.Pod, _ interface{}, nodeInfo *schedulercache.NodeInf
}, nil
}
// pickOneNodeForPreemption chooses one node among the given nodes. It assumes
// pods in each map entry are ordered by decreasing priority.
// It picks a node based on the following criteria:
// 1. A node with minimum highest priority victim is picked.
// 2. Ties are broken by sum of priorities of all victims.
// 3. If there are still ties, node with the minimum number of victims is picked.
// 4. If there are still ties, the first such node in the map is picked (sort of randomly).
func pickOneNodeForPreemption(nodesToPods map[string][]*v1.Pod) string {
type nodeScore struct {
nodeName string
highestPriority int32
sumPriorities int64
numPods int
}
if len(nodesToPods) == 0 {
return ""
}
minHighestPriority := int32(math.MaxInt32)
nodeScores := []*nodeScore{}
for nodeName, pods := range nodesToPods {
if len(pods) == 0 {
// We found a node that doesn't need any preemption. Return it!
// This should happen rarely when one or more pods are terminated between
// the time that scheduler tries to schedule the pod and the time that
// preemption logic tries to find nodes for preemption.
return nodeName
}
// highestPodPriority is the highest priority among the victims on this node.
highestPodPriority := util.GetPodPriority(pods[0])
if highestPodPriority < minHighestPriority {
minHighestPriority = highestPodPriority
}
nodeScores = append(nodeScores, &nodeScore{nodeName: nodeName, highestPriority: highestPodPriority, numPods: len(pods)})
}
// Find the nodes with minimum highest priority victim.
minSumPriorities := int64(math.MaxInt64)
lowestHighPriorityNodes := []*nodeScore{}
for _, nodeScore := range nodeScores {
if nodeScore.highestPriority == minHighestPriority {
lowestHighPriorityNodes = append(lowestHighPriorityNodes, nodeScore)
var sumPriorities int64
for _, pod := range nodesToPods[nodeScore.nodeName] {
// We add MaxInt32+1 to all priorities to make all of them >= 0. This is
// needed so that a node with a few pods with negative priority is not
// picked over a node with a smaller number of pods with the same negative
// priority (and similar scenarios).
sumPriorities += int64(util.GetPodPriority(pod)) + int64(math.MaxInt32+1)
}
if sumPriorities < minSumPriorities {
minSumPriorities = sumPriorities
}
nodeScore.sumPriorities = sumPriorities
}
}
if len(lowestHighPriorityNodes) == 1 {
return lowestHighPriorityNodes[0].nodeName
}
// There are multiple nodes with the same minimum highest priority victim.
// Choose the one(s) with lowest sum of priorities.
minNumPods := math.MaxInt32
lowestSumPriorityNodes := []*nodeScore{}
for _, nodeScore := range lowestHighPriorityNodes {
if nodeScore.sumPriorities == minSumPriorities {
lowestSumPriorityNodes = append(lowestSumPriorityNodes, nodeScore)
if nodeScore.numPods < minNumPods {
minNumPods = nodeScore.numPods
}
}
}
if len(lowestSumPriorityNodes) == 1 {
return lowestSumPriorityNodes[0].nodeName
}
// There are still more than one node with minimum highest priority victim and
// lowest sum of victim priorities. Find the anyone with minimum number of victims.
for _, nodeScore := range lowestSumPriorityNodes {
if nodeScore.numPods == minNumPods {
return nodeScore.nodeName
}
}
glog.Errorf("We should never reach here!")
return ""
}
// selectNodesForPreemption finds all the nodes with possible victims for
// preemption in parallel.
func selectNodesForPreemption(pod *v1.Pod,
@ -461,7 +546,7 @@ func selectNodesForPreemption(pod *v1.Pod,
checkNode := func(i int) {
nodeName := nodes[i].Name
pods, fits := selectVictimsOnNode(pod, meta.ShallowCopy(), nodeNameToInfo[nodeName], predicates)
if fits && len(pods) != 0 {
if fits {
resultLock.Lock()
nodeNameToPods[nodeName] = pods
resultLock.Unlock()

View File

@ -587,46 +587,59 @@ func PredicateMetadata(p *v1.Pod, nodeInfo map[string]*schedulercache.NodeInfo)
return algorithmpredicates.NewPredicateMetadataFactory(schedulertesting.FakePodLister{p})(p, nodeInfo)
}
var smallContainers = []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse(
strconv.FormatInt(priorityutil.DefaultMilliCpuRequest, 10) + "m"),
"memory": resource.MustParse(
strconv.FormatInt(priorityutil.DefaultMemoryRequest, 10)),
},
},
},
}
var mediumContainers = []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse(
strconv.FormatInt(priorityutil.DefaultMilliCpuRequest*2, 10) + "m"),
"memory": resource.MustParse(
strconv.FormatInt(priorityutil.DefaultMemoryRequest*2, 10)),
},
},
},
}
var largeContainers = []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse(
strconv.FormatInt(priorityutil.DefaultMilliCpuRequest*3, 10) + "m"),
"memory": resource.MustParse(
strconv.FormatInt(priorityutil.DefaultMemoryRequest*3, 10)),
},
},
},
}
var veryLargeContainers = []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse(
strconv.FormatInt(priorityutil.DefaultMilliCpuRequest*5, 10) + "m"),
"memory": resource.MustParse(
strconv.FormatInt(priorityutil.DefaultMemoryRequest*5, 10)),
},
},
},
}
var negPriority, lowPriority, midPriority, highPriority, veryHighPriority = int32(-100), int32(0), int32(100), int32(1000), int32(10000)
// TestSelectNodesForPreemption tests selectNodesForPreemption. This test assumes
// that podsFitsOnNode works correctly and is tested separately.
func TestSelectNodesForPreemption(t *testing.T) {
smallContainers := []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse(
strconv.FormatInt(priorityutil.DefaultMilliCpuRequest, 10) + "m"),
"memory": resource.MustParse(
strconv.FormatInt(priorityutil.DefaultMemoryRequest, 10)),
},
},
},
}
mediumContainers := []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse(
strconv.FormatInt(priorityutil.DefaultMilliCpuRequest*2, 10) + "m"),
"memory": resource.MustParse(
strconv.FormatInt(priorityutil.DefaultMemoryRequest*2, 10)),
},
},
},
}
largeContainers := []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse(
strconv.FormatInt(priorityutil.DefaultMilliCpuRequest*3, 10) + "m"),
"memory": resource.MustParse(
strconv.FormatInt(priorityutil.DefaultMemoryRequest*3, 10)),
},
},
},
}
lowPriority, midPriority, highPriority := int32(0), int32(100), int32(1000)
tests := []struct {
name string
predicates map[string]algorithm.FitPredicate
@ -654,7 +667,7 @@ func TestSelectNodesForPreemption(t *testing.T) {
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "a"}, Spec: v1.PodSpec{Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "b"}, Spec: v1.PodSpec{Priority: &midPriority, NodeName: "machine2"}}},
expected: map[string]map[string]bool{},
expected: map[string]map[string]bool{"machine1": {}, "machine2": {}},
},
{
name: "a pod that fits on one machine with no preemption",
@ -664,7 +677,7 @@ func TestSelectNodesForPreemption(t *testing.T) {
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "a"}, Spec: v1.PodSpec{Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "b"}, Spec: v1.PodSpec{Priority: &midPriority, NodeName: "machine2"}}},
expected: map[string]map[string]bool{},
expected: map[string]map[string]bool{"machine1": {}},
},
{
name: "a pod that fits on both machines when lower priority pods are preempted",
@ -789,3 +802,164 @@ func TestSelectNodesForPreemption(t *testing.T) {
}
}
}
// TestPickOneNodeForPreemption tests pickOneNodeForPreemption.
func TestPickOneNodeForPreemption(t *testing.T) {
tests := []struct {
name string
predicates map[string]algorithm.FitPredicate
nodes []string
pod *v1.Pod
pods []*v1.Pod
expected []string // any of the items is valid
}{
{
name: "No node needs preemption",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}}},
expected: []string{"machine1"},
},
{
name: "a pod that fits on both machines when lower priority pods are preempted",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1", "machine2"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}}},
expected: []string{"machine1", "machine2"},
},
{
name: "a pod that fits on a machine with no preemption",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1", "machine2", "machine3"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}}},
expected: []string{"machine3"},
},
{
name: "machine with min highest priority pod is picked",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1", "machine2", "machine3"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Containers: veryLargeContainers, Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.2"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &lowPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &lowPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.2"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &lowPriority, NodeName: "machine3"}},
},
expected: []string{"machine3"},
},
{
name: "when highest priorities are the same, minimum sum of priorities is picked",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1", "machine2", "machine3"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Containers: veryLargeContainers, Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.2"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &lowPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.2"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine3"}},
},
expected: []string{"machine2"},
},
{
name: "when highest priority and sum are the same, minimum number of pods is picked",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1", "machine2", "machine3"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Containers: veryLargeContainers, Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.3"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.4"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.2"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &negPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.2"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.3"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine3"}},
},
expected: []string{"machine2"},
},
{
// pickOneNodeForPreemption adjusts pod priorities when finding the sum of the victims. This
// test ensures that the logic works correctly.
name: "sum of adjusted priorities is considered",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1", "machine2", "machine3"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Containers: veryLargeContainers, Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.3"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.2"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &negPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.2"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.3"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine3"}},
},
expected: []string{"machine2"},
},
{
name: "non-overlapping lowest high priority, sum priorities, and number of pods",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1", "machine2", "machine3", "machine4"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1"}, Spec: v1.PodSpec{Containers: veryLargeContainers, Priority: &veryHighPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.3"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.2"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.3"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.4"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &lowPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m4.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine4"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m4.2"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine4"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m4.3"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine4"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m4.4"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine4"}},
},
expected: []string{"machine1"},
},
}
for _, test := range tests {
nodes := []*v1.Node{}
for _, n := range test.nodes {
nodes = append(nodes, makeNode(n, priorityutil.DefaultMilliCpuRequest*5, priorityutil.DefaultMemoryRequest*5))
}
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, nodes)
node := pickOneNodeForPreemption(selectNodesForPreemption(test.pod, nodeNameToInfo, nodes, test.predicates, PredicateMetadata))
found := false
for _, nodeName := range test.expected {
if node == nodeName {
found = true
break
}
}
if !found {
t.Errorf("test [%v]: unexpected node: %v", test.name, node)
}
}
}