Add a node tree that allows iterating over nodes in regions and zones

pull/8/head
Bobby (Babak) Salamat 2018-07-25 14:36:16 -07:00
parent b9544382ba
commit c1896c97ea
2 changed files with 626 additions and 0 deletions

185
pkg/scheduler/cache/node_tree.go vendored Normal file
View File

@ -0,0 +1,185 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cache
import (
"fmt"
"sync"
"k8s.io/api/core/v1"
utilnode "k8s.io/kubernetes/pkg/util/node"
"k8s.io/kubernetes/staging/src/k8s.io/apimachinery/pkg/util/sets"
"github.com/golang/glog"
)
// NodeTree is a tree-like data structure that holds node names in each zone. Zone names are
// keys to "NodeTree.tree" and values of "NodeTree.tree" are arrays of node names.
type NodeTree struct {
tree map[string]*nodeArray // a map from zone (region-zone) to an array of nodes in the zone.
zones []string // a list of all the zones in the tree (keys)
zoneIndex int
ExhaustedZones sets.String // set of zones that all of their nodes are returned by next()
NumNodes int
mu sync.RWMutex
}
// nodeArray is a struct that has nodes that are in a zone.
// We use a slice (as opposed to a set/map) to store the nodes because iterating over the nodes is
// a lot more frequent than searching them by name.
type nodeArray struct {
nodes []string
lastIndex int
}
func (na *nodeArray) next() (nodeName string, exhausted bool) {
if len(na.nodes) == 0 {
glog.Error("The nodeArray is empty. It should have been deleted from NodeTree.")
return "", false
}
if na.lastIndex >= len(na.nodes) {
return "", true
}
nodeName = na.nodes[na.lastIndex]
na.lastIndex++
return nodeName, false
}
func newNodeTree(nodes []*v1.Node) *NodeTree {
nt := &NodeTree{
tree: make(map[string]*nodeArray),
ExhaustedZones: sets.NewString(),
}
for _, n := range nodes {
nt.AddNode(n)
}
return nt
}
// AddNode adds a node and its corresponding zone to the tree. If the zone already exists, the node
// is added to the array of nodes in that zone.
func (nt *NodeTree) AddNode(n *v1.Node) {
nt.mu.Lock()
defer nt.mu.Unlock()
nt.addNode(n)
}
func (nt *NodeTree) addNode(n *v1.Node) {
zone := utilnode.GetZoneKey(n)
if na, ok := nt.tree[zone]; ok {
for _, nodeName := range na.nodes {
if nodeName == n.Name {
return
}
}
na.nodes = append(na.nodes, n.Name)
nt.tree[zone] = na
} else {
nt.zones = append(nt.zones, zone)
nt.tree[zone] = &nodeArray{nodes: []string{n.Name}, lastIndex: 0}
}
nt.NumNodes++
}
func (nt *NodeTree) RemoveNode(n *v1.Node) error {
nt.mu.Lock()
defer nt.mu.Unlock()
return nt.removeNode(n)
}
func (nt *NodeTree) removeNode(n *v1.Node) error {
zone := utilnode.GetZoneKey(n)
if na, ok := nt.tree[zone]; ok {
for i, nodeName := range na.nodes {
if nodeName == n.Name {
// delete without preserving order
na.nodes[i] = na.nodes[len(na.nodes)-1]
na.nodes = na.nodes[:len(na.nodes)-1]
nt.tree[zone] = na
if len(na.nodes) == 0 {
nt.removeZone(zone)
}
nt.NumNodes--
return nil
}
}
}
return fmt.Errorf("node %v in zone %v was not found", n.Name, zone)
}
// removeZone removes a zone from tree.
// This function must be called while writer locks are hold.
func (nt *NodeTree) removeZone(zone string) {
delete(nt.tree, zone)
for i, z := range nt.zones {
if z == zone {
nt.zones = append(nt.zones[:i], nt.zones[i+1:]...)
}
}
}
func (nt *NodeTree) UpdateNode(old, new *v1.Node) {
var oldZone string
if old != nil {
oldZone = utilnode.GetZoneKey(old)
}
newZone := utilnode.GetZoneKey(new)
// If the zone ID of the node has not changed, we don't need to do anything. Name of the node
// cannot be changed in an update.
if oldZone == newZone {
return
}
nt.mu.Lock()
defer nt.mu.Unlock()
nt.removeNode(old) // No error checking. We ignore whether the old node exists or not.
nt.addNode(new)
}
func (nt *NodeTree) resetExhausted() {
for _, na := range nt.tree {
na.lastIndex = 0
}
nt.ExhaustedZones = sets.NewString()
}
// Next returns the name of the next node. NodeTree iterates over zones and in each zone iterates
// over nodes in a round robin fashion.
func (nt *NodeTree) Next() string {
nt.mu.Lock()
defer nt.mu.Unlock()
if len(nt.zones) == 0 {
return ""
}
for {
if nt.zoneIndex >= len(nt.zones) {
nt.zoneIndex = 0
}
zone := nt.zones[nt.zoneIndex]
nt.zoneIndex++
// We do not check the set of exhausted zones before calling next() on the zone. This ensures
// that if more nodes are added to a zone after it is exhausted, we iterate over the new nodes.
nodeName, exhausted := nt.tree[zone].next()
if exhausted {
nt.ExhaustedZones.Insert(zone)
if len(nt.ExhaustedZones) == len(nt.zones) { // all zones are exhausted. we should reset.
nt.resetExhausted()
}
} else {
return nodeName
}
}
}

441
pkg/scheduler/cache/node_tree_test.go vendored Normal file
View File

@ -0,0 +1,441 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cache
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
)
var allNodes = []*v1.Node{
// Node 0: a node without any region-zone label
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-0",
},
},
// Node 1: a node with region label only
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-1",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-1",
},
},
},
// Node 2: a node with zone label only
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-2",
Labels: map[string]string{
kubeletapis.LabelZoneFailureDomain: "zone-2",
},
},
},
// Node 3: a node with proper region and zone labels
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-3",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-1",
kubeletapis.LabelZoneFailureDomain: "zone-2",
},
},
},
// Node 4: a node with proper region and zone labels
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-4",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-1",
kubeletapis.LabelZoneFailureDomain: "zone-2",
},
},
},
// Node 5: a node with proper region and zone labels in a different zone, same region as above
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-5",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-1",
kubeletapis.LabelZoneFailureDomain: "zone-3",
},
},
},
// Node 6: a node with proper region and zone labels in a new region and zone
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-6",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-2",
kubeletapis.LabelZoneFailureDomain: "zone-2",
},
},
},
// Node 7: a node with proper region and zone labels in a region and zone as node-6
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-7",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-2",
kubeletapis.LabelZoneFailureDomain: "zone-2",
},
},
},
// Node 8: a node with proper region and zone labels in a region and zone as node-6
{
ObjectMeta: metav1.ObjectMeta{
Name: "node-8",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-2",
kubeletapis.LabelZoneFailureDomain: "zone-2",
},
},
}}
func verifyNodeTree(t *testing.T, nt *NodeTree, expectedTree map[string]*nodeArray) {
expectedNumNodes := int(0)
for _, na := range expectedTree {
expectedNumNodes += len(na.nodes)
}
if nt.NumNodes != expectedNumNodes {
t.Errorf("unexpected NodeTree.numNodes. Expected: %v, Got: %v", expectedNumNodes, nt.NumNodes)
}
if !reflect.DeepEqual(nt.tree, expectedTree) {
t.Errorf("The node tree is not the same as expected. Expected: %v, Got: %v", expectedTree, nt.tree)
}
if len(nt.zones) != len(expectedTree) {
t.Errorf("Number of zones in NodeTree.zones is not expected. Expected: %v, Got: %v", len(expectedTree), len(nt.zones))
}
for _, z := range nt.zones {
if _, ok := expectedTree[z]; !ok {
t.Errorf("zone %v is not expected to exist in NodeTree.zones", z)
}
}
}
func TestNodeTree_AddNode(t *testing.T) {
tests := []struct {
name string
nodesToAdd []*v1.Node
expectedTree map[string]*nodeArray
}{
{
name: "single node no labels",
nodesToAdd: allNodes[:1],
expectedTree: map[string]*nodeArray{"": {[]string{"node-0"}, 0}},
},
{
name: "mix of nodes with and without proper labels",
nodesToAdd: allNodes[:4],
expectedTree: map[string]*nodeArray{
"": {[]string{"node-0"}, 0},
"region-1:\x00:": {[]string{"node-1"}, 0},
":\x00:zone-2": {[]string{"node-2"}, 0},
"region-1:\x00:zone-2": {[]string{"node-3"}, 0},
},
},
{
name: "mix of nodes with and without proper labels and some zones with multiple nodes",
nodesToAdd: allNodes[:7],
expectedTree: map[string]*nodeArray{
"": {[]string{"node-0"}, 0},
"region-1:\x00:": {[]string{"node-1"}, 0},
":\x00:zone-2": {[]string{"node-2"}, 0},
"region-1:\x00:zone-2": {[]string{"node-3", "node-4"}, 0},
"region-1:\x00:zone-3": {[]string{"node-5"}, 0},
"region-2:\x00:zone-2": {[]string{"node-6"}, 0},
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nt := newNodeTree(nil)
for _, n := range test.nodesToAdd {
nt.AddNode(n)
}
verifyNodeTree(t, nt, test.expectedTree)
})
}
}
func TestNodeTree_RemoveNode(t *testing.T) {
tests := []struct {
name string
existingNodes []*v1.Node
nodesToRemove []*v1.Node
expectedTree map[string]*nodeArray
expectError bool
}{
{
name: "remove a single node with no labels",
existingNodes: allNodes[:7],
nodesToRemove: allNodes[:1],
expectedTree: map[string]*nodeArray{
"region-1:\x00:": {[]string{"node-1"}, 0},
":\x00:zone-2": {[]string{"node-2"}, 0},
"region-1:\x00:zone-2": {[]string{"node-3", "node-4"}, 0},
"region-1:\x00:zone-3": {[]string{"node-5"}, 0},
"region-2:\x00:zone-2": {[]string{"node-6"}, 0},
},
},
{
name: "remove a few nodes including one from a zone with multiple nodes",
existingNodes: allNodes[:7],
nodesToRemove: allNodes[1:4],
expectedTree: map[string]*nodeArray{
"": {[]string{"node-0"}, 0},
"region-1:\x00:zone-2": {[]string{"node-4"}, 0},
"region-1:\x00:zone-3": {[]string{"node-5"}, 0},
"region-2:\x00:zone-2": {[]string{"node-6"}, 0},
},
},
{
name: "remove all nodes",
existingNodes: allNodes[:7],
nodesToRemove: allNodes[:7],
expectedTree: map[string]*nodeArray{},
},
{
name: "remove non-existing node",
existingNodes: nil,
nodesToRemove: allNodes[:5],
expectedTree: map[string]*nodeArray{},
expectError: true,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nt := newNodeTree(test.existingNodes)
for _, n := range test.nodesToRemove {
err := nt.RemoveNode(n)
if test.expectError == (err == nil) {
t.Errorf("unexpected returned error value: %v", err)
}
}
verifyNodeTree(t, nt, test.expectedTree)
})
}
}
func TestNodeTree_UpdateNode(t *testing.T) {
tests := []struct {
name string
existingNodes []*v1.Node
nodeToUpdate *v1.Node
expectedTree map[string]*nodeArray
}{
{
name: "update a node without label",
existingNodes: allNodes[:7],
nodeToUpdate: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-0",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-1",
kubeletapis.LabelZoneFailureDomain: "zone-2",
},
},
},
expectedTree: map[string]*nodeArray{
"region-1:\x00:": {[]string{"node-1"}, 0},
":\x00:zone-2": {[]string{"node-2"}, 0},
"region-1:\x00:zone-2": {[]string{"node-3", "node-4", "node-0"}, 0},
"region-1:\x00:zone-3": {[]string{"node-5"}, 0},
"region-2:\x00:zone-2": {[]string{"node-6"}, 0},
},
},
{
name: "update the only existing node",
existingNodes: allNodes[:1],
nodeToUpdate: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-0",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-1",
kubeletapis.LabelZoneFailureDomain: "zone-2",
},
},
},
expectedTree: map[string]*nodeArray{
"region-1:\x00:zone-2": {[]string{"node-0"}, 0},
},
},
{
name: "update non-existing node",
existingNodes: allNodes[:1],
nodeToUpdate: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-new",
Labels: map[string]string{
kubeletapis.LabelZoneRegion: "region-1",
kubeletapis.LabelZoneFailureDomain: "zone-2",
},
},
},
expectedTree: map[string]*nodeArray{
"": {[]string{"node-0"}, 0},
"region-1:\x00:zone-2": {[]string{"node-new"}, 0},
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nt := newNodeTree(test.existingNodes)
var oldNode *v1.Node
for _, n := range allNodes {
if n.Name == test.nodeToUpdate.Name {
oldNode = n
break
}
}
if oldNode == nil {
oldNode = &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "nonexisting-node"}}
}
nt.UpdateNode(oldNode, test.nodeToUpdate)
verifyNodeTree(t, nt, test.expectedTree)
})
}
}
func TestNodeTree_Next(t *testing.T) {
tests := []struct {
name string
nodesToAdd []*v1.Node
numRuns int // number of times to run Next()
expectedOutput []string
}{
{
name: "empty tree",
nodesToAdd: nil,
numRuns: 2,
expectedOutput: []string{"", ""},
},
{
name: "should go back to the first node after finishing a round",
nodesToAdd: allNodes[:1],
numRuns: 2,
expectedOutput: []string{"node-0", "node-0"},
},
{
name: "should go back to the first node after going over all nodes",
nodesToAdd: allNodes[:4],
numRuns: 5,
expectedOutput: []string{"node-0", "node-1", "node-2", "node-3", "node-0"},
},
{
name: "should go to all zones before going to the second nodes in the same zone",
nodesToAdd: allNodes[:9],
numRuns: 11,
expectedOutput: []string{"node-0", "node-1", "node-2", "node-3", "node-5", "node-6", "node-4", "node-7", "node-8", "node-0", "node-1"},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nt := newNodeTree(test.nodesToAdd)
var output []string
for i := 0; i < test.numRuns; i++ {
output = append(output, nt.Next())
}
if !reflect.DeepEqual(output, test.expectedOutput) {
t.Errorf("unexpected output. Expected: %v, Got: %v", test.expectedOutput, output)
}
})
}
}
func TestNodeTreeMultiOperations(t *testing.T) {
tests := []struct {
name string
nodesToAdd []*v1.Node
nodesToRemove []*v1.Node
operations []string
expectedOutput []string
}{
{
name: "add and remove all nodes between two Next operations",
nodesToAdd: allNodes[2:9],
nodesToRemove: allNodes[2:9],
operations: []string{"add", "add", "next", "add", "remove", "remove", "remove", "next"},
expectedOutput: []string{"node-2", ""},
},
{
name: "add and remove some nodes between two Next operations",
nodesToAdd: allNodes[2:9],
nodesToRemove: allNodes[2:9],
operations: []string{"add", "add", "next", "add", "remove", "remove", "next"},
expectedOutput: []string{"node-2", "node-4"},
},
{
name: "remove nodes already iterated on and add new nodes",
nodesToAdd: allNodes[2:9],
nodesToRemove: allNodes[2:9],
operations: []string{"add", "add", "next", "next", "add", "remove", "remove", "next"},
expectedOutput: []string{"node-2", "node-3", "node-4"},
},
{
name: "add more nodes to an exhausted zone",
nodesToAdd: append(allNodes[4:9], allNodes[3]),
nodesToRemove: nil,
operations: []string{"add", "add", "add", "add", "add", "next", "next", "next", "next", "add", "next", "next", "next"},
expectedOutput: []string{"node-4", "node-5", "node-6", "node-7", "node-3", "node-8", "node-4"},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nt := newNodeTree(nil)
addIndex := 0
removeIndex := 0
var output []string
for _, op := range test.operations {
switch op {
case "add":
if addIndex >= len(test.nodesToAdd) {
t.Error("more add operations than nodesToAdd")
} else {
nt.AddNode(test.nodesToAdd[addIndex])
addIndex++
}
case "remove":
if removeIndex >= len(test.nodesToRemove) {
t.Error("more remove operations than nodesToRemove")
} else {
nt.RemoveNode(test.nodesToRemove[removeIndex])
removeIndex++
}
case "next":
output = append(output, nt.Next())
default:
t.Errorf("unknow operation: %v", op)
}
}
if !reflect.DeepEqual(output, test.expectedOutput) {
t.Errorf("unexpected output. Expected: %v, Got: %v", test.expectedOutput, output)
}
})
}
}