From c1896c97ea507ef5ac04bfa624f7d1bcd1692e24 Mon Sep 17 00:00:00 2001 From: "Bobby (Babak) Salamat" Date: Wed, 25 Jul 2018 14:36:16 -0700 Subject: [PATCH] Add a node tree that allows iterating over nodes in regions and zones --- pkg/scheduler/cache/node_tree.go | 185 +++++++++++ pkg/scheduler/cache/node_tree_test.go | 441 ++++++++++++++++++++++++++ 2 files changed, 626 insertions(+) create mode 100644 pkg/scheduler/cache/node_tree.go create mode 100644 pkg/scheduler/cache/node_tree_test.go diff --git a/pkg/scheduler/cache/node_tree.go b/pkg/scheduler/cache/node_tree.go new file mode 100644 index 0000000000..7ec3dd744e --- /dev/null +++ b/pkg/scheduler/cache/node_tree.go @@ -0,0 +1,185 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cache + +import ( + "fmt" + "sync" + + "k8s.io/api/core/v1" + utilnode "k8s.io/kubernetes/pkg/util/node" + "k8s.io/kubernetes/staging/src/k8s.io/apimachinery/pkg/util/sets" + + "github.com/golang/glog" +) + +// NodeTree is a tree-like data structure that holds node names in each zone. Zone names are +// keys to "NodeTree.tree" and values of "NodeTree.tree" are arrays of node names. +type NodeTree struct { + tree map[string]*nodeArray // a map from zone (region-zone) to an array of nodes in the zone. + zones []string // a list of all the zones in the tree (keys) + zoneIndex int + ExhaustedZones sets.String // set of zones that all of their nodes are returned by next() + NumNodes int + mu sync.RWMutex +} + +// nodeArray is a struct that has nodes that are in a zone. +// We use a slice (as opposed to a set/map) to store the nodes because iterating over the nodes is +// a lot more frequent than searching them by name. +type nodeArray struct { + nodes []string + lastIndex int +} + +func (na *nodeArray) next() (nodeName string, exhausted bool) { + if len(na.nodes) == 0 { + glog.Error("The nodeArray is empty. It should have been deleted from NodeTree.") + return "", false + } + if na.lastIndex >= len(na.nodes) { + return "", true + } + nodeName = na.nodes[na.lastIndex] + na.lastIndex++ + return nodeName, false +} + +func newNodeTree(nodes []*v1.Node) *NodeTree { + nt := &NodeTree{ + tree: make(map[string]*nodeArray), + ExhaustedZones: sets.NewString(), + } + for _, n := range nodes { + nt.AddNode(n) + } + return nt +} + +// AddNode adds a node and its corresponding zone to the tree. If the zone already exists, the node +// is added to the array of nodes in that zone. +func (nt *NodeTree) AddNode(n *v1.Node) { + nt.mu.Lock() + defer nt.mu.Unlock() + nt.addNode(n) +} + +func (nt *NodeTree) addNode(n *v1.Node) { + zone := utilnode.GetZoneKey(n) + if na, ok := nt.tree[zone]; ok { + for _, nodeName := range na.nodes { + if nodeName == n.Name { + return + } + } + na.nodes = append(na.nodes, n.Name) + nt.tree[zone] = na + } else { + nt.zones = append(nt.zones, zone) + nt.tree[zone] = &nodeArray{nodes: []string{n.Name}, lastIndex: 0} + } + nt.NumNodes++ +} + +func (nt *NodeTree) RemoveNode(n *v1.Node) error { + nt.mu.Lock() + defer nt.mu.Unlock() + return nt.removeNode(n) +} + +func (nt *NodeTree) removeNode(n *v1.Node) error { + zone := utilnode.GetZoneKey(n) + if na, ok := nt.tree[zone]; ok { + for i, nodeName := range na.nodes { + if nodeName == n.Name { + // delete without preserving order + na.nodes[i] = na.nodes[len(na.nodes)-1] + na.nodes = na.nodes[:len(na.nodes)-1] + nt.tree[zone] = na + if len(na.nodes) == 0 { + nt.removeZone(zone) + } + nt.NumNodes-- + return nil + } + } + } + return fmt.Errorf("node %v in zone %v was not found", n.Name, zone) +} + +// removeZone removes a zone from tree. +// This function must be called while writer locks are hold. +func (nt *NodeTree) removeZone(zone string) { + delete(nt.tree, zone) + for i, z := range nt.zones { + if z == zone { + nt.zones = append(nt.zones[:i], nt.zones[i+1:]...) + } + } +} + +func (nt *NodeTree) UpdateNode(old, new *v1.Node) { + var oldZone string + if old != nil { + oldZone = utilnode.GetZoneKey(old) + } + newZone := utilnode.GetZoneKey(new) + // If the zone ID of the node has not changed, we don't need to do anything. Name of the node + // cannot be changed in an update. + if oldZone == newZone { + return + } + nt.mu.Lock() + defer nt.mu.Unlock() + nt.removeNode(old) // No error checking. We ignore whether the old node exists or not. + nt.addNode(new) +} + +func (nt *NodeTree) resetExhausted() { + for _, na := range nt.tree { + na.lastIndex = 0 + } + nt.ExhaustedZones = sets.NewString() +} + +// Next returns the name of the next node. NodeTree iterates over zones and in each zone iterates +// over nodes in a round robin fashion. +func (nt *NodeTree) Next() string { + nt.mu.Lock() + defer nt.mu.Unlock() + if len(nt.zones) == 0 { + return "" + } + for { + if nt.zoneIndex >= len(nt.zones) { + nt.zoneIndex = 0 + } + zone := nt.zones[nt.zoneIndex] + nt.zoneIndex++ + // We do not check the set of exhausted zones before calling next() on the zone. This ensures + // that if more nodes are added to a zone after it is exhausted, we iterate over the new nodes. + nodeName, exhausted := nt.tree[zone].next() + if exhausted { + nt.ExhaustedZones.Insert(zone) + if len(nt.ExhaustedZones) == len(nt.zones) { // all zones are exhausted. we should reset. + nt.resetExhausted() + } + } else { + return nodeName + } + } +} diff --git a/pkg/scheduler/cache/node_tree_test.go b/pkg/scheduler/cache/node_tree_test.go new file mode 100644 index 0000000000..135c7c0bb4 --- /dev/null +++ b/pkg/scheduler/cache/node_tree_test.go @@ -0,0 +1,441 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cache + +import ( + "reflect" + "testing" + + "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis" +) + +var allNodes = []*v1.Node{ + // Node 0: a node without any region-zone label + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node-0", + }, + }, + // Node 1: a node with region label only + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + Labels: map[string]string{ + kubeletapis.LabelZoneRegion: "region-1", + }, + }, + }, + // Node 2: a node with zone label only + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node-2", + Labels: map[string]string{ + kubeletapis.LabelZoneFailureDomain: "zone-2", + }, + }, + }, + // Node 3: a node with proper region and zone labels + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node-3", + Labels: map[string]string{ + kubeletapis.LabelZoneRegion: "region-1", + kubeletapis.LabelZoneFailureDomain: "zone-2", + }, + }, + }, + // Node 4: a node with proper region and zone labels + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node-4", + Labels: map[string]string{ + kubeletapis.LabelZoneRegion: "region-1", + kubeletapis.LabelZoneFailureDomain: "zone-2", + }, + }, + }, + // Node 5: a node with proper region and zone labels in a different zone, same region as above + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node-5", + Labels: map[string]string{ + kubeletapis.LabelZoneRegion: "region-1", + kubeletapis.LabelZoneFailureDomain: "zone-3", + }, + }, + }, + // Node 6: a node with proper region and zone labels in a new region and zone + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node-6", + Labels: map[string]string{ + kubeletapis.LabelZoneRegion: "region-2", + kubeletapis.LabelZoneFailureDomain: "zone-2", + }, + }, + }, + // Node 7: a node with proper region and zone labels in a region and zone as node-6 + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node-7", + Labels: map[string]string{ + kubeletapis.LabelZoneRegion: "region-2", + kubeletapis.LabelZoneFailureDomain: "zone-2", + }, + }, + }, + // Node 8: a node with proper region and zone labels in a region and zone as node-6 + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node-8", + Labels: map[string]string{ + kubeletapis.LabelZoneRegion: "region-2", + kubeletapis.LabelZoneFailureDomain: "zone-2", + }, + }, + }} + +func verifyNodeTree(t *testing.T, nt *NodeTree, expectedTree map[string]*nodeArray) { + expectedNumNodes := int(0) + for _, na := range expectedTree { + expectedNumNodes += len(na.nodes) + } + if nt.NumNodes != expectedNumNodes { + t.Errorf("unexpected NodeTree.numNodes. Expected: %v, Got: %v", expectedNumNodes, nt.NumNodes) + } + if !reflect.DeepEqual(nt.tree, expectedTree) { + t.Errorf("The node tree is not the same as expected. Expected: %v, Got: %v", expectedTree, nt.tree) + } + if len(nt.zones) != len(expectedTree) { + t.Errorf("Number of zones in NodeTree.zones is not expected. Expected: %v, Got: %v", len(expectedTree), len(nt.zones)) + } + for _, z := range nt.zones { + if _, ok := expectedTree[z]; !ok { + t.Errorf("zone %v is not expected to exist in NodeTree.zones", z) + } + } +} + +func TestNodeTree_AddNode(t *testing.T) { + tests := []struct { + name string + nodesToAdd []*v1.Node + expectedTree map[string]*nodeArray + }{ + { + name: "single node no labels", + nodesToAdd: allNodes[:1], + expectedTree: map[string]*nodeArray{"": {[]string{"node-0"}, 0}}, + }, + { + name: "mix of nodes with and without proper labels", + nodesToAdd: allNodes[:4], + expectedTree: map[string]*nodeArray{ + "": {[]string{"node-0"}, 0}, + "region-1:\x00:": {[]string{"node-1"}, 0}, + ":\x00:zone-2": {[]string{"node-2"}, 0}, + "region-1:\x00:zone-2": {[]string{"node-3"}, 0}, + }, + }, + { + name: "mix of nodes with and without proper labels and some zones with multiple nodes", + nodesToAdd: allNodes[:7], + expectedTree: map[string]*nodeArray{ + "": {[]string{"node-0"}, 0}, + "region-1:\x00:": {[]string{"node-1"}, 0}, + ":\x00:zone-2": {[]string{"node-2"}, 0}, + "region-1:\x00:zone-2": {[]string{"node-3", "node-4"}, 0}, + "region-1:\x00:zone-3": {[]string{"node-5"}, 0}, + "region-2:\x00:zone-2": {[]string{"node-6"}, 0}, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + nt := newNodeTree(nil) + for _, n := range test.nodesToAdd { + nt.AddNode(n) + } + verifyNodeTree(t, nt, test.expectedTree) + }) + } +} + +func TestNodeTree_RemoveNode(t *testing.T) { + tests := []struct { + name string + existingNodes []*v1.Node + nodesToRemove []*v1.Node + expectedTree map[string]*nodeArray + expectError bool + }{ + { + name: "remove a single node with no labels", + existingNodes: allNodes[:7], + nodesToRemove: allNodes[:1], + expectedTree: map[string]*nodeArray{ + "region-1:\x00:": {[]string{"node-1"}, 0}, + ":\x00:zone-2": {[]string{"node-2"}, 0}, + "region-1:\x00:zone-2": {[]string{"node-3", "node-4"}, 0}, + "region-1:\x00:zone-3": {[]string{"node-5"}, 0}, + "region-2:\x00:zone-2": {[]string{"node-6"}, 0}, + }, + }, + { + name: "remove a few nodes including one from a zone with multiple nodes", + existingNodes: allNodes[:7], + nodesToRemove: allNodes[1:4], + expectedTree: map[string]*nodeArray{ + "": {[]string{"node-0"}, 0}, + "region-1:\x00:zone-2": {[]string{"node-4"}, 0}, + "region-1:\x00:zone-3": {[]string{"node-5"}, 0}, + "region-2:\x00:zone-2": {[]string{"node-6"}, 0}, + }, + }, + { + name: "remove all nodes", + existingNodes: allNodes[:7], + nodesToRemove: allNodes[:7], + expectedTree: map[string]*nodeArray{}, + }, + { + name: "remove non-existing node", + existingNodes: nil, + nodesToRemove: allNodes[:5], + expectedTree: map[string]*nodeArray{}, + expectError: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + nt := newNodeTree(test.existingNodes) + for _, n := range test.nodesToRemove { + err := nt.RemoveNode(n) + if test.expectError == (err == nil) { + t.Errorf("unexpected returned error value: %v", err) + } + } + verifyNodeTree(t, nt, test.expectedTree) + }) + } +} + +func TestNodeTree_UpdateNode(t *testing.T) { + tests := []struct { + name string + existingNodes []*v1.Node + nodeToUpdate *v1.Node + expectedTree map[string]*nodeArray + }{ + { + name: "update a node without label", + existingNodes: allNodes[:7], + nodeToUpdate: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-0", + Labels: map[string]string{ + kubeletapis.LabelZoneRegion: "region-1", + kubeletapis.LabelZoneFailureDomain: "zone-2", + }, + }, + }, + expectedTree: map[string]*nodeArray{ + "region-1:\x00:": {[]string{"node-1"}, 0}, + ":\x00:zone-2": {[]string{"node-2"}, 0}, + "region-1:\x00:zone-2": {[]string{"node-3", "node-4", "node-0"}, 0}, + "region-1:\x00:zone-3": {[]string{"node-5"}, 0}, + "region-2:\x00:zone-2": {[]string{"node-6"}, 0}, + }, + }, + { + name: "update the only existing node", + existingNodes: allNodes[:1], + nodeToUpdate: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-0", + Labels: map[string]string{ + kubeletapis.LabelZoneRegion: "region-1", + kubeletapis.LabelZoneFailureDomain: "zone-2", + }, + }, + }, + expectedTree: map[string]*nodeArray{ + "region-1:\x00:zone-2": {[]string{"node-0"}, 0}, + }, + }, + { + name: "update non-existing node", + existingNodes: allNodes[:1], + nodeToUpdate: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-new", + Labels: map[string]string{ + kubeletapis.LabelZoneRegion: "region-1", + kubeletapis.LabelZoneFailureDomain: "zone-2", + }, + }, + }, + expectedTree: map[string]*nodeArray{ + "": {[]string{"node-0"}, 0}, + "region-1:\x00:zone-2": {[]string{"node-new"}, 0}, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + nt := newNodeTree(test.existingNodes) + var oldNode *v1.Node + for _, n := range allNodes { + if n.Name == test.nodeToUpdate.Name { + oldNode = n + break + } + } + if oldNode == nil { + oldNode = &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "nonexisting-node"}} + } + nt.UpdateNode(oldNode, test.nodeToUpdate) + verifyNodeTree(t, nt, test.expectedTree) + }) + } +} + +func TestNodeTree_Next(t *testing.T) { + tests := []struct { + name string + nodesToAdd []*v1.Node + numRuns int // number of times to run Next() + expectedOutput []string + }{ + { + name: "empty tree", + nodesToAdd: nil, + numRuns: 2, + expectedOutput: []string{"", ""}, + }, + { + name: "should go back to the first node after finishing a round", + nodesToAdd: allNodes[:1], + numRuns: 2, + expectedOutput: []string{"node-0", "node-0"}, + }, + { + name: "should go back to the first node after going over all nodes", + nodesToAdd: allNodes[:4], + numRuns: 5, + expectedOutput: []string{"node-0", "node-1", "node-2", "node-3", "node-0"}, + }, + { + name: "should go to all zones before going to the second nodes in the same zone", + nodesToAdd: allNodes[:9], + numRuns: 11, + expectedOutput: []string{"node-0", "node-1", "node-2", "node-3", "node-5", "node-6", "node-4", "node-7", "node-8", "node-0", "node-1"}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + nt := newNodeTree(test.nodesToAdd) + + var output []string + for i := 0; i < test.numRuns; i++ { + output = append(output, nt.Next()) + } + if !reflect.DeepEqual(output, test.expectedOutput) { + t.Errorf("unexpected output. Expected: %v, Got: %v", test.expectedOutput, output) + } + }) + } +} + +func TestNodeTreeMultiOperations(t *testing.T) { + tests := []struct { + name string + nodesToAdd []*v1.Node + nodesToRemove []*v1.Node + operations []string + expectedOutput []string + }{ + { + name: "add and remove all nodes between two Next operations", + nodesToAdd: allNodes[2:9], + nodesToRemove: allNodes[2:9], + operations: []string{"add", "add", "next", "add", "remove", "remove", "remove", "next"}, + expectedOutput: []string{"node-2", ""}, + }, + { + name: "add and remove some nodes between two Next operations", + nodesToAdd: allNodes[2:9], + nodesToRemove: allNodes[2:9], + operations: []string{"add", "add", "next", "add", "remove", "remove", "next"}, + expectedOutput: []string{"node-2", "node-4"}, + }, + { + name: "remove nodes already iterated on and add new nodes", + nodesToAdd: allNodes[2:9], + nodesToRemove: allNodes[2:9], + operations: []string{"add", "add", "next", "next", "add", "remove", "remove", "next"}, + expectedOutput: []string{"node-2", "node-3", "node-4"}, + }, + { + name: "add more nodes to an exhausted zone", + nodesToAdd: append(allNodes[4:9], allNodes[3]), + nodesToRemove: nil, + operations: []string{"add", "add", "add", "add", "add", "next", "next", "next", "next", "add", "next", "next", "next"}, + expectedOutput: []string{"node-4", "node-5", "node-6", "node-7", "node-3", "node-8", "node-4"}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + nt := newNodeTree(nil) + addIndex := 0 + removeIndex := 0 + var output []string + for _, op := range test.operations { + switch op { + case "add": + if addIndex >= len(test.nodesToAdd) { + t.Error("more add operations than nodesToAdd") + } else { + nt.AddNode(test.nodesToAdd[addIndex]) + addIndex++ + } + case "remove": + if removeIndex >= len(test.nodesToRemove) { + t.Error("more remove operations than nodesToRemove") + } else { + nt.RemoveNode(test.nodesToRemove[removeIndex]) + removeIndex++ + } + case "next": + output = append(output, nt.Next()) + default: + t.Errorf("unknow operation: %v", op) + } + } + if !reflect.DeepEqual(output, test.expectedOutput) { + t.Errorf("unexpected output. Expected: %v, Got: %v", test.expectedOutput, output) + } + }) + } +}