diff --git a/pkg/controller/node/nodecontroller.go b/pkg/controller/node/nodecontroller.go index 84fbd49870..ad2605870a 100644 --- a/pkg/controller/node/nodecontroller.go +++ b/pkg/controller/node/nodecontroller.go @@ -510,6 +510,26 @@ func (nc *NodeController) Run(stopCh <-chan struct{}) { <-stopCh } +// addPodEvictorForNewZone checks if new zone appeared, and if so add new evictor. +func (nc *NodeController) addPodEvictorForNewZone(node *v1.Node) { + zone := utilnode.GetZoneKey(node) + if _, found := nc.zoneStates[zone]; !found { + nc.zoneStates[zone] = stateInitial + if !nc.useTaintBasedEvictions { + nc.zonePodEvictor[zone] = + NewRateLimitedTimedQueue( + flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, evictionRateLimiterBurst)) + } else { + nc.zoneNotReadyOrUnreachableTainer[zone] = + NewRateLimitedTimedQueue( + flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, evictionRateLimiterBurst)) + } + // Init the metric for the new zone. + glog.Infof("Initializing eviction metric for zone: %v", zone) + EvictionsNumber.WithLabelValues(zone).Add(0) + } +} + // monitorNodeStatus verifies node status are constantly updated by kubelet, and if not, // post "NodeReady==ConditionUnknown". It also evicts all pods if node is not ready or // not reachable for a long period of time. @@ -520,28 +540,17 @@ func (nc *NodeController) monitorNodeStatus() error { if err != nil { return err } - added, deleted := nc.checkForNodeAddedDeleted(nodes) + added, deleted, newZoneRepresentatives := nc.classifyNodes(nodes) + + for i := range newZoneRepresentatives { + nc.addPodEvictorForNewZone(newZoneRepresentatives[i]) + } + for i := range added { glog.V(1).Infof("NodeController observed a new Node: %#v", added[i].Name) recordNodeEvent(nc.recorder, added[i].Name, string(added[i].UID), v1.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in NodeController", added[i].Name)) nc.knownNodeSet[added[i].Name] = added[i] - // When adding new Nodes we need to check if new zone appeared, and if so add new evictor. - zone := utilnode.GetZoneKey(added[i]) - if _, found := nc.zoneStates[zone]; !found { - nc.zoneStates[zone] = stateInitial - if !nc.useTaintBasedEvictions { - nc.zonePodEvictor[zone] = - NewRateLimitedTimedQueue( - flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, evictionRateLimiterBurst)) - } else { - nc.zoneNotReadyOrUnreachableTainer[zone] = - NewRateLimitedTimedQueue( - flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, evictionRateLimiterBurst)) - } - // Init the metric for the new zone. - glog.Infof("Initializing eviction metric for zone: %v", zone) - EvictionsNumber.WithLabelValues(zone).Add(0) - } + nc.addPodEvictorForNewZone(added[i]) if nc.useTaintBasedEvictions { nc.markNodeAsHealthy(added[i]) } else { @@ -991,21 +1000,32 @@ func (nc *NodeController) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1. return gracePeriod, observedReadyCondition, currentReadyCondition, err } -func (nc *NodeController) checkForNodeAddedDeleted(nodes []*v1.Node) (added, deleted []*v1.Node) { - for i := range nodes { - if _, has := nc.knownNodeSet[nodes[i].Name]; !has { - added = append(added, nodes[i]) +// classifyNodes classifies the allNodes to three categories: +// 1. added: the nodes that in 'allNodes', but not in 'knownNodeSet' +// 2. deleted: the nodes that in 'knownNodeSet', but not in 'allNodes' +// 3. newZoneRepresentatives: the nodes that in both 'knownNodeSet' and 'allNodes', but no zone states +func (nc *NodeController) classifyNodes(allNodes []*v1.Node) (added, deleted, newZoneRepresentatives []*v1.Node) { + for i := range allNodes { + if _, has := nc.knownNodeSet[allNodes[i].Name]; !has { + added = append(added, allNodes[i]) + } else { + // Currently, we only consider new zone as updated. + zone := utilnode.GetZoneKey(allNodes[i]) + if _, found := nc.zoneStates[zone]; !found { + newZoneRepresentatives = append(newZoneRepresentatives, allNodes[i]) + } } } + // If there's a difference between lengths of known Nodes and observed nodes // we must have removed some Node. - if len(nc.knownNodeSet)+len(added) != len(nodes) { + if len(nc.knownNodeSet)+len(added) != len(allNodes) { knowSetCopy := map[string]*v1.Node{} for k, v := range nc.knownNodeSet { knowSetCopy[k] = v } - for i := range nodes { - delete(knowSetCopy, nodes[i].Name) + for i := range allNodes { + delete(knowSetCopy, allNodes[i].Name) } for i := range knowSetCopy { deleted = append(deleted, knowSetCopy[i]) diff --git a/pkg/controller/node/nodecontroller_test.go b/pkg/controller/node/nodecontroller_test.go index c3b5ef9c39..4f9631efe1 100644 --- a/pkg/controller/node/nodecontroller_test.go +++ b/pkg/controller/node/nodecontroller_test.go @@ -133,6 +133,10 @@ func syncNodeStore(nc *nodeController, fakeNodeHandler *testutil.FakeNodeHandler func TestMonitorNodeStatusEvictPods(t *testing.T) { fakeNow := metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC) evictionTimeout := 10 * time.Minute + labels := map[string]string{ + kubeletapis.LabelZoneRegion: "region1", + kubeletapis.LabelZoneFailureDomain: "zone1", + } // Because of the logic that prevents NC from evicting anything when all Nodes are NotReady // we need second healthy node in tests. Because of how the tests are written we need to update @@ -202,6 +206,42 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { expectedEvictPods: false, description: "Node created recently, with no status.", }, + // Node created recently without FailureDomain labels which is added back later, with no status (happens only at cluster startup). + { + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + CreationTimestamp: fakeNow, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC), + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC), + }, + }, + }, + }, + }, + Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}), + }, + daemonSets: nil, + timeToPass: 0, + newNodeStatus: v1.NodeStatus{}, + secondNodeNewStatus: healthyNodeNewStatus, + expectedEvictPods: false, + description: "Node created recently without FailureDomain labels which is added back later, with no status (happens only at cluster startup).", + }, // Node created long time ago, and kubelet posted NotReady for a short period of time. { fakeNodeHandler: &testutil.FakeNodeHandler{ @@ -584,6 +624,10 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { item.fakeNodeHandler.Existing[0].Status = item.newNodeStatus item.fakeNodeHandler.Existing[1].Status = item.secondNodeNewStatus } + if len(item.fakeNodeHandler.Existing[0].Labels) == 0 && len(item.fakeNodeHandler.Existing[1].Labels) == 0 { + item.fakeNodeHandler.Existing[0].Labels = labels + item.fakeNodeHandler.Existing[1].Labels = labels + } if err := syncNodeStore(nodeController, item.fakeNodeHandler); err != nil { t.Errorf("unexpected error: %v", err) }