2014-10-08 23:14:37 +00:00
/ *
2016-06-03 00:25:58 +00:00
Copyright 2014 The Kubernetes Authors .
2014-10-08 23:14:37 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
2015-10-10 03:58:57 +00:00
package node
2014-10-08 23:14:37 +00:00
import (
2015-01-16 22:28:20 +00:00
"errors"
2015-02-04 21:56:59 +00:00
"fmt"
2015-01-09 21:14:39 +00:00
"net"
2015-09-07 13:04:15 +00:00
"sync"
2014-10-14 22:45:09 +00:00
"time"
2014-10-08 23:14:37 +00:00
2015-08-05 22:05:17 +00:00
"github.com/golang/glog"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/api"
2015-09-17 22:21:55 +00:00
"k8s.io/kubernetes/pkg/api/unversioned"
2016-02-18 19:54:39 +00:00
"k8s.io/kubernetes/pkg/apis/extensions"
2015-10-20 02:25:31 +00:00
"k8s.io/kubernetes/pkg/client/cache"
2016-02-05 21:58:03 +00:00
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
2016-03-29 21:52:43 +00:00
unversionedcore "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/core/unversioned"
2015-09-03 21:40:58 +00:00
"k8s.io/kubernetes/pkg/client/record"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/cloudprovider"
2015-10-20 02:25:31 +00:00
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/framework"
2016-07-16 18:52:51 +00:00
"k8s.io/kubernetes/pkg/fields"
2016-03-08 19:00:35 +00:00
"k8s.io/kubernetes/pkg/labels"
2015-10-20 02:25:31 +00:00
"k8s.io/kubernetes/pkg/runtime"
2016-03-09 05:54:59 +00:00
"k8s.io/kubernetes/pkg/util/flowcontrol"
2016-04-13 18:38:32 +00:00
"k8s.io/kubernetes/pkg/util/metrics"
2016-07-12 07:38:57 +00:00
utilnode "k8s.io/kubernetes/pkg/util/node"
2016-01-15 07:32:10 +00:00
utilruntime "k8s.io/kubernetes/pkg/util/runtime"
2016-05-16 09:20:23 +00:00
"k8s.io/kubernetes/pkg/util/system"
2016-02-02 10:57:06 +00:00
"k8s.io/kubernetes/pkg/util/wait"
2015-12-03 22:01:16 +00:00
"k8s.io/kubernetes/pkg/version"
2015-10-20 02:25:31 +00:00
"k8s.io/kubernetes/pkg/watch"
2014-10-08 23:14:37 +00:00
)
2015-01-16 22:28:20 +00:00
var (
2016-07-11 11:23:53 +00:00
ErrCloudInstance = errors . New ( "cloud provider doesn't support instances." )
gracefulDeletionVersion = version . MustParse ( "v1.1.0" )
2015-01-16 22:28:20 +00:00
)
2015-05-19 11:23:59 +00:00
const (
// nodeStatusUpdateRetry controls the number of retries of writing NodeStatus update.
nodeStatusUpdateRetry = 5
// controls how often NodeController will try to evict Pods from non-responsive Nodes.
nodeEvictionPeriod = 100 * time . Millisecond
2016-07-13 08:40:22 +00:00
// Burst value for all eviction rate limiters
evictionRateLimiterBurst = 1
2016-07-16 18:52:51 +00:00
// The amount of time the nodecontroller polls on the list nodes endpoint.
apiserverStartupGracePeriod = 10 * time . Minute
2015-05-19 11:23:59 +00:00
)
2015-03-31 11:17:12 +00:00
2016-07-12 07:38:57 +00:00
type zoneState string
const (
stateNormal = zoneState ( "Normal" )
stateFullSegmentation = zoneState ( "FullSegmentation" )
statePartialSegmentation = zoneState ( "PartialSegmentation" )
)
2015-04-10 22:30:11 +00:00
type nodeStatusData struct {
2015-09-17 22:21:55 +00:00
probeTimestamp unversioned . Time
readyTransitionTimestamp unversioned . Time
2015-03-31 15:15:39 +00:00
status api . NodeStatus
}
2014-12-19 09:27:01 +00:00
type NodeController struct {
2016-07-12 12:29:46 +00:00
allocateNodeCIDRs bool
cloud cloudprovider . Interface
clusterCIDR * net . IPNet
serviceCIDR * net . IPNet
knownNodeSet map [ string ] * api . Node
kubeClient clientset . Interface
2015-08-04 12:44:14 +00:00
// Method for easy mocking in unittest.
lookupIP func ( host string ) ( [ ] net . IP , error )
2015-03-31 11:17:12 +00:00
// Value used if sync_nodes_status=False. NodeController will not proactively
// sync node status in this case, but will monitor node status updated from kubelet. If
// it doesn't receive update for this amount of time, it will start posting "NodeReady==
// ConditionUnknown". The amount of time before which NodeController start evicting pods
2015-08-11 20:29:50 +00:00
// is controlled via flag 'pod-eviction-timeout'.
2015-03-31 11:17:12 +00:00
// Note: be cautious when changing the constant, it must work with nodeStatusUpdateFrequency
// in kubelet. There are several constraints:
// 1. nodeMonitorGracePeriod must be N times more than nodeStatusUpdateFrequency, where
// N means number of retries allowed for kubelet to post node status. It is pointless
// to make nodeMonitorGracePeriod be less than nodeStatusUpdateFrequency, since there
// will only be fresh values from Kubelet at an interval of nodeStatusUpdateFrequency.
// The constant must be less than podEvictionTimeout.
// 2. nodeMonitorGracePeriod can't be too large for user experience - larger value takes
// longer for user to see up-to-date node status.
nodeMonitorGracePeriod time . Duration
// Value controlling NodeController monitoring period, i.e. how often does NodeController
2015-04-07 19:36:09 +00:00
// check node status posted from kubelet. This value should be lower than nodeMonitorGracePeriod.
2015-03-31 11:17:12 +00:00
// TODO: Change node status monitor to watch based.
nodeMonitorPeriod time . Duration
2015-08-04 12:44:14 +00:00
// Value used if sync_nodes_status=False, only for node startup. When node
// is just created, e.g. cluster bootstrap or node creation, we give a longer grace period.
nodeStartupGracePeriod time . Duration
// per Node map storing last observed Status together with a local time when it was observed.
// This timestamp is to be used instead of LastProbeTime stored in Condition. We do this
// to aviod the problem with time skew across the cluster.
nodeStatusMap map [ string ] nodeStatusData
2015-09-17 22:21:55 +00:00
now func ( ) unversioned . Time
2015-09-07 13:04:15 +00:00
// Lock to access evictor workers
evictorLock * sync . Mutex
// workers that evicts pods from unresponsive nodes.
2016-07-12 12:29:46 +00:00
zonePodEvictor map [ string ] * RateLimitedTimedQueue
zoneTerminationEvictor map [ string ] * RateLimitedTimedQueue
evictionLimiterQPS float32
podEvictionTimeout time . Duration
2015-08-21 01:11:40 +00:00
// The maximum duration before a pod evicted from a node can be forcefully terminated.
maximumGracePeriod time . Duration
2015-08-04 12:44:14 +00:00
recorder record . EventRecorder
2015-10-20 02:25:31 +00:00
// Pod framework and store
podController * framework . Controller
podStore cache . StoreToPodLister
// Node framework and store
nodeController * framework . Controller
nodeStore cache . StoreToNodeLister
2016-01-08 21:38:02 +00:00
// DaemonSet framework and store
daemonSetController * framework . Controller
daemonSetStore cache . StoreToDaemonSetLister
2016-01-27 03:53:09 +00:00
// allocate/recycle CIDRs for node if allocateNodeCIDRs == true
cidrAllocator CIDRAllocator
2015-10-20 02:25:31 +00:00
2016-02-12 21:07:45 +00:00
forcefullyDeletePod func ( * api . Pod ) error
nodeExistsInCloudProvider func ( string ) ( bool , error )
2016-07-12 07:38:57 +00:00
computeZoneStateFunc func ( nodeConditions [ ] * api . NodeCondition ) zoneState
2016-05-16 09:20:23 +00:00
2016-07-12 07:38:57 +00:00
zoneStates map [ string ] zoneState
2014-10-08 23:14:37 +00:00
}
2014-12-19 09:27:01 +00:00
// NewNodeController returns a new node controller to sync instances from cloudprovider.
2016-07-16 18:52:51 +00:00
// This method returns an error if it is unable to initialize the CIDR bitmap with
// podCIDRs it has already allocated to nodes. Since we don't allow podCIDR changes
// currently, this should be handled as a fatal error.
2014-12-19 09:27:01 +00:00
func NewNodeController (
2014-10-14 22:45:09 +00:00
cloud cloudprovider . Interface ,
2016-01-29 06:34:08 +00:00
kubeClient clientset . Interface ,
2015-04-02 15:13:13 +00:00
podEvictionTimeout time . Duration ,
2016-07-12 12:29:46 +00:00
evictionLimiterQPS float32 ,
2015-03-31 11:17:12 +00:00
nodeMonitorGracePeriod time . Duration ,
nodeStartupGracePeriod time . Duration ,
2015-04-01 12:52:28 +00:00
nodeMonitorPeriod time . Duration ,
2015-05-06 21:48:45 +00:00
clusterCIDR * net . IPNet ,
2016-05-16 10:57:44 +00:00
serviceCIDR * net . IPNet ,
2016-05-20 11:21:52 +00:00
nodeCIDRMaskSize int ,
2016-07-16 18:52:51 +00:00
allocateNodeCIDRs bool ) ( * NodeController , error ) {
2015-04-08 11:45:37 +00:00
eventBroadcaster := record . NewBroadcaster ( )
recorder := eventBroadcaster . NewRecorder ( api . EventSource { Component : "controllermanager" } )
2015-06-03 06:51:32 +00:00
eventBroadcaster . StartLogging ( glog . Infof )
2015-04-08 11:45:37 +00:00
if kubeClient != nil {
2016-05-16 09:20:23 +00:00
glog . V ( 0 ) . Infof ( "Sending events to api server." )
2016-03-23 23:45:24 +00:00
eventBroadcaster . StartRecordingToSink ( & unversionedcore . EventSinkImpl { Interface : kubeClient . Core ( ) . Events ( "" ) } )
2015-04-08 11:45:37 +00:00
} else {
2016-05-16 09:20:23 +00:00
glog . V ( 0 ) . Infof ( "No api server defined - no events will be sent to API server." )
2015-04-08 11:45:37 +00:00
}
2016-04-13 18:38:32 +00:00
if kubeClient != nil && kubeClient . Core ( ) . GetRESTClient ( ) . GetRateLimiter ( ) != nil {
metrics . RegisterMetricAndTrackRateLimiterUsage ( "node_controller" , kubeClient . Core ( ) . GetRESTClient ( ) . GetRateLimiter ( ) )
}
2016-01-27 03:53:09 +00:00
if allocateNodeCIDRs {
if clusterCIDR == nil {
glog . Fatal ( "NodeController: Must specify clusterCIDR if allocateNodeCIDRs == true." )
}
mask := clusterCIDR . Mask
2016-05-20 11:21:52 +00:00
if maskSize , _ := mask . Size ( ) ; maskSize > nodeCIDRMaskSize {
glog . Fatal ( "NodeController: Invalid clusterCIDR, mask size of clusterCIDR must be less than nodeCIDRMaskSize." )
2016-01-27 03:53:09 +00:00
}
2015-05-06 21:48:45 +00:00
}
2015-09-07 13:04:15 +00:00
evictorLock := sync . Mutex { }
2015-10-20 02:25:31 +00:00
nc := & NodeController {
2016-02-12 21:07:45 +00:00
cloud : cloud ,
2016-07-12 07:38:57 +00:00
knownNodeSet : make ( map [ string ] * api . Node ) ,
2016-02-12 21:07:45 +00:00
kubeClient : kubeClient ,
recorder : recorder ,
podEvictionTimeout : podEvictionTimeout ,
maximumGracePeriod : 5 * time . Minute ,
evictorLock : & evictorLock ,
2016-07-12 12:29:46 +00:00
zonePodEvictor : make ( map [ string ] * RateLimitedTimedQueue ) ,
zoneTerminationEvictor : make ( map [ string ] * RateLimitedTimedQueue ) ,
2016-02-12 21:07:45 +00:00
nodeStatusMap : make ( map [ string ] nodeStatusData ) ,
nodeMonitorGracePeriod : nodeMonitorGracePeriod ,
nodeMonitorPeriod : nodeMonitorPeriod ,
nodeStartupGracePeriod : nodeStartupGracePeriod ,
lookupIP : net . LookupIP ,
now : unversioned . Now ,
clusterCIDR : clusterCIDR ,
2016-05-16 10:57:44 +00:00
serviceCIDR : serviceCIDR ,
2016-02-12 21:07:45 +00:00
allocateNodeCIDRs : allocateNodeCIDRs ,
forcefullyDeletePod : func ( p * api . Pod ) error { return forcefullyDeletePod ( kubeClient , p ) } ,
nodeExistsInCloudProvider : func ( nodeName string ) ( bool , error ) { return nodeExistsInCloudProvider ( cloud , nodeName ) } ,
2016-07-12 07:38:57 +00:00
computeZoneStateFunc : ComputeZoneState ,
2016-07-12 12:29:46 +00:00
evictionLimiterQPS : evictionLimiterQPS ,
2016-07-12 07:38:57 +00:00
zoneStates : make ( map [ string ] zoneState ) ,
2014-10-14 22:45:09 +00:00
}
2015-10-20 02:25:31 +00:00
2016-04-07 12:15:21 +00:00
nc . podStore . Indexer , nc . podController = framework . NewIndexerInformer (
2015-10-20 02:25:31 +00:00
& cache . ListWatch {
2015-12-10 09:39:03 +00:00
ListFunc : func ( options api . ListOptions ) ( runtime . Object , error ) {
2016-02-03 21:21:05 +00:00
return nc . kubeClient . Core ( ) . Pods ( api . NamespaceAll ) . List ( options )
2015-10-20 02:25:31 +00:00
} ,
2015-12-10 09:39:03 +00:00
WatchFunc : func ( options api . ListOptions ) ( watch . Interface , error ) {
2016-02-03 21:21:05 +00:00
return nc . kubeClient . Core ( ) . Pods ( api . NamespaceAll ) . Watch ( options )
2015-10-20 02:25:31 +00:00
} ,
} ,
& api . Pod { } ,
controller . NoResyncPeriodFunc ( ) ,
framework . ResourceEventHandlerFuncs {
2016-07-11 11:23:53 +00:00
AddFunc : func ( obj interface { } ) {
nc . maybeDeleteTerminatingPod ( obj , nc . nodeStore . Store , nc . forcefullyDeletePod )
} ,
UpdateFunc : func ( _ , obj interface { } ) {
nc . maybeDeleteTerminatingPod ( obj , nc . nodeStore . Store , nc . forcefullyDeletePod )
} ,
2015-10-20 02:25:31 +00:00
} ,
2016-05-02 04:35:18 +00:00
// We don't need to build a index for podStore here actually, but build one for consistency.
// It will ensure that if people start making use of the podStore in more specific ways,
// they'll get the benefits they expect. It will also reserve the name for future refactorings.
cache . Indexers { cache . NamespaceIndex : cache . MetaNamespaceIndexFunc } ,
2015-10-20 02:25:31 +00:00
)
2016-01-27 03:53:09 +00:00
nodeEventHandlerFuncs := framework . ResourceEventHandlerFuncs { }
if nc . allocateNodeCIDRs {
nodeEventHandlerFuncs = framework . ResourceEventHandlerFuncs {
2016-07-07 11:40:12 +00:00
AddFunc : func ( obj interface { } ) {
node := obj . ( * api . Node )
err := nc . cidrAllocator . AllocateOrOccupyCIDR ( node )
if err != nil {
glog . Errorf ( "Error allocating CIDR: %v" , err )
}
} ,
2016-07-18 09:38:43 +00:00
UpdateFunc : func ( _ , obj interface { } ) {
node := obj . ( * api . Node )
// If the PodCIDR is not empty we either:
// - already processed a Node that already had a CIDR after NC restarted
// (cidr is marked as used),
// - already processed a Node successfully and allocated a CIDR for it
// (cidr is marked as used),
// - already processed a Node but we did saw a "timeout" response and
// request eventually got through in this case we haven't released
// the allocated CIDR (cidr is still marked as used).
// There's a possible error here:
// - NC sees a new Node and assigns a CIDR X to it,
// - Update Node call fails with a timeout,
// - Node is updated by some other component, NC sees an update and
// assigns CIDR Y to the Node,
// - Both CIDR X and CIDR Y are marked as used in the local cache,
// even though Node sees only CIDR Y
// The problem here is that in in-memory cache we see CIDR X as marked,
// which prevents it from being assigned to any new node. The cluster
// state is correct.
// Restart of NC fixes the issue.
if node . Spec . PodCIDR == "" {
err := nc . cidrAllocator . AllocateOrOccupyCIDR ( node )
if err != nil {
glog . Errorf ( "Error allocating CIDR: %v" , err )
}
}
} ,
2016-07-07 11:40:12 +00:00
DeleteFunc : func ( obj interface { } ) {
node := obj . ( * api . Node )
err := nc . cidrAllocator . ReleaseCIDR ( node )
if err != nil {
glog . Errorf ( "Error releasing CIDR: %v" , err )
}
} ,
2016-01-27 03:53:09 +00:00
}
}
2015-10-20 02:25:31 +00:00
nc . nodeStore . Store , nc . nodeController = framework . NewInformer (
& cache . ListWatch {
2015-12-10 09:39:03 +00:00
ListFunc : func ( options api . ListOptions ) ( runtime . Object , error ) {
2016-02-03 21:21:05 +00:00
return nc . kubeClient . Core ( ) . Nodes ( ) . List ( options )
2015-10-20 02:25:31 +00:00
} ,
2015-12-10 09:39:03 +00:00
WatchFunc : func ( options api . ListOptions ) ( watch . Interface , error ) {
2016-02-03 21:21:05 +00:00
return nc . kubeClient . Core ( ) . Nodes ( ) . Watch ( options )
2015-10-20 02:25:31 +00:00
} ,
} ,
& api . Node { } ,
controller . NoResyncPeriodFunc ( ) ,
2016-01-27 03:53:09 +00:00
nodeEventHandlerFuncs ,
2015-10-20 02:25:31 +00:00
)
2016-01-27 03:53:09 +00:00
2016-01-08 21:38:02 +00:00
nc . daemonSetStore . Store , nc . daemonSetController = framework . NewInformer (
& cache . ListWatch {
ListFunc : func ( options api . ListOptions ) ( runtime . Object , error ) {
return nc . kubeClient . Extensions ( ) . DaemonSets ( api . NamespaceAll ) . List ( options )
} ,
WatchFunc : func ( options api . ListOptions ) ( watch . Interface , error ) {
return nc . kubeClient . Extensions ( ) . DaemonSets ( api . NamespaceAll ) . Watch ( options )
} ,
} ,
2016-02-18 19:54:39 +00:00
& extensions . DaemonSet { } ,
2016-01-08 21:38:02 +00:00
controller . NoResyncPeriodFunc ( ) ,
framework . ResourceEventHandlerFuncs { } ,
)
2016-01-27 03:53:09 +00:00
if allocateNodeCIDRs {
2016-07-16 18:52:51 +00:00
var nodeList * api . NodeList
var err error
// We must poll because apiserver might not be up. This error causes
// controller manager to restart.
if pollErr := wait . Poll ( 10 * time . Second , apiserverStartupGracePeriod , func ( ) ( bool , error ) {
nodeList , err = kubeClient . Core ( ) . Nodes ( ) . List ( api . ListOptions {
FieldSelector : fields . Everything ( ) ,
LabelSelector : labels . Everything ( ) ,
} )
if err != nil {
glog . Errorf ( "Failed to list all nodes: %v" , err )
return false , nil
}
return true , nil
} ) ; pollErr != nil {
return nil , fmt . Errorf ( "Failed to list all nodes in %v, cannot proceed without updating CIDR map" , apiserverStartupGracePeriod )
}
nc . cidrAllocator , err = NewCIDRRangeAllocator ( kubeClient , clusterCIDR , serviceCIDR , nodeCIDRMaskSize , nodeList )
if err != nil {
return nil , err
}
2016-01-27 03:53:09 +00:00
}
2016-07-16 18:52:51 +00:00
return nc , nil
2014-10-14 22:45:09 +00:00
}
2015-08-04 12:44:14 +00:00
// Run starts an asynchronous loop that monitors the status of cluster nodes.
func ( nc * NodeController ) Run ( period time . Duration ) {
2016-02-02 10:57:06 +00:00
go nc . nodeController . Run ( wait . NeverStop )
go nc . podController . Run ( wait . NeverStop )
go nc . daemonSetController . Run ( wait . NeverStop )
2016-01-08 21:38:02 +00:00
2015-08-04 12:44:14 +00:00
// Incorporate the results of node status pushed from kubelet to master.
2016-02-02 10:57:06 +00:00
go wait . Until ( func ( ) {
2015-08-04 12:44:14 +00:00
if err := nc . monitorNodeStatus ( ) ; err != nil {
glog . Errorf ( "Error monitoring node status: %v" , err )
}
2016-02-02 10:57:06 +00:00
} , nc . nodeMonitorPeriod , wait . NeverStop )
2015-08-04 12:44:14 +00:00
2015-08-25 13:47:08 +00:00
// Managing eviction of nodes:
// 1. when we delete pods off a node, if the node was not empty at the time we then
// queue a termination watcher
// a. If we hit an error, retry deletion
// 2. The terminator loop ensures that pods are eventually cleaned and we never
// terminate a pod in a time period less than nc.maximumGracePeriod. AddedAt
// is the time from which we measure "has this pod been terminating too long",
// after which we will delete the pod with grace period 0 (force delete).
// a. If we hit errors, retry instantly
// b. If there are no pods left terminating, exit
// c. If there are pods still terminating, wait for their estimated completion
// before retrying
2016-02-02 10:57:06 +00:00
go wait . Until ( func ( ) {
2015-09-07 13:04:15 +00:00
nc . evictorLock . Lock ( )
defer nc . evictorLock . Unlock ( )
2016-07-12 12:29:46 +00:00
for k := range nc . zonePodEvictor {
nc . zonePodEvictor [ k ] . Try ( func ( value TimedValue ) ( bool , time . Duration ) {
remaining , err := deletePods ( nc . kubeClient , nc . recorder , value . Value , nc . daemonSetStore )
if err != nil {
utilruntime . HandleError ( fmt . Errorf ( "unable to evict node %q: %v" , value . Value , err ) )
return false , 0
}
2015-10-20 02:25:31 +00:00
2016-07-12 12:29:46 +00:00
if remaining {
nc . zoneTerminationEvictor [ k ] . Add ( value . Value )
}
return true , 0
} )
}
2016-02-02 10:57:06 +00:00
} , nodeEvictionPeriod , wait . NeverStop )
2015-08-04 12:44:14 +00:00
2015-08-21 01:11:40 +00:00
// TODO: replace with a controller that ensures pods that are terminating complete
// in a particular time period
2016-02-02 10:57:06 +00:00
go wait . Until ( func ( ) {
2015-09-07 13:04:15 +00:00
nc . evictorLock . Lock ( )
defer nc . evictorLock . Unlock ( )
2016-07-12 12:29:46 +00:00
for k := range nc . zoneTerminationEvictor {
nc . zoneTerminationEvictor [ k ] . Try ( func ( value TimedValue ) ( bool , time . Duration ) {
completed , remaining , err := terminatePods ( nc . kubeClient , nc . recorder , value . Value , value . AddedAt , nc . maximumGracePeriod )
if err != nil {
utilruntime . HandleError ( fmt . Errorf ( "unable to terminate pods on node %q: %v" , value . Value , err ) )
return false , 0
}
2015-08-19 00:34:49 +00:00
2016-07-12 12:29:46 +00:00
if completed {
glog . V ( 2 ) . Infof ( "All pods terminated on %s" , value . Value )
recordNodeEvent ( nc . recorder , value . Value , api . EventTypeNormal , "TerminatedAllPods" , fmt . Sprintf ( "Terminated all Pods on Node %s." , value . Value ) )
return true , 0
}
2015-08-19 00:34:49 +00:00
2016-07-12 12:29:46 +00:00
glog . V ( 2 ) . Infof ( "Pods terminating since %s on %q, estimated completion %s" , value . AddedAt , value . Value , remaining )
// clamp very short intervals
if remaining < nodeEvictionPeriod {
remaining = nodeEvictionPeriod
}
return false , remaining
} )
}
2016-02-02 10:57:06 +00:00
} , nodeEvictionPeriod , wait . NeverStop )
2016-03-08 19:00:35 +00:00
2016-07-11 11:23:53 +00:00
go wait . Until ( func ( ) {
pods , err := nc . podStore . List ( labels . Everything ( ) )
if err != nil {
2016-03-08 19:00:35 +00:00
utilruntime . HandleError ( err )
2016-07-11 11:23:53 +00:00
return
2016-03-08 19:00:35 +00:00
}
2016-07-11 11:23:53 +00:00
cleanupOrphanedPods ( pods , nc . nodeStore . Store , nc . forcefullyDeletePod )
} , 30 * time . Second , wait . NeverStop )
2015-10-20 02:25:31 +00:00
}
2015-08-04 12:44:14 +00:00
// monitorNodeStatus verifies node status are constantly updated by kubelet, and if not,
// post "NodeReady==ConditionUnknown". It also evicts all pods if node is not ready or
// not reachable for a long period of time.
func ( nc * NodeController ) monitorNodeStatus ( ) error {
2016-02-03 21:21:05 +00:00
nodes , err := nc . kubeClient . Core ( ) . Nodes ( ) . List ( api . ListOptions { } )
2015-08-19 16:54:08 +00:00
if err != nil {
return err
}
2016-07-12 07:38:57 +00:00
added , deleted := nc . checkForNodeAddedDeleted ( nodes )
for i := range added {
glog . V ( 1 ) . Infof ( "NodeController observed a new Node: %#v" , added [ i ] . Name )
recordNodeEvent ( nc . recorder , added [ i ] . Name , api . EventTypeNormal , "RegisteredNode" , fmt . Sprintf ( "Registered Node %v in NodeController" , added [ i ] . Name ) )
nc . knownNodeSet [ added [ i ] . Name ] = added [ i ]
2016-07-12 12:29:46 +00:00
// When adding new Nodes we need to check if new zone appeared, and if so add new evictor.
zone := utilnode . GetZoneKey ( added [ i ] )
if _ , found := nc . zonePodEvictor [ zone ] ; ! found {
nc . zonePodEvictor [ zone ] =
NewRateLimitedTimedQueue (
2016-07-13 08:40:22 +00:00
flowcontrol . NewTokenBucketRateLimiter ( nc . evictionLimiterQPS , evictionRateLimiterBurst ) )
2016-07-12 12:29:46 +00:00
}
if _ , found := nc . zoneTerminationEvictor [ zone ] ; ! found {
nc . zoneTerminationEvictor [ zone ] = NewRateLimitedTimedQueue (
2016-07-13 08:40:22 +00:00
flowcontrol . NewTokenBucketRateLimiter ( nc . evictionLimiterQPS , evictionRateLimiterBurst ) )
2016-07-12 12:29:46 +00:00
}
nc . cancelPodEviction ( added [ i ] )
2015-08-05 13:22:13 +00:00
}
2016-07-12 07:38:57 +00:00
for i := range deleted {
glog . V ( 1 ) . Infof ( "NodeController observed a Node deletion: %v" , deleted [ i ] . Name )
recordNodeEvent ( nc . recorder , deleted [ i ] . Name , api . EventTypeNormal , "RemovingNode" , fmt . Sprintf ( "Removing Node %v from NodeController" , deleted [ i ] . Name ) )
nc . evictPods ( deleted [ i ] )
delete ( nc . knownNodeSet , deleted [ i ] . Name )
2015-08-05 13:22:13 +00:00
}
2016-07-12 07:38:57 +00:00
zoneToNodeConditions := map [ string ] [ ] * api . NodeCondition { }
2015-08-04 12:44:14 +00:00
for i := range nodes . Items {
var gracePeriod time . Duration
2016-05-16 09:20:23 +00:00
var observedReadyCondition api . NodeCondition
var currentReadyCondition * api . NodeCondition
2015-08-04 12:44:14 +00:00
node := & nodes . Items [ i ]
for rep := 0 ; rep < nodeStatusUpdateRetry ; rep ++ {
2016-05-16 09:20:23 +00:00
gracePeriod , observedReadyCondition , currentReadyCondition , err = nc . tryUpdateNodeStatus ( node )
2015-08-04 12:44:14 +00:00
if err == nil {
break
}
name := node . Name
2016-02-03 21:21:05 +00:00
node , err = nc . kubeClient . Core ( ) . Nodes ( ) . Get ( name )
2015-08-04 12:44:14 +00:00
if err != nil {
glog . Errorf ( "Failed while getting a Node to retry updating NodeStatus. Probably Node %s was deleted." , name )
break
}
}
if err != nil {
glog . Errorf ( "Update status of Node %v from NodeController exceeds retry count." +
"Skipping - no pods will be evicted." , node . Name )
continue
}
2016-07-12 07:38:57 +00:00
// We do not treat a master node as a part of the cluster for network segmentation checking.
if ! system . IsMasterNode ( node ) {
zoneToNodeConditions [ utilnode . GetZoneKey ( node ) ] = append ( zoneToNodeConditions [ utilnode . GetZoneKey ( node ) ] , currentReadyCondition )
}
2015-08-04 12:44:14 +00:00
decisionTimestamp := nc . now ( )
2016-05-16 09:20:23 +00:00
if currentReadyCondition != nil {
2015-08-04 12:44:14 +00:00
// Check eviction timeout against decisionTimestamp
2016-05-16 09:20:23 +00:00
if observedReadyCondition . Status == api . ConditionFalse &&
2015-08-04 12:44:14 +00:00
decisionTimestamp . After ( nc . nodeStatusMap [ node . Name ] . readyTransitionTimestamp . Add ( nc . podEvictionTimeout ) ) {
2016-07-12 07:38:57 +00:00
if nc . evictPods ( node ) {
2016-04-11 02:51:29 +00:00
glog . V ( 4 ) . Infof ( "Evicting pods on node %s: %v is later than %v + %v" , node . Name , decisionTimestamp , nc . nodeStatusMap [ node . Name ] . readyTransitionTimestamp , nc . podEvictionTimeout )
2015-08-04 12:44:14 +00:00
}
}
2016-05-16 09:20:23 +00:00
if observedReadyCondition . Status == api . ConditionUnknown &&
2015-11-16 03:39:21 +00:00
decisionTimestamp . After ( nc . nodeStatusMap [ node . Name ] . probeTimestamp . Add ( nc . podEvictionTimeout ) ) {
2016-07-12 07:38:57 +00:00
if nc . evictPods ( node ) {
2016-04-11 02:51:29 +00:00
glog . V ( 4 ) . Infof ( "Evicting pods on node %s: %v is later than %v + %v" , node . Name , decisionTimestamp , nc . nodeStatusMap [ node . Name ] . readyTransitionTimestamp , nc . podEvictionTimeout - gracePeriod )
2015-08-04 12:44:14 +00:00
}
}
2016-05-16 09:20:23 +00:00
if observedReadyCondition . Status == api . ConditionTrue {
2016-07-12 07:38:57 +00:00
if nc . cancelPodEviction ( node ) {
2016-04-11 02:51:29 +00:00
glog . V ( 2 ) . Infof ( "Node %s is ready again, cancelled pod eviction" , node . Name )
2015-08-04 12:44:14 +00:00
}
}
// Report node event.
2016-05-16 09:20:23 +00:00
if currentReadyCondition . Status != api . ConditionTrue && observedReadyCondition . Status == api . ConditionTrue {
2016-07-07 11:40:12 +00:00
recordNodeStatusChange ( nc . recorder , node , "NodeNotReady" )
2016-07-11 11:23:53 +00:00
if err = markAllPodsNotReady ( nc . kubeClient , node . Name ) ; err != nil {
2016-01-15 07:32:10 +00:00
utilruntime . HandleError ( fmt . Errorf ( "Unable to mark all pods NotReady on node %v: %v" , node . Name , err ) )
2015-11-24 22:46:17 +00:00
}
2015-08-04 12:44:14 +00:00
}
// Check with the cloud provider to see if the node still exists. If it
2016-02-12 21:07:45 +00:00
// doesn't, delete the node immediately.
2016-05-16 09:20:23 +00:00
if currentReadyCondition . Status != api . ConditionTrue && nc . cloud != nil {
2016-02-12 21:07:45 +00:00
exists , err := nc . nodeExistsInCloudProvider ( node . Name )
if err != nil {
glog . Errorf ( "Error determining if node %v exists in cloud: %v" , node . Name , err )
2015-08-04 12:44:14 +00:00
continue
}
2016-02-12 21:07:45 +00:00
if ! exists {
2016-05-16 09:20:23 +00:00
glog . V ( 2 ) . Infof ( "Deleting node (no longer present in cloud provider): %s" , node . Name )
2016-07-11 11:23:53 +00:00
recordNodeEvent ( nc . recorder , node . Name , api . EventTypeNormal , "DeletingNode" , fmt . Sprintf ( "Deleting Node %v because it's not present according to cloud provider" , node . Name ) )
2016-02-12 21:07:45 +00:00
go func ( nodeName string ) {
defer utilruntime . HandleCrash ( )
// Kubelet is not reporting and Cloud Provider says node
// is gone. Delete it without worrying about grace
// periods.
2016-07-11 11:23:53 +00:00
if err := forcefullyDeleteNode ( nc . kubeClient , nodeName , nc . forcefullyDeletePod ) ; err != nil {
2016-02-12 21:07:45 +00:00
glog . Errorf ( "Unable to forcefully delete node %q: %v" , nodeName , err )
}
} ( node . Name )
continue
2015-08-04 12:44:14 +00:00
}
}
}
}
2016-05-16 09:20:23 +00:00
2016-07-12 07:38:57 +00:00
for k , v := range zoneToNodeConditions {
newState := nc . computeZoneStateFunc ( v )
if newState == nc . zoneStates [ k ] {
continue
}
if newState == stateFullSegmentation {
glog . V ( 2 ) . Infof ( "NodeController is entering network segmentation mode in zone %v." , k )
} else if newState == stateNormal {
glog . V ( 2 ) . Infof ( "NodeController exited network segmentation mode in zone %v." , k )
}
for i := range nodes . Items {
if utilnode . GetZoneKey ( & nodes . Items [ i ] ) == k {
if newState == stateFullSegmentation {
// When zone is fully segmented we stop the eviction all together.
nc . cancelPodEviction ( & nodes . Items [ i ] )
}
if newState == stateNormal && nc . zoneStates [ k ] == stateFullSegmentation {
// When exiting segmentation mode update probe timestamps on all Nodes.
now := nc . now ( )
v := nc . nodeStatusMap [ nodes . Items [ i ] . Name ]
v . probeTimestamp = now
v . readyTransitionTimestamp = now
nc . nodeStatusMap [ nodes . Items [ i ] . Name ] = v
}
}
2016-05-16 09:20:23 +00:00
}
2016-07-12 07:38:57 +00:00
nc . zoneStates [ k ] = newState
2016-05-16 09:20:23 +00:00
}
2016-07-12 07:38:57 +00:00
2015-08-04 12:44:14 +00:00
return nil
}
2015-03-31 15:15:39 +00:00
// For a given node checks its conditions and tries to update it. Returns grace period to which given node
2015-09-12 19:16:22 +00:00
// is entitled, state of current and last observed Ready Condition, and an error if it occurred.
2015-03-31 15:15:39 +00:00
func ( nc * NodeController ) tryUpdateNodeStatus ( node * api . Node ) ( time . Duration , api . NodeCondition , * api . NodeCondition , error ) {
2015-03-30 12:44:02 +00:00
var err error
var gracePeriod time . Duration
2016-05-16 09:20:23 +00:00
var observedReadyCondition api . NodeCondition
2016-05-25 13:51:43 +00:00
_ , currentReadyCondition := api . GetNodeCondition ( & node . Status , api . NodeReady )
2016-05-16 09:20:23 +00:00
if currentReadyCondition == nil {
2015-03-30 12:44:02 +00:00
// If ready condition is nil, then kubelet (or nodecontroller) never posted node status.
// A fake ready condition is created, where LastProbeTime and LastTransitionTime is set
// to node.CreationTimestamp to avoid handle the corner case.
2016-05-16 09:20:23 +00:00
observedReadyCondition = api . NodeCondition {
2015-03-30 12:44:02 +00:00
Type : api . NodeReady ,
Status : api . ConditionUnknown ,
2015-03-27 14:09:51 +00:00
LastHeartbeatTime : node . CreationTimestamp ,
2015-03-30 12:44:02 +00:00
LastTransitionTime : node . CreationTimestamp ,
}
2015-03-31 11:17:12 +00:00
gracePeriod = nc . nodeStartupGracePeriod
2015-04-10 22:30:11 +00:00
nc . nodeStatusMap [ node . Name ] = nodeStatusData {
2015-03-31 15:15:39 +00:00
status : node . Status ,
probeTimestamp : node . CreationTimestamp ,
readyTransitionTimestamp : node . CreationTimestamp ,
}
2015-03-30 12:44:02 +00:00
} else {
// If ready condition is not nil, make a copy of it, since we may modify it in place later.
2016-05-16 09:20:23 +00:00
observedReadyCondition = * currentReadyCondition
2015-03-31 11:17:12 +00:00
gracePeriod = nc . nodeMonitorGracePeriod
2015-03-30 12:44:02 +00:00
}
2015-03-31 15:15:39 +00:00
savedNodeStatus , found := nc . nodeStatusMap [ node . Name ]
// There are following cases to check:
// - both saved and new status have no Ready Condition set - we leave everything as it is,
// - saved status have no Ready Condition, but current one does - NodeController was restarted with Node data already present in etcd,
// - saved status have some Ready Condition, but current one does not - it's an error, but we fill it up because that's probably a good thing to do,
// - both saved and current statuses have Ready Conditions and they have the same LastProbeTime - nothing happened on that Node, it may be
// unresponsive, so we leave it as it is,
// - both saved and current statuses have Ready Conditions, they have different LastProbeTimes, but the same Ready Condition State -
// everything's in order, no transition occurred, we update only probeTimestamp,
// - both saved and current statuses have Ready Conditions, different LastProbeTimes and different Ready Condition State -
// Ready Condition changed it state since we last seen it, so we update both probeTimestamp and readyTransitionTimestamp.
// TODO: things to consider:
2015-07-29 21:11:19 +00:00
// - if 'LastProbeTime' have gone back in time its probably an error, currently we ignore it,
2015-03-31 15:15:39 +00:00
// - currently only correct Ready State transition outside of Node Controller is marking it ready by Kubelet, we don't check
// if that's the case, but it does not seem necessary.
2015-09-29 06:43:04 +00:00
var savedCondition * api . NodeCondition
if found {
2016-05-25 13:51:43 +00:00
_ , savedCondition = api . GetNodeCondition ( & savedNodeStatus . status , api . NodeReady )
2015-09-29 06:43:04 +00:00
}
2016-05-25 13:51:43 +00:00
_ , observedCondition := api . GetNodeCondition ( & node . Status , api . NodeReady )
2015-03-31 15:15:39 +00:00
if ! found {
glog . Warningf ( "Missing timestamp for Node %s. Assuming now as a timestamp." , node . Name )
2015-04-10 22:30:11 +00:00
savedNodeStatus = nodeStatusData {
2015-03-31 15:15:39 +00:00
status : node . Status ,
probeTimestamp : nc . now ( ) ,
readyTransitionTimestamp : nc . now ( ) ,
}
} else if savedCondition == nil && observedCondition != nil {
glog . V ( 1 ) . Infof ( "Creating timestamp entry for newly observed Node %s" , node . Name )
2015-04-10 22:30:11 +00:00
savedNodeStatus = nodeStatusData {
2015-03-31 15:15:39 +00:00
status : node . Status ,
probeTimestamp : nc . now ( ) ,
readyTransitionTimestamp : nc . now ( ) ,
}
} else if savedCondition != nil && observedCondition == nil {
glog . Errorf ( "ReadyCondition was removed from Status of Node %s" , node . Name )
// TODO: figure out what to do in this case. For now we do the same thing as above.
2015-04-10 22:30:11 +00:00
savedNodeStatus = nodeStatusData {
2015-03-31 15:15:39 +00:00
status : node . Status ,
probeTimestamp : nc . now ( ) ,
readyTransitionTimestamp : nc . now ( ) ,
}
2015-03-27 14:09:51 +00:00
} else if savedCondition != nil && observedCondition != nil && savedCondition . LastHeartbeatTime != observedCondition . LastHeartbeatTime {
2015-09-17 22:21:55 +00:00
var transitionTime unversioned . Time
2015-03-31 15:15:39 +00:00
// If ReadyCondition changed since the last time we checked, we update the transition timestamp to "now",
// otherwise we leave it as it is.
if savedCondition . LastTransitionTime != observedCondition . LastTransitionTime {
glog . V ( 3 ) . Infof ( "ReadyCondition for Node %s transitioned from %v to %v" , node . Name , savedCondition . Status , observedCondition )
transitionTime = nc . now ( )
} else {
transitionTime = savedNodeStatus . readyTransitionTimestamp
}
2016-02-20 20:07:23 +00:00
if glog . V ( 5 ) {
2016-05-16 09:20:23 +00:00
glog . V ( 5 ) . Infof ( "Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v." , node . Name , savedNodeStatus . status , node . Status )
2016-02-20 20:07:23 +00:00
} else {
glog . V ( 3 ) . Infof ( "Node %s ReadyCondition updated. Updating timestamp." , node . Name )
}
2015-04-10 22:30:11 +00:00
savedNodeStatus = nodeStatusData {
2015-03-31 15:15:39 +00:00
status : node . Status ,
probeTimestamp : nc . now ( ) ,
readyTransitionTimestamp : transitionTime ,
}
}
2016-05-16 09:20:23 +00:00
nc . nodeStatusMap [ node . Name ] = savedNodeStatus
2015-03-31 15:15:39 +00:00
if nc . now ( ) . After ( savedNodeStatus . probeTimestamp . Add ( gracePeriod ) ) {
2015-03-30 12:44:02 +00:00
// NodeReady condition was last set longer ago than gracePeriod, so update it to Unknown
2015-10-22 19:47:43 +00:00
// (regardless of its current value) in the master.
2016-05-16 09:20:23 +00:00
if currentReadyCondition == nil {
2015-05-06 21:39:14 +00:00
glog . V ( 2 ) . Infof ( "node %v is never updated by kubelet" , node . Name )
2015-03-30 12:44:02 +00:00
node . Status . Conditions = append ( node . Status . Conditions , api . NodeCondition {
Type : api . NodeReady ,
Status : api . ConditionUnknown ,
2015-09-11 10:08:09 +00:00
Reason : "NodeStatusNeverUpdated" ,
Message : fmt . Sprintf ( "Kubelet never posted node status." ) ,
2015-03-27 14:09:51 +00:00
LastHeartbeatTime : node . CreationTimestamp ,
2015-03-30 12:44:02 +00:00
LastTransitionTime : nc . now ( ) ,
} )
} else {
2016-04-11 02:51:29 +00:00
glog . V ( 4 ) . Infof ( "node %v hasn't been updated for %+v. Last ready condition is: %+v" ,
2016-05-16 09:20:23 +00:00
node . Name , nc . now ( ) . Time . Sub ( savedNodeStatus . probeTimestamp . Time ) , observedReadyCondition )
if observedReadyCondition . Status != api . ConditionUnknown {
currentReadyCondition . Status = api . ConditionUnknown
currentReadyCondition . Reason = "NodeStatusUnknown"
currentReadyCondition . Message = fmt . Sprintf ( "Kubelet stopped posting node status." )
2015-03-30 12:44:02 +00:00
// LastProbeTime is the last time we heard from kubelet.
2016-05-16 09:20:23 +00:00
currentReadyCondition . LastHeartbeatTime = observedReadyCondition . LastHeartbeatTime
currentReadyCondition . LastTransitionTime = nc . now ( )
2015-03-30 12:44:02 +00:00
}
}
2015-10-22 19:47:43 +00:00
// Like NodeReady condition, NodeOutOfDisk was last set longer ago than gracePeriod, so update
// it to Unknown (regardless of its current value) in the master.
// TODO(madhusudancs): Refactor this with readyCondition to remove duplicated code.
2016-05-25 13:51:43 +00:00
_ , oodCondition := api . GetNodeCondition ( & node . Status , api . NodeOutOfDisk )
2015-10-22 19:47:43 +00:00
if oodCondition == nil {
glog . V ( 2 ) . Infof ( "Out of disk condition of node %v is never updated by kubelet" , node . Name )
node . Status . Conditions = append ( node . Status . Conditions , api . NodeCondition {
Type : api . NodeOutOfDisk ,
Status : api . ConditionUnknown ,
Reason : "NodeStatusNeverUpdated" ,
Message : fmt . Sprintf ( "Kubelet never posted node status." ) ,
LastHeartbeatTime : node . CreationTimestamp ,
LastTransitionTime : nc . now ( ) ,
} )
} else {
2016-04-11 02:51:29 +00:00
glog . V ( 4 ) . Infof ( "node %v hasn't been updated for %+v. Last out of disk condition is: %+v" ,
2015-10-22 19:47:43 +00:00
node . Name , nc . now ( ) . Time . Sub ( savedNodeStatus . probeTimestamp . Time ) , oodCondition )
if oodCondition . Status != api . ConditionUnknown {
oodCondition . Status = api . ConditionUnknown
oodCondition . Reason = "NodeStatusUnknown"
oodCondition . Message = fmt . Sprintf ( "Kubelet stopped posting node status." )
oodCondition . LastTransitionTime = nc . now ( )
}
}
2016-05-25 13:51:43 +00:00
_ , currentCondition := api . GetNodeCondition ( & node . Status , api . NodeReady )
if ! api . Semantic . DeepEqual ( currentCondition , & observedReadyCondition ) {
2016-02-03 21:21:05 +00:00
if _ , err = nc . kubeClient . Core ( ) . Nodes ( ) . UpdateStatus ( node ) ; err != nil {
2015-03-31 15:15:39 +00:00
glog . Errorf ( "Error updating node %s: %v" , node . Name , err )
2016-05-16 09:20:23 +00:00
return gracePeriod , observedReadyCondition , currentReadyCondition , err
2015-03-31 15:15:39 +00:00
} else {
2015-04-10 22:30:11 +00:00
nc . nodeStatusMap [ node . Name ] = nodeStatusData {
2015-03-31 15:15:39 +00:00
status : node . Status ,
probeTimestamp : nc . nodeStatusMap [ node . Name ] . probeTimestamp ,
readyTransitionTimestamp : nc . now ( ) ,
}
2016-05-16 09:20:23 +00:00
return gracePeriod , observedReadyCondition , currentReadyCondition , nil
2015-03-31 15:15:39 +00:00
}
2015-03-30 12:44:02 +00:00
}
}
2016-05-16 09:20:23 +00:00
return gracePeriod , observedReadyCondition , currentReadyCondition , err
}
2016-07-12 07:38:57 +00:00
func ( nc * NodeController ) checkForNodeAddedDeleted ( nodes * api . NodeList ) ( added , deleted [ ] * api . Node ) {
for i := range nodes . Items {
if _ , has := nc . knownNodeSet [ nodes . Items [ i ] . Name ] ; ! has {
added = append ( added , & nodes . Items [ i ] )
}
}
// If there's a difference between lengths of known Nodes and observed nodes
// we must have removed some Node.
if len ( nc . knownNodeSet ) + len ( added ) != len ( nodes . Items ) {
knowSetCopy := map [ string ] * api . Node { }
for k , v := range nc . knownNodeSet {
knowSetCopy [ k ] = v
}
for i := range nodes . Items {
delete ( knowSetCopy , nodes . Items [ i ] . Name )
}
for i := range knowSetCopy {
deleted = append ( deleted , knowSetCopy [ i ] )
}
}
return
}
2015-08-25 13:47:08 +00:00
// cancelPodEviction removes any queued evictions, typically because the node is available again. It
// returns true if an eviction was queued.
2016-07-12 07:38:57 +00:00
func ( nc * NodeController ) cancelPodEviction ( node * api . Node ) bool {
2016-07-12 12:29:46 +00:00
zone := utilnode . GetZoneKey ( node )
2015-09-07 13:04:15 +00:00
nc . evictorLock . Lock ( )
defer nc . evictorLock . Unlock ( )
2016-07-12 12:29:46 +00:00
wasDeleting := nc . zonePodEvictor [ zone ] . Remove ( node . Name )
wasTerminating := nc . zoneTerminationEvictor [ zone ] . Remove ( node . Name )
2015-09-15 21:45:56 +00:00
if wasDeleting || wasTerminating {
2016-07-12 07:38:57 +00:00
glog . V ( 2 ) . Infof ( "Cancelling pod Eviction on Node: %v" , node . Name )
2015-09-15 21:45:56 +00:00
return true
}
return false
2015-08-25 13:47:08 +00:00
}
2016-07-11 11:23:53 +00:00
// evictPods queues an eviction for the provided node name, and returns false if the node is already
// queued for eviction.
2016-07-12 07:38:57 +00:00
func ( nc * NodeController ) evictPods ( node * api . Node ) bool {
2016-07-11 11:23:53 +00:00
nc . evictorLock . Lock ( )
defer nc . evictorLock . Unlock ( )
2016-07-12 12:29:46 +00:00
foundHealty := false
for _ , state := range nc . zoneStates {
if state != stateFullSegmentation {
foundHealty = true
break
}
}
if ! foundHealty {
return false
}
zone := utilnode . GetZoneKey ( node )
return nc . zonePodEvictor [ zone ] . Add ( node . Name )
2016-05-16 09:20:23 +00:00
}