2019-08-30 18:33:25 +00:00
/ *
Copyright 2015 The Kubernetes Authors .
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package service
import (
"context"
"fmt"
2020-08-10 17:43:49 +00:00
"reflect"
2019-08-30 18:33:25 +00:00
"sync"
"time"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
2020-03-26 21:07:15 +00:00
"k8s.io/apimachinery/pkg/labels"
2019-08-30 18:33:25 +00:00
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
coreinformers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/workqueue"
cloudprovider "k8s.io/cloud-provider"
servicehelper "k8s.io/cloud-provider/service/helpers"
2020-08-10 17:43:49 +00:00
"k8s.io/component-base/featuregate"
2019-12-12 01:27:03 +00:00
"k8s.io/component-base/metrics/prometheus/ratelimiter"
2020-08-10 17:43:49 +00:00
"k8s.io/klog/v2"
2019-08-30 18:33:25 +00:00
)
const (
// Interval of synchronizing service status from apiserver
serviceSyncPeriod = 30 * time . Second
// Interval of synchronizing node status from apiserver
nodeSyncPeriod = 100 * time . Second
// How long to wait before retrying the processing of a service change.
// If this changes, the sleep in hack/jenkins/e2e.sh before downing a cluster
// should be changed appropriately.
minRetryDelay = 5 * time . Second
maxRetryDelay = 300 * time . Second
)
type cachedService struct {
// The cached state of the service
state * v1 . Service
}
type serviceCache struct {
2019-09-27 21:51:53 +00:00
mu sync . RWMutex // protects serviceMap
2019-08-30 18:33:25 +00:00
serviceMap map [ string ] * cachedService
}
2019-12-12 01:27:03 +00:00
// Controller keeps cloud provider service resources
2019-08-30 18:33:25 +00:00
// (like load balancers) in sync with the registry.
2019-12-12 01:27:03 +00:00
type Controller struct {
cloud cloudprovider . Interface
knownHosts [ ] * v1 . Node
servicesToUpdate [ ] * v1 . Service
kubeClient clientset . Interface
clusterName string
balancer cloudprovider . LoadBalancer
// TODO(#85155): Stop relying on this and remove the cache completely.
2019-08-30 18:33:25 +00:00
cache * serviceCache
serviceLister corelisters . ServiceLister
serviceListerSynced cache . InformerSynced
eventBroadcaster record . EventBroadcaster
eventRecorder record . EventRecorder
nodeLister corelisters . NodeLister
nodeListerSynced cache . InformerSynced
// services that need to be synced
queue workqueue . RateLimitingInterface
2021-03-18 22:40:29 +00:00
// nodeSyncLock ensures there is only one instance of triggerNodeSync getting executed at one time
// and protects internal states (needFullSync) of nodeSync
nodeSyncLock sync . Mutex
// nodeSyncCh triggers nodeSyncLoop to run
nodeSyncCh chan interface { }
// needFullSync indicates if the nodeSyncInternal will do a full node sync on all LB services.
needFullSync bool
2019-08-30 18:33:25 +00:00
}
// New returns a new service controller to keep cloud provider service resources
// (like load balancers) in sync with the registry.
func New (
cloud cloudprovider . Interface ,
kubeClient clientset . Interface ,
serviceInformer coreinformers . ServiceInformer ,
nodeInformer coreinformers . NodeInformer ,
clusterName string ,
2020-08-10 17:43:49 +00:00
featureGate featuregate . FeatureGate ,
2019-12-12 01:27:03 +00:00
) ( * Controller , error ) {
2019-08-30 18:33:25 +00:00
broadcaster := record . NewBroadcaster ( )
2020-08-10 17:43:49 +00:00
broadcaster . StartStructuredLogging ( 0 )
2019-08-30 18:33:25 +00:00
broadcaster . StartRecordingToSink ( & v1core . EventSinkImpl { Interface : kubeClient . CoreV1 ( ) . Events ( "" ) } )
recorder := broadcaster . NewRecorder ( scheme . Scheme , v1 . EventSource { Component : "service-controller" } )
if kubeClient != nil && kubeClient . CoreV1 ( ) . RESTClient ( ) . GetRateLimiter ( ) != nil {
2021-03-18 22:40:29 +00:00
if err := ratelimiter . RegisterMetricAndTrackRateLimiterUsage ( subSystemName , kubeClient . CoreV1 ( ) . RESTClient ( ) . GetRateLimiter ( ) ) ; err != nil {
2019-08-30 18:33:25 +00:00
return nil , err
}
}
2021-03-18 22:40:29 +00:00
registerMetrics ( )
2019-12-12 01:27:03 +00:00
s := & Controller {
2021-03-18 22:40:29 +00:00
cloud : cloud ,
knownHosts : [ ] * v1 . Node { } ,
kubeClient : kubeClient ,
clusterName : clusterName ,
cache : & serviceCache { serviceMap : make ( map [ string ] * cachedService ) } ,
eventBroadcaster : broadcaster ,
eventRecorder : recorder ,
nodeLister : nodeInformer . Lister ( ) ,
nodeListerSynced : nodeInformer . Informer ( ) . HasSynced ,
queue : workqueue . NewNamedRateLimitingQueue ( workqueue . NewItemExponentialFailureRateLimiter ( minRetryDelay , maxRetryDelay ) , "service" ) ,
// nodeSyncCh has a size 1 buffer. Only one pending sync signal would be cached.
nodeSyncCh : make ( chan interface { } , 1 ) ,
2019-08-30 18:33:25 +00:00
}
serviceInformer . Informer ( ) . AddEventHandlerWithResyncPeriod (
cache . ResourceEventHandlerFuncs {
AddFunc : func ( cur interface { } ) {
svc , ok := cur . ( * v1 . Service )
2019-09-27 21:51:53 +00:00
// Check cleanup here can provide a remedy when controller failed to handle
// changes before it exiting (e.g. crashing, restart, etc.).
2019-08-30 18:33:25 +00:00
if ok && ( wantsLoadBalancer ( svc ) || needsCleanup ( svc ) ) {
s . enqueueService ( cur )
}
} ,
UpdateFunc : func ( old , cur interface { } ) {
oldSvc , ok1 := old . ( * v1 . Service )
curSvc , ok2 := cur . ( * v1 . Service )
if ok1 && ok2 && ( s . needsUpdate ( oldSvc , curSvc ) || needsCleanup ( curSvc ) ) {
s . enqueueService ( cur )
}
} ,
2019-12-12 01:27:03 +00:00
// No need to handle deletion event because the deletion would be handled by
// the update path when the deletion timestamp is added.
2019-08-30 18:33:25 +00:00
} ,
serviceSyncPeriod ,
)
s . serviceLister = serviceInformer . Lister ( )
s . serviceListerSynced = serviceInformer . Informer ( ) . HasSynced
2020-08-10 17:43:49 +00:00
nodeInformer . Informer ( ) . AddEventHandlerWithResyncPeriod (
cache . ResourceEventHandlerFuncs {
AddFunc : func ( cur interface { } ) {
2021-03-18 22:40:29 +00:00
s . triggerNodeSync ( )
2020-08-10 17:43:49 +00:00
} ,
UpdateFunc : func ( old , cur interface { } ) {
oldNode , ok := old . ( * v1 . Node )
if ! ok {
return
}
curNode , ok := cur . ( * v1 . Node )
if ! ok {
return
}
if ! shouldSyncNode ( oldNode , curNode ) {
return
}
2021-03-18 22:40:29 +00:00
s . triggerNodeSync ( )
2020-08-10 17:43:49 +00:00
} ,
DeleteFunc : func ( old interface { } ) {
2021-03-18 22:40:29 +00:00
s . triggerNodeSync ( )
2020-08-10 17:43:49 +00:00
} ,
} ,
time . Duration ( 0 ) ,
)
2019-08-30 18:33:25 +00:00
if err := s . init ( ) ; err != nil {
return nil , err
}
2020-08-10 17:43:49 +00:00
2019-08-30 18:33:25 +00:00
return s , nil
}
2021-03-18 22:40:29 +00:00
// needFullSyncAndUnmark returns the value and needFullSync and marks the field to false.
func ( s * Controller ) needFullSyncAndUnmark ( ) bool {
s . nodeSyncLock . Lock ( )
defer s . nodeSyncLock . Unlock ( )
ret := s . needFullSync
s . needFullSync = false
return ret
}
2019-08-30 18:33:25 +00:00
// obj could be an *v1.Service, or a DeletionFinalStateUnknown marker item.
2019-12-12 01:27:03 +00:00
func ( s * Controller ) enqueueService ( obj interface { } ) {
2019-09-27 21:51:53 +00:00
key , err := cache . DeletionHandlingMetaNamespaceKeyFunc ( obj )
2019-08-30 18:33:25 +00:00
if err != nil {
runtime . HandleError ( fmt . Errorf ( "couldn't get key for object %#v: %v" , obj , err ) )
return
}
s . queue . Add ( key )
}
// Run starts a background goroutine that watches for changes to services that
// have (or had) LoadBalancers=true and ensures that they have
// load balancers created and deleted appropriately.
// serviceSyncPeriod controls how often we check the cluster's services to
// ensure that the correct load balancers exist.
// nodeSyncPeriod controls how often we check the cluster's nodes to determine
// if load balancers need to be updated to point to a new set.
//
// It's an error to call Run() more than once for a given ServiceController
// object.
2019-12-12 01:27:03 +00:00
func ( s * Controller ) Run ( stopCh <- chan struct { } , workers int ) {
2019-08-30 18:33:25 +00:00
defer runtime . HandleCrash ( )
defer s . queue . ShutDown ( )
klog . Info ( "Starting service controller" )
defer klog . Info ( "Shutting down service controller" )
2019-09-27 21:51:53 +00:00
if ! cache . WaitForNamedCacheSync ( "service" , stopCh , s . serviceListerSynced , s . nodeListerSynced ) {
2019-08-30 18:33:25 +00:00
return
}
for i := 0 ; i < workers ; i ++ {
go wait . Until ( s . worker , time . Second , stopCh )
}
2021-03-18 22:40:29 +00:00
go s . nodeSyncLoop ( workers )
go wait . Until ( s . triggerNodeSync , nodeSyncPeriod , stopCh )
2019-08-30 18:33:25 +00:00
<- stopCh
}
2021-03-18 22:40:29 +00:00
// triggerNodeSync triggers a nodeSync asynchronously
func ( s * Controller ) triggerNodeSync ( ) {
s . nodeSyncLock . Lock ( )
defer s . nodeSyncLock . Unlock ( )
newHosts , err := listWithPredicate ( s . nodeLister , s . getNodeConditionPredicate ( ) )
if err != nil {
runtime . HandleError ( fmt . Errorf ( "Failed to retrieve current set of nodes from node lister: %v" , err ) )
// if node list cannot be retrieve, trigger full node sync to be safe.
s . needFullSync = true
} else if ! nodeSlicesEqualForLB ( newHosts , s . knownHosts ) {
// Here the last known state is recorded as knownHosts. For each
// LB update, the latest node list is retrieved. This is to prevent
// a stale set of nodes were used to be update loadbalancers when
// there are many loadbalancers in the clusters. nodeSyncInternal
// would be triggered until all loadbalancers are updated to the new state.
klog . V ( 2 ) . Infof ( "Node changes detected, triggering a full node sync on all loadbalancer services" )
s . needFullSync = true
s . knownHosts = newHosts
}
select {
case s . nodeSyncCh <- struct { } { } :
klog . V ( 4 ) . Info ( "Triggering nodeSync" )
return
default :
klog . V ( 4 ) . Info ( "A pending nodeSync is already in queue" )
return
}
}
2019-08-30 18:33:25 +00:00
// worker runs a worker thread that just dequeues items, processes them, and marks them done.
// It enforces that the syncHandler is never invoked concurrently with the same key.
2019-12-12 01:27:03 +00:00
func ( s * Controller ) worker ( ) {
2019-08-30 18:33:25 +00:00
for s . processNextWorkItem ( ) {
}
}
2021-03-18 22:40:29 +00:00
// nodeSyncLoop takes nodeSync signal and triggers nodeSync
func ( s * Controller ) nodeSyncLoop ( workers int ) {
klog . V ( 4 ) . Info ( "nodeSyncLoop Started" )
for range s . nodeSyncCh {
klog . V ( 4 ) . Info ( "nodeSync has been triggered" )
s . nodeSyncInternal ( workers )
}
klog . V ( 2 ) . Info ( "s.nodeSyncCh is closed. Exiting nodeSyncLoop" )
}
2019-12-12 01:27:03 +00:00
func ( s * Controller ) processNextWorkItem ( ) bool {
2019-08-30 18:33:25 +00:00
key , quit := s . queue . Get ( )
if quit {
return false
}
defer s . queue . Done ( key )
err := s . syncService ( key . ( string ) )
if err == nil {
s . queue . Forget ( key )
return true
}
runtime . HandleError ( fmt . Errorf ( "error processing service %v (will retry): %v" , key , err ) )
s . queue . AddRateLimited ( key )
return true
}
2019-12-12 01:27:03 +00:00
func ( s * Controller ) init ( ) error {
2019-08-30 18:33:25 +00:00
if s . cloud == nil {
return fmt . Errorf ( "WARNING: no cloud provider provided, services of type LoadBalancer will fail" )
}
balancer , ok := s . cloud . LoadBalancer ( )
if ! ok {
return fmt . Errorf ( "the cloud provider does not support external load balancers" )
}
s . balancer = balancer
return nil
}
// processServiceCreateOrUpdate operates loadbalancers for the incoming service accordingly.
// Returns an error if processing the service update failed.
2019-12-12 01:27:03 +00:00
func ( s * Controller ) processServiceCreateOrUpdate ( service * v1 . Service , key string ) error {
2019-08-30 18:33:25 +00:00
// TODO(@MrHohn): Remove the cache once we get rid of the non-finalizer deletion
// path. Ref https://github.com/kubernetes/enhancements/issues/980.
cachedService := s . cache . getOrCreate ( key )
if cachedService . state != nil && cachedService . state . UID != service . UID {
// This happens only when a service is deleted and re-created
// in a short period, which is only possible when it doesn't
// contain finalizer.
if err := s . processLoadBalancerDelete ( cachedService . state , key ) ; err != nil {
return err
}
}
// Always cache the service, we need the info for service deletion in case
// when load balancer cleanup is not handled via finalizer.
cachedService . state = service
op , err := s . syncLoadBalancerIfNeeded ( service , key )
if err != nil {
s . eventRecorder . Eventf ( service , v1 . EventTypeWarning , "SyncLoadBalancerFailed" , "Error syncing load balancer: %v" , err )
return err
}
if op == deleteLoadBalancer {
// Only delete the cache upon successful load balancer deletion.
s . cache . delete ( key )
}
return nil
}
type loadBalancerOperation int
const (
deleteLoadBalancer loadBalancerOperation = iota
ensureLoadBalancer
)
// syncLoadBalancerIfNeeded ensures that service's status is synced up with loadbalancer
// i.e. creates loadbalancer for service if requested and deletes loadbalancer if the service
// doesn't want a loadbalancer no more. Returns whatever error occurred.
2019-12-12 01:27:03 +00:00
func ( s * Controller ) syncLoadBalancerIfNeeded ( service * v1 . Service , key string ) ( loadBalancerOperation , error ) {
2019-08-30 18:33:25 +00:00
// Note: It is safe to just call EnsureLoadBalancer. But, on some clouds that requires a delete & create,
// which may involve service interruption. Also, we would like user-friendly events.
// Save the state so we can avoid a write if it doesn't change
2019-12-12 01:27:03 +00:00
previousStatus := service . Status . LoadBalancer . DeepCopy ( )
2019-08-30 18:33:25 +00:00
var newStatus * v1 . LoadBalancerStatus
var op loadBalancerOperation
var err error
if ! wantsLoadBalancer ( service ) || needsCleanup ( service ) {
// Delete the load balancer if service no longer wants one, or if service needs cleanup.
op = deleteLoadBalancer
newStatus = & v1 . LoadBalancerStatus { }
_ , exists , err := s . balancer . GetLoadBalancer ( context . TODO ( ) , s . clusterName , service )
if err != nil {
return op , fmt . Errorf ( "failed to check if load balancer exists before cleanup: %v" , err )
}
if exists {
klog . V ( 2 ) . Infof ( "Deleting existing load balancer for service %s" , key )
s . eventRecorder . Event ( service , v1 . EventTypeNormal , "DeletingLoadBalancer" , "Deleting load balancer" )
if err := s . balancer . EnsureLoadBalancerDeleted ( context . TODO ( ) , s . clusterName , service ) ; err != nil {
return op , fmt . Errorf ( "failed to delete load balancer: %v" , err )
}
}
2019-12-12 01:27:03 +00:00
// Always remove finalizer when load balancer is deleted, this ensures Services
// can be deleted after all corresponding load balancer resources are deleted.
2019-08-30 18:33:25 +00:00
if err := s . removeFinalizer ( service ) ; err != nil {
return op , fmt . Errorf ( "failed to remove load balancer cleanup finalizer: %v" , err )
}
s . eventRecorder . Event ( service , v1 . EventTypeNormal , "DeletedLoadBalancer" , "Deleted load balancer" )
} else {
// Create or update the load balancer if service wants one.
op = ensureLoadBalancer
klog . V ( 2 ) . Infof ( "Ensuring load balancer for service %s" , key )
s . eventRecorder . Event ( service , v1 . EventTypeNormal , "EnsuringLoadBalancer" , "Ensuring load balancer" )
2019-12-12 01:27:03 +00:00
// Always add a finalizer prior to creating load balancers, this ensures Services
// can't be deleted until all corresponding load balancer resources are also deleted.
if err := s . addFinalizer ( service ) ; err != nil {
return op , fmt . Errorf ( "failed to add load balancer cleanup finalizer: %v" , err )
2019-08-30 18:33:25 +00:00
}
newStatus , err = s . ensureLoadBalancer ( service )
if err != nil {
2019-09-27 21:51:53 +00:00
if err == cloudprovider . ImplementedElsewhere {
// ImplementedElsewhere indicates that the ensureLoadBalancer is a nop and the
// functionality is implemented by a different controller. In this case, we
// return immediately without doing anything.
klog . V ( 4 ) . Infof ( "LoadBalancer for service %s implemented by a different controller %s, Ignoring error" , key , s . cloud . ProviderName ( ) )
return op , nil
}
2019-08-30 18:33:25 +00:00
return op , fmt . Errorf ( "failed to ensure load balancer: %v" , err )
}
2020-03-26 21:07:15 +00:00
if newStatus == nil {
return op , fmt . Errorf ( "service status returned by EnsureLoadBalancer is nil" )
}
2019-08-30 18:33:25 +00:00
s . eventRecorder . Event ( service , v1 . EventTypeNormal , "EnsuredLoadBalancer" , "Ensured load balancer" )
}
if err := s . patchStatus ( service , previousStatus , newStatus ) ; err != nil {
// Only retry error that isn't not found:
// - Not found error mostly happens when service disappears right after
// we remove the finalizer.
// - We can't patch status on non-exist service anyway.
if ! errors . IsNotFound ( err ) {
return op , fmt . Errorf ( "failed to update load balancer status: %v" , err )
}
}
return op , nil
}
2019-12-12 01:27:03 +00:00
func ( s * Controller ) ensureLoadBalancer ( service * v1 . Service ) ( * v1 . LoadBalancerStatus , error ) {
2020-08-10 17:43:49 +00:00
nodes , err := listWithPredicate ( s . nodeLister , s . getNodeConditionPredicate ( ) )
2019-08-30 18:33:25 +00:00
if err != nil {
return nil , err
}
// If there are no available nodes for LoadBalancer service, make a EventTypeWarning event for it.
if len ( nodes ) == 0 {
s . eventRecorder . Event ( service , v1 . EventTypeWarning , "UnAvailableLoadBalancer" , "There are no available nodes for LoadBalancer" )
}
// - Only one protocol supported per service
// - Not all cloud providers support all protocols and the next step is expected to return
// an error for unsupported protocols
return s . balancer . EnsureLoadBalancer ( context . TODO ( ) , s . clusterName , service , nodes )
}
// ListKeys implements the interface required by DeltaFIFO to list the keys we
// already know about.
func ( s * serviceCache ) ListKeys ( ) [ ] string {
2019-09-27 21:51:53 +00:00
s . mu . RLock ( )
defer s . mu . RUnlock ( )
2019-08-30 18:33:25 +00:00
keys := make ( [ ] string , 0 , len ( s . serviceMap ) )
for k := range s . serviceMap {
keys = append ( keys , k )
}
return keys
}
// GetByKey returns the value stored in the serviceMap under the given key
func ( s * serviceCache ) GetByKey ( key string ) ( interface { } , bool , error ) {
2019-09-27 21:51:53 +00:00
s . mu . RLock ( )
defer s . mu . RUnlock ( )
2019-08-30 18:33:25 +00:00
if v , ok := s . serviceMap [ key ] ; ok {
return v , true , nil
}
return nil , false , nil
}
// ListKeys implements the interface required by DeltaFIFO to list the keys we
// already know about.
func ( s * serviceCache ) allServices ( ) [ ] * v1 . Service {
2019-09-27 21:51:53 +00:00
s . mu . RLock ( )
defer s . mu . RUnlock ( )
2019-08-30 18:33:25 +00:00
services := make ( [ ] * v1 . Service , 0 , len ( s . serviceMap ) )
for _ , v := range s . serviceMap {
services = append ( services , v . state )
}
return services
}
func ( s * serviceCache ) get ( serviceName string ) ( * cachedService , bool ) {
2019-09-27 21:51:53 +00:00
s . mu . RLock ( )
defer s . mu . RUnlock ( )
2019-08-30 18:33:25 +00:00
service , ok := s . serviceMap [ serviceName ]
return service , ok
}
func ( s * serviceCache ) getOrCreate ( serviceName string ) * cachedService {
s . mu . Lock ( )
defer s . mu . Unlock ( )
service , ok := s . serviceMap [ serviceName ]
if ! ok {
service = & cachedService { }
s . serviceMap [ serviceName ] = service
}
return service
}
func ( s * serviceCache ) set ( serviceName string , service * cachedService ) {
s . mu . Lock ( )
defer s . mu . Unlock ( )
s . serviceMap [ serviceName ] = service
}
func ( s * serviceCache ) delete ( serviceName string ) {
s . mu . Lock ( )
defer s . mu . Unlock ( )
delete ( s . serviceMap , serviceName )
}
// needsCleanup checks if load balancer needs to be cleaned up as indicated by finalizer.
func needsCleanup ( service * v1 . Service ) bool {
2019-09-27 21:51:53 +00:00
if ! servicehelper . HasLBFinalizer ( service ) {
return false
}
if service . ObjectMeta . DeletionTimestamp != nil {
return true
}
// Service doesn't want loadBalancer but owns loadBalancer finalizer also need to be cleaned up.
if service . Spec . Type != v1 . ServiceTypeLoadBalancer {
return true
}
return false
2019-08-30 18:33:25 +00:00
}
// needsUpdate checks if load balancer needs to be updated due to change in attributes.
2019-12-12 01:27:03 +00:00
func ( s * Controller ) needsUpdate ( oldService * v1 . Service , newService * v1 . Service ) bool {
2019-08-30 18:33:25 +00:00
if ! wantsLoadBalancer ( oldService ) && ! wantsLoadBalancer ( newService ) {
return false
}
if wantsLoadBalancer ( oldService ) != wantsLoadBalancer ( newService ) {
s . eventRecorder . Eventf ( newService , v1 . EventTypeNormal , "Type" , "%v -> %v" ,
oldService . Spec . Type , newService . Spec . Type )
return true
}
if wantsLoadBalancer ( newService ) && ! reflect . DeepEqual ( oldService . Spec . LoadBalancerSourceRanges , newService . Spec . LoadBalancerSourceRanges ) {
s . eventRecorder . Eventf ( newService , v1 . EventTypeNormal , "LoadBalancerSourceRanges" , "%v -> %v" ,
oldService . Spec . LoadBalancerSourceRanges , newService . Spec . LoadBalancerSourceRanges )
return true
}
if ! portsEqualForLB ( oldService , newService ) || oldService . Spec . SessionAffinity != newService . Spec . SessionAffinity {
return true
}
2019-09-27 21:51:53 +00:00
if ! reflect . DeepEqual ( oldService . Spec . SessionAffinityConfig , newService . Spec . SessionAffinityConfig ) {
return true
}
2019-08-30 18:33:25 +00:00
if ! loadBalancerIPsAreEqual ( oldService , newService ) {
s . eventRecorder . Eventf ( newService , v1 . EventTypeNormal , "LoadbalancerIP" , "%v -> %v" ,
oldService . Spec . LoadBalancerIP , newService . Spec . LoadBalancerIP )
return true
}
if len ( oldService . Spec . ExternalIPs ) != len ( newService . Spec . ExternalIPs ) {
s . eventRecorder . Eventf ( newService , v1 . EventTypeNormal , "ExternalIP" , "Count: %v -> %v" ,
len ( oldService . Spec . ExternalIPs ) , len ( newService . Spec . ExternalIPs ) )
return true
}
for i := range oldService . Spec . ExternalIPs {
if oldService . Spec . ExternalIPs [ i ] != newService . Spec . ExternalIPs [ i ] {
s . eventRecorder . Eventf ( newService , v1 . EventTypeNormal , "ExternalIP" , "Added: %v" ,
newService . Spec . ExternalIPs [ i ] )
return true
}
}
if ! reflect . DeepEqual ( oldService . Annotations , newService . Annotations ) {
return true
}
if oldService . UID != newService . UID {
s . eventRecorder . Eventf ( newService , v1 . EventTypeNormal , "UID" , "%v -> %v" ,
oldService . UID , newService . UID )
return true
}
if oldService . Spec . ExternalTrafficPolicy != newService . Spec . ExternalTrafficPolicy {
s . eventRecorder . Eventf ( newService , v1 . EventTypeNormal , "ExternalTrafficPolicy" , "%v -> %v" ,
oldService . Spec . ExternalTrafficPolicy , newService . Spec . ExternalTrafficPolicy )
return true
}
if oldService . Spec . HealthCheckNodePort != newService . Spec . HealthCheckNodePort {
s . eventRecorder . Eventf ( newService , v1 . EventTypeNormal , "HealthCheckNodePort" , "%v -> %v" ,
oldService . Spec . HealthCheckNodePort , newService . Spec . HealthCheckNodePort )
return true
}
return false
}
2019-12-12 01:27:03 +00:00
func getPortsForLB ( service * v1 . Service ) [ ] * v1 . ServicePort {
2019-08-30 18:33:25 +00:00
ports := [ ] * v1 . ServicePort { }
for i := range service . Spec . Ports {
sp := & service . Spec . Ports [ i ]
ports = append ( ports , sp )
}
2019-12-12 01:27:03 +00:00
return ports
2019-08-30 18:33:25 +00:00
}
func portsEqualForLB ( x , y * v1 . Service ) bool {
2019-12-12 01:27:03 +00:00
xPorts := getPortsForLB ( x )
yPorts := getPortsForLB ( y )
2019-08-30 18:33:25 +00:00
return portSlicesEqualForLB ( xPorts , yPorts )
}
func portSlicesEqualForLB ( x , y [ ] * v1 . ServicePort ) bool {
if len ( x ) != len ( y ) {
return false
}
for i := range x {
if ! portEqualForLB ( x [ i ] , y [ i ] ) {
return false
}
}
return true
}
func portEqualForLB ( x , y * v1 . ServicePort ) bool {
// TODO: Should we check name? (In theory, an LB could expose it)
if x . Name != y . Name {
return false
}
if x . Protocol != y . Protocol {
return false
}
if x . Port != y . Port {
return false
}
if x . NodePort != y . NodePort {
return false
}
2019-09-27 21:51:53 +00:00
if x . TargetPort != y . TargetPort {
return false
}
2019-08-30 18:33:25 +00:00
return true
}
func nodeNames ( nodes [ ] * v1 . Node ) sets . String {
ret := sets . NewString ( )
for _ , node := range nodes {
ret . Insert ( node . Name )
}
return ret
}
func nodeSlicesEqualForLB ( x , y [ ] * v1 . Node ) bool {
if len ( x ) != len ( y ) {
return false
}
return nodeNames ( x ) . Equal ( nodeNames ( y ) )
}
2020-08-10 17:43:49 +00:00
func ( s * Controller ) getNodeConditionPredicate ( ) NodeConditionPredicate {
2019-08-30 18:33:25 +00:00
return func ( node * v1 . Node ) bool {
2021-03-18 22:40:29 +00:00
if _ , hasExcludeBalancerLabel := node . Labels [ v1 . LabelNodeExcludeBalancers ] ; hasExcludeBalancerLabel {
return false
2019-08-30 18:33:25 +00:00
}
// If we have no info, don't accept
if len ( node . Status . Conditions ) == 0 {
return false
}
for _ , cond := range node . Status . Conditions {
// We consider the node for load balancing only when its NodeReady condition status
// is ConditionTrue
if cond . Type == v1 . NodeReady && cond . Status != v1 . ConditionTrue {
klog . V ( 4 ) . Infof ( "Ignoring node %v with %v condition status %v" , node . Name , cond . Type , cond . Status )
return false
}
}
return true
}
}
2020-08-10 17:43:49 +00:00
func shouldSyncNode ( oldNode , newNode * v1 . Node ) bool {
if oldNode . Spec . Unschedulable != newNode . Spec . Unschedulable {
return true
}
if ! reflect . DeepEqual ( oldNode . Labels , newNode . Labels ) {
return true
}
return nodeReadyConditionStatus ( oldNode ) != nodeReadyConditionStatus ( newNode )
}
func nodeReadyConditionStatus ( node * v1 . Node ) v1 . ConditionStatus {
for _ , condition := range node . Status . Conditions {
if condition . Type != v1 . NodeReady {
continue
}
return condition . Status
}
return ""
}
2021-03-18 22:40:29 +00:00
// nodeSyncInternal handles updating the hosts pointed to by all load
2019-08-30 18:33:25 +00:00
// balancers whenever the set of nodes in the cluster changes.
2021-03-18 22:40:29 +00:00
func ( s * Controller ) nodeSyncInternal ( workers int ) {
startTime := time . Now ( )
defer func ( ) {
latency := time . Now ( ) . Sub ( startTime ) . Seconds ( )
klog . V ( 4 ) . Infof ( "It took %v seconds to finish nodeSyncInternal" , latency )
nodeSyncLatency . Observe ( latency )
} ( )
if ! s . needFullSyncAndUnmark ( ) {
2019-08-30 18:33:25 +00:00
// The set of nodes in the cluster hasn't changed, but we can retry
// updating any services that we failed to update last time around.
2021-03-18 22:40:29 +00:00
s . servicesToUpdate = s . updateLoadBalancerHosts ( s . servicesToUpdate , workers )
2019-08-30 18:33:25 +00:00
return
}
2021-03-18 22:40:29 +00:00
klog . V ( 2 ) . Infof ( "Syncing backends for all LB services." )
2019-08-30 18:33:25 +00:00
// Try updating all services, and save the ones that fail to try again next
// round.
s . servicesToUpdate = s . cache . allServices ( )
numServices := len ( s . servicesToUpdate )
2021-03-18 22:40:29 +00:00
s . servicesToUpdate = s . updateLoadBalancerHosts ( s . servicesToUpdate , workers )
2019-08-30 18:33:25 +00:00
klog . V ( 2 ) . Infof ( "Successfully updated %d out of %d load balancers to direct traffic to the updated set of nodes" ,
numServices - len ( s . servicesToUpdate ) , numServices )
2021-03-18 22:40:29 +00:00
}
// nodeSyncService syncs the nodes for one load balancer type service
func ( s * Controller ) nodeSyncService ( svc * v1 . Service ) bool {
if svc == nil || ! wantsLoadBalancer ( svc ) {
return false
}
klog . V ( 4 ) . Infof ( "nodeSyncService started for service %s/%s" , svc . Namespace , svc . Name )
hosts , err := listWithPredicate ( s . nodeLister , s . getNodeConditionPredicate ( ) )
if err != nil {
runtime . HandleError ( fmt . Errorf ( "failed to retrieve node list: %v" , err ) )
return true
}
2019-08-30 18:33:25 +00:00
2021-03-18 22:40:29 +00:00
if err := s . lockedUpdateLoadBalancerHosts ( svc , hosts ) ; err != nil {
runtime . HandleError ( fmt . Errorf ( "failed to update load balancer hosts for service %s/%s: %v" , svc . Namespace , svc . Name , err ) )
return true
}
klog . V ( 4 ) . Infof ( "nodeSyncService finished successfully for service %s/%s" , svc . Namespace , svc . Name )
return false
2019-08-30 18:33:25 +00:00
}
// updateLoadBalancerHosts updates all existing load balancers so that
2021-03-18 22:40:29 +00:00
// they will match the latest list of nodes with input number of workers.
2019-08-30 18:33:25 +00:00
// Returns the list of services that couldn't be updated.
2021-03-18 22:40:29 +00:00
func ( s * Controller ) updateLoadBalancerHosts ( services [ ] * v1 . Service , workers int ) ( servicesToRetry [ ] * v1 . Service ) {
klog . V ( 4 ) . Infof ( "Running updateLoadBalancerHosts(len(services)==%d, workers==%d)" , len ( services ) , workers )
// lock for servicesToRetry
lock := sync . Mutex { }
doWork := func ( piece int ) {
if shouldRetry := s . nodeSyncService ( services [ piece ] ) ; ! shouldRetry {
return
}
lock . Lock ( )
defer lock . Unlock ( )
servicesToRetry = append ( servicesToRetry , services [ piece ] )
2019-08-30 18:33:25 +00:00
}
2021-03-18 22:40:29 +00:00
workqueue . ParallelizeUntil ( context . TODO ( ) , workers , len ( services ) , doWork )
klog . V ( 4 ) . Infof ( "Finished updateLoadBalancerHosts" )
2019-08-30 18:33:25 +00:00
return servicesToRetry
}
// Updates the load balancer of a service, assuming we hold the mutex
// associated with the service.
2019-12-12 01:27:03 +00:00
func ( s * Controller ) lockedUpdateLoadBalancerHosts ( service * v1 . Service , hosts [ ] * v1 . Node ) error {
2021-03-18 22:40:29 +00:00
startTime := time . Now ( )
defer func ( ) {
latency := time . Now ( ) . Sub ( startTime ) . Seconds ( )
klog . V ( 4 ) . Infof ( "It took %v seconds to update load balancer hosts for service %s/%s" , latency , service . Namespace , service . Name )
updateLoadBalancerHostLatency . Observe ( latency )
} ( )
2019-08-30 18:33:25 +00:00
2021-03-18 22:40:29 +00:00
klog . V ( 2 ) . Infof ( "Updating backends for load balancer %s/%s with node set: %v" , service . Namespace , service . Name , nodeNames ( hosts ) )
2019-08-30 18:33:25 +00:00
// This operation doesn't normally take very long (and happens pretty often), so we only record the final event
err := s . balancer . UpdateLoadBalancer ( context . TODO ( ) , s . clusterName , service , hosts )
if err == nil {
// If there are no available nodes for LoadBalancer service, make a EventTypeWarning event for it.
if len ( hosts ) == 0 {
s . eventRecorder . Event ( service , v1 . EventTypeWarning , "UnAvailableLoadBalancer" , "There are no available nodes for LoadBalancer" )
} else {
s . eventRecorder . Event ( service , v1 . EventTypeNormal , "UpdatedLoadBalancer" , "Updated load balancer with new hosts" )
}
return nil
}
2019-09-27 21:51:53 +00:00
if err == cloudprovider . ImplementedElsewhere {
// ImplementedElsewhere indicates that the UpdateLoadBalancer is a nop and the
// functionality is implemented by a different controller. In this case, we
// return immediately without doing anything.
return nil
}
2019-08-30 18:33:25 +00:00
// It's only an actual error if the load balancer still exists.
if _ , exists , err := s . balancer . GetLoadBalancer ( context . TODO ( ) , s . clusterName , service ) ; err != nil {
runtime . HandleError ( fmt . Errorf ( "failed to check if load balancer exists for service %s/%s: %v" , service . Namespace , service . Name , err ) )
} else if ! exists {
return nil
}
s . eventRecorder . Eventf ( service , v1 . EventTypeWarning , "UpdateLoadBalancerFailed" , "Error updating load balancer with new hosts %v: %v" , nodeNames ( hosts ) , err )
return err
}
func wantsLoadBalancer ( service * v1 . Service ) bool {
2021-03-18 22:40:29 +00:00
// if LoadBalancerClass is set, the user does not want the default cloud-provider Load Balancer
return service . Spec . Type == v1 . ServiceTypeLoadBalancer && service . Spec . LoadBalancerClass == nil
2019-08-30 18:33:25 +00:00
}
func loadBalancerIPsAreEqual ( oldService , newService * v1 . Service ) bool {
return oldService . Spec . LoadBalancerIP == newService . Spec . LoadBalancerIP
}
// syncService will sync the Service with the given key if it has had its expectations fulfilled,
// meaning it did not expect to see any more of its pods created or deleted. This function is not meant to be
// invoked concurrently with the same key.
2019-12-12 01:27:03 +00:00
func ( s * Controller ) syncService ( key string ) error {
2019-08-30 18:33:25 +00:00
startTime := time . Now ( )
defer func ( ) {
klog . V ( 4 ) . Infof ( "Finished syncing service %q (%v)" , key , time . Since ( startTime ) )
} ( )
namespace , name , err := cache . SplitMetaNamespaceKey ( key )
if err != nil {
return err
}
// service holds the latest service info from apiserver
service , err := s . serviceLister . Services ( namespace ) . Get ( name )
switch {
case errors . IsNotFound ( err ) :
// service absence in store means watcher caught the deletion, ensure LB info is cleaned
err = s . processServiceDeletion ( key )
case err != nil :
runtime . HandleError ( fmt . Errorf ( "Unable to retrieve service %v from store: %v" , key , err ) )
default :
err = s . processServiceCreateOrUpdate ( service , key )
}
return err
}
2019-12-12 01:27:03 +00:00
func ( s * Controller ) processServiceDeletion ( key string ) error {
2019-08-30 18:33:25 +00:00
cachedService , ok := s . cache . get ( key )
if ! ok {
// Cache does not contains the key means:
// - We didn't create a Load Balancer for the deleted service at all.
// - We already deleted the Load Balancer that was created for the service.
// In both cases we have nothing left to do.
return nil
}
klog . V ( 2 ) . Infof ( "Service %v has been deleted. Attempting to cleanup load balancer resources" , key )
if err := s . processLoadBalancerDelete ( cachedService . state , key ) ; err != nil {
return err
}
s . cache . delete ( key )
return nil
}
2019-12-12 01:27:03 +00:00
func ( s * Controller ) processLoadBalancerDelete ( service * v1 . Service , key string ) error {
2019-08-30 18:33:25 +00:00
// delete load balancer info only if the service type is LoadBalancer
if ! wantsLoadBalancer ( service ) {
return nil
}
s . eventRecorder . Event ( service , v1 . EventTypeNormal , "DeletingLoadBalancer" , "Deleting load balancer" )
if err := s . balancer . EnsureLoadBalancerDeleted ( context . TODO ( ) , s . clusterName , service ) ; err != nil {
s . eventRecorder . Eventf ( service , v1 . EventTypeWarning , "DeleteLoadBalancerFailed" , "Error deleting load balancer: %v" , err )
return err
}
s . eventRecorder . Event ( service , v1 . EventTypeNormal , "DeletedLoadBalancer" , "Deleted load balancer" )
return nil
}
// addFinalizer patches the service to add finalizer.
2019-12-12 01:27:03 +00:00
func ( s * Controller ) addFinalizer ( service * v1 . Service ) error {
2019-08-30 18:33:25 +00:00
if servicehelper . HasLBFinalizer ( service ) {
return nil
}
// Make a copy so we don't mutate the shared informer cache.
updated := service . DeepCopy ( )
updated . ObjectMeta . Finalizers = append ( updated . ObjectMeta . Finalizers , servicehelper . LoadBalancerCleanupFinalizer )
klog . V ( 2 ) . Infof ( "Adding finalizer to service %s/%s" , updated . Namespace , updated . Name )
2020-03-26 21:07:15 +00:00
_ , err := servicehelper . PatchService ( s . kubeClient . CoreV1 ( ) , service , updated )
2019-08-30 18:33:25 +00:00
return err
}
// removeFinalizer patches the service to remove finalizer.
2019-12-12 01:27:03 +00:00
func ( s * Controller ) removeFinalizer ( service * v1 . Service ) error {
2019-08-30 18:33:25 +00:00
if ! servicehelper . HasLBFinalizer ( service ) {
return nil
}
// Make a copy so we don't mutate the shared informer cache.
updated := service . DeepCopy ( )
2019-09-27 21:51:53 +00:00
updated . ObjectMeta . Finalizers = removeString ( updated . ObjectMeta . Finalizers , servicehelper . LoadBalancerCleanupFinalizer )
2019-08-30 18:33:25 +00:00
klog . V ( 2 ) . Infof ( "Removing finalizer from service %s/%s" , updated . Namespace , updated . Name )
2020-03-26 21:07:15 +00:00
_ , err := servicehelper . PatchService ( s . kubeClient . CoreV1 ( ) , service , updated )
2019-08-30 18:33:25 +00:00
return err
}
2019-09-27 21:51:53 +00:00
// removeString returns a newly created []string that contains all items from slice that
// are not equal to s.
func removeString ( slice [ ] string , s string ) [ ] string {
var newSlice [ ] string
for _ , item := range slice {
if item != s {
newSlice = append ( newSlice , item )
}
}
return newSlice
}
2019-08-30 18:33:25 +00:00
// patchStatus patches the service with the given LoadBalancerStatus.
2019-12-12 01:27:03 +00:00
func ( s * Controller ) patchStatus ( service * v1 . Service , previousStatus , newStatus * v1 . LoadBalancerStatus ) error {
if servicehelper . LoadBalancerStatusEqual ( previousStatus , newStatus ) {
2019-08-30 18:33:25 +00:00
return nil
}
// Make a copy so we don't mutate the shared informer cache.
updated := service . DeepCopy ( )
updated . Status . LoadBalancer = * newStatus
klog . V ( 2 ) . Infof ( "Patching status for service %s/%s" , updated . Namespace , updated . Name )
2020-03-26 21:07:15 +00:00
_ , err := servicehelper . PatchService ( s . kubeClient . CoreV1 ( ) , service , updated )
2019-08-30 18:33:25 +00:00
return err
}
2020-03-26 21:07:15 +00:00
// NodeConditionPredicate is a function that indicates whether the given node's conditions meet
// some set of criteria defined by the function.
type NodeConditionPredicate func ( node * v1 . Node ) bool
// listWithPredicate gets nodes that matches predicate function.
func listWithPredicate ( nodeLister corelisters . NodeLister , predicate NodeConditionPredicate ) ( [ ] * v1 . Node , error ) {
nodes , err := nodeLister . List ( labels . Everything ( ) )
if err != nil {
return nil , err
}
var filtered [ ] * v1 . Node
for i := range nodes {
if predicate ( nodes [ i ] ) {
filtered = append ( filtered , nodes [ i ] )
}
}
return filtered , nil
}