2016-05-04 05:31:26 +00:00
/ *
2016-06-03 00:25:58 +00:00
Copyright 2016 The Kubernetes Authors .
2016-05-04 05:31:26 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package garbagecollector
import (
"fmt"
2017-08-08 19:57:55 +00:00
"reflect"
2017-07-24 20:56:39 +00:00
"sync"
2016-05-04 05:31:26 +00:00
"time"
2018-11-09 18:49:10 +00:00
"k8s.io/klog"
2016-05-04 05:31:26 +00:00
2017-01-13 17:48:50 +00:00
"k8s.io/apimachinery/pkg/api/errors"
2017-01-11 14:09:48 +00:00
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
2018-06-01 22:47:00 +00:00
"k8s.io/apimachinery/pkg/util/sets"
2017-01-11 14:09:48 +00:00
"k8s.io/apimachinery/pkg/util/wait"
2017-05-17 22:54:58 +00:00
"k8s.io/client-go/discovery"
2017-01-25 19:00:30 +00:00
"k8s.io/client-go/dynamic"
2017-06-23 20:56:37 +00:00
"k8s.io/client-go/informers"
2017-01-27 15:20:40 +00:00
"k8s.io/client-go/util/workqueue"
2017-04-12 19:49:17 +00:00
"k8s.io/kubernetes/pkg/controller"
2017-01-25 20:07:10 +00:00
// import known versions
_ "k8s.io/client-go/kubernetes"
2016-05-04 05:31:26 +00:00
)
2016-08-19 01:38:33 +00:00
const ResourceResyncTime time . Duration = 0
2016-05-04 05:31:26 +00:00
2017-02-23 19:16:13 +00:00
// GarbageCollector runs reflectors to watch for changes of managed API
// objects, funnels the results to a single-threaded dependencyGraphBuilder,
// which builds a graph caching the dependencies among objects. Triggered by the
// graph changes, the dependencyGraphBuilder enqueues objects that can
// potentially be garbage-collected to the `attemptToDelete` queue, and enqueues
// objects whose dependents need to be orphaned to the `attemptToOrphan` queue.
// The GarbageCollector has workers who consume these two queues, send requests
// to the API server to delete/update the objects accordingly.
// Note that having the dependencyGraphBuilder notify the garbage collector
// ensures that the garbage collector operates with a graph that is at least as
// up to date as the notification is sent.
type GarbageCollector struct {
2018-05-04 18:40:39 +00:00
restMapper resettableRESTMapper
2018-05-09 16:58:12 +00:00
dynamicClient dynamic . Interface
2017-02-23 19:16:13 +00:00
// garbage collector attempts to delete the items in attemptToDelete queue when the time is ripe.
attemptToDelete workqueue . RateLimitingInterface
// garbage collector attempts to orphan the dependents of the items in the attemptToOrphan queue, then deletes the items.
attemptToOrphan workqueue . RateLimitingInterface
dependencyGraphBuilder * GraphBuilder
// GC caches the owners that do not exist according to the API server.
absentOwnerCache * UIDCache
2017-05-04 17:55:24 +00:00
sharedInformers informers . SharedInformerFactory
2017-05-17 22:54:58 +00:00
workerLock sync . RWMutex
2016-05-04 05:31:26 +00:00
}
2017-05-04 17:55:24 +00:00
func NewGarbageCollector (
2018-05-09 16:58:12 +00:00
dynamicClient dynamic . Interface ,
2017-08-08 19:57:55 +00:00
mapper resettableRESTMapper ,
2017-05-04 17:55:24 +00:00
deletableResources map [ schema . GroupVersionResource ] struct { } ,
2017-05-16 17:35:45 +00:00
ignoredResources map [ schema . GroupResource ] struct { } ,
2017-05-04 17:55:24 +00:00
sharedInformers informers . SharedInformerFactory ,
2017-08-24 16:39:55 +00:00
informersStarted <- chan struct { } ,
2017-05-04 17:55:24 +00:00
) ( * GarbageCollector , error ) {
2017-02-23 19:16:13 +00:00
attemptToDelete := workqueue . NewNamedRateLimitingQueue ( workqueue . DefaultControllerRateLimiter ( ) , "garbage_collector_attempt_to_delete" )
attemptToOrphan := workqueue . NewNamedRateLimitingQueue ( workqueue . DefaultControllerRateLimiter ( ) , "garbage_collector_attempt_to_orphan" )
absentOwnerCache := NewUIDCache ( 500 )
gc := & GarbageCollector {
2018-05-04 18:40:39 +00:00
dynamicClient : dynamicClient ,
2017-09-21 21:41:35 +00:00
restMapper : mapper ,
attemptToDelete : attemptToDelete ,
attemptToOrphan : attemptToOrphan ,
absentOwnerCache : absentOwnerCache ,
2017-02-23 19:16:13 +00:00
}
gb := & GraphBuilder {
2018-05-04 18:40:39 +00:00
dynamicClient : dynamicClient ,
informersStarted : informersStarted ,
restMapper : mapper ,
graphChanges : workqueue . NewNamedRateLimitingQueue ( workqueue . DefaultControllerRateLimiter ( ) , "garbage_collector_graph_changes" ) ,
2017-02-23 19:16:13 +00:00
uidToNode : & concurrentUIDToNode {
uidToNode : make ( map [ types . UID ] * node ) ,
} ,
attemptToDelete : attemptToDelete ,
attemptToOrphan : attemptToOrphan ,
absentOwnerCache : absentOwnerCache ,
2017-05-04 17:55:24 +00:00
sharedInformers : sharedInformers ,
2017-05-16 17:35:45 +00:00
ignoredResources : ignoredResources ,
2017-02-23 19:16:13 +00:00
}
2017-05-17 22:54:58 +00:00
if err := gb . syncMonitors ( deletableResources ) ; err != nil {
utilruntime . HandleError ( fmt . Errorf ( "failed to sync all monitors: %v" , err ) )
2017-02-23 19:16:13 +00:00
}
gc . dependencyGraphBuilder = gb
2016-05-18 03:24:42 +00:00
2017-02-23 19:16:13 +00:00
return gc , nil
2016-05-18 03:24:42 +00:00
}
2017-05-17 22:54:58 +00:00
// resyncMonitors starts or stops resource monitors as needed to ensure that all
// (and only) those resources present in the map are monitored.
func ( gc * GarbageCollector ) resyncMonitors ( deletableResources map [ schema . GroupVersionResource ] struct { } ) error {
if err := gc . dependencyGraphBuilder . syncMonitors ( deletableResources ) ; err != nil {
return err
}
gc . dependencyGraphBuilder . startMonitors ( )
return nil
}
2017-02-23 19:16:13 +00:00
func ( gc * GarbageCollector ) Run ( workers int , stopCh <- chan struct { } ) {
2017-04-12 19:49:17 +00:00
defer utilruntime . HandleCrash ( )
2017-02-23 19:16:13 +00:00
defer gc . attemptToDelete . ShutDown ( )
defer gc . attemptToOrphan . ShutDown ( )
defer gc . dependencyGraphBuilder . graphChanges . ShutDown ( )
2016-05-04 05:31:26 +00:00
2018-11-09 18:49:10 +00:00
klog . Infof ( "Starting garbage collector controller" )
defer klog . Infof ( "Shutting down garbage collector controller" )
2017-04-12 19:49:17 +00:00
2017-05-17 22:54:58 +00:00
go gc . dependencyGraphBuilder . Run ( stopCh )
2017-04-12 19:49:17 +00:00
2017-05-17 22:54:58 +00:00
if ! controller . WaitForCacheSync ( "garbage collector" , stopCh , gc . dependencyGraphBuilder . IsSynced ) {
2017-02-23 19:16:13 +00:00
return
}
2017-04-12 19:49:17 +00:00
2018-11-09 18:49:10 +00:00
klog . Infof ( "Garbage collector: all resource monitors have synced. Proceeding to collect garbage" )
2016-05-04 05:31:26 +00:00
2017-02-23 19:16:13 +00:00
// gc workers
for i := 0 ; i < workers ; i ++ {
go wait . Until ( gc . runAttemptToDeleteWorker , 1 * time . Second , stopCh )
go wait . Until ( gc . runAttemptToOrphanWorker , 1 * time . Second , stopCh )
}
2017-04-12 19:49:17 +00:00
2017-02-23 19:16:13 +00:00
<- stopCh
2016-05-18 03:24:42 +00:00
}
2017-05-17 22:54:58 +00:00
// resettableRESTMapper is a RESTMapper which is capable of resetting itself
// from discovery.
type resettableRESTMapper interface {
meta . RESTMapper
Reset ( )
}
2017-08-08 19:57:55 +00:00
// Sync periodically resyncs the garbage collector when new resources are
// observed from discovery. When new resources are detected, Sync will stop all
// GC workers, reset gc.restMapper, and resync the monitors.
2017-05-17 22:54:58 +00:00
//
2017-08-08 19:57:55 +00:00
// Note that discoveryClient should NOT be shared with gc.restMapper, otherwise
// the mapper's underlying discovery client will be unnecessarily reset during
// the course of detecting new resources.
2018-03-16 18:28:58 +00:00
func ( gc * GarbageCollector ) Sync ( discoveryClient discovery . ServerResourcesInterface , period time . Duration , stopCh <- chan struct { } ) {
2017-08-08 19:57:55 +00:00
oldResources := make ( map [ schema . GroupVersionResource ] struct { } )
2017-05-17 22:54:58 +00:00
wait . Until ( func ( ) {
2017-08-08 19:57:55 +00:00
// Get the current resource list from discovery.
2017-11-07 18:19:43 +00:00
newResources := GetDeletableResources ( discoveryClient )
2017-08-08 19:57:55 +00:00
2018-03-16 18:44:09 +00:00
// This can occur if there is an internal error in GetDeletableResources.
if len ( newResources ) == 0 {
2018-11-09 18:49:10 +00:00
klog . V ( 2 ) . Infof ( "no resources reported by discovery, skipping garbage collector sync" )
2018-03-16 18:44:09 +00:00
return
}
2017-08-08 19:57:55 +00:00
// Decide whether discovery has reported a change.
if reflect . DeepEqual ( oldResources , newResources ) {
2018-11-09 18:49:10 +00:00
klog . V ( 5 ) . Infof ( "no resource updates from discovery, skipping garbage collector sync" )
2017-08-08 19:57:55 +00:00
return
}
2017-05-17 22:54:58 +00:00
// Ensure workers are paused to avoid processing events before informers
// have resynced.
gc . workerLock . Lock ( )
defer gc . workerLock . Unlock ( )
2018-06-01 22:47:00 +00:00
// Once we get here, we should not unpause workers until we've successfully synced
attempt := 0
wait . PollImmediateUntil ( 100 * time . Millisecond , func ( ) ( bool , error ) {
attempt ++
// On a reattempt, check if available resources have changed
if attempt > 1 {
newResources = GetDeletableResources ( discoveryClient )
if len ( newResources ) == 0 {
2018-11-09 18:49:10 +00:00
klog . V ( 2 ) . Infof ( "no resources reported by discovery (attempt %d)" , attempt )
2018-06-01 22:47:00 +00:00
return false , nil
}
}
2018-11-09 18:49:10 +00:00
klog . V ( 2 ) . Infof ( "syncing garbage collector with updated resources from discovery (attempt %d): %s" , attempt , printDiff ( oldResources , newResources ) )
2018-06-01 22:47:00 +00:00
// Resetting the REST mapper will also invalidate the underlying discovery
// client. This is a leaky abstraction and assumes behavior about the REST
// mapper, but we'll deal with it for now.
gc . restMapper . Reset ( )
2018-11-09 18:49:10 +00:00
klog . V ( 4 ) . Infof ( "reset restmapper" )
2018-06-01 22:47:00 +00:00
// Perform the monitor resync and wait for controllers to report cache sync.
//
// NOTE: It's possible that newResources will diverge from the resources
// discovered by restMapper during the call to Reset, since they are
// distinct discovery clients invalidated at different times. For example,
// newResources may contain resources not returned in the restMapper's
// discovery call if the resources appeared in-between the calls. In that
// case, the restMapper will fail to map some of newResources until the next
// attempt.
if err := gc . resyncMonitors ( newResources ) ; err != nil {
utilruntime . HandleError ( fmt . Errorf ( "failed to sync resource monitors (attempt %d): %v" , attempt , err ) )
return false , nil
}
2018-11-09 18:49:10 +00:00
klog . V ( 4 ) . Infof ( "resynced monitors" )
2018-06-01 22:47:00 +00:00
// wait for caches to fill for a while (our sync period) before attempting to rediscover resources and retry syncing.
// this protects us from deadlocks where available resources changed and one of our informer caches will never fill.
// informers keep attempting to sync in the background, so retrying doesn't interrupt them.
// the call to resyncMonitors on the reattempt will no-op for resources that still exist.
// note that workers stay paused until we successfully resync.
if ! controller . WaitForCacheSync ( "garbage collector" , waitForStopOrTimeout ( stopCh , period ) , gc . dependencyGraphBuilder . IsSynced ) {
utilruntime . HandleError ( fmt . Errorf ( "timed out waiting for dependency graph builder sync during GC sync (attempt %d)" , attempt ) )
return false , nil
}
// success, break out of the loop
return true , nil
} , stopCh )
2017-11-27 21:29:18 +00:00
// Finally, keep track of our new state. Do this after all preceding steps
// have succeeded to ensure we'll retry on subsequent syncs if an error
2018-02-09 06:53:53 +00:00
// occurred.
2017-11-27 21:29:18 +00:00
oldResources = newResources
2018-11-09 18:49:10 +00:00
klog . V ( 2 ) . Infof ( "synced garbage collector" )
2017-05-17 22:54:58 +00:00
} , period , stopCh )
}
2018-06-01 22:47:00 +00:00
// printDiff returns a human-readable summary of what resources were added and removed
func printDiff ( oldResources , newResources map [ schema . GroupVersionResource ] struct { } ) string {
removed := sets . NewString ( )
for oldResource := range oldResources {
if _ , ok := newResources [ oldResource ] ; ! ok {
removed . Insert ( fmt . Sprintf ( "%+v" , oldResource ) )
}
}
added := sets . NewString ( )
for newResource := range newResources {
if _ , ok := oldResources [ newResource ] ; ! ok {
added . Insert ( fmt . Sprintf ( "%+v" , newResource ) )
}
}
return fmt . Sprintf ( "added: %v, removed: %v" , added . List ( ) , removed . List ( ) )
}
// waitForStopOrTimeout returns a stop channel that closes when the provided stop channel closes or when the specified timeout is reached
func waitForStopOrTimeout ( stopCh <- chan struct { } , timeout time . Duration ) <- chan struct { } {
stopChWithTimeout := make ( chan struct { } )
go func ( ) {
select {
case <- stopCh :
case <- time . After ( timeout ) :
}
close ( stopChWithTimeout )
} ( )
return stopChWithTimeout
}
2017-05-17 22:54:58 +00:00
func ( gc * GarbageCollector ) IsSynced ( ) bool {
return gc . dependencyGraphBuilder . IsSynced ( )
2017-03-14 23:19:33 +00:00
}
2017-02-23 19:16:13 +00:00
func ( gc * GarbageCollector ) runAttemptToDeleteWorker ( ) {
for gc . attemptToDeleteWorker ( ) {
}
2016-05-18 03:24:42 +00:00
}
2017-02-23 19:16:13 +00:00
func ( gc * GarbageCollector ) attemptToDeleteWorker ( ) bool {
item , quit := gc . attemptToDelete . Get ( )
2017-05-17 22:54:58 +00:00
gc . workerLock . RLock ( )
defer gc . workerLock . RUnlock ( )
2017-02-23 19:16:13 +00:00
if quit {
return false
}
defer gc . attemptToDelete . Done ( item )
n , ok := item . ( * node )
if ! ok {
utilruntime . HandleError ( fmt . Errorf ( "expect *node, got %#v" , item ) )
return true
}
err := gc . attemptToDeleteItem ( n )
if err != nil {
2017-05-17 22:54:58 +00:00
if _ , ok := err . ( * restMappingError ) ; ok {
// There are at least two ways this can happen:
// 1. The reference is to an object of a custom type that has not yet been
// recognized by gc.restMapper (this is a transient error).
// 2. The reference is to an invalid group/version. We don't currently
// have a way to distinguish this from a valid type we will recognize
// after the next discovery sync.
// For now, record the error and retry.
2018-11-09 18:49:10 +00:00
klog . V ( 5 ) . Infof ( "error syncing item %s: %v" , n , err )
2017-05-17 22:54:58 +00:00
} else {
utilruntime . HandleError ( fmt . Errorf ( "error syncing item %s: %v" , n , err ) )
2017-03-10 01:26:22 +00:00
}
2017-02-23 19:16:13 +00:00
// retry if garbage collection of an object failed.
gc . attemptToDelete . AddRateLimited ( item )
2017-12-21 07:56:09 +00:00
} else if ! n . isObserved ( ) {
// requeue if item hasn't been observed via an informer event yet.
// otherwise a virtual node for an item added AND removed during watch reestablishment can get stuck in the graph and never removed.
// see https://issue.k8s.io/56121
2018-11-09 18:49:10 +00:00
klog . V ( 5 ) . Infof ( "item %s hasn't been observed via informer yet" , n . identity )
2017-12-21 07:56:09 +00:00
gc . attemptToDelete . AddRateLimited ( item )
2017-02-23 19:16:13 +00:00
}
return true
2016-05-18 03:24:42 +00:00
}
2017-02-23 19:16:13 +00:00
// isDangling check if a reference is pointing to an object that doesn't exist.
// If isDangling looks up the referenced object at the API server, it also
// returns its latest state.
func ( gc * GarbageCollector ) isDangling ( reference metav1 . OwnerReference , item * node ) (
dangling bool , owner * unstructured . Unstructured , err error ) {
if gc . absentOwnerCache . Has ( reference . UID ) {
2018-11-09 18:49:10 +00:00
klog . V ( 5 ) . Infof ( "according to the absentOwnerCache, object %s's owner %s/%s, %s does not exist" , item . identity . UID , reference . APIVersion , reference . Kind , reference . Name )
2017-02-23 19:16:13 +00:00
return true , nil , nil
}
// TODO: we need to verify the reference resource is supported by the
// system. If it's not a valid resource, the garbage collector should i)
// ignore the reference when decide if the object should be deleted, and
// ii) should update the object to remove such references. This is to
// prevent objects having references to an old resource from being
// deleted during a cluster upgrade.
2018-05-04 18:40:39 +00:00
resource , namespaced , err := gc . apiResource ( reference . APIVersion , reference . Kind )
2017-02-23 19:16:13 +00:00
if err != nil {
return false , nil , err
}
2018-05-04 18:40:39 +00:00
2017-02-23 19:16:13 +00:00
// TODO: It's only necessary to talk to the API server if the owner node
// is a "virtual" node. The local graph could lag behind the real
// status, but in practice, the difference is small.
2018-05-04 18:40:39 +00:00
owner , err = gc . dynamicClient . Resource ( resource ) . Namespace ( resourceDefaultNamespace ( namespaced , item . identity . Namespace ) ) . Get ( reference . Name , metav1 . GetOptions { } )
2017-02-23 19:16:13 +00:00
switch {
case errors . IsNotFound ( err ) :
gc . absentOwnerCache . Add ( reference . UID )
2018-11-09 18:49:10 +00:00
klog . V ( 5 ) . Infof ( "object %s's owner %s/%s, %s is not found" , item . identity . UID , reference . APIVersion , reference . Kind , reference . Name )
2017-02-23 19:16:13 +00:00
return true , nil , nil
case err != nil :
return false , nil , err
}
if owner . GetUID ( ) != reference . UID {
2018-11-09 18:49:10 +00:00
klog . V ( 5 ) . Infof ( "object %s's owner %s/%s, %s is not found, UID mismatch" , item . identity . UID , reference . APIVersion , reference . Kind , reference . Name )
2017-02-23 19:16:13 +00:00
gc . absentOwnerCache . Add ( reference . UID )
return true , nil , nil
}
return false , owner , nil
}
// classify the latestReferences to three categories:
// solid: the owner exists, and is not "waitingForDependentsDeletion"
// dangling: the owner does not exist
// waitingForDependentsDeletion: the owner exists, its deletionTimestamp is non-nil, and it has
// FinalizerDeletingDependents
// This function communicates with the server.
func ( gc * GarbageCollector ) classifyReferences ( item * node , latestReferences [ ] metav1 . OwnerReference ) (
solid , dangling , waitingForDependentsDeletion [ ] metav1 . OwnerReference , err error ) {
for _ , reference := range latestReferences {
isDangling , owner , err := gc . isDangling ( reference , item )
if err != nil {
return nil , nil , nil , err
}
if isDangling {
dangling = append ( dangling , reference )
continue
}
2016-05-04 05:31:26 +00:00
2017-02-23 19:16:13 +00:00
ownerAccessor , err := meta . Accessor ( owner )
if err != nil {
return nil , nil , nil , err
}
if ownerAccessor . GetDeletionTimestamp ( ) != nil && hasDeleteDependentsFinalizer ( ownerAccessor ) {
waitingForDependentsDeletion = append ( waitingForDependentsDeletion , reference )
} else {
solid = append ( solid , reference )
2016-05-04 05:31:26 +00:00
}
}
2017-02-23 19:16:13 +00:00
return solid , dangling , waitingForDependentsDeletion , nil
2016-05-04 05:31:26 +00:00
}
2017-02-23 19:16:13 +00:00
func ownerRefsToUIDs ( refs [ ] metav1 . OwnerReference ) [ ] types . UID {
var ret [ ] types . UID
for _ , ref := range refs {
ret = append ( ret , ref . UID )
2016-05-04 05:31:26 +00:00
}
2017-02-23 19:16:13 +00:00
return ret
2016-05-04 05:31:26 +00:00
}
2017-02-23 19:16:13 +00:00
func ( gc * GarbageCollector ) attemptToDeleteItem ( item * node ) error {
2018-11-09 18:49:10 +00:00
klog . V ( 2 ) . Infof ( "processing item %s" , item . identity )
2017-02-23 19:16:13 +00:00
// "being deleted" is an one-way trip to the final deletion. We'll just wait for the final deletion, and then process the object's dependents.
if item . isBeingDeleted ( ) && ! item . isDeletingDependents ( ) {
2018-11-09 18:49:10 +00:00
klog . V ( 5 ) . Infof ( "processing item %s returned at once, because its DeletionTimestamp is non-nil" , item . identity )
2017-02-23 19:16:13 +00:00
return nil
}
// TODO: It's only necessary to talk to the API server if this is a
// "virtual" node. The local graph could lag behind the real status, but in
// practice, the difference is small.
latest , err := gc . getObject ( item . identity )
switch {
case errors . IsNotFound ( err ) :
// the GraphBuilder can add "virtual" node for an owner that doesn't
// exist yet, so we need to enqueue a virtual Delete event to remove
// the virtual node from GraphBuilder.uidToNode.
2018-11-09 18:49:10 +00:00
klog . V ( 5 ) . Infof ( "item %v not found, generating a virtual delete event" , item . identity )
2017-12-21 07:56:09 +00:00
gc . dependencyGraphBuilder . enqueueVirtualDeleteEvent ( item . identity )
// since we're manually inserting a delete event to remove this node,
// we don't need to keep tracking it as a virtual node and requeueing in attemptToDelete
item . markObserved ( )
2017-02-23 19:16:13 +00:00
return nil
case err != nil :
return err
}
2016-05-04 05:31:26 +00:00
2017-02-23 19:16:13 +00:00
if latest . GetUID ( ) != item . identity . UID {
2018-11-09 18:49:10 +00:00
klog . V ( 5 ) . Infof ( "UID doesn't match, item %v not found, generating a virtual delete event" , item . identity )
2017-12-21 07:56:09 +00:00
gc . dependencyGraphBuilder . enqueueVirtualDeleteEvent ( item . identity )
// since we're manually inserting a delete event to remove this node,
// we don't need to keep tracking it as a virtual node and requeueing in attemptToDelete
item . markObserved ( )
2017-02-23 19:16:13 +00:00
return nil
2016-05-04 05:31:26 +00:00
}
2017-02-23 19:16:13 +00:00
// TODO: attemptToOrphanWorker() routine is similar. Consider merging
// attemptToOrphanWorker() into attemptToDeleteItem() as well.
if item . isDeletingDependents ( ) {
return gc . processDeletingDependentsItem ( item )
2016-05-04 05:31:26 +00:00
}
2017-02-23 19:16:13 +00:00
// compute if we should delete the item
ownerReferences := latest . GetOwnerReferences ( )
if len ( ownerReferences ) == 0 {
2018-11-09 18:49:10 +00:00
klog . V ( 2 ) . Infof ( "object %s's doesn't have an owner, continue on next item" , item . identity )
2017-02-23 19:16:13 +00:00
return nil
}
2016-05-04 05:31:26 +00:00
2017-02-23 19:16:13 +00:00
solid , dangling , waitingForDependentsDeletion , err := gc . classifyReferences ( item , ownerReferences )
if err != nil {
return err
2016-05-04 05:31:26 +00:00
}
2018-11-09 18:49:10 +00:00
klog . V ( 5 ) . Infof ( "classify references of %s.\nsolid: %#v\ndangling: %#v\nwaitingForDependentsDeletion: %#v\n" , item . identity , solid , dangling , waitingForDependentsDeletion )
2017-02-23 19:16:13 +00:00
switch {
case len ( solid ) != 0 :
2018-11-09 18:49:10 +00:00
klog . V ( 2 ) . Infof ( "object %#v has at least one existing owner: %#v, will not garbage collect" , solid , item . identity )
2017-08-01 20:06:36 +00:00
if len ( dangling ) == 0 && len ( waitingForDependentsDeletion ) == 0 {
return nil
2017-02-23 19:16:13 +00:00
}
2018-11-09 18:49:10 +00:00
klog . V ( 2 ) . Infof ( "remove dangling references %#v and waiting references %#v for object %s" , dangling , waitingForDependentsDeletion , item . identity )
2017-02-23 19:16:13 +00:00
// waitingForDependentsDeletion needs to be deleted from the
// ownerReferences, otherwise the referenced objects will be stuck with
// the FinalizerDeletingDependents and never get deleted.
2017-11-29 01:49:24 +00:00
ownerUIDs := append ( ownerRefsToUIDs ( dangling ) , ownerRefsToUIDs ( waitingForDependentsDeletion ) ... )
patch := deleteOwnerRefStrategicMergePatch ( item . identity . UID , ownerUIDs ... )
_ , err = gc . patch ( item , patch , func ( n * node ) ( [ ] byte , error ) {
return gc . deleteOwnerRefJSONMergePatch ( n , ownerUIDs ... )
} )
2017-02-23 19:16:13 +00:00
return err
case len ( waitingForDependentsDeletion ) != 0 && item . dependentsLength ( ) != 0 :
deps := item . getDependents ( )
for _ , dep := range deps {
if dep . isDeletingDependents ( ) {
// this circle detection has false positives, we need to
// apply a more rigorous detection if this turns out to be a
// problem.
// there are multiple workers run attemptToDeleteItem in
// parallel, the circle detection can fail in a race condition.
2018-11-09 18:49:10 +00:00
klog . V ( 2 ) . Infof ( "processing object %s, some of its owners and its dependent [%s] have FinalizerDeletingDependents, to prevent potential cycle, its ownerReferences are going to be modified to be non-blocking, then the object is going to be deleted with Foreground" , item . identity , dep . identity )
2017-11-29 01:49:24 +00:00
patch , err := item . unblockOwnerReferencesStrategicMergePatch ( )
2017-02-23 19:16:13 +00:00
if err != nil {
return err
}
2017-11-29 01:49:24 +00:00
if _ , err := gc . patch ( item , patch , gc . unblockOwnerReferencesJSONMergePatch ) ; err != nil {
2017-02-23 19:16:13 +00:00
return err
}
break
}
}
2018-11-09 18:49:10 +00:00
klog . V ( 2 ) . Infof ( "at least one owner of object %s has FinalizerDeletingDependents, and the object itself has dependents, so it is going to be deleted in Foreground" , item . identity )
2017-02-23 19:16:13 +00:00
// the deletion event will be observed by the graphBuilder, so the item
// will be processed again in processDeletingDependentsItem. If it
// doesn't have dependents, the function will remove the
// FinalizerDeletingDependents from the item, resulting in the final
// deletion of the item.
policy := metav1 . DeletePropagationForeground
return gc . deleteObject ( item . identity , & policy )
default :
// item doesn't have any solid owner, so it needs to be garbage
// collected. Also, none of item's owners is waiting for the deletion of
2017-06-26 04:41:33 +00:00
// the dependents, so set propagationPolicy based on existing finalizers.
var policy metav1 . DeletionPropagation
switch {
case hasOrphanFinalizer ( latest ) :
// if an existing orphan finalizer is already on the object, honor it.
policy = metav1 . DeletePropagationOrphan
case hasDeleteDependentsFinalizer ( latest ) :
// if an existing foreground finalizer is already on the object, honor it.
policy = metav1 . DeletePropagationForeground
default :
// otherwise, default to background.
policy = metav1 . DeletePropagationBackground
}
2018-11-09 18:49:10 +00:00
klog . V ( 2 ) . Infof ( "delete object %s with propagation policy %s" , item . identity , policy )
2017-06-26 04:41:33 +00:00
return gc . deleteObject ( item . identity , & policy )
2016-05-04 05:31:26 +00:00
}
}
2017-02-23 19:16:13 +00:00
// process item that's waiting for its dependents to be deleted
func ( gc * GarbageCollector ) processDeletingDependentsItem ( item * node ) error {
blockingDependents := item . blockingDependents ( )
if len ( blockingDependents ) == 0 {
2018-11-09 18:49:10 +00:00
klog . V ( 2 ) . Infof ( "remove DeleteDependents finalizer for item %s" , item . identity )
2017-02-23 19:16:13 +00:00
return gc . removeFinalizer ( item , metav1 . FinalizerDeleteDependents )
2016-05-18 03:24:42 +00:00
}
2017-02-23 19:16:13 +00:00
for _ , dep := range blockingDependents {
if ! dep . isDeletingDependents ( ) {
2018-11-09 18:49:10 +00:00
klog . V ( 2 ) . Infof ( "adding %s to attemptToDelete, because its owner %s is deletingDependents" , dep . identity , item . identity )
2017-02-23 19:16:13 +00:00
gc . attemptToDelete . Add ( dep )
2016-05-18 03:24:42 +00:00
}
}
2017-02-23 19:16:13 +00:00
return nil
2016-05-18 03:24:42 +00:00
}
// dependents are copies of pointers to the owner's dependents, they don't need to be locked.
2017-02-23 19:16:13 +00:00
func ( gc * GarbageCollector ) orphanDependents ( owner objectReference , dependents [ ] * node ) error {
2017-07-24 20:56:39 +00:00
errCh := make ( chan error , len ( dependents ) )
wg := sync . WaitGroup { }
wg . Add ( len ( dependents ) )
for i := range dependents {
go func ( dependent * node ) {
defer wg . Done ( )
// the dependent.identity.UID is used as precondition
2017-11-29 01:49:24 +00:00
patch := deleteOwnerRefStrategicMergePatch ( dependent . identity . UID , owner . UID )
_ , err := gc . patch ( dependent , patch , func ( n * node ) ( [ ] byte , error ) {
return gc . deleteOwnerRefJSONMergePatch ( n , owner . UID )
} )
2017-07-24 20:56:39 +00:00
// note that if the target ownerReference doesn't exist in the
// dependent, strategic merge patch will NOT return an error.
if err != nil && ! errors . IsNotFound ( err ) {
errCh <- fmt . Errorf ( "orphaning %s failed, %v" , dependent . identity , err )
}
} ( dependents [ i ] )
}
wg . Wait ( )
close ( errCh )
2016-05-18 03:24:42 +00:00
var errorsSlice [ ] error
2017-07-24 20:56:39 +00:00
for e := range errCh {
errorsSlice = append ( errorsSlice , e )
2016-05-18 03:24:42 +00:00
}
2017-07-24 20:56:39 +00:00
2017-06-05 23:22:46 +00:00
if len ( errorsSlice ) != 0 {
2016-05-18 03:24:42 +00:00
return fmt . Errorf ( "failed to orphan dependents of owner %s, got errors: %s" , owner , utilerrors . NewAggregate ( errorsSlice ) . Error ( ) )
}
2018-11-09 18:49:10 +00:00
klog . V ( 5 ) . Infof ( "successfully updated all dependents of owner %s" , owner )
2016-05-18 03:24:42 +00:00
return nil
}
2017-02-23 19:16:13 +00:00
func ( gc * GarbageCollector ) runAttemptToOrphanWorker ( ) {
for gc . attemptToOrphanWorker ( ) {
2016-05-18 03:24:42 +00:00
}
}
2017-02-23 19:16:13 +00:00
// attemptToOrphanWorker dequeues a node from the attemptToOrphan, then finds its
// dependents based on the graph maintained by the GC, then removes it from the
2016-05-18 03:24:42 +00:00
// OwnerReferences of its dependents, and finally updates the owner to remove
2017-02-23 19:16:13 +00:00
// the "Orphan" finalizer. The node is added back into the attemptToOrphan if any of
2016-05-18 03:24:42 +00:00
// these steps fail.
2017-02-23 19:16:13 +00:00
func ( gc * GarbageCollector ) attemptToOrphanWorker ( ) bool {
item , quit := gc . attemptToOrphan . Get ( )
2017-05-17 22:54:58 +00:00
gc . workerLock . RLock ( )
defer gc . workerLock . RUnlock ( )
2016-05-18 03:24:42 +00:00
if quit {
2017-02-23 19:16:13 +00:00
return false
2016-05-18 03:24:42 +00:00
}
2017-02-23 19:16:13 +00:00
defer gc . attemptToOrphan . Done ( item )
owner , ok := item . ( * node )
2016-05-18 03:24:42 +00:00
if ! ok {
2017-02-23 19:16:13 +00:00
utilruntime . HandleError ( fmt . Errorf ( "expect *node, got %#v" , item ) )
return true
2016-05-18 03:24:42 +00:00
}
// we don't need to lock each element, because they never get updated
owner . dependentsLock . RLock ( )
dependents := make ( [ ] * node , 0 , len ( owner . dependents ) )
for dependent := range owner . dependents {
dependents = append ( dependents , dependent )
}
owner . dependentsLock . RUnlock ( )
2017-02-23 19:16:13 +00:00
err := gc . orphanDependents ( owner . identity , dependents )
2016-05-18 03:24:42 +00:00
if err != nil {
2017-11-22 15:40:03 +00:00
utilruntime . HandleError ( fmt . Errorf ( "orphanDependents for %s failed with %v" , owner . identity , err ) )
2017-02-23 19:16:13 +00:00
gc . attemptToOrphan . AddRateLimited ( item )
return true
2016-05-18 03:24:42 +00:00
}
// update the owner, remove "orphaningFinalizer" from its finalizers list
2017-02-23 19:16:13 +00:00
err = gc . removeFinalizer ( owner , metav1 . FinalizerOrphanDependents )
2016-05-18 03:24:42 +00:00
if err != nil {
2017-11-22 15:40:03 +00:00
utilruntime . HandleError ( fmt . Errorf ( "removeOrphanFinalizer for %s failed with %v" , owner . identity , err ) )
2017-02-23 19:16:13 +00:00
gc . attemptToOrphan . AddRateLimited ( item )
2016-05-18 03:24:42 +00:00
}
2017-02-23 19:16:13 +00:00
return true
2016-05-04 05:31:26 +00:00
}
2017-02-23 19:16:13 +00:00
// *FOR TEST USE ONLY*
// GraphHasUID returns if the GraphBuilder has a particular UID store in its
2016-05-04 05:31:26 +00:00
// uidToNode graph. It's useful for debugging.
2017-02-23 19:16:13 +00:00
// This method is used by integration tests.
2016-05-04 05:31:26 +00:00
func ( gc * GarbageCollector ) GraphHasUID ( UIDs [ ] types . UID ) bool {
for _ , u := range UIDs {
2017-02-23 19:16:13 +00:00
if _ , ok := gc . dependencyGraphBuilder . uidToNode . Read ( u ) ; ok {
2016-05-04 05:31:26 +00:00
return true
}
}
return false
}
2017-05-17 22:54:58 +00:00
// GetDeletableResources returns all resources from discoveryClient that the
// garbage collector should recognize and work with. More specifically, all
2017-11-15 07:26:58 +00:00
// preferred resources which support the 'delete', 'list', and 'watch' verbs.
2017-11-07 18:19:43 +00:00
//
// All discovery errors are considered temporary. Upon encountering any error,
// GetDeletableResources will log and return any discovered resources it was
// able to process (which may be none).
func GetDeletableResources ( discoveryClient discovery . ServerResourcesInterface ) map [ schema . GroupVersionResource ] struct { } {
2017-05-17 22:54:58 +00:00
preferredResources , err := discoveryClient . ServerPreferredResources ( )
if err != nil {
2017-11-07 18:19:43 +00:00
if discovery . IsGroupDiscoveryFailedError ( err ) {
2018-11-09 18:49:10 +00:00
klog . Warningf ( "failed to discover some groups: %v" , err . ( * discovery . ErrGroupDiscoveryFailed ) . Groups )
2017-11-07 18:19:43 +00:00
} else {
2018-11-09 18:49:10 +00:00
klog . Warningf ( "failed to discover preferred resources: %v" , err )
2017-11-07 18:19:43 +00:00
}
}
if preferredResources == nil {
return map [ schema . GroupVersionResource ] struct { } { }
2017-05-17 22:54:58 +00:00
}
2017-11-07 18:19:43 +00:00
// This is extracted from discovery.GroupVersionResources to allow tolerating
// failures on a per-resource basis.
2017-11-15 07:26:58 +00:00
deletableResources := discovery . FilteredBy ( discovery . SupportsAllVerbs { Verbs : [ ] string { "delete" , "list" , "watch" } } , preferredResources )
2017-11-07 18:19:43 +00:00
deletableGroupVersionResources := map [ schema . GroupVersionResource ] struct { } { }
for _ , rl := range deletableResources {
gv , err := schema . ParseGroupVersion ( rl . GroupVersion )
if err != nil {
2018-11-09 18:49:10 +00:00
klog . Warningf ( "ignoring invalid discovered resource %q: %v" , rl . GroupVersion , err )
2017-11-07 18:19:43 +00:00
continue
}
for i := range rl . APIResources {
deletableGroupVersionResources [ schema . GroupVersionResource { Group : gv . Group , Version : gv . Version , Resource : rl . APIResources [ i ] . Name } ] = struct { } { }
}
2017-05-17 22:54:58 +00:00
}
2017-11-07 18:19:43 +00:00
return deletableGroupVersionResources
2017-05-17 22:54:58 +00:00
}