Merge pull request #57503 from liggitt/gc-virtual-node-fix

Automatic merge from submit-queue (batch tested with PRs 57735, 57503). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Ensure virtual nodes aren't stranded in GC graph

Fixes #56121

See https://github.com/kubernetes/kubernetes/issues/56121#issuecomment-353265160 for details on the sequence of events that can lead to virtual nodes getting stranded in the graph

```release-note
Fixed garbage collection hang
```

(a branch with a commit that reliably triggers the cascading deletion test failure is at https://github.com/liggitt/kubernetes/commits/gc-debug-cascading... it's not easily made into a permanent test case because it only works when that test is run in isolation, and requires plumbing test hooks deep into the watch cache layer)
pull/6/head
Kubernetes Submit Queue 2018-01-02 07:51:30 -08:00 committed by GitHub
commit ff58401257
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 54 additions and 33 deletions

View File

@ -38,7 +38,6 @@ import (
"k8s.io/client-go/informers"
"k8s.io/client-go/util/workqueue"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/garbagecollector/metaonly"
_ "k8s.io/kubernetes/pkg/util/reflector/prometheus" // for reflector metric registration
// install the prometheus plugin
_ "k8s.io/kubernetes/pkg/util/workqueue/prometheus"
@ -259,24 +258,16 @@ func (gc *GarbageCollector) attemptToDeleteWorker() bool {
}
// retry if garbage collection of an object failed.
gc.attemptToDelete.AddRateLimited(item)
} else if !n.isObserved() {
// requeue if item hasn't been observed via an informer event yet.
// otherwise a virtual node for an item added AND removed during watch reestablishment can get stuck in the graph and never removed.
// see https://issue.k8s.io/56121
glog.V(5).Infof("item %s hasn't been observed via informer yet", n.identity)
gc.attemptToDelete.AddRateLimited(item)
}
return true
}
func objectReferenceToMetadataOnlyObject(ref objectReference) *metaonly.MetadataOnlyObject {
return &metaonly.MetadataOnlyObject{
TypeMeta: metav1.TypeMeta{
APIVersion: ref.APIVersion,
Kind: ref.Kind,
},
ObjectMeta: metav1.ObjectMeta{
Namespace: ref.Namespace,
UID: ref.UID,
Name: ref.Name,
},
}
}
// isDangling check if a reference is pointing to an object that doesn't exist.
// If isDangling looks up the referenced object at the API server, it also
// returns its latest state.
@ -353,15 +344,6 @@ func (gc *GarbageCollector) classifyReferences(item *node, latestReferences []me
return solid, dangling, waitingForDependentsDeletion, nil
}
func (gc *GarbageCollector) generateVirtualDeleteEvent(identity objectReference) {
event := &event{
eventType: deleteEvent,
obj: objectReferenceToMetadataOnlyObject(identity),
}
glog.V(5).Infof("generating virtual delete event for %s\n\n", event.obj)
gc.dependencyGraphBuilder.enqueueChanges(event)
}
func ownerRefsToUIDs(refs []metav1.OwnerReference) []types.UID {
var ret []types.UID
for _, ref := range refs {
@ -387,7 +369,10 @@ func (gc *GarbageCollector) attemptToDeleteItem(item *node) error {
// exist yet, so we need to enqueue a virtual Delete event to remove
// the virtual node from GraphBuilder.uidToNode.
glog.V(5).Infof("item %v not found, generating a virtual delete event", item.identity)
gc.generateVirtualDeleteEvent(item.identity)
gc.dependencyGraphBuilder.enqueueVirtualDeleteEvent(item.identity)
// since we're manually inserting a delete event to remove this node,
// we don't need to keep tracking it as a virtual node and requeueing in attemptToDelete
item.markObserved()
return nil
case err != nil:
return err
@ -395,7 +380,10 @@ func (gc *GarbageCollector) attemptToDeleteItem(item *node) error {
if latest.GetUID() != item.identity.UID {
glog.V(5).Infof("UID doesn't match, item %v not found, generating a virtual delete event", item.identity)
gc.generateVirtualDeleteEvent(item.identity)
gc.dependencyGraphBuilder.enqueueVirtualDeleteEvent(item.identity)
// since we're manually inserting a delete event to remove this node,
// we don't need to keep tracking it as a virtual node and requeueing in attemptToDelete
item.markObserved()
return nil
}

View File

@ -53,6 +53,9 @@ type node struct {
// this records if the object's deletionTimestamp is non-nil.
beingDeleted bool
beingDeletedLock sync.RWMutex
// this records if the object was constructed virtually and never observed via informer event
virtual bool
virtualLock sync.RWMutex
// when processing an Update event, we need to compare the updated
// ownerReferences with the owners recorded in the graph.
owners []metav1.OwnerReference
@ -72,6 +75,17 @@ func (n *node) isBeingDeleted() bool {
return n.beingDeleted
}
func (n *node) markObserved() {
n.virtualLock.Lock()
defer n.virtualLock.Unlock()
n.virtual = false
}
func (n *node) isObserved() bool {
n.virtualLock.RLock()
defer n.virtualLock.RUnlock()
return n.virtual == false
}
func (n *node) markDeletingDependents() {
n.deletingDependentsLock.Lock()
defer n.deletingDependentsLock.Unlock()

View File

@ -37,6 +37,7 @@ import (
"k8s.io/client-go/informers"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
"k8s.io/kubernetes/pkg/controller/garbagecollector/metaonly"
)
type eventType int
@ -369,8 +370,16 @@ func DefaultIgnoredResources() map[schema.GroupResource]struct{} {
return ignoredResources
}
func (gb *GraphBuilder) enqueueChanges(e *event) {
gb.graphChanges.Add(e)
// enqueueVirtualDeleteEvent is used to add a virtual delete event to be processed for virtual nodes
// once it is determined they do not have backing objects in storage
func (gb *GraphBuilder) enqueueVirtualDeleteEvent(ref objectReference) {
gb.graphChanges.Add(&event{
eventType: deleteEvent,
obj: &metaonly.MetadataOnlyObject{
TypeMeta: metav1.TypeMeta{APIVersion: ref.APIVersion, Kind: ref.Kind},
ObjectMeta: metav1.ObjectMeta{Namespace: ref.Namespace, UID: ref.UID, Name: ref.Name},
},
})
}
// addDependentToOwners adds n to owners' dependents list. If the owner does not
@ -382,22 +391,26 @@ func (gb *GraphBuilder) addDependentToOwners(n *node, owners []metav1.OwnerRefer
ownerNode, ok := gb.uidToNode.Read(owner.UID)
if !ok {
// Create a "virtual" node in the graph for the owner if it doesn't
// exist in the graph yet. Then enqueue the virtual node into the
// attemptToDelete. The garbage processor will enqueue a virtual delete
// event to delete it from the graph if API server confirms this
// owner doesn't exist.
// exist in the graph yet.
ownerNode = &node{
identity: objectReference{
OwnerReference: owner,
Namespace: n.identity.Namespace,
},
dependents: make(map[*node]struct{}),
virtual: true,
}
glog.V(5).Infof("add virtual node.identity: %s\n\n", ownerNode.identity)
gb.uidToNode.Write(ownerNode)
gb.attemptToDelete.Add(ownerNode)
}
ownerNode.addDependent(n)
if !ok {
// Enqueue the virtual node into attemptToDelete.
// The garbage processor will enqueue a virtual delete
// event to delete it from the graph if API server confirms this
// owner doesn't exist.
gb.attemptToDelete.Add(ownerNode)
}
}
}
@ -588,6 +601,12 @@ func (gb *GraphBuilder) processGraphChanges() bool {
glog.V(5).Infof("GraphBuilder process object: %s/%s, namespace %s, name %s, uid %s, event type %v", event.gvk.GroupVersion().String(), event.gvk.Kind, accessor.GetNamespace(), accessor.GetName(), string(accessor.GetUID()), event.eventType)
// Check if the node already exsits
existingNode, found := gb.uidToNode.Read(accessor.GetUID())
if found {
// this marks the node as having been observed via an informer event
// 1. this depends on graphChanges only containing add/update events from the actual informer
// 2. this allows things tracking virtual nodes' existence to stop polling and rely on informer events
existingNode.markObserved()
}
switch {
case (event.eventType == addEvent || event.eventType == updateEvent) && !found:
newNode := &node{