From c3dd82c30c8ea89c3125e7b09b0d414a94a55e02 Mon Sep 17 00:00:00 2001 From: Dan Mace Date: Tue, 7 Nov 2017 13:19:43 -0500 Subject: [PATCH] Tolerate partial discovery in garbage collector Allow the garbage collector to tolerate partial discovery failures. On a partial failure, use whatever was discovered, log the failures, and allow the resync logic to try again later. Fixes #55022. --- cmd/kube-controller-manager/app/core.go | 5 +- pkg/controller/garbagecollector/BUILD | 1 + .../garbagecollector/garbagecollector.go | 40 +++++-- .../garbagecollector/garbagecollector_test.go | 108 ++++++++++++++++++ .../garbage_collector_test.go | 5 +- 5 files changed, 140 insertions(+), 19 deletions(-) diff --git a/cmd/kube-controller-manager/app/core.go b/cmd/kube-controller-manager/app/core.go index 0d43b8de75..3c58265b74 100644 --- a/cmd/kube-controller-manager/app/core.go +++ b/cmd/kube-controller-manager/app/core.go @@ -341,10 +341,7 @@ func startGarbageCollectorController(ctx ControllerContext) (bool, error) { clientPool := dynamic.NewClientPool(config, restMapper, dynamic.LegacyAPIPathResolverFunc) // Get an initial set of deletable resources to prime the garbage collector. - deletableResources, err := garbagecollector.GetDeletableResources(discoveryClient) - if err != nil { - return true, err - } + deletableResources := garbagecollector.GetDeletableResources(discoveryClient) ignoredResources := make(map[schema.GroupResource]struct{}) for _, r := range ctx.Options.GCIgnoredResources { ignoredResources[schema.GroupResource{Group: r.Group, Resource: r.Resource}] = struct{}{} diff --git a/pkg/controller/garbagecollector/BUILD b/pkg/controller/garbagecollector/BUILD index 12f9019648..9464b69dc6 100644 --- a/pkg/controller/garbagecollector/BUILD +++ b/pkg/controller/garbagecollector/BUILD @@ -66,6 +66,7 @@ go_test( "//vendor/k8s.io/apimachinery/pkg/util/json:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/strategicpatch:go_default_library", + "//vendor/k8s.io/client-go/discovery:go_default_library", "//vendor/k8s.io/client-go/dynamic:go_default_library", "//vendor/k8s.io/client-go/informers:go_default_library", "//vendor/k8s.io/client-go/kubernetes/fake:go_default_library", diff --git a/pkg/controller/garbagecollector/garbagecollector.go b/pkg/controller/garbagecollector/garbagecollector.go index 8853640871..ef9a69473c 100644 --- a/pkg/controller/garbagecollector/garbagecollector.go +++ b/pkg/controller/garbagecollector/garbagecollector.go @@ -171,11 +171,7 @@ func (gc *GarbageCollector) Sync(discoveryClient discovery.DiscoveryInterface, p oldResources := make(map[schema.GroupVersionResource]struct{}) wait.Until(func() { // Get the current resource list from discovery. - newResources, err := GetDeletableResources(discoveryClient) - if err != nil { - utilruntime.HandleError(err) - return - } + newResources := GetDeletableResources(discoveryClient) // Detect first or abnormal sync and try again later. if oldResources == nil || len(oldResources) == 0 { @@ -592,15 +588,37 @@ func (gc *GarbageCollector) GraphHasUID(UIDs []types.UID) bool { // GetDeletableResources returns all resources from discoveryClient that the // garbage collector should recognize and work with. More specifically, all // preferred resources which support the 'delete' verb. -func GetDeletableResources(discoveryClient discovery.DiscoveryInterface) (map[schema.GroupVersionResource]struct{}, error) { +// +// All discovery errors are considered temporary. Upon encountering any error, +// GetDeletableResources will log and return any discovered resources it was +// able to process (which may be none). +func GetDeletableResources(discoveryClient discovery.ServerResourcesInterface) map[schema.GroupVersionResource]struct{} { preferredResources, err := discoveryClient.ServerPreferredResources() if err != nil { - return nil, fmt.Errorf("failed to get supported resources from server: %v", err) + if discovery.IsGroupDiscoveryFailedError(err) { + glog.Warning("failed to discover some groups: %v", err.(*discovery.ErrGroupDiscoveryFailed).Groups) + } else { + glog.Warning("failed to discover preferred resources: %v", err) + } } + if preferredResources == nil { + return map[schema.GroupVersionResource]struct{}{} + } + + // This is extracted from discovery.GroupVersionResources to allow tolerating + // failures on a per-resource basis. deletableResources := discovery.FilteredBy(discovery.SupportsAllVerbs{Verbs: []string{"delete"}}, preferredResources) - deletableGroupVersionResources, err := discovery.GroupVersionResources(deletableResources) - if err != nil { - return nil, fmt.Errorf("Failed to parse resources from server: %v", err) + deletableGroupVersionResources := map[schema.GroupVersionResource]struct{}{} + for _, rl := range deletableResources { + gv, err := schema.ParseGroupVersion(rl.GroupVersion) + if err != nil { + glog.Warning("ignoring invalid discovered resource %q: %v", rl.GroupVersion, err) + continue + } + for i := range rl.APIResources { + deletableGroupVersionResources[schema.GroupVersionResource{Group: gv.Group, Version: gv.Version, Resource: rl.APIResources[i].Name}] = struct{}{} + } } - return deletableGroupVersionResources, nil + + return deletableGroupVersionResources } diff --git a/pkg/controller/garbagecollector/garbagecollector_test.go b/pkg/controller/garbagecollector/garbagecollector_test.go index 96e52509f7..7b46279e81 100644 --- a/pkg/controller/garbagecollector/garbagecollector_test.go +++ b/pkg/controller/garbagecollector/garbagecollector_test.go @@ -17,6 +17,7 @@ limitations under the License. package garbagecollector import ( + "fmt" "net/http" "net/http/httptest" "reflect" @@ -37,6 +38,7 @@ import ( "k8s.io/apimachinery/pkg/util/json" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/strategicpatch" + "k8s.io/client-go/discovery" "k8s.io/client-go/dynamic" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes/fake" @@ -671,3 +673,109 @@ func TestOrphanDependentsFailure(t *testing.T) { t.Errorf("expected error contains text %s, got %v", expected, err) } } + +// TestGetDeletableResources ensures GetDeletableResources always returns +// something usable regardless of discovery output. +func TestGetDeletableResources(t *testing.T) { + tests := map[string]struct { + serverResources []*metav1.APIResourceList + err error + deletableResources map[schema.GroupVersionResource]struct{} + }{ + "no error": { + serverResources: []*metav1.APIResourceList{ + { + // Valid GroupVersion + GroupVersion: "apps/v1", + APIResources: []metav1.APIResource{ + {Name: "pods", Namespaced: true, Kind: "Pod", Verbs: metav1.Verbs{"delete"}}, + {Name: "services", Namespaced: true, Kind: "Service"}, + }, + }, + { + // Invalid GroupVersion, should be ignored + GroupVersion: "foo//whatever", + APIResources: []metav1.APIResource{ + {Name: "bars", Namespaced: true, Kind: "Bar", Verbs: metav1.Verbs{"delete"}}, + }, + }, + }, + err: nil, + deletableResources: map[schema.GroupVersionResource]struct{}{ + {Group: "apps", Version: "v1", Resource: "pods"}: {}, + }, + }, + "nonspecific failure, includes usable results": { + serverResources: []*metav1.APIResourceList{ + { + GroupVersion: "apps/v1", + APIResources: []metav1.APIResource{ + {Name: "pods", Namespaced: true, Kind: "Pod", Verbs: metav1.Verbs{"delete"}}, + {Name: "services", Namespaced: true, Kind: "Service"}, + }, + }, + }, + err: fmt.Errorf("internal error"), + deletableResources: map[schema.GroupVersionResource]struct{}{ + {Group: "apps", Version: "v1", Resource: "pods"}: {}, + }, + }, + "partial discovery failure, includes usable results": { + serverResources: []*metav1.APIResourceList{ + { + GroupVersion: "apps/v1", + APIResources: []metav1.APIResource{ + {Name: "pods", Namespaced: true, Kind: "Pod", Verbs: metav1.Verbs{"delete"}}, + {Name: "services", Namespaced: true, Kind: "Service"}, + }, + }, + }, + err: &discovery.ErrGroupDiscoveryFailed{ + Groups: map[schema.GroupVersion]error{ + {Group: "foo", Version: "v1"}: fmt.Errorf("discovery failure"), + }, + }, + deletableResources: map[schema.GroupVersionResource]struct{}{ + {Group: "apps", Version: "v1", Resource: "pods"}: {}, + }, + }, + "discovery failure, no results": { + serverResources: nil, + err: fmt.Errorf("internal error"), + deletableResources: map[schema.GroupVersionResource]struct{}{}, + }, + } + + for name, test := range tests { + t.Logf("testing %q", name) + client := &fakeServerResources{ + PreferredResources: test.serverResources, + Error: test.err, + } + actual := GetDeletableResources(client) + if !reflect.DeepEqual(test.deletableResources, actual) { + t.Errorf("expected resources:\n%v\ngot:\n%v", test.deletableResources, actual) + } + } +} + +type fakeServerResources struct { + PreferredResources []*metav1.APIResourceList + Error error +} + +func (_ *fakeServerResources) ServerResourcesForGroupVersion(groupVersion string) (*metav1.APIResourceList, error) { + return nil, nil +} + +func (_ *fakeServerResources) ServerResources() ([]*metav1.APIResourceList, error) { + return nil, nil +} + +func (f *fakeServerResources) ServerPreferredResources() ([]*metav1.APIResourceList, error) { + return f.PreferredResources, f.Error +} + +func (_ *fakeServerResources) ServerPreferredNamespacedResources() ([]*metav1.APIResourceList, error) { + return nil, nil +} diff --git a/test/integration/garbagecollector/garbage_collector_test.go b/test/integration/garbagecollector/garbage_collector_test.go index 030951e82b..632a75890a 100644 --- a/test/integration/garbagecollector/garbage_collector_test.go +++ b/test/integration/garbagecollector/garbage_collector_test.go @@ -233,10 +233,7 @@ func setup(t *testing.T, workerCount int) *testContext { discoveryClient := cacheddiscovery.NewMemCacheClient(clientSet.Discovery()) restMapper := discovery.NewDeferredDiscoveryRESTMapper(discoveryClient, meta.InterfacesForUnstructured) restMapper.Reset() - deletableResources, err := garbagecollector.GetDeletableResources(discoveryClient) - if err != nil { - t.Fatalf("unable to get deletable resources: %v", err) - } + deletableResources := garbagecollector.GetDeletableResources(discoveryClient) config := *masterConfig config.ContentConfig = dynamic.ContentConfig() metaOnlyClientPool := dynamic.NewClientPool(&config, restMapper, dynamic.LegacyAPIPathResolverFunc)