v2tenancy: namespace deletion using finalizers (#19714)

pull/19722/head
Semir Patel 2023-11-22 10:06:11 -06:00 committed by GitHub
parent f69c68e777
commit 0fdc2ac5e9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 457 additions and 1 deletions

View File

@ -0,0 +1,196 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
// Common code shared by the partition and namespace controllers.
package common
import (
"context"
"errors"
"fmt"
"github.com/hashicorp/consul/internal/controller"
"github.com/hashicorp/consul/internal/resource"
"github.com/hashicorp/consul/proto-public/pbresource"
)
const (
// ConditionAccepted indicates the tenancy unit has a finalizer
// and contains a default namespace if a partition.
ConditionAccepted = "accepted"
ReasonAcceptedOK = "Ok"
ReasonEnsureHasFinalizerFailed = "EnsureHasFinalizerFailed"
// ConditionDeleted indicates that the units tenants have been
// deleted. It never has a state other than false because the
// resource no longer exists at that point.
ConditionDeleted = "deleted"
ReasonDeletionInProgress = "DeletionInProgress"
)
var (
ErrStillHasTenants = errors.New("still has tenants")
)
func EnsureHasFinalizer(ctx context.Context, rt controller.Runtime, res *pbresource.Resource, statusKey string) error {
// The statusKey doubles as the finalizer name for tenancy resources.
if resource.HasFinalizer(res, statusKey) {
rt.Logger.Trace("already has finalizer")
return nil
}
// Finalizer hasn't been written, so add it.
resource.AddFinalizer(res, statusKey)
_, err := rt.Client.Write(ctx, &pbresource.WriteRequest{Resource: res})
if err != nil {
return WriteStatus(ctx, rt, res, statusKey, ConditionAccepted, ReasonEnsureHasFinalizerFailed, err)
}
rt.Logger.Trace("added finalizer")
return err
}
func EnsureTenantsDeleted(ctx context.Context, rt controller.Runtime, registry resource.Registry, res *pbresource.Resource, tenantScope resource.Scope, tenancy *pbresource.Tenancy) error {
// Useful stats to keep track of on every sweep
numExistingHasFinalizer := 0
numExistingOwned := 0
numImmediateDeletes := 0
numDeferredDeletes := 0
// List doesn't support querying across all types so iterate through each one.
for _, reg := range registry.Types() {
// Skip tenants that aren't scoped to the tenancy unit.
if reg.Scope != tenantScope {
continue
}
// Get all tenants of the current type.
rsp, err := rt.Client.List(ctx, &pbresource.ListRequest{Type: reg.Type, Tenancy: tenancy})
if err != nil {
return err
}
if len(rsp.Resources) > 0 {
rt.Logger.Trace(fmt.Sprintf("found %d tenant %s", len(rsp.Resources), reg.Type.Kind))
}
// Delete each qualified tenant.
for _, tenant := range rsp.Resources {
// Owned resources will be deleted when the parent resource is deleted (tombstone reaper)
// so just skip over them.
if tenant.Owner != nil {
numExistingOwned++
continue
}
// Skip anything that is already marked for deletion and has finalizers
// since deletion of those resource is out of our control.
if resource.IsMarkedForDeletion(tenant) && resource.HasFinalizers(tenant) {
numExistingHasFinalizer++
continue
}
// Delete tenant with a blanket non-CAS delete since we don't care about the version that
// is deleted. Since we don't know whether the delete was immediate or deferred due to the
// presense of a finalizer, we can't assume that the tenant is really deleted.
_, err = rt.Client.Delete(ctx, &pbresource.DeleteRequest{Id: tenant.Id, Version: ""})
if err != nil {
// Bail on the first sign of trouble and retry in future reconciles.
return err
}
// Classify the just deleted tenant since we're not fully vacated if a deferred delete occurred.
if resource.HasFinalizers(tenant) {
rt.Logger.Trace(fmt.Sprintf("deferred delete of %s tenant %q", res.Id.Type.Kind, tenant.Id.Name))
numDeferredDeletes++
} else {
rt.Logger.Trace(fmt.Sprintf("immediate delete of %s tenant %q", res.Id.Type.Kind, tenant.Id.Name))
numImmediateDeletes++
}
}
}
// Force re-reconcile if we have any lingering tenants by returning an error.
if numExistingOwned+numExistingHasFinalizer+numDeferredDeletes > 0 {
if numExistingOwned > 0 {
rt.Logger.Debug(fmt.Sprintf("delete blocked on %d remaining owned tenants", numExistingOwned))
}
if numExistingHasFinalizer > 0 {
rt.Logger.Debug(fmt.Sprintf("delete blocked on %d remaining tenants with finalizers", numExistingHasFinalizer))
}
if numDeferredDeletes > 0 {
rt.Logger.Debug(fmt.Sprintf("delete blocked on %d tenants which were just marked for deletion", numDeferredDeletes))
}
return ErrStillHasTenants
}
// We should have zero tenants and be good to continue.
rt.Logger.Debug("no tenants - green light the delete")
return nil
}
// EnsureResourceDelete makes sure a tenancy unit (partition or namespace) with no tenants is finally deleted.
func EnsureResourceDeleted(ctx context.Context, rt controller.Runtime, res *pbresource.Resource, statusKey string) error {
// Remove finalizer if present
if resource.HasFinalizer(res, statusKey) {
resource.RemoveFinalizer(res, statusKey)
_, err := rt.Client.Write(ctx, &pbresource.WriteRequest{Resource: res})
if err != nil {
rt.Logger.Error("failed write to remove finalizer")
return WriteStatus(ctx, rt, res, statusKey, ConditionDeleted, ReasonDeletionInProgress, err)
}
rt.Logger.Trace("removed finalizer")
}
// Finally, delete the tenancy unit.
_, err := rt.Client.Delete(ctx, &pbresource.DeleteRequest{Id: res.Id})
if err != nil {
rt.Logger.Error("failed final delete", "error", err)
return WriteStatus(ctx, rt, res, statusKey, ConditionDeleted, ReasonDeletionInProgress, err)
}
// Success
rt.Logger.Trace("finally deleted")
return nil
}
// WriteStatus writes the tenancy resource status only if the status has changed.
// The state and message are based on whether the passed in error is nil
// (state=TRUE, message="") or not (state=FALSE, message=error). The passed in
// error is always returned unless the delegated call to client.WriteStatus
// itself fails.
func WriteStatus(ctx context.Context, rt controller.Runtime, res *pbresource.Resource, statusKey string, condition string, reason string, err error) error {
state := pbresource.Condition_STATE_TRUE
message := ""
if err != nil {
state = pbresource.Condition_STATE_FALSE
message = err.Error()
}
newStatus := &pbresource.Status{
ObservedGeneration: res.Generation,
Conditions: []*pbresource.Condition{{
Type: condition,
State: state,
Reason: reason,
Message: message,
}},
}
// Skip the write if the status hasn't changed to keep write amplificiation in check.
if resource.EqualStatus(res.Status[statusKey], newStatus, false) {
return err
}
_, statusErr := rt.Client.WriteStatus(ctx, &pbresource.WriteStatusRequest{
Id: res.Id,
Key: statusKey,
Status: newStatus,
})
if statusErr != nil {
rt.Logger.Error("failed writing status", "error", statusErr)
return statusErr
}
rt.Logger.Trace("wrote status", "status", newStatus)
return err
}

View File

@ -0,0 +1,95 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package namespace
import (
"context"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"github.com/hashicorp/consul/internal/controller"
"github.com/hashicorp/consul/internal/resource"
"github.com/hashicorp/consul/internal/tenancy/internal/controllers/common"
"github.com/hashicorp/consul/proto-public/pbresource"
pbtenancy "github.com/hashicorp/consul/proto-public/pbtenancy/v2beta1"
)
const (
// StatusKey also serves as the finalizer name.
StatusKey = "consul.io/namespace-controller"
// Conditions and reasons are shared with partitions. See
// common.go for the full list.
)
func Controller(registry resource.Registry) controller.Controller {
return controller.ForType(pbtenancy.NamespaceType).
WithReconciler(&Reconciler{Registry: registry})
}
type Reconciler struct {
Registry resource.Registry
}
// Reconcile is responsible for reconciling a namespace resource.
//
// When a namespace is created, ensures a finalizer is added for cleanup.
//
// When a namespace is marked for deletion, ensures tenants are deleted and
// the finalizer is removed.
func (r *Reconciler) Reconcile(ctx context.Context, rt controller.Runtime, req controller.Request) error {
rt.Logger = rt.Logger.With("resource", req.ID.Name, "controller", "namespace")
// Never reconcile the default namespace in the default partition since it is
// created on system startup or snapshot restoration. Resource validation rules
// protect them being deleted.
if req.ID.Tenancy.Partition == resource.DefaultPartitionName && req.ID.Name == resource.DefaultNamespaceName {
rt.Logger.Trace("skipping reconcile of default namespace")
return nil
}
// Read namespace to make sure we have the latest version.
rsp, err := rt.Client.Read(ctx, &pbresource.ReadRequest{Id: req.ID})
switch {
case status.Code(err) == codes.NotFound:
// Namespace deleted - nothing to do.
rt.Logger.Trace("namespace not found, nothing to do")
return nil
case err != nil:
rt.Logger.Error("failed read", "error", err)
return err
}
res := rsp.Resource
if resource.IsMarkedForDeletion(res) {
return ensureDeleted(ctx, rt, r.Registry, res)
}
if err = common.EnsureHasFinalizer(ctx, rt, res, StatusKey); err != nil {
return err
}
return common.WriteStatus(ctx, rt, res, StatusKey, common.ConditionAccepted, common.ReasonAcceptedOK, err)
}
func ensureDeleted(ctx context.Context, rt controller.Runtime, registry resource.Registry, res *pbresource.Resource) error {
tenancy := &pbresource.Tenancy{
Partition: res.Id.Tenancy.Partition,
Namespace: res.Id.Name,
PeerName: resource.DefaultPeerName,
}
// Delete namespace scoped tenants
if err := common.EnsureTenantsDeleted(ctx, rt, registry, res, resource.ScopeNamespace, tenancy); err != nil {
rt.Logger.Error("failed deleting tenants", "error", err)
return common.WriteStatus(ctx, rt, res, StatusKey, common.ConditionDeleted, common.ReasonDeletionInProgress, err)
}
// Delete namespace resource since all namespace scoped tenants are deleted
if err := common.EnsureResourceDeleted(ctx, rt, res, StatusKey); err != nil {
rt.Logger.Error("failed deleting namespace", "error", err)
return err
}
return nil
}

View File

@ -7,8 +7,9 @@ package controllers
import (
"github.com/hashicorp/consul/internal/controller"
"github.com/hashicorp/consul/internal/tenancy/internal/controllers/namespace"
)
func Register(mgr *controller.Manager, deps Dependencies) {
//mgr.Register(namespace.NamespaceController())
mgr.Register(namespace.Controller(deps.Registry))
}

View File

@ -0,0 +1,164 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package tenancytest
import (
"context"
"testing"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
svc "github.com/hashicorp/consul/agent/grpc-external/services/resource"
svctest "github.com/hashicorp/consul/agent/grpc-external/services/resource/testing"
"github.com/hashicorp/consul/internal/controller"
"github.com/hashicorp/consul/internal/resource"
"github.com/hashicorp/consul/internal/resource/demo"
rtest "github.com/hashicorp/consul/internal/resource/resourcetest"
"github.com/hashicorp/consul/internal/tenancy"
"github.com/hashicorp/consul/internal/tenancy/internal/controllers/common"
"github.com/hashicorp/consul/internal/tenancy/internal/controllers/namespace"
"github.com/hashicorp/consul/proto-public/pbresource"
pbtenancy "github.com/hashicorp/consul/proto-public/pbtenancy/v2beta1"
pbdemo "github.com/hashicorp/consul/proto/private/pbdemo/v1"
"github.com/hashicorp/consul/sdk/testutil"
"github.com/hashicorp/consul/sdk/testutil/retry"
)
// Due to a circular dependency, this test can't reside in the package next to the controller it is testing.
type nsTestSuite struct {
suite.Suite
client *rtest.Client
runtime controller.Runtime
ctx context.Context
registry resource.Registry
}
func (ts *nsTestSuite) SetupTest() {
ts.registry = resource.NewRegistry()
tenancyBridge := tenancy.NewV2TenancyBridge()
config := svc.Config{TenancyBridge: tenancyBridge, Registry: ts.registry, UseV2Tenancy: true}
resourceClient := svctest.RunResourceServiceWithConfig(ts.T(), config, tenancy.RegisterTypes, demo.RegisterTypes)
tenancyBridge.WithClient(resourceClient)
ts.client = rtest.NewClient(resourceClient)
ts.runtime = controller.Runtime{Client: resourceClient, Logger: testutil.Logger(ts.T())}
ts.ctx = testutil.TestContext(ts.T())
mgr := controller.NewManager(ts.client, testutil.Logger(ts.T()))
mgr.Register(namespace.Controller(ts.registry))
mgr.SetRaftLeader(true)
ctx, cancel := context.WithCancel(context.Background())
ts.T().Cleanup(cancel)
go mgr.Run(ctx)
}
func (ts *nsTestSuite) waitForReconciliation(id *pbresource.ID, reason string) {
ts.T().Helper()
retry.Run(ts.T(), func(r *retry.R) {
rsp, err := ts.client.Read(context.Background(), &pbresource.ReadRequest{
Id: id,
})
require.NoError(r, err)
status, found := rsp.Resource.Status[namespace.StatusKey]
require.True(r, found)
require.Len(r, status.Conditions, 1)
require.Equal(r, reason, status.Conditions[0].Reason)
})
}
func (ts *nsTestSuite) TestNamespaceController_HappyPath() {
// Create namespace ns1
ns1 := rtest.Resource(pbtenancy.NamespaceType, "ns1").
// Keep this CE friendly by using default partition
WithTenancy(resource.DefaultPartitionedTenancy()).
WithData(ts.T(), &pbtenancy.Namespace{Description: "namespace ns1"}).
Write(ts.T(), ts.client)
// Wait for it to be accepted
ts.waitForReconciliation(ns1.Id, common.ReasonAcceptedOK)
// Verify namespace finalizer added
ns1 = ts.client.RequireResourceMeta(ts.T(), ns1.Id, resource.FinalizerKey, namespace.StatusKey)
// Add a namespace scoped tenant to the namespace
artist1 := rtest.Resource(demo.TypeV1Artist, "moonchild").
WithTenancy(&pbresource.Tenancy{
Partition: resource.DefaultPartitionName,
Namespace: ns1.Id.Name,
PeerName: resource.DefaultPeerName,
}).
WithData(ts.T(), &pbdemo.Artist{Name: "Moonchild"}).
Write(ts.T(), ts.client)
// Delete the namespace
_, err := ts.client.Delete(ts.ctx, &pbresource.DeleteRequest{Id: ns1.Id})
require.NoError(ts.T(), err)
// Wait for the namespace to be deleted
ts.client.WaitForDeletion(ts.T(), ns1.Id)
// Verify tenants deleted.
ts.client.RequireResourceNotFound(ts.T(), artist1.Id)
}
func (ts *nsTestSuite) TestNamespaceController_DeleteBlockedByTenantsWithFinalizers() {
// Create namespace ns1
ns1 := rtest.Resource(pbtenancy.NamespaceType, "ns1").
WithTenancy(resource.DefaultPartitionedTenancy()).
WithData(ts.T(), &pbtenancy.Namespace{Description: "namespace ns1"}).
Write(ts.T(), ts.client)
// Wait for it to be accepted
ts.waitForReconciliation(ns1.Id, common.ReasonAcceptedOK)
// Add artist to namespace
_ = rtest.Resource(demo.TypeV1Artist, "weezer").
WithTenancy(&pbresource.Tenancy{
Partition: resource.DefaultPartitionName,
Namespace: ns1.Id.Name,
PeerName: resource.DefaultPeerName,
}).
WithData(ts.T(), &pbdemo.Artist{Name: "Weezer"}).
Write(ts.T(), ts.client)
// Add another artist to namespace with a finalizer so that is blocks namespace deletion.
artist2 := rtest.Resource(demo.TypeV1Artist, "foofighters").
WithTenancy(&pbresource.Tenancy{
Partition: resource.DefaultPartitionName,
Namespace: ns1.Id.Name,
PeerName: resource.DefaultPeerName,
}).
WithData(ts.T(), &pbdemo.Artist{Name: "Foo Fighters"}).
WithMeta(resource.FinalizerKey, "finalizer2").
Write(ts.T(), ts.client)
// Delete the namespace - this activates the controller logic to delete all tenants
ts.client.Delete(ts.ctx, &pbresource.DeleteRequest{Id: ns1.Id})
// Delete should be blocked by artist2 tenant with finalizer
ts.client.WaitForStatusConditionAnyGen(ts.T(), ns1.Id, namespace.StatusKey, &pbresource.Condition{
Type: common.ConditionDeleted,
State: pbresource.Condition_STATE_FALSE,
Reason: common.ReasonDeletionInProgress,
Message: common.ErrStillHasTenants.Error(),
})
// Remove the finalizer on artist2 to unblock deletion of ns1
artist2 = ts.client.RequireResourceExists(ts.T(), artist2.Id)
resource.RemoveFinalizer(artist2, "finalizer2")
_, err := ts.client.Write(ts.ctx, &pbresource.WriteRequest{Resource: artist2})
require.NoError(ts.T(), err)
// The final reconcile should delete artist since it was marked for deletion and
// and has no finalizers. Given no more tenants, wait for namespace to be deleted.
ts.client.WaitForDeletion(ts.T(), ns1.Id)
}
func TestNamespaceControllerSuite(t *testing.T) {
suite.Run(t, new(nsTestSuite))
}