From 15bc463a87dd7a13ca3b5681ba67cabb1654dd7a Mon Sep 17 00:00:00 2001 From: "John A. Lotoski" <39752197+johnalotoski@users.noreply.github.com> Date: Wed, 16 Mar 2022 15:44:28 -0500 Subject: [PATCH] state: sunken token fallback_req logging and case fixup (#1) --- agent/local/state.go | 103 +++++++++++++++++++++++++------------------ 1 file changed, 60 insertions(+), 43 deletions(-) diff --git a/agent/local/state.go b/agent/local/state.go index ad7f7efa17..888e8b58d7 100644 --- a/agent/local/state.go +++ b/agent/local/state.go @@ -1311,30 +1311,39 @@ func (l *State) deleteService(key structs.ServiceID) error { l.logger.Info("Deregistered service", "service", key.ID) return nil case acl.IsErrNotFound(err): - // token might have sunken already, fallback to the default token - var out struct{} - err := l.Delegate.RPC("Catalog.Deregister", &fallback_req, &out) - switch { - case err == nil: - delete(l.services, key) - // service deregister also deletes associated checks - for _, c := range l.checks { - if c.Deleted && c.Check != nil { - sid := c.Check.CompoundServiceID() - if sid.Matches(key) { - l.pruneCheck(c.Check.CompoundCheckID()) - } - } - } - l.logger.Info("Deregistered service", "service", key.ID) - return nil - default: - l.logger.Warn("Deregistering service failed.", - "service", key.String(), - "error", err, - ) - return err - } + // token might have sunken already, fallback to the default token + var out struct{} + err := l.Delegate.RPC("Catalog.Deregister", &fallback_req, &out) + switch { + case err == nil || strings.Contains(err.Error(), "Unknown service"): + delete(l.services, key) + // service deregister also deletes associated checks + for _, c := range l.checks { + if c.Deleted && c.Check != nil { + sid := c.Check.CompoundServiceID() + if sid.Matches(key) { + l.pruneCheck(c.Check.CompoundCheckID()) + } + } + } + l.logger.Info("Deregistered service with fallback_req", "service", key.ID) + return nil + case acl.IsErrPermissionDenied(err): + // todo(fs): mark the service to be in sync to prevent excessive retrying before next full sync + // todo(fs): some backoff strategy might be a better solution + l.services[key].InSync = true + accessorID := l.aclAccessorID(st) + l.logger.Warn("Service deregistration blocked by ACLs with fallback_req", "service", key.String(), "accessorID", accessorID) + metrics.IncrCounter([]string{"acl", "blocked", "service", "deregistration"}, 1) + return nil + + default: + l.logger.Warn("Deregistering service failed with fallback_req.", + "service", key.String(), + "error", err, + ) + return err + } case acl.IsErrPermissionDenied(err): // todo(fs): mark the service to be in sync to prevent excessive retrying before next full sync // todo(fs): some backoff strategy might be a better solution @@ -1533,26 +1542,34 @@ func (l *State) syncCheck(key structs.CheckID) error { return nil case acl.IsErrNotFound(err): - // token might have sunken already, fallback to the default token - var out struct{} - err := l.Delegate.RPC("Catalog.Register", &fallback_req, &out) - switch { - case err == nil: - l.checks[key].InSync = true - // Given how the register API works, this info is also updated - // every time we sync a check. - l.nodeInfoInSync = true - l.logger.Info("Synced check", "check", key.String()) - return nil - default: - l.logger.Warn("Syncing check failed.", - "check", key.String(), - "error", err, - ) - return err - } + // token might have sunken already, fallback to the default token + var out struct{} + err := l.Delegate.RPC("Catalog.Register", &fallback_req, &out) + switch { + case err == nil: + l.checks[key].InSync = true + // Given how the register API works, this info is also updated + // every time we sync a check. + l.nodeInfoInSync = true + l.logger.Info("Synced check with fallback_req", "check", key.String()) + return nil + case acl.IsErrPermissionDenied(err): + // todo(fs): mark the check to be in sync to prevent excessive retrying before next full sync + // todo(fs): some backoff strategy might be a better solution + l.checks[key].InSync = true + accessorID := l.aclAccessorID(ct) + l.logger.Warn("Check registration blocked by ACLs with fallback_req", "check", key.String(), "accessorID", accessorID) + metrics.IncrCounter([]string{"acl", "blocked", "check", "registration"}, 1) + return nil + default: + l.logger.Warn("Syncing check failed with fallback_req.", + "check", key.String(), + "error", err, + ) + return err + } - case acl.IsErrPermissionDenied(err), acl.IsErrNotFound(err): + case acl.IsErrPermissionDenied(err): // todo(fs): mark the check to be in sync to prevent excessive retrying before next full sync // todo(fs): some backoff strategy might be a better solution l.checks[key].InSync = true