diff --git a/agent/proxycfg/api_gateway.go b/agent/proxycfg/api_gateway.go index 28a6c42300..c9b5ad1007 100644 --- a/agent/proxycfg/api_gateway.go +++ b/agent/proxycfg/api_gateway.go @@ -73,6 +73,7 @@ func (h *handlerAPIGateway) initialize(ctx context.Context) (ConfigSnapshot, err snap.APIGateway.WatchedDiscoveryChains = make(map[UpstreamID]context.CancelFunc) snap.APIGateway.WatchedGateways = make(map[UpstreamID]map[string]context.CancelFunc) snap.APIGateway.WatchedGatewayEndpoints = make(map[UpstreamID]map[string]structs.CheckServiceNodes) + snap.APIGateway.WatchedLocalGWEndpoints = watch.NewMap[string, structs.CheckServiceNodes]() snap.APIGateway.WatchedUpstreams = make(map[UpstreamID]map[string]context.CancelFunc) snap.APIGateway.WatchedUpstreamEndpoints = make(map[UpstreamID]map[string]structs.CheckServiceNodes) diff --git a/agent/proxycfg/ingress_gateway.go b/agent/proxycfg/ingress_gateway.go index b6c1cd6e02..f0f75d8107 100644 --- a/agent/proxycfg/ingress_gateway.go +++ b/agent/proxycfg/ingress_gateway.go @@ -67,6 +67,7 @@ func (s *handlerIngressGateway) initialize(ctx context.Context) (ConfigSnapshot, snap.IngressGateway.WatchedUpstreamEndpoints = make(map[UpstreamID]map[string]structs.CheckServiceNodes) snap.IngressGateway.WatchedGateways = make(map[UpstreamID]map[string]context.CancelFunc) snap.IngressGateway.WatchedGatewayEndpoints = make(map[UpstreamID]map[string]structs.CheckServiceNodes) + snap.IngressGateway.WatchedLocalGWEndpoints = watch.NewMap[string, structs.CheckServiceNodes]() snap.IngressGateway.Listeners = make(map[IngressListenerKey]structs.IngressListener) snap.IngressGateway.UpstreamPeerTrustBundles = watch.NewMap[string, *pbpeering.PeeringTrustBundle]() snap.IngressGateway.PeerUpstreamEndpoints = watch.NewMap[UpstreamID, structs.CheckServiceNodes]() diff --git a/agent/proxycfg/manager.go b/agent/proxycfg/manager.go index eb5df5a855..c58268e7e0 100644 --- a/agent/proxycfg/manager.go +++ b/agent/proxycfg/manager.go @@ -2,6 +2,7 @@ package proxycfg import ( "errors" + "runtime/debug" "sync" "github.com/hashicorp/go-hclog" @@ -142,6 +143,20 @@ func (m *Manager) Register(id ProxyID, ns *structs.NodeService, source ProxySour m.mu.Lock() defer m.mu.Unlock() + defer func() { + if r := recover(); r != nil { + m.Logger.Error("unexpected panic during service manager registration", + "node", id.NodeName, + "service", id.ServiceID, + "message", r, + "stacktrace", string(debug.Stack()), + ) + } + }() + return m.register(id, ns, source, token, overwrite) +} + +func (m *Manager) register(id ProxyID, ns *structs.NodeService, source ProxySource, token string, overwrite bool) error { state, ok := m.proxies[id] if ok { if state.source != source && !overwrite { diff --git a/agent/proxycfg/state.go b/agent/proxycfg/state.go index 57964d54fe..d312c3b4c1 100644 --- a/agent/proxycfg/state.go +++ b/agent/proxycfg/state.go @@ -6,6 +6,7 @@ import ( "fmt" "net" "reflect" + "runtime/debug" "sync/atomic" "time" @@ -298,6 +299,21 @@ func newConfigSnapshotFromServiceInstance(s serviceInstance, config stateConfig) } func (s *state) run(ctx context.Context, snap *ConfigSnapshot) { + // Add a recover here so than any panics do not make their way up + // into the server / agent. + defer func() { + if r := recover(); r != nil { + s.logger.Error("unexpected panic while running proxycfg", + "node", s.serviceInstance.proxyID.NodeName, + "service", s.serviceInstance.proxyID.ServiceID, + "message", r, + "stacktrace", string(debug.Stack())) + } + }() + s.unsafeRun(ctx, snap) +} + +func (s *state) unsafeRun(ctx context.Context, snap *ConfigSnapshot) { // Close the channel we return from Watch when we stop so consumers can stop // watching and clean up their goroutines. It's important we do this here and // not in Close since this routine sends on this chan and so might panic if it