mirror of https://github.com/hashicorp/consul
Fix nil-pointer panics from proxycfg package.
Prior to this PR, servers / agents would panic and crash if an ingress or api gateway were configured to use a discovery chain that both: 1. Referenced a peered service 2. Had a mesh gateway mode of local This could occur, because code for handling upstream watches was shared between both connect-proxy and the gateways. As a short-term fix, this PR ensures that the maps are always initialized for these gateway services. For agentless deployments, this behavior is particularly bad. The server would essentially get stuck in an endless crash-loop, because it would attempt to execute proxycfg immediately from its persisted state. To help prevent this, the PR also wraps the proxycfg execution and service registration calls with recover statements to ensure that future issues like this do not put the server into an unrecoverable state.pull/16277/head
parent
fd61605ffb
commit
e5adc43315
|
@ -73,6 +73,7 @@ func (h *handlerAPIGateway) initialize(ctx context.Context) (ConfigSnapshot, err
|
|||
snap.APIGateway.WatchedDiscoveryChains = make(map[UpstreamID]context.CancelFunc)
|
||||
snap.APIGateway.WatchedGateways = make(map[UpstreamID]map[string]context.CancelFunc)
|
||||
snap.APIGateway.WatchedGatewayEndpoints = make(map[UpstreamID]map[string]structs.CheckServiceNodes)
|
||||
snap.APIGateway.WatchedLocalGWEndpoints = watch.NewMap[string, structs.CheckServiceNodes]()
|
||||
snap.APIGateway.WatchedUpstreams = make(map[UpstreamID]map[string]context.CancelFunc)
|
||||
snap.APIGateway.WatchedUpstreamEndpoints = make(map[UpstreamID]map[string]structs.CheckServiceNodes)
|
||||
|
||||
|
|
|
@ -67,6 +67,7 @@ func (s *handlerIngressGateway) initialize(ctx context.Context) (ConfigSnapshot,
|
|||
snap.IngressGateway.WatchedUpstreamEndpoints = make(map[UpstreamID]map[string]structs.CheckServiceNodes)
|
||||
snap.IngressGateway.WatchedGateways = make(map[UpstreamID]map[string]context.CancelFunc)
|
||||
snap.IngressGateway.WatchedGatewayEndpoints = make(map[UpstreamID]map[string]structs.CheckServiceNodes)
|
||||
snap.IngressGateway.WatchedLocalGWEndpoints = watch.NewMap[string, structs.CheckServiceNodes]()
|
||||
snap.IngressGateway.Listeners = make(map[IngressListenerKey]structs.IngressListener)
|
||||
snap.IngressGateway.UpstreamPeerTrustBundles = watch.NewMap[string, *pbpeering.PeeringTrustBundle]()
|
||||
snap.IngressGateway.PeerUpstreamEndpoints = watch.NewMap[UpstreamID, structs.CheckServiceNodes]()
|
||||
|
|
|
@ -2,6 +2,7 @@ package proxycfg
|
|||
|
||||
import (
|
||||
"errors"
|
||||
"runtime/debug"
|
||||
"sync"
|
||||
|
||||
"github.com/hashicorp/go-hclog"
|
||||
|
@ -142,6 +143,20 @@ func (m *Manager) Register(id ProxyID, ns *structs.NodeService, source ProxySour
|
|||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
m.Logger.Error("unexpected panic during service manager registration",
|
||||
"node", id.NodeName,
|
||||
"service", id.ServiceID,
|
||||
"message", r,
|
||||
"stacktrace", string(debug.Stack()),
|
||||
)
|
||||
}
|
||||
}()
|
||||
return m.register(id, ns, source, token, overwrite)
|
||||
}
|
||||
|
||||
func (m *Manager) register(id ProxyID, ns *structs.NodeService, source ProxySource, token string, overwrite bool) error {
|
||||
state, ok := m.proxies[id]
|
||||
if ok {
|
||||
if state.source != source && !overwrite {
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"fmt"
|
||||
"net"
|
||||
"reflect"
|
||||
"runtime/debug"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
|
@ -298,6 +299,21 @@ func newConfigSnapshotFromServiceInstance(s serviceInstance, config stateConfig)
|
|||
}
|
||||
|
||||
func (s *state) run(ctx context.Context, snap *ConfigSnapshot) {
|
||||
// Add a recover here so than any panics do not make their way up
|
||||
// into the server / agent.
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
s.logger.Error("unexpected panic while running proxycfg",
|
||||
"node", s.serviceInstance.proxyID.NodeName,
|
||||
"service", s.serviceInstance.proxyID.ServiceID,
|
||||
"message", r,
|
||||
"stacktrace", string(debug.Stack()))
|
||||
}
|
||||
}()
|
||||
s.unsafeRun(ctx, snap)
|
||||
}
|
||||
|
||||
func (s *state) unsafeRun(ctx context.Context, snap *ConfigSnapshot) {
|
||||
// Close the channel we return from Watch when we stop so consumers can stop
|
||||
// watching and clean up their goroutines. It's important we do this here and
|
||||
// not in Close since this routine sends on this chan and so might panic if it
|
||||
|
|
Loading…
Reference in New Issue