Backport of Fix to not create a watch to `Internal.ServiceDump` when mesh gateway is not used into release/1.17.x (#20268)

This add a fix to properly verify the gateway mode before creating a watch specific to mesh gateways. This watch have a high performance cost and when mesh gateways are not used is not used.

This also adds an optimization to only return the nodes when watching the Internal.ServiceDump RPC to avoid unnecessary disco chain compilation. As watches in proxy config only need the nodes.

* backport of commit b0ce20b5e2
* backport of commit 3d4bde00cf
* backport of commit b2c77246b9
* backport of commit e7ab4d418d
* backport of commit d00d9c5da4
* backport of commit b2db3d5eb4
* backport of commit 50fb45ac74
* backport of commit 7b41a61c17
* backport of commit 2fa0e0a629
* backport of commit 88849c9030
* backport of commit 4ac54f10bc
* backport of commit 2a9dfc37f2

---------

Co-authored-by: Dhia Ayachi <dhia@hashicorp.com>
pull/20287/head
hc-github-team-consul-core 2024-01-19 08:17:22 -06:00 committed by GitHub
parent ec8d1f75c7
commit 993f2d21c0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 350 additions and 41 deletions

7
.changelog/20168.txt Normal file
View File

@ -0,0 +1,7 @@
```release-note:enhancement
ProxyCfg: avoid setting a watch on `Internal.ServiceDump` when mesh gateway is not used.
```
```release-note:enhancement
ProxyCfg: only return the nodes list when querying the `Internal.ServiceDump` watch from proxycfg
```

View File

@ -197,43 +197,44 @@ func (m *Internal) ServiceDump(args *structs.ServiceDumpRequest, reply *structs.
}
reply.Nodes = nodes
// get a list of all peerings
index, listedPeerings, err := state.PeeringList(ws, args.EnterpriseMeta)
if err != nil {
return fmt.Errorf("could not list peers for service dump %w", err)
}
if index > maxIndex {
maxIndex = index
}
for _, p := range listedPeerings {
// Note we fetch imported services with wildcard namespace because imported services' namespaces
// are in a different locality; regardless of our local namespace, we return all imported services
// of the local partition.
index, importedNodes, err := state.ServiceDump(ws, args.ServiceKind, args.UseServiceKind, args.EnterpriseMeta.WithWildcardNamespace(), p.Name)
if !args.NodesOnly {
// get a list of all peerings
index, listedPeerings, err := state.PeeringList(ws, args.EnterpriseMeta)
if err != nil {
return fmt.Errorf("could not get a service dump for peer %q: %w", p.Name, err)
return fmt.Errorf("could not list peers for service dump %w", err)
}
if index > maxIndex {
maxIndex = index
}
reply.ImportedNodes = append(reply.ImportedNodes, importedNodes...)
}
// Get, store, and filter gateway services
idx, gatewayServices, err := state.DumpGatewayServices(ws)
if err != nil {
return err
}
reply.Gateways = gatewayServices
for _, p := range listedPeerings {
// Note we fetch imported services with wildcard namespace because imported services' namespaces
// are in a different locality; regardless of our local namespace, we return all imported services
// of the local partition.
index, importedNodes, err := state.ServiceDump(ws, args.ServiceKind, args.UseServiceKind, args.EnterpriseMeta.WithWildcardNamespace(), p.Name)
if err != nil {
return fmt.Errorf("could not get a service dump for peer %q: %w", p.Name, err)
}
if idx > maxIndex {
maxIndex = idx
if index > maxIndex {
maxIndex = index
}
reply.ImportedNodes = append(reply.ImportedNodes, importedNodes...)
}
// Get, store, and filter gateway services
idx, gatewayServices, err := state.DumpGatewayServices(ws)
if err != nil {
return err
}
reply.Gateways = gatewayServices
if idx > maxIndex {
maxIndex = idx
}
}
reply.Index = maxIndex
raw, err := filter.Execute(reply.Nodes)
if err != nil {
return fmt.Errorf("could not filter local service dump: %w", err)
@ -241,12 +242,13 @@ func (m *Internal) ServiceDump(args *structs.ServiceDumpRequest, reply *structs.
reply.Nodes = raw.(structs.CheckServiceNodes)
}
importedRaw, err := filter.Execute(reply.ImportedNodes)
if err != nil {
return fmt.Errorf("could not filter peer service dump: %w", err)
if !args.NodesOnly {
importedRaw, err := filter.Execute(reply.ImportedNodes)
if err != nil {
return fmt.Errorf("could not filter peer service dump: %w", err)
}
reply.ImportedNodes = importedRaw.(structs.CheckServiceNodes)
}
reply.ImportedNodes = importedRaw.(structs.CheckServiceNodes)
// Note: we filter the results with ACLs *after* applying the user-supplied
// bexpr filter, to ensure QueryMeta.ResultsFilteredByACLs does not include
// results that would be filtered out even if the user did have permission.

View File

@ -1779,10 +1779,11 @@ func TestInternal_ServiceDump_Peering(t *testing.T) {
// prep the cluster with some data we can use in our filters
registerTestCatalogEntries(t, codec)
doRequest := func(t *testing.T, filter string) structs.IndexedNodesWithGateways {
doRequest := func(t *testing.T, filter string, onlyNodes bool) structs.IndexedNodesWithGateways {
t.Helper()
args := structs.DCSpecificRequest{
args := structs.ServiceDumpRequest{
QueryOptions: structs.QueryOptions{Filter: filter},
NodesOnly: onlyNodes,
}
var out structs.IndexedNodesWithGateways
@ -1792,7 +1793,7 @@ func TestInternal_ServiceDump_Peering(t *testing.T) {
}
t.Run("No peerings", func(t *testing.T) {
nodes := doRequest(t, "")
nodes := doRequest(t, "", false)
// redis (3), web (3), critical (1), warning (1) and consul (1)
require.Len(t, nodes.Nodes, 9)
require.Len(t, nodes.ImportedNodes, 0)
@ -1809,19 +1810,27 @@ func TestInternal_ServiceDump_Peering(t *testing.T) {
require.NoError(t, err)
t.Run("peerings", func(t *testing.T) {
nodes := doRequest(t, "")
nodes := doRequest(t, "", false)
// redis (3), web (3), critical (1), warning (1) and consul (1)
require.Len(t, nodes.Nodes, 9)
// service (1)
require.Len(t, nodes.ImportedNodes, 1)
})
t.Run("peerings onlynodes", func(t *testing.T) {
nodes := doRequest(t, "", true)
// redis (3), web (3), critical (1), warning (1) and consul (1)
require.Len(t, nodes.Nodes, 9)
// service (1)
require.Len(t, nodes.ImportedNodes, 0)
})
t.Run("peerings w filter", func(t *testing.T) {
nodes := doRequest(t, "Node.PeerName == foo")
nodes := doRequest(t, "Node.PeerName == foo", false)
require.Len(t, nodes.Nodes, 0)
require.Len(t, nodes.ImportedNodes, 0)
nodes2 := doRequest(t, "Node.PeerName == peer1")
nodes2 := doRequest(t, "Node.PeerName == peer1", false)
require.Len(t, nodes2.Nodes, 0)
require.Len(t, nodes2.ImportedNodes, 1)
})

View File

@ -295,6 +295,7 @@ func (s *handlerMeshGateway) handleUpdate(ctx context.Context, u UpdateEvent, sn
QueryOptions: structs.QueryOptions{Token: s.token},
ServiceKind: structs.ServiceKindMeshGateway,
UseServiceKind: true,
NodesOnly: true,
Source: *s.source,
EnterpriseMeta: *entMeta,
}, fmt.Sprintf("mesh-gateway:%s", gk.String()), s.ch)

View File

@ -544,6 +544,7 @@ func watchMeshGateway(ctx context.Context, opts gatewayWatchOpts) error {
QueryOptions: structs.QueryOptions{Token: opts.token},
ServiceKind: structs.ServiceKindMeshGateway,
UseServiceKind: true,
NodesOnly: true,
Source: opts.source,
EnterpriseMeta: *structs.DefaultEnterpriseMetaInPartition(opts.key.Partition),
}, correlationId, opts.notifyCh)

View File

@ -838,6 +838,282 @@ func TestState_WatchesAndUpdates(t *testing.T) {
stages: []verificationStage{stage0, stage1},
}
}
newConnectProxyCaseMeshDefault := func() testCase {
ns := structs.NodeService{
Kind: structs.ServiceKindConnectProxy,
ID: "web-sidecar-proxy",
Service: "web-sidecar-proxy",
Address: "10.0.1.1",
Port: 443,
Proxy: structs.ConnectProxyConfig{
DestinationServiceName: "web",
Upstreams: structs.Upstreams{
structs.Upstream{
DestinationType: structs.UpstreamDestTypePreparedQuery,
DestinationName: "query",
LocalBindPort: 10001,
},
structs.Upstream{
DestinationType: structs.UpstreamDestTypeService,
DestinationName: "api",
LocalBindPort: 10002,
},
structs.Upstream{
DestinationType: structs.UpstreamDestTypeService,
DestinationName: "api-failover-direct",
Datacenter: "dc2",
LocalBindPort: 10005,
MeshGateway: structs.MeshGatewayConfig{
Mode: structs.MeshGatewayModeNone,
},
},
structs.Upstream{
DestinationType: structs.UpstreamDestTypeService,
DestinationName: "api-failover-to-peer",
LocalBindPort: 10007,
},
structs.Upstream{
DestinationType: structs.UpstreamDestTypeService,
DestinationName: "api-dc2",
LocalBindPort: 10006,
},
},
},
}
ixnMatch := TestIntentions()
stage0 := verificationStage{
requiredWatches: map[string]verifyWatchRequest{
intentionsWatchID: genVerifyIntentionWatch("web", "dc1"),
meshConfigEntryID: genVerifyMeshConfigWatch("dc1"),
fmt.Sprintf("discovery-chain:%s", apiUID.String()): genVerifyDiscoveryChainWatch(&structs.DiscoveryChainRequest{
Name: "api",
EvaluateInDatacenter: "dc1",
EvaluateInNamespace: "default",
EvaluateInPartition: "default",
Datacenter: "dc1",
OverrideMeshGateway: structs.MeshGatewayConfig{
Mode: structs.MeshGatewayModeDefault,
},
QueryOptions: structs.QueryOptions{
Token: aclToken,
},
}),
fmt.Sprintf("discovery-chain:%s-failover-direct?dc=dc2", apiUID.String()): genVerifyDiscoveryChainWatch(&structs.DiscoveryChainRequest{
Name: "api-failover-direct",
EvaluateInDatacenter: "dc2",
EvaluateInNamespace: "default",
EvaluateInPartition: "default",
Datacenter: "dc1",
OverrideMeshGateway: structs.MeshGatewayConfig{
Mode: structs.MeshGatewayModeNone,
},
QueryOptions: structs.QueryOptions{
Token: aclToken,
},
}),
fmt.Sprintf("discovery-chain:%s-failover-to-peer", apiUID.String()): genVerifyDiscoveryChainWatch(&structs.DiscoveryChainRequest{
Name: "api-failover-to-peer",
EvaluateInDatacenter: "dc1",
EvaluateInNamespace: "default",
EvaluateInPartition: "default",
Datacenter: "dc1",
OverrideMeshGateway: structs.MeshGatewayConfig{
Mode: structs.MeshGatewayModeDefault,
},
QueryOptions: structs.QueryOptions{
Token: aclToken,
},
}),
fmt.Sprintf("discovery-chain:%s-dc2", apiUID.String()): genVerifyDiscoveryChainWatch(&structs.DiscoveryChainRequest{
Name: "api-dc2",
EvaluateInDatacenter: "dc1",
EvaluateInNamespace: "default",
EvaluateInPartition: "default",
Datacenter: "dc1",
OverrideMeshGateway: structs.MeshGatewayConfig{
Mode: structs.MeshGatewayModeDefault,
},
QueryOptions: structs.QueryOptions{
Token: aclToken,
},
}),
"upstream:" + pqUID.String(): genVerifyPreparedQueryWatch("query", "dc1"),
rootsWatchID: genVerifyDCSpecificWatch("dc1"),
leafWatchID: genVerifyLeafWatch("web", "dc1"),
},
events: []UpdateEvent{
rootWatchEvent(),
{
CorrelationID: leafWatchID,
Result: issuedCert,
Err: nil,
},
{
CorrelationID: intentionsWatchID,
Result: ixnMatch,
Err: nil,
},
{
CorrelationID: meshConfigEntryID,
Result: &structs.ConfigEntryResponse{},
},
{
CorrelationID: fmt.Sprintf("discovery-chain:%s", apiUID.String()),
Result: &structs.DiscoveryChainResponse{
Chain: discoverychain.TestCompileConfigEntries(t, "api", "default", "default", "dc1", "trustdomain.consul",
func(req *discoverychain.CompileRequest) {
req.OverrideMeshGateway.Mode = structs.MeshGatewayModeDefault
}, nil),
},
Err: nil,
},
{
CorrelationID: fmt.Sprintf("discovery-chain:%s-failover-remote?dc=dc2", apiUID.String()),
Result: &structs.DiscoveryChainResponse{
Chain: discoverychain.TestCompileConfigEntries(t, "api-failover-remote", "default", "default", "dc2", "trustdomain.consul",
func(req *discoverychain.CompileRequest) {
req.OverrideMeshGateway.Mode = structs.MeshGatewayModeDefault
}, nil),
},
Err: nil,
},
{
CorrelationID: fmt.Sprintf("discovery-chain:%s-failover-local?dc=dc2", apiUID.String()),
Result: &structs.DiscoveryChainResponse{
Chain: discoverychain.TestCompileConfigEntries(t, "api-failover-local", "default", "default", "dc2", "trustdomain.consul",
func(req *discoverychain.CompileRequest) {
req.OverrideMeshGateway.Mode = structs.MeshGatewayModeDefault
}, nil),
},
Err: nil,
},
{
CorrelationID: fmt.Sprintf("discovery-chain:%s-failover-direct?dc=dc2", apiUID.String()),
Result: &structs.DiscoveryChainResponse{
Chain: discoverychain.TestCompileConfigEntries(t, "api-failover-direct", "default", "default", "dc2", "trustdomain.consul",
func(req *discoverychain.CompileRequest) {
req.OverrideMeshGateway.Mode = structs.MeshGatewayModeDefault
}, nil),
},
Err: nil,
},
{
CorrelationID: fmt.Sprintf("discovery-chain:%s-dc2", apiUID.String()),
Result: &structs.DiscoveryChainResponse{
Chain: discoverychain.TestCompileConfigEntries(t, "api-dc2", "default", "default", "dc1", "trustdomain.consul",
func(req *discoverychain.CompileRequest) {
req.OverrideMeshGateway.Mode = structs.MeshGatewayModeDefault
}, discoChainSetWithEntries(&structs.ServiceResolverConfigEntry{
Kind: structs.ServiceResolver,
Name: "api-dc2",
Redirect: &structs.ServiceResolverRedirect{
Service: "api",
Datacenter: "dc2",
},
})),
},
Err: nil,
},
{
CorrelationID: fmt.Sprintf("discovery-chain:%s-failover-to-peer", apiUID.String()),
Result: &structs.DiscoveryChainResponse{
Chain: discoverychain.TestCompileConfigEntries(t, "api-failover-to-peer", "default", "default", "dc1", "trustdomain.consul",
func(req *discoverychain.CompileRequest) {
req.OverrideMeshGateway.Mode = structs.MeshGatewayModeDefault
}, discoChainSetWithEntries(&structs.ServiceResolverConfigEntry{
Kind: structs.ServiceResolver,
Name: "api-failover-to-peer",
Failover: map[string]structs.ServiceResolverFailover{
"*": {
Targets: []structs.ServiceResolverFailoverTarget{
{Peer: "cluster-01"},
},
},
},
})),
},
Err: nil,
},
{
CorrelationID: "mesh-gateway:dc1",
Result: &structs.IndexedCheckServiceNodes{
Nodes: structs.CheckServiceNodes{
{
Node: &structs.Node{
Node: "node1",
Address: "10.1.2.3",
},
Service: structs.TestNodeServiceMeshGateway(t),
},
},
},
},
},
verifySnapshot: func(t testing.TB, snap *ConfigSnapshot) {
require.True(t, snap.Valid())
require.True(t, snap.MeshGateway.isEmpty())
require.Equal(t, indexedRoots, snap.Roots)
require.Equal(t, issuedCert, snap.ConnectProxy.Leaf)
require.Len(t, snap.ConnectProxy.DiscoveryChain, 4, "%+v", snap.ConnectProxy.DiscoveryChain)
require.Len(t, snap.ConnectProxy.WatchedUpstreams, 4, "%+v", snap.ConnectProxy.WatchedUpstreams)
require.Len(t, snap.ConnectProxy.WatchedUpstreamEndpoints, 4, "%+v", snap.ConnectProxy.WatchedUpstreamEndpoints)
require.Len(t, snap.ConnectProxy.WatchedGateways, 4, "%+v", snap.ConnectProxy.WatchedGateways)
require.Len(t, snap.ConnectProxy.WatchedGatewayEndpoints, 4, "%+v", snap.ConnectProxy.WatchedGatewayEndpoints)
require.Len(t, snap.ConnectProxy.WatchedServiceChecks, 0, "%+v", snap.ConnectProxy.WatchedServiceChecks)
require.Len(t, snap.ConnectProxy.PreparedQueryEndpoints, 0, "%+v", snap.ConnectProxy.PreparedQueryEndpoints)
require.Equal(t, 1, snap.ConnectProxy.ConfigSnapshotUpstreams.PeerUpstreamEndpoints.Len())
require.Equal(t, 1, snap.ConnectProxy.ConfigSnapshotUpstreams.UpstreamPeerTrustBundles.Len())
require.True(t, snap.ConnectProxy.IntentionsSet)
require.Equal(t, ixnMatch, snap.ConnectProxy.Intentions)
require.True(t, snap.ConnectProxy.MeshConfigSet)
// No event is expected as all services use default or remote mode
require.Equal(t, 0, snap.ConnectProxy.WatchedLocalGWEndpoints.Len())
},
}
stage1 := verificationStage{
requiredWatches: map[string]verifyWatchRequest{
fmt.Sprintf("upstream-target:api.default.default.dc1:%s", apiUID.String()): genVerifyServiceSpecificRequest("api", "", "dc1", true),
fmt.Sprintf("upstream-target:api-failover-direct.default.default.dc2:%s-failover-direct?dc=dc2", apiUID.String()): genVerifyServiceSpecificRequest("api-failover-direct", "", "dc2", true),
upstreamPeerWatchIDPrefix + fmt.Sprintf("%s-failover-to-peer?peer=cluster-01", apiUID.String()): genVerifyServiceSpecificPeeredRequest("api-failover-to-peer", "", "dc1", "cluster-01", true),
},
verifySnapshot: func(t testing.TB, snap *ConfigSnapshot) {
require.True(t, snap.Valid())
require.True(t, snap.MeshGateway.isEmpty())
require.Equal(t, indexedRoots, snap.Roots)
require.Equal(t, issuedCert, snap.ConnectProxy.Leaf)
require.Len(t, snap.ConnectProxy.DiscoveryChain, 4, "%+v", snap.ConnectProxy.DiscoveryChain)
require.Len(t, snap.ConnectProxy.WatchedUpstreams, 4, "%+v", snap.ConnectProxy.WatchedUpstreams)
require.Len(t, snap.ConnectProxy.WatchedUpstreamEndpoints, 4, "%+v", snap.ConnectProxy.WatchedUpstreamEndpoints)
require.Len(t, snap.ConnectProxy.WatchedGateways, 4, "%+v", snap.ConnectProxy.WatchedGateways)
require.Len(t, snap.ConnectProxy.WatchedGatewayEndpoints, 4, "%+v", snap.ConnectProxy.WatchedGatewayEndpoints)
require.Len(t, snap.ConnectProxy.WatchedServiceChecks, 0, "%+v", snap.ConnectProxy.WatchedServiceChecks)
require.Len(t, snap.ConnectProxy.PreparedQueryEndpoints, 0, "%+v", snap.ConnectProxy.PreparedQueryEndpoints)
require.Equal(t, 1, snap.ConnectProxy.ConfigSnapshotUpstreams.PeerUpstreamEndpoints.Len())
require.Equal(t, 1, snap.ConnectProxy.ConfigSnapshotUpstreams.UpstreamPeerTrustBundles.Len())
require.True(t, snap.ConnectProxy.IntentionsSet)
require.Equal(t, ixnMatch, snap.ConnectProxy.Intentions)
},
}
return testCase{
ns: ns,
sourceDC: "dc1",
stages: []verificationStage{stage0, stage1},
}
}
dbIxnMatch := structs.SimplifiedIntentions{
{
@ -3444,8 +3720,9 @@ func TestState_WatchesAndUpdates(t *testing.T) {
},
},
},
"connect-proxy": newConnectProxyCase(structs.MeshGatewayModeDefault),
"connect-proxy-mesh-gateway-local": newConnectProxyCase(structs.MeshGatewayModeLocal),
"connect-proxy": newConnectProxyCase(structs.MeshGatewayModeDefault),
"connect-proxy-mesh-gateway-default": newConnectProxyCaseMeshDefault(),
"connect-proxy-mesh-gateway-local": newConnectProxyCase(structs.MeshGatewayModeLocal),
"connect-proxy-with-peers": {
ns: structs.NodeService{
Kind: structs.ServiceKindConnectProxy,

View File

@ -354,6 +354,14 @@ func (s *handlerUpstreams) resetWatchesFromChain(
Partition: s.proxyID.PartitionOrDefault(),
Datacenter: s.source.Datacenter,
}
default:
// if target.MeshGateway.Mode is not set and target is not peered we don't want to set up watches for the gateway.
// This is important specifically in wan-fed without mesh gateway use case, as for this case
//the source and target DC could be different but there is not mesh-gateway so no need to watch
// a costly watch (Internal.ServiceDump)
if target.Peer == "" {
continue
}
}
if s.source.Datacenter != target.Datacenter || s.proxyID.PartitionOrDefault() != target.Partition {
needGateways[gk.String()] = struct{}{}
@ -393,6 +401,7 @@ func (s *handlerUpstreams) resetWatchesFromChain(
if _, ok := snap.WatchedGateways[uid][key]; ok {
continue
}
gwKey := gatewayKeyFromString(key)
s.logger.Trace("initializing watch of mesh gateway",
@ -411,13 +420,14 @@ func (s *handlerUpstreams) resetWatchesFromChain(
key: gwKey,
upstreamID: uid,
}
err := watchMeshGateway(ctx, opts)
if err != nil {
cancel()
return err
}
snap.WatchedGateways[uid][key] = cancel
}
for key, cancelFn := range snap.WatchedGateways[uid] {

View File

@ -662,6 +662,7 @@ type ServiceDumpRequest struct {
Datacenter string
ServiceKind ServiceKind
UseServiceKind bool
NodesOnly bool
Source QuerySource
acl.EnterpriseMeta `hcl:",squash" mapstructure:",squash"`
PeerName string
@ -694,6 +695,7 @@ func (r *ServiceDumpRequest) CacheInfo() cache.RequestInfo {
v, err := hashstructure.Hash([]interface{}{
keyKind,
r.UseServiceKind,
r.NodesOnly,
r.Filter,
r.EnterpriseMeta,
}, nil)