consul/agent/proxycfg/snapshot.go

920 lines
32 KiB
Go
Raw Normal View History

package proxycfg
import (
"context"
"fmt"
"sort"
"strings"
"github.com/hashicorp/consul/acl"
2022-07-13 16:14:57 +00:00
"github.com/hashicorp/consul/agent/proxycfg/internal/watch"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/proto/pbpeering"
)
// TODO(ingress): Can we think of a better for this bag of data?
// A shared data structure that contains information about discovered upstreams
type ConfigSnapshotUpstreams struct {
Leaf *structs.IssuedCert
2021-12-13 22:13:33 +00:00
MeshConfig *structs.MeshConfigEntry
MeshConfigSet bool
// DiscoveryChain is a map of UpstreamID -> CompiledDiscoveryChain's, and
// is used to determine what services could be targeted by this upstream.
// We then instantiate watches for those targets.
DiscoveryChain map[UpstreamID]*structs.CompiledDiscoveryChain
// WatchedDiscoveryChains is a map of UpstreamID -> CancelFunc's
2021-03-17 19:40:39 +00:00
// in order to cancel any watches when the proxy's configuration is
// changed. Ingress gateways and transparent proxies need this because
// discovery chain watches are added and removed through the lifecycle
// of a single proxycfg state instance.
WatchedDiscoveryChains map[UpstreamID]context.CancelFunc
2021-03-17 19:40:39 +00:00
// WatchedUpstreams is a map of UpstreamID -> (map of TargetID ->
// CancelFunc's) in order to cancel any watches when the configuration is
// changed.
WatchedUpstreams map[UpstreamID]map[string]context.CancelFunc
// WatchedUpstreamEndpoints is a map of UpstreamID -> (map of
// TargetID -> CheckServiceNodes) and is used to determine the backing
// endpoints of an upstream.
WatchedUpstreamEndpoints map[UpstreamID]map[string]structs.CheckServiceNodes
connect: fix failover through a mesh gateway to a remote datacenter (#6259) Failover is pushed entirely down to the data plane by creating envoy clusters and putting each successive destination in a different load assignment priority band. For example this shows that normally requests go to 1.2.3.4:8080 but when that fails they go to 6.7.8.9:8080: - name: foo load_assignment: cluster_name: foo policy: overprovisioning_factor: 100000 endpoints: - priority: 0 lb_endpoints: - endpoint: address: socket_address: address: 1.2.3.4 port_value: 8080 - priority: 1 lb_endpoints: - endpoint: address: socket_address: address: 6.7.8.9 port_value: 8080 Mesh gateways route requests based solely on the SNI header tacked onto the TLS layer. Envoy currently only lets you configure the outbound SNI header at the cluster layer. If you try to failover through a mesh gateway you ideally would configure the SNI value per endpoint, but that's not possible in envoy today. This PR introduces a simpler way around the problem for now: 1. We identify any target of failover that will use mesh gateway mode local or remote and then further isolate any resolver node in the compiled discovery chain that has a failover destination set to one of those targets. 2. For each of these resolvers we will perform a small measurement of comparative healths of the endpoints that come back from the health API for the set of primary target and serial failover targets. We walk the list of targets in order and if any endpoint is healthy we return that target, otherwise we move on to the next target. 3. The CDS and EDS endpoints both perform the measurements in (2) for the affected resolver nodes. 4. For CDS this measurement selects which TLS SNI field to use for the cluster (note the cluster is always going to be named for the primary target) 5. For EDS this measurement selects which set of endpoints will populate the cluster. Priority tiered failover is ignored. One of the big downsides to this approach to failover is that the failover detection and correction is going to be controlled by consul rather than deferring that entirely to the data plane as with the prior version. This also means that we are bound to only failover using official health signals and cannot make use of data plane signals like outlier detection to affect failover. In this specific scenario the lack of data plane signals is ok because the effectiveness is already muted by the fact that the ultimate destination endpoints will have their data plane signals scrambled when they pass through the mesh gateway wrapper anyway so we're not losing much. Another related fix is that we now use the endpoint health from the underlying service, not the health of the gateway (regardless of failover mode).
2019-08-05 18:30:35 +00:00
// UpstreamPeerTrustBundles is a map of (PeerName -> PeeringTrustBundle).
// It is used to store trust bundles for upstream TLS transport sockets.
2022-07-13 16:14:57 +00:00
UpstreamPeerTrustBundles watch.Map[PeerName, *pbpeering.PeeringTrustBundle]
// WatchedGateways is a map of UpstreamID -> (map of GatewayKey.String() ->
// CancelFunc) in order to cancel watches for mesh gateways
WatchedGateways map[UpstreamID]map[string]context.CancelFunc
// WatchedGatewayEndpoints is a map of UpstreamID -> (map of
// GatewayKey.String() -> CheckServiceNodes) and is used to determine the
// backing endpoints of a mesh gateway.
WatchedGatewayEndpoints map[UpstreamID]map[string]structs.CheckServiceNodes
2021-03-17 19:40:39 +00:00
// WatchedLocalGWEndpoints is used to store the backing endpoints of
// a local mesh gateway. Currently, this is used by peered upstreams
// configured with local mesh gateway mode so that they can watch for
// gateway endpoints.
//
// Note that the string form of GatewayKey is used as the key so empty
// fields can be normalized in OSS.
// GatewayKey.String() -> structs.CheckServiceNodes
WatchedLocalGWEndpoints watch.Map[string, structs.CheckServiceNodes]
2021-03-17 19:40:39 +00:00
// UpstreamConfig is a map to an upstream's configuration.
UpstreamConfig map[UpstreamID]*structs.Upstream
// PassthroughEndpoints is a map of: UpstreamID -> (map of TargetID ->
// (set of IP addresses)). It contains the upstream endpoints that
// can be dialed directly by a transparent proxy.
PassthroughUpstreams map[UpstreamID]map[string]map[string]struct{}
2021-12-13 22:13:33 +00:00
// PassthroughIndices is a map of: address -> indexedTarget.
// It is used to track the modify index associated with a passthrough address.
// Tracking this index helps break ties when a single address is shared by
// more than one upstream due to a race.
PassthroughIndices map[string]indexedTarget
2021-12-13 22:13:33 +00:00
// IntentionUpstreams is a set of upstreams inferred from intentions.
//
2021-12-13 22:13:33 +00:00
// This list only applies to proxies registered in 'transparent' mode.
IntentionUpstreams map[UpstreamID]struct{}
2022-07-13 16:14:57 +00:00
// PeeredUpstreams is a set of all upstream targets in a local partition.
//
// This list only applies to proxies registered in 'transparent' mode.
PeeredUpstreams map[UpstreamID]struct{}
// PeerUpstreamEndpoints is a map of UpstreamID -> (set of IP addresses)
// and used to determine the backing endpoints of an upstream in another
// peer.
2022-07-13 16:14:57 +00:00
PeerUpstreamEndpoints watch.Map[UpstreamID, structs.CheckServiceNodes]
PeerUpstreamEndpointsUseHostnames map[UpstreamID]struct{}
}
// indexedTarget is used to associate the Raft modify index of a resource
// with the corresponding upstream target.
type indexedTarget struct {
upstreamID UpstreamID
targetID string
idx uint64
}
type GatewayKey struct {
Datacenter string
Partition string
}
func (k GatewayKey) String() string {
resp := k.Datacenter
if !acl.IsDefaultPartition(k.Partition) {
resp = k.Partition + "." + resp
}
return resp
}
func (k GatewayKey) IsEmpty() bool {
return k.Partition == "" && k.Datacenter == ""
}
func (k GatewayKey) Matches(dc, partition string) bool {
return acl.EqualPartitions(k.Partition, partition) && k.Datacenter == dc
}
func gatewayKeyFromString(s string) GatewayKey {
2021-10-26 21:32:12 +00:00
split := strings.SplitN(s, ".", 2)
if len(split) == 1 {
return GatewayKey{Datacenter: split[0], Partition: acl.DefaultPartitionName}
}
return GatewayKey{Partition: split[0], Datacenter: split[1]}
}
type configSnapshotConnectProxy struct {
ConfigSnapshotUpstreams
InboundPeerTrustBundlesSet bool
InboundPeerTrustBundles []*pbpeering.PeeringTrustBundle
WatchedServiceChecks map[structs.ServiceID][]structs.CheckType // TODO: missing garbage collection
PreparedQueryEndpoints map[UpstreamID]structs.CheckServiceNodes // DEPRECATED:see:WatchedUpstreamEndpoints
// NOTE: Intentions stores a list of lists as returned by the Intentions
// Match RPC. So far we only use the first list as the list of matching
// intentions.
Intentions structs.Intentions
IntentionsSet bool
DestinationsUpstream watch.Map[UpstreamID, *structs.ServiceConfigEntry]
DestinationGateways watch.Map[UpstreamID, structs.CheckServiceNodes]
}
// isEmpty is a test helper
func (c *configSnapshotConnectProxy) isEmpty() bool {
if c == nil {
return true
}
return c.Leaf == nil &&
!c.IntentionsSet &&
len(c.DiscoveryChain) == 0 &&
2021-03-17 19:40:39 +00:00
len(c.WatchedDiscoveryChains) == 0 &&
len(c.WatchedUpstreams) == 0 &&
len(c.WatchedUpstreamEndpoints) == 0 &&
2022-07-13 16:14:57 +00:00
c.UpstreamPeerTrustBundles.Len() == 0 &&
len(c.WatchedGateways) == 0 &&
len(c.WatchedGatewayEndpoints) == 0 &&
len(c.WatchedServiceChecks) == 0 &&
2021-03-17 19:40:39 +00:00
len(c.PreparedQueryEndpoints) == 0 &&
len(c.UpstreamConfig) == 0 &&
len(c.PassthroughUpstreams) == 0 &&
2021-12-13 22:13:33 +00:00
len(c.IntentionUpstreams) == 0 &&
c.DestinationGateways.Len() == 0 &&
c.DestinationsUpstream.Len() == 0 &&
2022-07-13 16:14:57 +00:00
len(c.PeeredUpstreams) == 0 &&
!c.InboundPeerTrustBundlesSet &&
!c.MeshConfigSet &&
2022-07-13 16:14:57 +00:00
c.PeerUpstreamEndpoints.Len() == 0 &&
len(c.PeerUpstreamEndpointsUseHostnames) == 0
}
func (c *configSnapshotConnectProxy) IsImplicitUpstream(uid UpstreamID) bool {
_, intentionImplicit := c.IntentionUpstreams[uid]
_, peeringImplicit := c.PeeredUpstreams[uid]
return intentionImplicit || peeringImplicit
}
type configSnapshotTerminatingGateway struct {
MeshConfig *structs.MeshConfigEntry
MeshConfigSet bool
// WatchedServices is a map of service name to a cancel function. This cancel
// function is tied to the watch of linked service instances for the given
// id. If the linked services watch would indicate the removal of
// a service altogether we then cancel watching that service for its endpoints.
WatchedServices map[structs.ServiceName]context.CancelFunc
// WatchedIntentions is a map of service name to a cancel function.
// This cancel function is tied to the watch of intentions for linked services.
// As with WatchedServices, intention watches will be cancelled when services
// are no longer linked to the gateway.
WatchedIntentions map[structs.ServiceName]context.CancelFunc
// NOTE: Intentions stores a map of list of lists as returned by the Intentions
// Match RPC. So far we only use the first list as the list of matching
// intentions.
//
// A key being present implies that we have gotten at least one watch reply for the
// service. This is logically the same as ConnectProxy.IntentionsSet==true
Intentions map[structs.ServiceName]structs.Intentions
// WatchedLeaves is a map of ServiceName to a cancel function.
// This cancel function is tied to the watch of leaf certs for linked services.
// As with WatchedServices, leaf watches will be cancelled when services
// are no longer linked to the gateway.
WatchedLeaves map[structs.ServiceName]context.CancelFunc
// ServiceLeaves is a map of ServiceName to a leaf cert.
// Terminating gateways will present different certificates depending
// on the service that the caller is trying to reach.
ServiceLeaves map[structs.ServiceName]*structs.IssuedCert
// WatchedConfigs is a map of ServiceName to a cancel function. This cancel
// function is tied to the watch of service configs for linked services. As
// with WatchedServices, service config watches will be cancelled when
// services are no longer linked to the gateway.
WatchedConfigs map[structs.ServiceName]context.CancelFunc
// ServiceConfigs is a map of service name to the resolved service config
// for that service.
ServiceConfigs map[structs.ServiceName]*structs.ServiceConfigResponse
// WatchedResolvers is a map of ServiceName to a cancel function.
2020-04-14 14:59:23 +00:00
// This cancel function is tied to the watch of resolvers for linked services.
// As with WatchedServices, resolver watches will be cancelled when services
// are no longer linked to the gateway.
WatchedResolvers map[structs.ServiceName]context.CancelFunc
2020-04-14 14:59:23 +00:00
// ServiceResolvers is a map of service name to an associated
2020-04-14 14:59:23 +00:00
// service-resolver config entry for that service.
ServiceResolvers map[structs.ServiceName]*structs.ServiceResolverConfigEntry
ServiceResolversSet map[structs.ServiceName]bool
2020-04-14 14:59:23 +00:00
// ServiceGroups is a map of service name to the service instances of that
// service in the local datacenter.
ServiceGroups map[structs.ServiceName]structs.CheckServiceNodes
// GatewayServices is a map of service name to the config entry association
// between the gateway and a service. TLS configuration stored here is
// used for TLS origination from the gateway to the linked service.
// This map does not include GatewayServices that represent Endpoints to external
// destinations.
GatewayServices map[structs.ServiceName]structs.GatewayService
// DestinationServices is a map of service name to GatewayServices that represent
// a destination to an external destination of the service mesh.
DestinationServices map[structs.ServiceName]structs.GatewayService
// HostnameServices is a map of service name to service instances with a hostname as the address.
// If hostnames are configured they must be provided to Envoy via CDS not EDS.
HostnameServices map[structs.ServiceName]structs.CheckServiceNodes
}
// ValidServices returns the list of service keys that have enough data to be emitted.
func (c *configSnapshotTerminatingGateway) ValidServices() []structs.ServiceName {
out := make([]structs.ServiceName, 0, len(c.ServiceGroups))
for svc := range c.ServiceGroups {
// It only counts if ALL of our watches have come back (with data or not).
// Skip the service if we don't know if there is a resolver or not.
if _, ok := c.ServiceResolversSet[svc]; !ok {
continue
}
// Skip the service if we don't have a cert to present for mTLS.
if cert, ok := c.ServiceLeaves[svc]; !ok || cert == nil {
continue
}
// Skip the service if we haven't gotten our intentions yet.
if _, intentionsSet := c.Intentions[svc]; !intentionsSet {
continue
}
// Skip the service if we haven't gotten our service config yet to know
// the protocol.
if _, ok := c.ServiceConfigs[svc]; !ok {
continue
}
out = append(out, svc)
}
return out
}
// ValidDestinations returns the list of service keys (that represent exclusively endpoints) that have enough data to be emitted.
func (c *configSnapshotTerminatingGateway) ValidDestinations() []structs.ServiceName {
out := make([]structs.ServiceName, 0, len(c.DestinationServices))
for svc := range c.DestinationServices {
// It only counts if ALL of our watches have come back (with data or not).
// Skip the service if we don't have a cert to present for mTLS.
if cert, ok := c.ServiceLeaves[svc]; !ok || cert == nil {
continue
}
// Skip the service if we haven't gotten our intentions yet.
if _, intentionsSet := c.Intentions[svc]; !intentionsSet {
continue
}
// Skip the service if we haven't gotten our service config yet to know
// the protocol.
if conf, ok := c.ServiceConfigs[svc]; !ok || len(conf.Destination.Addresses) == 0 {
continue
}
out = append(out, svc)
}
return out
}
// isEmpty is a test helper
func (c *configSnapshotTerminatingGateway) isEmpty() bool {
if c == nil {
return true
}
return len(c.ServiceLeaves) == 0 &&
len(c.WatchedLeaves) == 0 &&
len(c.WatchedIntentions) == 0 &&
len(c.Intentions) == 0 &&
len(c.ServiceGroups) == 0 &&
2020-04-14 14:59:23 +00:00
len(c.WatchedServices) == 0 &&
2020-04-24 00:16:09 +00:00
len(c.ServiceResolvers) == 0 &&
len(c.ServiceResolversSet) == 0 &&
len(c.WatchedResolvers) == 0 &&
len(c.ServiceConfigs) == 0 &&
len(c.WatchedConfigs) == 0 &&
len(c.GatewayServices) == 0 &&
len(c.DestinationServices) == 0 &&
len(c.HostnameServices) == 0 &&
!c.MeshConfigSet
}
type PeerServersValue struct {
Addresses []structs.ServiceAddress
Index uint64
UseCDS bool
}
type PeeringServiceValue struct {
Nodes structs.CheckServiceNodes
UseCDS bool
}
type configSnapshotMeshGateway struct {
// WatchedServices is a map of service name to a cancel function. This cancel
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
// function is tied to the watch of connect enabled services for the given
// id. If the main datacenter services watch would indicate the removal of
// a service altogether we then cancel watching that service for its
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
// connect endpoints.
WatchedServices map[structs.ServiceName]context.CancelFunc
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
// WatchedServicesSet indicates that the watch on the datacenters services
// has completed. Even when there are no connect services, this being set
// (and the Connect roots being available) will be enough for the config
// snapshot to be considered valid. In the case of Envoy, this allows it to
// start its listeners even when no services would be proxied and allow its
// health check to pass.
WatchedServicesSet bool
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
// WatchedGateways is a map of GatewayKeys to a cancel function.
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
// This cancel function is tied to the watch of mesh-gateway services in
// that datacenter/partition.
WatchedGateways map[string]context.CancelFunc
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
// ServiceGroups is a map of service name to the service instances of that
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
// service in the local datacenter.
ServiceGroups map[structs.ServiceName]structs.CheckServiceNodes
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
// PeeringServices is a map of peer name -> (map of
// service name -> CheckServiceNodes) and is used to determine the backing
// endpoints of a service on a peer.
PeeringServices map[string]map[structs.ServiceName]PeeringServiceValue
// WatchedPeeringServices is a map of peer name -> (map of service name ->
// cancel function) and is used to track watches on services within a peer.
WatchedPeeringServices map[string]map[structs.ServiceName]context.CancelFunc
// WatchedPeers is a map of peer name -> cancel functions. It is used to
// track watches on peers.
WatchedPeers map[string]context.CancelFunc
// ServiceResolvers is a map of service name to an associated
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
// service-resolver config entry for that service.
ServiceResolvers map[structs.ServiceName]*structs.ServiceResolverConfigEntry
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
// GatewayGroups is a map of datacenter names to services of kind
// mesh-gateway in that datacenter.
GatewayGroups map[string]structs.CheckServiceNodes
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
// FedStateGateways is a map of datacenter names to mesh gateways in that
// datacenter.
FedStateGateways map[string]structs.CheckServiceNodes
// WatchedLocalServers is a map of (structs.ConsulServiceName -> structs.CheckServiceNodes)`
// Mesh gateways can spin up watches for local servers both for
// WAN federation and for peering. This map ensures we only have one
// watch at a time.
WatchedLocalServers watch.Map[string, structs.CheckServiceNodes]
// HostnameDatacenters is a map of datacenters to mesh gateway instances with a hostname as the address.
// If hostnames are configured they must be provided to Envoy via CDS not EDS.
HostnameDatacenters map[string]structs.CheckServiceNodes
// ExportedServicesSlice is a sorted slice of services that are exported to
// connected peers.
ExportedServicesSlice []structs.ServiceName
// ExportedServicesWithPeers is a map of exported service name to a sorted
// slice of peers that they are exported to.
ExportedServicesWithPeers map[structs.ServiceName][]string
// ExportedServicesSet indicates that the watch on the list of
// peer-exported services has completed at least once.
ExportedServicesSet bool
// DiscoveryChain is a map of the peer-exported service names to their
// local compiled discovery chain. This will be populated regardless of
// L4/L7 status of the chain.
DiscoveryChain map[structs.ServiceName]*structs.CompiledDiscoveryChain
// WatchedDiscoveryChains is a map of peer-exported service names to a
// cancel function.
WatchedDiscoveryChains map[structs.ServiceName]context.CancelFunc
// MeshConfig is the mesh config entry that should be used for services
// fronted by this mesh gateway.
MeshConfig *structs.MeshConfigEntry
// MeshConfigSet indicates that the watch on the mesh config entry has
// completed at least once.
MeshConfigSet bool
// Leaf is the leaf cert to be used by this mesh gateway.
Leaf *structs.IssuedCert
// LeafCertWatchCancel is a CancelFunc to use when refreshing this gateway's
// leaf cert watch with different parameters.
LeafCertWatchCancel context.CancelFunc
// PeerServers is the map of peering server names to their addresses.
PeerServers map[string]PeerServersValue
// PeerServersWatchCancel is a CancelFunc to use when resetting the watch
// on all peerings as it is enabled/disabled.
PeerServersWatchCancel context.CancelFunc
// PeeringTrustBundles is the list of trust bundles for peers where
// services have been exported to using this mesh gateway.
PeeringTrustBundles []*pbpeering.PeeringTrustBundle
// PeeringTrustBundlesSet indicates that the watch on the peer trust
// bundles has completed at least once.
PeeringTrustBundlesSet bool
}
// MeshGatewayValidExportedServices ensures that the following data is present
// if it exists for a service before it returns that in the set of services to
// expose.
//
// - peering info
// - discovery chain
func (c *ConfigSnapshot) MeshGatewayValidExportedServices() []structs.ServiceName {
out := make([]structs.ServiceName, 0, len(c.MeshGateway.ExportedServicesSlice))
for _, svc := range c.MeshGateway.ExportedServicesSlice {
if _, ok := c.MeshGateway.ExportedServicesWithPeers[svc]; !ok {
continue // not possible
}
if _, ok := c.MeshGateway.ServiceGroups[svc]; !ok {
continue // unregistered services
}
chain, ok := c.MeshGateway.DiscoveryChain[svc]
if !ok {
continue // ignore; not ready
}
if structs.IsProtocolHTTPLike(chain.Protocol) {
if c.MeshGateway.Leaf == nil {
continue // ignore; not ready
}
}
out = append(out, svc)
}
return out
}
func (c *ConfigSnapshot) GetMeshGatewayEndpoints(key GatewayKey) structs.CheckServiceNodes {
// Mesh gateways in remote DCs are discovered in two ways:
//
// 1. Via an Internal.ServiceDump RPC in the remote DC (GatewayGroups).
// 2. In the federation state that is replicated from the primary DC (FedStateGateways).
//
// We determine which set to use based on whichever contains the highest
// raft ModifyIndex (and is therefore most up-to-date).
//
// Previously, GatewayGroups was always given presedence over FedStateGateways
// but this was problematic when using mesh gateways for WAN federation.
//
// Consider the following example:
//
// - Primary and Secondary DCs are WAN Federated via local mesh gateways.
//
// - Secondary DC's mesh gateway is running on an ephemeral compute instance
// and is abruptly terminated and rescheduled with a *new IP address*.
//
// - Primary DC's mesh gateway is no longer able to connect to the Secondary
// DC as its proxy is configured with the old IP address. Therefore any RPC
// from the Primary to the Secondary DC will fail (including the one to
// discover the gateway's new IP address).
//
// - Secondary DC performs its regular anti-entropy of federation state data
// to the Primary DC (this succeeds as there is still connectivity in this
// direction).
//
// - At this point the Primary DC's mesh gateway should observe the new IP
// address and reconfigure its proxy, however as we always prioritised
// GatewayGroups this didn't happen and the connection remained severed.
maxModifyIndex := func(vals structs.CheckServiceNodes) uint64 {
var max uint64
for _, v := range vals {
if i := v.Service.RaftIndex.ModifyIndex; i > max {
max = i
}
}
return max
}
endpoints := c.MeshGateway.GatewayGroups[key.String()]
fedStateEndpoints := c.MeshGateway.FedStateGateways[key.String()]
if maxModifyIndex(fedStateEndpoints) > maxModifyIndex(endpoints) {
return fedStateEndpoints
}
return endpoints
}
func (c *configSnapshotMeshGateway) IsServiceExported(svc structs.ServiceName) bool {
if c == nil || len(c.ExportedServicesWithPeers) == 0 {
return false
}
_, ok := c.ExportedServicesWithPeers[svc]
return ok
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
}
2021-10-26 21:58:23 +00:00
func (c *configSnapshotMeshGateway) GatewayKeys() []GatewayKey {
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
sz1, sz2 := len(c.GatewayGroups), len(c.FedStateGateways)
sz := sz1
if sz2 > sz1 {
sz = sz2
}
keys := make([]GatewayKey, 0, sz)
for key := range c.FedStateGateways {
keys = append(keys, gatewayKeyFromString(key))
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
}
for key := range c.GatewayGroups {
gk := gatewayKeyFromString(key)
if _, ok := c.FedStateGateways[gk.Datacenter]; !ok {
keys = append(keys, gk)
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
}
}
// Always sort the results to ensure we generate deterministic things over
// xDS, such as mesh-gateway listener filter chains.
sort.Slice(keys, func(i, j int) bool {
if keys[i].Datacenter != keys[j].Datacenter {
return keys[i].Datacenter < keys[j].Datacenter
}
return keys[i].Partition < keys[j].Partition
})
return keys
}
// isEmpty is a test helper
func (c *configSnapshotMeshGateway) isEmpty() bool {
if c == nil {
return true
}
return len(c.WatchedServices) == 0 &&
!c.WatchedServicesSet &&
len(c.WatchedGateways) == 0 &&
len(c.ServiceGroups) == 0 &&
len(c.ServiceResolvers) == 0 &&
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
len(c.GatewayGroups) == 0 &&
len(c.FedStateGateways) == 0 &&
len(c.HostnameDatacenters) == 0 &&
c.WatchedLocalServers.Len() == 0 &&
c.isEmptyPeering()
}
// isEmptyPeering is a test helper
func (c *configSnapshotMeshGateway) isEmptyPeering() bool {
if c == nil {
return true
}
return len(c.ExportedServicesSlice) == 0 &&
len(c.ExportedServicesWithPeers) == 0 &&
!c.ExportedServicesSet &&
len(c.DiscoveryChain) == 0 &&
len(c.WatchedDiscoveryChains) == 0 &&
!c.MeshConfigSet &&
c.LeafCertWatchCancel == nil &&
c.Leaf == nil &&
len(c.PeeringTrustBundles) == 0 &&
!c.PeeringTrustBundlesSet
}
type configSnapshotIngressGateway struct {
ConfigSnapshotUpstreams
2021-09-13 12:03:16 +00:00
// TLSConfig is the gateway-level TLS configuration. Listener/service level
// config is preserved in the Listeners map below.
TLSConfig structs.GatewayTLSConfig
2021-08-17 11:27:31 +00:00
// GatewayConfigLoaded is used to determine if we have received the initial
// ingress-gateway config entry yet.
GatewayConfigLoaded bool
// Hosts is the list of extra host entries to add to our leaf cert's DNS SANs.
Hosts []string
HostsSet bool
// LeafCertWatchCancel is a CancelFunc to use when refreshing this gateway's
// leaf cert watch with different parameters.
LeafCertWatchCancel context.CancelFunc
// Upstreams is a list of upstreams this ingress gateway should serve traffic
// to. This is constructed from the ingress-gateway config entry, and uses
// the GatewayServices RPC to retrieve them.
Upstreams map[IngressListenerKey]structs.Upstreams
2021-07-13 12:53:59 +00:00
// UpstreamsSet is the unique set of UpstreamID the gateway routes to.
UpstreamsSet map[UpstreamID]struct{}
2021-07-13 12:53:59 +00:00
// Listeners is the original listener config from the ingress-gateway config
// entry to save us trying to pass fields through Upstreams
Listeners map[IngressListenerKey]structs.IngressListener
// Defaults is the default configuration for upstream service instances
Defaults structs.IngressServiceConfig
}
// isEmpty is a test helper
func (c *configSnapshotIngressGateway) isEmpty() bool {
if c == nil {
return true
}
return len(c.Upstreams) == 0 &&
len(c.UpstreamsSet) == 0 &&
len(c.DiscoveryChain) == 0 &&
len(c.WatchedUpstreams) == 0 &&
len(c.WatchedUpstreamEndpoints) == 0 &&
!c.MeshConfigSet
}
type IngressListenerKey struct {
Protocol string
Port int
}
func (k *IngressListenerKey) RouteName() string {
return fmt.Sprintf("%d", k.Port)
}
func IngressListenerKeyFromGWService(s structs.GatewayService) IngressListenerKey {
return IngressListenerKey{Protocol: s.Protocol, Port: s.Port}
}
func IngressListenerKeyFromListener(l structs.IngressListener) IngressListenerKey {
return IngressListenerKey{Protocol: l.Protocol, Port: l.Port}
}
// ConfigSnapshot captures all the resulting config needed for a proxy instance.
// It is meant to be point-in-time coherent and is used to deliver the current
// config state to observers who need it to be pushed in (e.g. XDS server).
type ConfigSnapshot struct {
Kind structs.ServiceKind
Service string
ProxyID ProxyID
Address string
Port int
ServiceMeta map[string]string
TaggedAddresses map[string]structs.ServiceAddress
Proxy structs.ConnectProxyConfig
Datacenter string
IntentionDefaultAllow bool
Locality GatewayKey
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
ServerSNIFn ServerSNIFunc
Roots *structs.IndexedCARoots
// connect-proxy specific
ConnectProxy configSnapshotConnectProxy
// terminating-gateway specific
TerminatingGateway configSnapshotTerminatingGateway
// mesh-gateway specific
MeshGateway configSnapshotMeshGateway
// ingress-gateway specific
IngressGateway configSnapshotIngressGateway
}
// Valid returns whether or not the snapshot has all required fields filled yet.
func (s *ConfigSnapshot) Valid() bool {
switch s.Kind {
case structs.ServiceKindConnectProxy:
if s.Proxy.Mode == structs.ProxyModeTransparent && !s.ConnectProxy.MeshConfigSet {
return false
}
return s.Roots != nil &&
s.ConnectProxy.Leaf != nil &&
s.ConnectProxy.IntentionsSet &&
s.ConnectProxy.MeshConfigSet
case structs.ServiceKindTerminatingGateway:
return s.Roots != nil &&
s.TerminatingGateway.MeshConfigSet
case structs.ServiceKindMeshGateway:
if s.MeshGateway.WatchedLocalServers.Len() == 0 {
if s.ServiceMeta[structs.MetaWANFederationKey] == "1" {
return false
}
if cfg := s.MeshConfig(); cfg.PeerThroughMeshGateways() {
wan federation via mesh gateways (#6884) This is like a Möbius strip of code due to the fact that low-level components (serf/memberlist) are connected to high-level components (the catalog and mesh-gateways) in a twisty maze of references which make it hard to dive into. With that in mind here's a high level summary of what you'll find in the patch: There are several distinct chunks of code that are affected: * new flags and config options for the server * retry join WAN is slightly different * retry join code is shared to discover primary mesh gateways from secondary datacenters * because retry join logic runs in the *agent* and the results of that operation for primary mesh gateways are needed in the *server* there are some methods like `RefreshPrimaryGatewayFallbackAddresses` that must occur at multiple layers of abstraction just to pass the data down to the right layer. * new cache type `FederationStateListMeshGatewaysName` for use in `proxycfg/xds` layers * the function signature for RPC dialing picked up a new required field (the node name of the destination) * several new RPCs for manipulating a FederationState object: `FederationState:{Apply,Get,List,ListMeshGateways}` * 3 read-only internal APIs for debugging use to invoke those RPCs from curl * raft and fsm changes to persist these FederationStates * replication for FederationStates as they are canonically stored in the Primary and replicated to the Secondaries. * a special derivative of anti-entropy that runs in secondaries to snapshot their local mesh gateway `CheckServiceNodes` and sync them into their upstream FederationState in the primary (this works in conjunction with the replication to distribute addresses for all mesh gateways in all DCs to all other DCs) * a "gateway locator" convenience object to make use of this data to choose the addresses of gateways to use for any given RPC or gossip operation to a remote DC. This gets data from the "retry join" logic in the agent and also directly calls into the FSM. * RPC (`:8300`) on the server sniffs the first byte of a new connection to determine if it's actually doing native TLS. If so it checks the ALPN header for protocol determination (just like how the existing system uses the type-byte marker). * 2 new kinds of protocols are exclusively decoded via this native TLS mechanism: one for ferrying "packet" operations (udp-like) from the gossip layer and one for "stream" operations (tcp-like). The packet operations re-use sockets (using length-prefixing) to cut down on TLS re-negotiation overhead. * the server instances specially wrap the `memberlist.NetTransport` when running with gateway federation enabled (in a `wanfed.Transport`). The general gist is that if it tries to dial a node in the SAME datacenter (deduced by looking at the suffix of the node name) there is no change. If dialing a DIFFERENT datacenter it is wrapped up in a TLS+ALPN blob and sent through some mesh gateways to eventually end up in a server's :8300 port. * a new flag when launching a mesh gateway via `consul connect envoy` to indicate that the servers are to be exposed. This sets a special service meta when registering the gateway into the catalog. * `proxycfg/xds` notice this metadata blob to activate additional watches for the FederationState objects as well as the location of all of the consul servers in that datacenter. * `xds:` if the extra metadata is in place additional clusters are defined in a DC to bulk sink all traffic to another DC's gateways. For the current datacenter we listen on a wildcard name (`server.<dc>.consul`) that load balances all servers as well as one mini-cluster per node (`<node>.server.<dc>.consul`) * the `consul tls cert create` command got a new flag (`-node`) to help create an additional SAN in certs that can be used with this flavor of federation.
2020-03-09 20:59:02 +00:00
return false
}
}
return s.Roots != nil &&
(s.MeshGateway.WatchedServicesSet || len(s.MeshGateway.ServiceGroups) > 0) &&
s.MeshGateway.ExportedServicesSet &&
s.MeshGateway.MeshConfigSet &&
s.MeshGateway.PeeringTrustBundlesSet
case structs.ServiceKindIngressGateway:
return s.Roots != nil &&
s.IngressGateway.Leaf != nil &&
s.IngressGateway.GatewayConfigLoaded &&
s.IngressGateway.HostsSet &&
s.IngressGateway.MeshConfigSet
default:
return false
}
}
// Clone makes a deep copy of the snapshot we can send to other goroutines
// without worrying that they will racily read or mutate shared maps etc.
func (s *ConfigSnapshot) Clone() *ConfigSnapshot {
snap := s.DeepCopy()
// nil these out as anything receiving one of these clones does not need them and should never "cancel" our watches
switch s.Kind {
case structs.ServiceKindConnectProxy:
// common with connect-proxy and ingress-gateway
snap.ConnectProxy.WatchedUpstreams = nil
connect: fix failover through a mesh gateway to a remote datacenter (#6259) Failover is pushed entirely down to the data plane by creating envoy clusters and putting each successive destination in a different load assignment priority band. For example this shows that normally requests go to 1.2.3.4:8080 but when that fails they go to 6.7.8.9:8080: - name: foo load_assignment: cluster_name: foo policy: overprovisioning_factor: 100000 endpoints: - priority: 0 lb_endpoints: - endpoint: address: socket_address: address: 1.2.3.4 port_value: 8080 - priority: 1 lb_endpoints: - endpoint: address: socket_address: address: 6.7.8.9 port_value: 8080 Mesh gateways route requests based solely on the SNI header tacked onto the TLS layer. Envoy currently only lets you configure the outbound SNI header at the cluster layer. If you try to failover through a mesh gateway you ideally would configure the SNI value per endpoint, but that's not possible in envoy today. This PR introduces a simpler way around the problem for now: 1. We identify any target of failover that will use mesh gateway mode local or remote and then further isolate any resolver node in the compiled discovery chain that has a failover destination set to one of those targets. 2. For each of these resolvers we will perform a small measurement of comparative healths of the endpoints that come back from the health API for the set of primary target and serial failover targets. We walk the list of targets in order and if any endpoint is healthy we return that target, otherwise we move on to the next target. 3. The CDS and EDS endpoints both perform the measurements in (2) for the affected resolver nodes. 4. For CDS this measurement selects which TLS SNI field to use for the cluster (note the cluster is always going to be named for the primary target) 5. For EDS this measurement selects which set of endpoints will populate the cluster. Priority tiered failover is ignored. One of the big downsides to this approach to failover is that the failover detection and correction is going to be controlled by consul rather than deferring that entirely to the data plane as with the prior version. This also means that we are bound to only failover using official health signals and cannot make use of data plane signals like outlier detection to affect failover. In this specific scenario the lack of data plane signals is ok because the effectiveness is already muted by the fact that the ultimate destination endpoints will have their data plane signals scrambled when they pass through the mesh gateway wrapper anyway so we're not losing much. Another related fix is that we now use the endpoint health from the underlying service, not the health of the gateway (regardless of failover mode).
2019-08-05 18:30:35 +00:00
snap.ConnectProxy.WatchedGateways = nil
snap.ConnectProxy.WatchedDiscoveryChains = nil
case structs.ServiceKindTerminatingGateway:
snap.TerminatingGateway.WatchedServices = nil
snap.TerminatingGateway.WatchedIntentions = nil
snap.TerminatingGateway.WatchedLeaves = nil
snap.TerminatingGateway.WatchedConfigs = nil
snap.TerminatingGateway.WatchedResolvers = nil
case structs.ServiceKindMeshGateway:
snap.MeshGateway.WatchedGateways = nil
snap.MeshGateway.WatchedServices = nil
case structs.ServiceKindIngressGateway:
// common with connect-proxy and ingress-gateway
snap.IngressGateway.WatchedUpstreams = nil
snap.IngressGateway.WatchedGateways = nil
snap.IngressGateway.WatchedDiscoveryChains = nil
// only ingress-gateway
snap.IngressGateway.LeafCertWatchCancel = nil
}
return snap
}
func (s *ConfigSnapshot) Leaf() *structs.IssuedCert {
switch s.Kind {
case structs.ServiceKindConnectProxy:
return s.ConnectProxy.Leaf
case structs.ServiceKindIngressGateway:
return s.IngressGateway.Leaf
case structs.ServiceKindMeshGateway:
return s.MeshGateway.Leaf
default:
return nil
}
}
func (s *ConfigSnapshot) PeeringTrustBundles() []*pbpeering.PeeringTrustBundle {
switch s.Kind {
case structs.ServiceKindConnectProxy:
return s.ConnectProxy.InboundPeerTrustBundles
case structs.ServiceKindMeshGateway:
return s.MeshGateway.PeeringTrustBundles
default:
return nil
}
}
// RootPEMs returns all PEM-encoded public certificates for the root CA.
func (s *ConfigSnapshot) RootPEMs() string {
var rootPEMs string
for _, root := range s.Roots.Roots {
rootPEMs += lib.EnsureTrailingNewline(root.RootCert)
}
return rootPEMs
}
func (s *ConfigSnapshot) MeshConfig() *structs.MeshConfigEntry {
switch s.Kind {
case structs.ServiceKindConnectProxy:
return s.ConnectProxy.MeshConfig
case structs.ServiceKindIngressGateway:
return s.IngressGateway.MeshConfig
case structs.ServiceKindTerminatingGateway:
return s.TerminatingGateway.MeshConfig
case structs.ServiceKindMeshGateway:
return s.MeshGateway.MeshConfig
default:
return nil
}
}
func (s *ConfigSnapshot) MeshConfigTLSIncoming() *structs.MeshDirectionalTLSConfig {
mesh := s.MeshConfig()
if mesh == nil || mesh.TLS == nil {
return nil
}
return mesh.TLS.Incoming
}
func (s *ConfigSnapshot) MeshConfigTLSOutgoing() *structs.MeshDirectionalTLSConfig {
mesh := s.MeshConfig()
if mesh == nil || mesh.TLS == nil {
return nil
}
return mesh.TLS.Outgoing
}
func (s *ConfigSnapshot) ToConfigSnapshotUpstreams() (*ConfigSnapshotUpstreams, error) {
switch s.Kind {
case structs.ServiceKindConnectProxy:
return &s.ConnectProxy.ConfigSnapshotUpstreams, nil
case structs.ServiceKindIngressGateway:
return &s.IngressGateway.ConfigSnapshotUpstreams, nil
default:
// This is a coherence check and should never fail
return nil, fmt.Errorf("No upstream snapshot for gateway mode %q", s.Kind)
}
}
func (u *ConfigSnapshotUpstreams) UpstreamPeerMeta(uid UpstreamID) structs.PeeringServiceMeta {
2022-07-13 16:14:57 +00:00
nodes, _ := u.PeerUpstreamEndpoints.Get(uid)
if len(nodes) == 0 {
return structs.PeeringServiceMeta{}
}
// In agent/rpc/peering/subscription_manager.go we denormalize the
// PeeringServiceMeta data onto each replicated service instance to convey
// this information back to the importing side of the peering.
//
// This data is guaranteed (subject to any eventual consistency lag around
// updates) to be the same across all instances, so we only need to take
// the first item.
//
// TODO(peering): consider replicating this "common to all instances" data
// using a different replication type and persist it separately in the
// catalog to avoid this weird construction.
csn := nodes[0]
if csn.Service == nil {
return structs.PeeringServiceMeta{}
}
return *csn.Service.Connect.PeerMeta
}
// PeeredUpstreamIDs returns a slice of peered UpstreamIDs from explicit config entries
// and implicit imported services.
// Upstreams whose trust bundles have not been stored in the snapshot are ignored.
func (u *ConfigSnapshotUpstreams) PeeredUpstreamIDs() []UpstreamID {
out := make([]UpstreamID, 0, u.PeerUpstreamEndpoints.Len())
u.PeerUpstreamEndpoints.ForEachKey(func(uid UpstreamID) bool {
if _, ok := u.PeerUpstreamEndpoints.Get(uid); !ok {
// uid might exist in the map but if Set hasn't been called, skip for now.
return true
}
2022-07-13 16:14:57 +00:00
if _, ok := u.UpstreamPeerTrustBundles.Get(uid.Peer); !ok {
// The trust bundle for this upstream is not available yet, skip for now.
return true
}
out = append(out, uid)
return true
})
return out
}