mirror of https://github.com/hashicorp/consul
Browse Source
Treat each exported service as a "discovery chain" and replicate one synthetic CheckServiceNode for each chain and remote mesh gateway. The health will be a flattened generated check of the checks for that mesh gateway node.pull/13150/head
R.B. Boyer
3 years ago
15 changed files with 1585 additions and 481 deletions
@ -0,0 +1,108 @@
|
||||
package peering |
||||
|
||||
import ( |
||||
"context" |
||||
"errors" |
||||
"fmt" |
||||
"time" |
||||
|
||||
"github.com/hashicorp/go-memdb" |
||||
|
||||
"github.com/hashicorp/consul/agent/cache" |
||||
"github.com/hashicorp/consul/agent/structs" |
||||
"github.com/hashicorp/consul/lib/retry" |
||||
"github.com/hashicorp/consul/proto/pbservice" |
||||
) |
||||
|
||||
// This file contains direct state store functions that need additional
|
||||
// management to have them emit events. Ideally these would go through
|
||||
// streaming machinery instead to be cheaper.
|
||||
|
||||
func (m *subscriptionManager) notifyExportedServicesForPeerID(ctx context.Context, state *subscriptionState, peerID string) { |
||||
// syncSubscriptionsAndBlock ensures that the subscriptions to the subscription backend
|
||||
// match the list of services exported to the peer.
|
||||
m.syncViaBlockingQuery(ctx, "exported-services", func(ctx context.Context, store Store, ws memdb.WatchSet) (interface{}, error) { |
||||
// Get exported services for peer id
|
||||
_, list, err := store.ExportedServicesForPeer(ws, peerID) |
||||
if err != nil { |
||||
return nil, fmt.Errorf("failed to watch exported services for peer %q: %w", peerID, err) |
||||
} |
||||
|
||||
return list, nil |
||||
}, subExportedServiceList, state.updateCh) |
||||
} |
||||
|
||||
// TODO: add a new streaming subscription type to list-by-kind-and-partition since we're getting evictions
|
||||
func (m *subscriptionManager) notifyMeshGatewaysForPartition(ctx context.Context, state *subscriptionState, partition string) { |
||||
m.syncViaBlockingQuery(ctx, "mesh-gateways", func(ctx context.Context, store Store, ws memdb.WatchSet) (interface{}, error) { |
||||
// Fetch our current list of all mesh gateways.
|
||||
entMeta := structs.DefaultEnterpriseMetaInPartition(partition) |
||||
idx, nodes, err := store.ServiceDump(ws, structs.ServiceKindMeshGateway, true, entMeta, structs.DefaultPeerKeyword) |
||||
if err != nil { |
||||
return nil, fmt.Errorf("failed to watch mesh gateways services for partition %q: %w", partition, err) |
||||
} |
||||
if idx == 0 { |
||||
idx = 1 |
||||
} |
||||
|
||||
// convert back to a protobuf flavor
|
||||
result := &pbservice.IndexedCheckServiceNodes{ |
||||
Index: idx, |
||||
Nodes: make([]*pbservice.CheckServiceNode, len(nodes)), |
||||
} |
||||
for i, csn := range nodes { |
||||
result.Nodes[i] = pbservice.NewCheckServiceNodeFromStructs(&csn) |
||||
} |
||||
|
||||
return result, nil |
||||
}, subMeshGateway+partition, state.updateCh) |
||||
} |
||||
|
||||
func (m *subscriptionManager) syncViaBlockingQuery( |
||||
ctx context.Context, |
||||
queryType string, |
||||
queryFn func(ctx context.Context, store Store, ws memdb.WatchSet) (interface{}, error), |
||||
correlationID string, |
||||
updateCh chan<- cache.UpdateEvent, |
||||
) { |
||||
waiter := &retry.Waiter{ |
||||
MinFailures: 1, |
||||
Factor: 500 * time.Millisecond, |
||||
MaxWait: 60 * time.Second, |
||||
Jitter: retry.NewJitter(100), |
||||
} |
||||
|
||||
logger := m.logger |
||||
if queryType != "" { |
||||
logger = m.logger.With("queryType", queryType) |
||||
} |
||||
|
||||
store := m.backend.Store() |
||||
|
||||
for { |
||||
ws := memdb.NewWatchSet() |
||||
ws.Add(store.AbandonCh()) |
||||
ws.Add(ctx.Done()) |
||||
|
||||
if result, err := queryFn(ctx, store, ws); err != nil { |
||||
logger.Error("failed to sync from query", "error", err) |
||||
} else { |
||||
// Block for any changes to the state store.
|
||||
updateCh <- cache.UpdateEvent{ |
||||
CorrelationID: correlationID, |
||||
Result: result, |
||||
} |
||||
ws.WatchCtx(ctx) |
||||
} |
||||
|
||||
if err := waiter.Wait(ctx); err != nil && !errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) { |
||||
logger.Error("failed to wait before re-trying sync", "error", err) |
||||
} |
||||
|
||||
select { |
||||
case <-ctx.Done(): |
||||
return |
||||
default: |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,165 @@
|
||||
package peering |
||||
|
||||
import ( |
||||
"context" |
||||
"crypto/sha256" |
||||
"encoding/hex" |
||||
"fmt" |
||||
"strings" |
||||
|
||||
"github.com/golang/protobuf/proto" |
||||
"github.com/hashicorp/go-hclog" |
||||
|
||||
"github.com/hashicorp/consul/agent/cache" |
||||
"github.com/hashicorp/consul/agent/structs" |
||||
"github.com/hashicorp/consul/proto/pbservice" |
||||
) |
||||
|
||||
// subscriptionState is a collection of working state tied to a peerID subscription.
|
||||
type subscriptionState struct { |
||||
// partition is immutable
|
||||
partition string |
||||
|
||||
// plain data
|
||||
exportList *structs.ExportedServiceList |
||||
|
||||
watchedServices map[structs.ServiceName]context.CancelFunc |
||||
connectServices map[structs.ServiceName]struct{} |
||||
|
||||
// eventVersions is a duplicate event suppression system keyed by the "id"
|
||||
// not the "correlationID"
|
||||
eventVersions map[string]string |
||||
|
||||
meshGateway *pbservice.IndexedCheckServiceNodes |
||||
|
||||
// updateCh is an internal implementation detail for the machinery of the
|
||||
// manager.
|
||||
updateCh chan<- cache.UpdateEvent |
||||
|
||||
// publicUpdateCh is the channel the manager uses to pass data back to the
|
||||
// caller.
|
||||
publicUpdateCh chan<- cache.UpdateEvent |
||||
} |
||||
|
||||
func newSubscriptionState(partition string) *subscriptionState { |
||||
return &subscriptionState{ |
||||
partition: partition, |
||||
watchedServices: make(map[structs.ServiceName]context.CancelFunc), |
||||
connectServices: make(map[structs.ServiceName]struct{}), |
||||
eventVersions: make(map[string]string), |
||||
} |
||||
} |
||||
|
||||
func (s *subscriptionState) sendPendingEvents( |
||||
ctx context.Context, |
||||
logger hclog.Logger, |
||||
pending *pendingPayload, |
||||
) { |
||||
for _, pendingEvt := range pending.Events { |
||||
cID := pendingEvt.CorrelationID |
||||
newVersion := pendingEvt.Version |
||||
|
||||
oldVersion, ok := s.eventVersions[pendingEvt.ID] |
||||
if ok && newVersion == oldVersion { |
||||
logger.Trace("skipping send of duplicate public event", "correlationID", cID) |
||||
continue |
||||
} |
||||
|
||||
logger.Trace("sending public event", "correlationID", cID) |
||||
s.eventVersions[pendingEvt.ID] = newVersion |
||||
|
||||
evt := cache.UpdateEvent{ |
||||
CorrelationID: cID, |
||||
Result: pendingEvt.Result, |
||||
} |
||||
|
||||
select { |
||||
case s.publicUpdateCh <- evt: |
||||
case <-ctx.Done(): |
||||
} |
||||
} |
||||
} |
||||
|
||||
func (s *subscriptionState) cleanupEventVersions(logger hclog.Logger) { |
||||
for id := range s.eventVersions { |
||||
keep := false |
||||
switch { |
||||
case id == meshGatewayPayloadID: |
||||
keep = true |
||||
|
||||
case strings.HasPrefix(id, servicePayloadIDPrefix): |
||||
name := strings.TrimPrefix(id, servicePayloadIDPrefix) |
||||
sn := structs.ServiceNameFromString(name) |
||||
|
||||
if _, ok := s.watchedServices[sn]; ok { |
||||
keep = true |
||||
} |
||||
|
||||
case strings.HasPrefix(id, discoveryChainPayloadIDPrefix): |
||||
name := strings.TrimPrefix(id, discoveryChainPayloadIDPrefix) |
||||
sn := structs.ServiceNameFromString(name) |
||||
|
||||
if _, ok := s.connectServices[sn]; ok { |
||||
keep = true |
||||
} |
||||
} |
||||
|
||||
if !keep { |
||||
logger.Trace("cleaning up unreferenced event id version", "id", id) |
||||
delete(s.eventVersions, id) |
||||
} |
||||
} |
||||
} |
||||
|
||||
type pendingPayload struct { |
||||
Events []pendingEvent |
||||
} |
||||
|
||||
type pendingEvent struct { |
||||
ID string |
||||
CorrelationID string |
||||
Result proto.Message |
||||
Version string |
||||
} |
||||
|
||||
const ( |
||||
meshGatewayPayloadID = "mesh-gateway" |
||||
servicePayloadIDPrefix = "service:" |
||||
discoveryChainPayloadIDPrefix = "chain:" |
||||
) |
||||
|
||||
func (p *pendingPayload) Add(id string, correlationID string, raw interface{}) error { |
||||
result, ok := raw.(proto.Message) |
||||
if !ok { |
||||
return fmt.Errorf("invalid type for %q event: %T", correlationID, raw) |
||||
} |
||||
|
||||
version, err := hashProtobuf(result) |
||||
if err != nil { |
||||
return fmt.Errorf("error hashing %q event: %w", correlationID, err) |
||||
} |
||||
|
||||
p.Events = append(p.Events, pendingEvent{ |
||||
ID: id, |
||||
CorrelationID: correlationID, |
||||
Result: result, |
||||
Version: version, |
||||
}) |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func hashProtobuf(res proto.Message) (string, error) { |
||||
h := sha256.New() |
||||
buffer := proto.NewBuffer(nil) |
||||
buffer.SetDeterministic(true) |
||||
|
||||
err := buffer.Marshal(res) |
||||
if err != nil { |
||||
return "", err |
||||
} |
||||
h.Write(buffer.Bytes()) |
||||
buffer.Reset() |
||||
|
||||
return hex.EncodeToString(h.Sum(nil)), nil |
||||
} |
@ -0,0 +1,200 @@
|
||||
package peering |
||||
|
||||
import ( |
||||
"context" |
||||
"testing" |
||||
"time" |
||||
|
||||
"github.com/golang/protobuf/proto" |
||||
"github.com/hashicorp/go-hclog" |
||||
"github.com/stretchr/testify/require" |
||||
|
||||
"github.com/hashicorp/consul/acl" |
||||
"github.com/hashicorp/consul/agent/cache" |
||||
"github.com/hashicorp/consul/agent/structs" |
||||
"github.com/hashicorp/consul/proto/pbservice" |
||||
"github.com/hashicorp/consul/sdk/testutil" |
||||
) |
||||
|
||||
func TestSubscriptionState_Events(t *testing.T) { |
||||
logger := hclog.NewNullLogger() |
||||
|
||||
partition := acl.DefaultEnterpriseMeta().PartitionOrEmpty() |
||||
|
||||
state := newSubscriptionState(partition) |
||||
|
||||
testutil.RunStep(t, "empty", func(t *testing.T) { |
||||
pending := &pendingPayload{} |
||||
|
||||
ch := make(chan cache.UpdateEvent, 1) |
||||
state.publicUpdateCh = ch |
||||
go func() { |
||||
state.sendPendingEvents(context.Background(), logger, pending) |
||||
close(ch) |
||||
}() |
||||
|
||||
got := drainEvents(t, ch) |
||||
require.Len(t, got, 0) |
||||
}) |
||||
|
||||
meshNode1 := &pbservice.CheckServiceNode{ |
||||
Node: &pbservice.Node{Node: "foo"}, |
||||
Service: &pbservice.NodeService{ID: "mgw-1", Service: "mgw", Kind: "mesh-gateway"}, |
||||
} |
||||
|
||||
testutil.RunStep(t, "one", func(t *testing.T) { |
||||
pending := &pendingPayload{} |
||||
require.NoError(t, pending.Add( |
||||
meshGatewayPayloadID, |
||||
subMeshGateway+partition, |
||||
&pbservice.IndexedCheckServiceNodes{ |
||||
Nodes: []*pbservice.CheckServiceNode{ |
||||
proto.Clone(meshNode1).(*pbservice.CheckServiceNode), |
||||
}, |
||||
}, |
||||
)) |
||||
|
||||
ch := make(chan cache.UpdateEvent, 1) |
||||
state.publicUpdateCh = ch |
||||
go func() { |
||||
state.sendPendingEvents(context.Background(), logger, pending) |
||||
close(ch) |
||||
}() |
||||
|
||||
got := drainEvents(t, ch) |
||||
require.Len(t, got, 1) |
||||
|
||||
evt := got[0] |
||||
require.Equal(t, subMeshGateway+partition, evt.CorrelationID) |
||||
require.Len(t, evt.Result.(*pbservice.IndexedCheckServiceNodes).Nodes, 1) |
||||
}) |
||||
|
||||
testutil.RunStep(t, "a duplicate is omitted", func(t *testing.T) { |
||||
pending := &pendingPayload{} |
||||
require.NoError(t, pending.Add( |
||||
meshGatewayPayloadID, |
||||
subMeshGateway+partition, |
||||
&pbservice.IndexedCheckServiceNodes{ |
||||
Nodes: []*pbservice.CheckServiceNode{ |
||||
proto.Clone(meshNode1).(*pbservice.CheckServiceNode), |
||||
}, |
||||
}, |
||||
)) |
||||
|
||||
ch := make(chan cache.UpdateEvent, 1) |
||||
state.publicUpdateCh = ch |
||||
go func() { |
||||
state.sendPendingEvents(context.Background(), logger, pending) |
||||
close(ch) |
||||
}() |
||||
|
||||
got := drainEvents(t, ch) |
||||
require.Len(t, got, 0) |
||||
}) |
||||
|
||||
webNode1 := &pbservice.CheckServiceNode{ |
||||
Node: &pbservice.Node{Node: "zim"}, |
||||
Service: &pbservice.NodeService{ID: "web-1", Service: "web"}, |
||||
} |
||||
|
||||
webSN := structs.NewServiceName("web", nil) |
||||
|
||||
testutil.RunStep(t, "a duplicate is omitted even if mixed", func(t *testing.T) { |
||||
pending := &pendingPayload{} |
||||
require.NoError(t, pending.Add( |
||||
meshGatewayPayloadID, |
||||
subMeshGateway+partition, |
||||
&pbservice.IndexedCheckServiceNodes{ |
||||
Nodes: []*pbservice.CheckServiceNode{ |
||||
proto.Clone(meshNode1).(*pbservice.CheckServiceNode), |
||||
}, |
||||
}, |
||||
)) |
||||
require.NoError(t, pending.Add( |
||||
servicePayloadIDPrefix+webSN.String(), |
||||
subExportedService+webSN.String(), |
||||
&pbservice.IndexedCheckServiceNodes{ |
||||
Nodes: []*pbservice.CheckServiceNode{ |
||||
proto.Clone(webNode1).(*pbservice.CheckServiceNode), |
||||
}, |
||||
}, |
||||
)) |
||||
|
||||
ch := make(chan cache.UpdateEvent, 1) |
||||
state.publicUpdateCh = ch |
||||
go func() { |
||||
state.sendPendingEvents(context.Background(), logger, pending) |
||||
close(ch) |
||||
}() |
||||
|
||||
got := drainEvents(t, ch) |
||||
require.Len(t, got, 1) |
||||
|
||||
evt := got[0] |
||||
require.Equal(t, subExportedService+webSN.String(), evt.CorrelationID) |
||||
require.Len(t, evt.Result.(*pbservice.IndexedCheckServiceNodes).Nodes, 1) |
||||
}) |
||||
|
||||
meshNode2 := &pbservice.CheckServiceNode{ |
||||
Node: &pbservice.Node{Node: "bar"}, |
||||
Service: &pbservice.NodeService{ID: "mgw-2", Service: "mgw", Kind: "mesh-gateway"}, |
||||
} |
||||
|
||||
testutil.RunStep(t, "an update to an existing item is published", func(t *testing.T) { |
||||
pending := &pendingPayload{} |
||||
require.NoError(t, pending.Add( |
||||
meshGatewayPayloadID, |
||||
subMeshGateway+partition, |
||||
&pbservice.IndexedCheckServiceNodes{ |
||||
Nodes: []*pbservice.CheckServiceNode{ |
||||
proto.Clone(meshNode1).(*pbservice.CheckServiceNode), |
||||
proto.Clone(meshNode2).(*pbservice.CheckServiceNode), |
||||
}, |
||||
}, |
||||
)) |
||||
|
||||
ch := make(chan cache.UpdateEvent, 1) |
||||
state.publicUpdateCh = ch |
||||
go func() { |
||||
state.sendPendingEvents(context.Background(), logger, pending) |
||||
close(ch) |
||||
}() |
||||
|
||||
got := drainEvents(t, ch) |
||||
require.Len(t, got, 1) |
||||
|
||||
evt := got[0] |
||||
require.Equal(t, subMeshGateway+partition, evt.CorrelationID) |
||||
require.Len(t, evt.Result.(*pbservice.IndexedCheckServiceNodes).Nodes, 2) |
||||
}) |
||||
} |
||||
|
||||
func drainEvents(t *testing.T, ch <-chan cache.UpdateEvent) []cache.UpdateEvent { |
||||
var out []cache.UpdateEvent |
||||
|
||||
for { |
||||
select { |
||||
case evt, ok := <-ch: |
||||
if !ok { |
||||
return out |
||||
} |
||||
out = append(out, evt) |
||||
case <-time.After(100 * time.Millisecond): |
||||
t.Fatalf("channel did not close in time") |
||||
} |
||||
} |
||||
} |
||||
|
||||
func testNewSubscriptionState(partition string) ( |
||||
*subscriptionState, |
||||
chan cache.UpdateEvent, |
||||
) { |
||||
var ( |
||||
publicUpdateCh = make(chan cache.UpdateEvent, 1) |
||||
) |
||||
|
||||
state := newSubscriptionState(partition) |
||||
state.publicUpdateCh = publicUpdateCh |
||||
|
||||
return state, publicUpdateCh |
||||
} |
@ -0,0 +1,12 @@
|
||||
package maps |
||||
|
||||
func SliceOfKeys[K comparable, V any](m map[K]V) []K { |
||||
if len(m) == 0 { |
||||
return nil |
||||
} |
||||
res := make([]K, 0, len(m)) |
||||
for k := range m { |
||||
res = append(res, k) |
||||
} |
||||
return res |
||||
} |
@ -0,0 +1,41 @@
|
||||
package maps |
||||
|
||||
import ( |
||||
"testing" |
||||
|
||||
"github.com/stretchr/testify/require" |
||||
) |
||||
|
||||
func TestSliceOfKeys(t *testing.T) { |
||||
t.Run("string to int", func(t *testing.T) { |
||||
m := make(map[string]int) |
||||
require.Equal(t, []string(nil), SliceOfKeys(m)) |
||||
m["foo"] = 5 |
||||
m["bar"] = 6 |
||||
require.ElementsMatch(t, []string{"foo", "bar"}, SliceOfKeys(m)) |
||||
}) |
||||
|
||||
type blah struct { |
||||
V string |
||||
} |
||||
|
||||
t.Run("int to struct", func(t *testing.T) { |
||||
m := make(map[int]blah) |
||||
require.Equal(t, []int(nil), SliceOfKeys(m)) |
||||
m[5] = blah{V: "foo"} |
||||
m[6] = blah{V: "bar"} |
||||
require.ElementsMatch(t, []int{5, 6}, SliceOfKeys(m)) |
||||
}) |
||||
|
||||
type id struct { |
||||
Name string |
||||
} |
||||
|
||||
t.Run("struct to struct pointer", func(t *testing.T) { |
||||
m := make(map[id]*blah) |
||||
require.Equal(t, []id(nil), SliceOfKeys(m)) |
||||
m[id{Name: "foo"}] = &blah{V: "oof"} |
||||
m[id{Name: "bar"}] = &blah{V: "rab"} |
||||
require.ElementsMatch(t, []id{{Name: "foo"}, {Name: "bar"}}, SliceOfKeys(m)) |
||||
}) |
||||
} |
Loading…
Reference in new issue