From 584f90bbebd58b59bf01547a39cd4ff24ec6a622 Mon Sep 17 00:00:00 2001 From: Chris Piraino Date: Thu, 2 Apr 2020 10:12:13 -0500 Subject: [PATCH] Fix flapping of mesh gateway connect-service watches (#7575) --- agent/proxycfg/state.go | 6 +- agent/proxycfg/state_test.go | 55 +++++++++++++++++++ .../case-gateways-local/primary/verify.bats | 4 ++ 3 files changed, 64 insertions(+), 1 deletion(-) diff --git a/agent/proxycfg/state.go b/agent/proxycfg/state.go index f4ad0c7200..799037970e 100644 --- a/agent/proxycfg/state.go +++ b/agent/proxycfg/state.go @@ -810,6 +810,10 @@ func (s *state) handleUpdateMeshGateway(u cache.UpdateEvent, snap *ConfigSnapsho svcMap := make(map[structs.ServiceID]struct{}) for _, svc := range services.Services { sid := svc.ToServiceID() + // Make sure to add every service to this map, we use it to cancel + // watches below. + svcMap[sid] = struct{}{} + if _, ok := snap.MeshGateway.WatchedServices[sid]; !ok { ctx, cancel := context.WithCancel(s.ctx) err := s.cache.Notify(ctx, cachetype.HealthServicesName, &structs.ServiceSpecificRequest{ @@ -829,12 +833,12 @@ func (s *state) handleUpdateMeshGateway(u cache.UpdateEvent, snap *ConfigSnapsho return err } snap.MeshGateway.WatchedServices[sid] = cancel - svcMap[sid] = struct{}{} } } for sid, cancelFn := range snap.MeshGateway.WatchedServices { if _, ok := svcMap[sid]; !ok { + meshLogger.Debug("canceling watch for service", "service", sid.String()) delete(snap.MeshGateway.WatchedServices, sid) cancelFn() } diff --git a/agent/proxycfg/state_test.go b/agent/proxycfg/state_test.go index f80ff870bf..074842f4cc 100644 --- a/agent/proxycfg/state_test.go +++ b/agent/proxycfg/state_test.go @@ -610,6 +610,61 @@ func TestState_WatchesAndUpdates(t *testing.T) { }, }, }, + "mesh-gateway-do-not-cancel-service-watches": testCase{ + ns: structs.NodeService{ + Kind: structs.ServiceKindMeshGateway, + ID: "mesh-gateway", + Service: "mesh-gateway", + Address: "10.0.1.1", + Port: 443, + }, + sourceDC: "dc1", + stages: []verificationStage{ + verificationStage{ + requiredWatches: map[string]verifyWatchRequest{ + rootsWatchID: genVerifyRootsWatch("dc1"), + serviceListWatchID: genVerifyListServicesWatch("dc1"), + datacentersWatchID: verifyDatacentersWatch, + }, + events: []cache.UpdateEvent{ + rootWatchEvent(), + cache.UpdateEvent{ + CorrelationID: serviceListWatchID, + Result: &structs.IndexedServiceList{ + Services: structs.ServiceList{ + {Name: "web"}, + }, + }, + Err: nil, + }, + }, + verifySnapshot: func(t testing.TB, snap *ConfigSnapshot) { + require.True(t, snap.Valid(), "gateway with service list is vaild") + require.Len(t, snap.MeshGateway.WatchedServices, 1) + require.True(t, snap.MeshGateway.WatchedServicesSet) + }, + }, + verificationStage{ + events: []cache.UpdateEvent{ + cache.UpdateEvent{ + CorrelationID: serviceListWatchID, + Result: &structs.IndexedServiceList{ + Services: structs.ServiceList{ + {Name: "web"}, + {Name: "api"}, + }, + }, + Err: nil, + }, + }, + verifySnapshot: func(t testing.TB, snap *ConfigSnapshot) { + require.True(t, snap.Valid(), "gateway with service list is vaild") + require.Len(t, snap.MeshGateway.WatchedServices, 2) + require.True(t, snap.MeshGateway.WatchedServicesSet) + }, + }, + }, + }, "connect-proxy": newConnectProxyCase(structs.MeshGatewayModeDefault), "connect-proxy-mesh-gateway-local": newConnectProxyCase(structs.MeshGatewayModeLocal), } diff --git a/test/integration/connect/envoy/case-gateways-local/primary/verify.bats b/test/integration/connect/envoy/case-gateways-local/primary/verify.bats index b7fe8488fd..3cb1bff2f8 100644 --- a/test/integration/connect/envoy/case-gateways-local/primary/verify.bats +++ b/test/integration/connect/envoy/case-gateways-local/primary/verify.bats @@ -27,6 +27,10 @@ load helpers assert_upstream_has_endpoints_in_status 127.0.0.1:19002 secondary HEALTHY 1 } +@test "gateway-secondary should have healthy endpoints for s2" { + assert_upstream_has_endpoints_in_status consul-secondary:19003 s2 HEALTHY 1 +} + @test "s1 upstream should be able to connect to s2" { run retry_default curl -s -f -d hello localhost:5000 [ "$status" -eq 0 ]