Merge branch 'main' into add-docs-for-anno

3 years ago · f664d13b10
parent aa3b4eaf29 4e013158b3
commit f664d13b10
106 changed files with 3601 additions and 1225 deletions
--- a/.changelog/12080.txt
+++ b/.changelog/12080.txt
@ -0,0 +1,3 @@
+```release-note:enhancement
+streaming: Improved performance when the server is handling many concurrent subscriptions and has a high number of CPU cores
+```
--- a/.changelog/12174.txt
+++ b/.changelog/12174.txt
@ -0,0 +1,3 @@
+```release-note:bug
+xds: fix for delta xDS reconnect bug in LDS/CDS
+```
--- a/.changelog/12176.txt
+++ b/.changelog/12176.txt
@ -0,0 +1,3 @@
+```release-note:enhancement
+systemd: Support starting/stopping the systemd service for linux packages when the optional EnvironmentFile does not exist.
+```
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -690,10 +690,15 @@ jobs:
              git config --local user.email "github-team-consul-core@hashicorp.com"
              git config --local user.name "hc-github-team-consul-core"
              
+              # stash newly built bindata_assetfs.go
+              git stash push
+              
              # checkout the CI branch and merge latest from main
              git checkout ci/main-assetfs-build
              git merge --no-edit main
            
+              git stash pop
+            
              short_sha=$(git rev-parse --short HEAD)
              git add agent/uiserver/bindata_assetfs.go
              git commit -m "auto-updated agent/uiserver/bindata_assetfs.go from commit ${short_sha}"
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -218,7 +218,7 @@ jobs:
          GOLDFLAGS: "${{ needs.get-product-version.outputs.shared-ldflags }}"
        run: |
          mkdir dist out
-          go build -ldflags="$GOLDFLAGS" -o dist/ .
+          go build -ldflags="$GOLDFLAGS" -tags netcgo -o dist/ .
          zip -r -j out/${{ env.PKG_NAME }}_${{ needs.get-product-version.outputs.product-version }}_${{ matrix.goos }}_${{ matrix.goarch }}.zip dist/

      - uses: actions/upload-artifact@v2
--- a/.release/linux/package/etc/consul.d/consul.env
+++ b/.release/linux/package/etc/consul.d/consul.env
--- a/.release/linux/package/usr/lib/systemd/system/consul.service
+++ b/.release/linux/package/usr/lib/systemd/system/consul.service
@ -6,7 +6,7 @@ After=network-online.target
 ConditionFileNotEmpty=/etc/consul.d/consul.hcl

 [Service]
-EnvironmentFile=/etc/consul.d/consul.env
+EnvironmentFile=-/etc/consul.d/consul.env
 User=consul
 Group=consul
 ExecStart=/usr/bin/consul agent -config-dir=/etc/consul.d/
--- a/agent/connect/ca/mock_Provider.go
+++ b/agent/connect/ca/mock_Provider.go
@ -34,34 +34,13 @@ func (_m *MockProvider) ActiveIntermediate() (string, error) {
 	return r0, r1
 }

-// ActiveRoot provides a mock function with given fields:
-func (_m *MockProvider) ActiveRoot() (string, error) {
-	ret := _m.Called()
-
-	var r0 string
-	if rf, ok := ret.Get(0).(func() string); ok {
-		r0 = rf()
-	} else {
-		r0 = ret.Get(0).(string)
-	}
-
-	var r1 error
-	if rf, ok := ret.Get(1).(func() error); ok {
-		r1 = rf()
-	} else {
-		r1 = ret.Error(1)
-	}
-
-	return r0, r1
-}
-
-// Cleanup provides a mock function with given fields: providerTypeChange, config
-func (_m *MockProvider) Cleanup(providerTypeChange bool, config map[string]interface{}) error {
-	ret := _m.Called(providerTypeChange, config)
+// Cleanup provides a mock function with given fields: providerTypeChange, otherConfig
+func (_m *MockProvider) Cleanup(providerTypeChange bool, otherConfig map[string]interface{}) error {
+	ret := _m.Called(providerTypeChange, otherConfig)

 	var r0 error
 	if rf, ok := ret.Get(0).(func(bool, map[string]interface{}) error); ok {
-		r0 = rf(providerTypeChange, config)
+		r0 = rf(providerTypeChange, otherConfig)
 	} else {
 		r0 = ret.Error(0)
 	}
@ -147,17 +126,24 @@ func (_m *MockProvider) GenerateIntermediateCSR() (string, error) {
 }

 // GenerateRoot provides a mock function with given fields:
-func (_m *MockProvider) GenerateRoot() error {
+func (_m *MockProvider) GenerateRoot() (RootResult, error) {
 	ret := _m.Called()

-	var r0 error
-	if rf, ok := ret.Get(0).(func() error); ok {
+	var r0 RootResult
+	if rf, ok := ret.Get(0).(func() RootResult); ok {
 		r0 = rf()
 	} else {
-		r0 = ret.Error(0)
+		r0 = ret.Get(0).(RootResult)
 	}

-	return r0
+	var r1 error
+	if rf, ok := ret.Get(1).(func() error); ok {
+		r1 = rf()
+	} else {
+		r1 = ret.Error(1)
+	}
+
+	return r0, r1
 }

 // SetIntermediate provides a mock function with given fields: intermediatePEM, rootPEM
--- a/agent/connect/ca/provider.go
+++ b/agent/connect/ca/provider.go
@ -118,17 +118,18 @@ type Provider interface {
 }

 type PrimaryProvider interface {
-	// GenerateRoot causes the creation of a new root certificate for this provider.
-	// This can also be a no-op if a root certificate already exists for the given
-	// config. If IsPrimary is false, calling this method is an error.
-	GenerateRoot() error
-
-	// ActiveRoot returns the currently active root CA for this
-	// provider. This should be a parent of the certificate returned by
-	// ActiveIntermediate()
+	// GenerateRoot is called:
+	//   * to initialize the CA system when a server is elected as a raft leader
+	//   * when the CA configuration is updated in a way that might require
+	//     generating a new root certificate.
 	//
-	// TODO: currently called from secondaries, but shouldn't be so is on PrimaryProvider
-	ActiveRoot() (string, error)
+	// In both cases GenerateRoot is always called on a newly created provider
+	// after calling Provider.Configure, and before any other calls to the
+	// provider.
+	//
+	// The provider should return an existing root certificate if one exists,
+	// otherwise it should generate a new root certificate and return it.
+	GenerateRoot() (RootResult, error)

 	// GenerateIntermediate returns a new intermediate signing cert and sets it to
 	// the active intermediate. If multiple intermediates are needed to complete
@ -181,6 +182,14 @@ type SecondaryProvider interface {
 	SetIntermediate(intermediatePEM, rootPEM string) error
 }

+// RootResult is the result returned by PrimaryProvider.GenerateRoot.
+//
+// TODO: rename this struct
+type RootResult struct {
+	// PEM encoded certificate that will be used as the primary CA.
+	PEM string
+}
+
 // NeedsStop is an optional interface that allows a CA to define a function
 // to be called when the CA instance is no longer in use. This is different
 // from Cleanup(), as only the local provider instance is being shut down
--- a/agent/connect/ca/provider_aws.go
+++ b/agent/connect/ca/provider_aws.go
@ -134,12 +134,19 @@ func (a *AWSProvider) State() (map[string]string, error) {
 }

 // GenerateRoot implements Provider
-func (a *AWSProvider) GenerateRoot() error {
+func (a *AWSProvider) GenerateRoot() (RootResult, error) {
 	if !a.isPrimary {
-		return fmt.Errorf("provider is not the root certificate authority")
+		return RootResult{}, fmt.Errorf("provider is not the root certificate authority")
 	}

-	return a.ensureCA()
+	if err := a.ensureCA(); err != nil {
+		return RootResult{}, err
+	}
+
+	if a.rootPEM == "" {
+		return RootResult{}, fmt.Errorf("AWS CA provider not fully Initialized")
+	}
+	return RootResult{PEM: a.rootPEM}, nil
 }

 // ensureCA loads the CA resource to check it exists if configured by User or in
@ -489,19 +496,6 @@ func (a *AWSProvider) signCSR(csrPEM string, templateARN string, ttl time.Durati
 		})
 }

-// ActiveRoot implements Provider
-func (a *AWSProvider) ActiveRoot() (string, error) {
-	err := a.ensureCA()
-	if err != nil {
-		return "", err
-	}
-
-	if a.rootPEM == "" {
-		return "", fmt.Errorf("Secondary AWS CA provider not fully Initialized")
-	}
-	return a.rootPEM, nil
-}
-
 // GenerateIntermediateCSR implements Provider
 func (a *AWSProvider) GenerateIntermediateCSR() (string, error) {
 	if a.isPrimary {
--- a/agent/connect/ca/provider_aws_test.go
+++ b/agent/connect/ca/provider_aws_test.go
@ -46,12 +46,9 @@ func TestAWSBootstrapAndSignPrimary(t *testing.T) {
 			provider := testAWSProvider(t, testProviderConfigPrimary(t, cfg))
 			defer provider.Cleanup(true, nil)

-			// Generate the root
-			require.NoError(t, provider.GenerateRoot())
-
-			// Fetch Active Root
-			rootPEM, err := provider.ActiveRoot()
+			root, err := provider.GenerateRoot()
 			require.NoError(t, err)
+			rootPEM := root.PEM

 			// Generate Intermediate (not actually needed for this provider for now
 			// but this simulates the calls in Server.initializeRoot).
@ -81,16 +78,12 @@ func TestAWSBootstrapAndSignPrimary(t *testing.T) {
 	}

 	t.Run("Test default root ttl for aws ca provider", func(t *testing.T) {
-
 		provider := testAWSProvider(t, testProviderConfigPrimary(t, nil))
 		defer provider.Cleanup(true, nil)

-		// Generate the root
-		require.NoError(t, provider.GenerateRoot())
-
-		// Fetch Active Root
-		rootPEM, err := provider.ActiveRoot()
+		root, err := provider.GenerateRoot()
 		require.NoError(t, err)
+		rootPEM := root.PEM

 		// Ensure they use the right key type
 		rootCert, err := connect.ParseCert(rootPEM)
@ -123,8 +116,9 @@ func TestAWSBootstrapAndSignSecondary(t *testing.T) {

 	p1 := testAWSProvider(t, testProviderConfigPrimary(t, nil))
 	defer p1.Cleanup(true, nil)
-	rootPEM, err := p1.ActiveRoot()
+	root, err := p1.GenerateRoot()
 	require.NoError(t, err)
+	rootPEM := root.PEM

 	p2 := testAWSProvider(t, testProviderConfigSecondary(t, nil))
 	defer p2.Cleanup(true, nil)
@ -151,8 +145,9 @@ func TestAWSBootstrapAndSignSecondary(t *testing.T) {
 		cfg1 := testProviderConfigPrimary(t, nil)
 		cfg1.State = p1State
 		p1 = testAWSProvider(t, cfg1)
-		newRootPEM, err := p1.ActiveRoot()
+		root, err := p1.GenerateRoot()
 		require.NoError(t, err)
+		newRootPEM := root.PEM

 		cfg2 := testProviderConfigPrimary(t, nil)
 		cfg2.State = p2State
@ -184,8 +179,9 @@ func TestAWSBootstrapAndSignSecondary(t *testing.T) {
 			"ExistingARN": p1State[AWSStateCAARNKey],
 		})
 		p1 = testAWSProvider(t, cfg1)
-		newRootPEM, err := p1.ActiveRoot()
+		root, err := p1.GenerateRoot()
 		require.NoError(t, err)
+		newRootPEM := root.PEM

 		cfg2 := testProviderConfigPrimary(t, map[string]interface{}{
 			"ExistingARN": p2State[AWSStateCAARNKey],
@ -222,8 +218,9 @@ func TestAWSBootstrapAndSignSecondary(t *testing.T) {
 		p2 = testAWSProvider(t, cfg2)
 		require.NoError(t, p2.SetIntermediate(newIntPEM, newRootPEM))

-		newRootPEM, err = p1.ActiveRoot()
+		root, err = p1.GenerateRoot()
 		require.NoError(t, err)
+		newRootPEM = root.PEM
 		newIntPEM, err = p2.ActiveIntermediate()
 		require.NoError(t, err)

@ -243,7 +240,8 @@ func TestAWSBootstrapAndSignSecondaryConsul(t *testing.T) {
 		p1 := TestConsulProvider(t, delegate)
 		cfg := testProviderConfig(conf)
 		require.NoError(t, p1.Configure(cfg))
-		require.NoError(t, p1.GenerateRoot())
+		_, err := p1.GenerateRoot()
+		require.NoError(t, err)

 		p2 := testAWSProvider(t, testProviderConfigSecondary(t, nil))
 		defer p2.Cleanup(true, nil)
@ -254,7 +252,9 @@ func TestAWSBootstrapAndSignSecondaryConsul(t *testing.T) {
 	t.Run("pri=aws,sec=consul", func(t *testing.T) {
 		p1 := testAWSProvider(t, testProviderConfigPrimary(t, nil))
 		defer p1.Cleanup(true, nil)
-		require.NoError(t, p1.GenerateRoot())
+
+		_, err := p1.GenerateRoot()
+		require.NoError(t, err)

 		conf := testConsulCAConfig()
 		delegate := newMockDelegate(t, conf)
@ -315,11 +315,13 @@ func TestAWSProvider_Cleanup(t *testing.T) {
 	}

 	requirePCADeleted := func(t *testing.T, provider *AWSProvider) {
+		t.Helper()
 		deleted, err := describeCA(t, provider)
 		require.True(t, err != nil || deleted, "The AWS PCA instance has not been deleted")
 	}

 	requirePCANotDeleted := func(t *testing.T, provider *AWSProvider) {
+		t.Helper()
 		deleted, err := describeCA(t, provider)
 		require.NoError(t, err)
 		require.False(t, deleted, "The AWS PCA instance should not have been deleted")
--- a/agent/connect/ca/provider_consul.go
+++ b/agent/connect/ca/provider_consul.go
@ -149,29 +149,18 @@ func (c *ConsulProvider) State() (map[string]string, error) {
 	return c.testState, nil
 }

-// ActiveRoot returns the active root CA certificate.
-func (c *ConsulProvider) ActiveRoot() (string, error) {
+// GenerateRoot initializes a new root certificate and private key if needed.
+func (c *ConsulProvider) GenerateRoot() (RootResult, error) {
 	providerState, err := c.getState()
 	if err != nil {
-		return "", err
-	}
-
-	return providerState.RootCert, nil
-}
-
-// GenerateRoot initializes a new root certificate and private key
-// if needed.
-func (c *ConsulProvider) GenerateRoot() error {
-	providerState, err := c.getState()
-	if err != nil {
-		return err
+		return RootResult{}, err
 	}

 	if !c.isPrimary {
-		return fmt.Errorf("provider is not the root certificate authority")
+		return RootResult{}, fmt.Errorf("provider is not the root certificate authority")
 	}
 	if providerState.RootCert != "" {
-		return nil
+		return RootResult{PEM: providerState.RootCert}, nil
 	}

 	// Generate a private key if needed
@ -179,7 +168,7 @@ func (c *ConsulProvider) GenerateRoot() error {
 	if c.config.PrivateKey == "" {
 		_, pk, err := connect.GeneratePrivateKeyWithConfig(c.config.PrivateKeyType, c.config.PrivateKeyBits)
 		if err != nil {
-			return err
+			return RootResult{}, err
 		}
 		newState.PrivateKey = pk
 	} else {
@ -190,12 +179,12 @@ func (c *ConsulProvider) GenerateRoot() error {
 	if c.config.RootCert == "" {
 		nextSerial, err := c.incrementAndGetNextSerialNumber()
 		if err != nil {
-			return fmt.Errorf("error computing next serial number: %v", err)
+			return RootResult{}, fmt.Errorf("error computing next serial number: %v", err)
 		}

 		ca, err := c.generateCA(newState.PrivateKey, nextSerial, c.config.RootCertTTL)
 		if err != nil {
-			return fmt.Errorf("error generating CA: %v", err)
+			return RootResult{}, fmt.Errorf("error generating CA: %v", err)
 		}
 		newState.RootCert = ca
 	} else {
@ -208,10 +197,10 @@ func (c *ConsulProvider) GenerateRoot() error {
 		ProviderState: &newState,
 	}
 	if _, err := c.Delegate.ApplyCARequest(args); err != nil {
-		return err
+		return RootResult{}, err
 	}

-	return nil
+	return RootResult{PEM: newState.RootCert}, nil
 }

 // GenerateIntermediateCSR creates a private key and generates a CSR
@ -288,18 +277,15 @@ func (c *ConsulProvider) SetIntermediate(intermediatePEM, rootPEM string) error
 	return nil
 }

-// We aren't maintaining separate root/intermediate CAs for the builtin
-// provider, so just return the root.
 func (c *ConsulProvider) ActiveIntermediate() (string, error) {
-	if c.isPrimary {
-		return c.ActiveRoot()
-	}
-
 	providerState, err := c.getState()
 	if err != nil {
 		return "", err
 	}

+	if c.isPrimary {
+		return providerState.RootCert, nil
+	}
 	return providerState.IntermediateCert, nil
 }

--- a/agent/connect/ca/provider_consul_test.go
+++ b/agent/connect/ca/provider_consul_test.go
@ -83,18 +83,17 @@ func TestConsulCAProvider_Bootstrap(t *testing.T) {

 	provider := TestConsulProvider(t, delegate)
 	require.NoError(t, provider.Configure(testProviderConfig(conf)))
-	require.NoError(t, provider.GenerateRoot())

-	root, err := provider.ActiveRoot()
+	root, err := provider.GenerateRoot()
 	require.NoError(t, err)

 	// Intermediate should be the same cert.
 	inter, err := provider.ActiveIntermediate()
 	require.NoError(t, err)
-	require.Equal(t, root, inter)
+	require.Equal(t, root.PEM, inter)

 	// Should be a valid cert
-	parsed, err := connect.ParseCert(root)
+	parsed, err := connect.ParseCert(root.PEM)
 	require.NoError(t, err)
 	require.Equal(t, parsed.URIs[0].String(), fmt.Sprintf("spiffe://%s.consul", conf.ClusterID))
 	requireNotEncoded(t, parsed.SubjectKeyId)
@ -123,14 +122,13 @@ func TestConsulCAProvider_Bootstrap_WithCert(t *testing.T) {

 	provider := TestConsulProvider(t, delegate)
 	require.NoError(t, provider.Configure(testProviderConfig(conf)))
-	require.NoError(t, provider.GenerateRoot())

-	root, err := provider.ActiveRoot()
+	root, err := provider.GenerateRoot()
 	require.NoError(t, err)
-	require.Equal(t, root, rootCA.RootCert)
+	require.Equal(t, root.PEM, rootCA.RootCert)

 	// Should be a valid cert
-	parsed, err := connect.ParseCert(root)
+	parsed, err := connect.ParseCert(root.PEM)
 	require.NoError(t, err)

 	// test that the default root cert ttl was not applied to the provided cert
@ -160,7 +158,8 @@ func TestConsulCAProvider_SignLeaf(t *testing.T) {

 			provider := TestConsulProvider(t, delegate)
 			require.NoError(t, provider.Configure(testProviderConfig(conf)))
-			require.NoError(t, provider.GenerateRoot())
+			_, err := provider.GenerateRoot()
+			require.NoError(t, err)

 			spiffeService := &connect.SpiffeIDService{
 				Host:       connect.TestClusterID + ".consul",
@ -272,7 +271,8 @@ func TestConsulCAProvider_CrossSignCA(t *testing.T) {
 			conf1.Config["PrivateKeyType"] = tc.SigningKeyType
 			conf1.Config["PrivateKeyBits"] = tc.SigningKeyBits
 			require.NoError(t, provider1.Configure(testProviderConfig(conf1)))
-			require.NoError(t, provider1.GenerateRoot())
+			_, err := provider1.GenerateRoot()
+			require.NoError(t, err)

 			conf2 := testConsulCAConfig()
 			conf2.CreateIndex = 10
@ -281,7 +281,8 @@ func TestConsulCAProvider_CrossSignCA(t *testing.T) {
 			conf2.Config["PrivateKeyType"] = tc.CSRKeyType
 			conf2.Config["PrivateKeyBits"] = tc.CSRKeyBits
 			require.NoError(t, provider2.Configure(testProviderConfig(conf2)))
-			require.NoError(t, provider2.GenerateRoot())
+			_, err = provider2.GenerateRoot()
+			require.NoError(t, err)

 			testCrossSignProviders(t, provider1, provider2)
 		})
@ -291,9 +292,10 @@ func TestConsulCAProvider_CrossSignCA(t *testing.T) {
 func testCrossSignProviders(t *testing.T, provider1, provider2 Provider) {

 	// Get the root from the new provider to be cross-signed.
-	newRootPEM, err := provider2.ActiveRoot()
+	root, err := provider2.GenerateRoot()
 	require.NoError(t, err)
-	newRoot, err := connect.ParseCert(newRootPEM)
+
+	newRoot, err := connect.ParseCert(root.PEM)
 	require.NoError(t, err)
 	oldSubject := newRoot.Subject.CommonName
 	requireNotEncoded(t, newRoot.SubjectKeyId)
@ -314,9 +316,9 @@ func testCrossSignProviders(t *testing.T, provider1, provider2 Provider) {
 	requireNotEncoded(t, xc.SubjectKeyId)
 	requireNotEncoded(t, xc.AuthorityKeyId)

-	oldRootPEM, err := provider1.ActiveRoot()
+	p1Root, err := provider1.GenerateRoot()
 	require.NoError(t, err)
-	oldRoot, err := connect.ParseCert(oldRootPEM)
+	oldRoot, err := connect.ParseCert(p1Root.PEM)
 	require.NoError(t, err)
 	requireNotEncoded(t, oldRoot.SubjectKeyId)
 	requireNotEncoded(t, oldRoot.AuthorityKeyId)
@ -392,7 +394,8 @@ func TestConsulProvider_SignIntermediate(t *testing.T) {
 			conf1.Config["PrivateKeyType"] = tc.SigningKeyType
 			conf1.Config["PrivateKeyBits"] = tc.SigningKeyBits
 			require.NoError(t, provider1.Configure(testProviderConfig(conf1)))
-			require.NoError(t, provider1.GenerateRoot())
+			_, err := provider1.GenerateRoot()
+			require.NoError(t, err)

 			conf2 := testConsulCAConfig()
 			conf2.CreateIndex = 10
@ -422,8 +425,9 @@ func testSignIntermediateCrossDC(t *testing.T, provider1, provider2 Provider) {
 	// Sign the CSR with provider1.
 	intermediatePEM, err := provider1.SignIntermediate(csr)
 	require.NoError(t, err)
-	rootPEM, err := provider1.ActiveRoot()
+	root, err := provider1.GenerateRoot()
 	require.NoError(t, err)
+	rootPEM := root.PEM

 	// Give the new intermediate to provider2 to use.
 	require.NoError(t, provider2.SetIntermediate(intermediatePEM, rootPEM))
@ -496,7 +500,8 @@ func TestConsulCAProvider_MigrateOldID(t *testing.T) {

 			provider := TestConsulProvider(t, delegate)
 			require.NoError(t, provider.Configure(testProviderConfig(conf)))
-			require.NoError(t, provider.GenerateRoot())
+			_, err = provider.GenerateRoot()
+			require.NoError(t, err)

 			// After running Configure, the old ID entry should be gone.
 			_, providerState, err = delegate.state.CAProviderState(tc.oldID)
--- a/agent/connect/ca/provider_vault.go
+++ b/agent/connect/ca/provider_vault.go
@ -12,11 +12,12 @@ import (
 	"strings"
 	"time"

-	"github.com/hashicorp/consul/lib/decode"
 	"github.com/hashicorp/go-hclog"
 	vaultapi "github.com/hashicorp/vault/api"
 	"github.com/mitchellh/mapstructure"

+	"github.com/hashicorp/consul/lib/decode"
+
 	"github.com/hashicorp/consul/agent/connect"
 	"github.com/hashicorp/consul/agent/structs"
 )
@ -220,19 +221,14 @@ func (v *VaultProvider) State() (map[string]string, error) {
 	return nil, nil
 }

-// ActiveRoot returns the active root CA certificate.
-func (v *VaultProvider) ActiveRoot() (string, error) {
-	return v.getCA(v.config.RootPKIPath)
-}
-
 // GenerateRoot mounts and initializes a new root PKI backend if needed.
-func (v *VaultProvider) GenerateRoot() error {
+func (v *VaultProvider) GenerateRoot() (RootResult, error) {
 	if !v.isPrimary {
-		return fmt.Errorf("provider is not the root certificate authority")
+		return RootResult{}, fmt.Errorf("provider is not the root certificate authority")
 	}

 	// Set up the root PKI backend if necessary.
-	rootPEM, err := v.ActiveRoot()
+	rootPEM, err := v.getCA(v.config.RootPKIPath)
 	switch err {
 	case ErrBackendNotMounted:
 		err := v.client.Sys().Mount(v.config.RootPKIPath, &vaultapi.MountInput{
@ -247,14 +243,14 @@ func (v *VaultProvider) GenerateRoot() error {
 			},
 		})
 		if err != nil {
-			return err
+			return RootResult{}, err
 		}

 		fallthrough
 	case ErrBackendNotInitialized:
 		uid, err := connect.CompactUID()
 		if err != nil {
-			return err
+			return RootResult{}, err
 		}
 		_, err = v.client.Logical().Write(v.config.RootPKIPath+"root/generate/internal", map[string]interface{}{
 			"common_name": connect.CACN("vault", uid, v.clusterID, v.isPrimary),
@ -263,17 +259,25 @@ func (v *VaultProvider) GenerateRoot() error {
 			"key_bits":    v.config.PrivateKeyBits,
 		})
 		if err != nil {
-			return err
+			return RootResult{}, err
+		}
+
+		// retrieve the newly generated cert so that we can return it
+		// TODO: is this already available from the Local().Write() above?
+		rootPEM, err = v.getCA(v.config.RootPKIPath)
+		if err != nil {
+			return RootResult{}, err
 		}
+
 	default:
 		if err != nil {
-			return err
+			return RootResult{}, err
 		}

 		if rootPEM != "" {
 			rootCert, err := connect.ParseCert(rootPEM)
 			if err != nil {
-				return err
+				return RootResult{}, err
 			}

 			// Vault PKI doesn't allow in-place cert/key regeneration. That
@ -285,18 +289,18 @@ func (v *VaultProvider) GenerateRoot() error {
 			// ForceWithoutCrossSigning option when changing key types.
 			foundKeyType, foundKeyBits, err := connect.KeyInfoFromCert(rootCert)
 			if err != nil {
-				return err
+				return RootResult{}, err
 			}
 			if v.config.PrivateKeyType != foundKeyType {
-				return fmt.Errorf("cannot update the PrivateKeyType field without choosing a new PKI mount for the root CA")
+				return RootResult{}, fmt.Errorf("cannot update the PrivateKeyType field without choosing a new PKI mount for the root CA")
 			}
 			if v.config.PrivateKeyBits != foundKeyBits {
-				return fmt.Errorf("cannot update the PrivateKeyBits field without choosing a new PKI mount for the root CA")
+				return RootResult{}, fmt.Errorf("cannot update the PrivateKeyBits field without choosing a new PKI mount for the root CA")
 			}
 		}
 	}

-	return nil
+	return RootResult{PEM: rootPEM}, nil
 }

 // GenerateIntermediateCSR creates a private key and generates a CSR
@ -574,7 +578,7 @@ func (v *VaultProvider) SignIntermediate(csr *x509.CertificateRequest) (string,
 // CrossSignCA takes a CA certificate and cross-signs it to form a trust chain
 // back to our active root.
 func (v *VaultProvider) CrossSignCA(cert *x509.Certificate) (string, error) {
-	rootPEM, err := v.ActiveRoot()
+	rootPEM, err := v.getCA(v.config.RootPKIPath)
 	if err != nil {
 		return "", err
 	}
--- a/agent/connect/ca/provider_vault_test.go
+++ b/agent/connect/ca/provider_vault_test.go
@ -238,7 +238,10 @@ func TestVaultCAProvider_Bootstrap(t *testing.T) {
 		expectedRootCertTTL string
 	}{
 		{
-			certFunc:            providerWDefaultRootCertTtl.ActiveRoot,
+			certFunc: func() (string, error) {
+				root, err := providerWDefaultRootCertTtl.GenerateRoot()
+				return root.PEM, err
+			},
 			backendPath:         "pki-root/",
 			rootCaCreation:      true,
 			client:              client1,
@ -323,8 +326,9 @@ func TestVaultCAProvider_SignLeaf(t *testing.T) {
 				Service:    "foo",
 			}

-			rootPEM, err := provider.ActiveRoot()
+			root, err := provider.GenerateRoot()
 			require.NoError(t, err)
+			rootPEM := root.PEM
 			assertCorrectKeyType(t, tc.KeyType, rootPEM)

 			intPEM, err := provider.ActiveIntermediate()
@ -407,9 +411,9 @@ func TestVaultCAProvider_CrossSignCA(t *testing.T) {
 			defer testVault1.Stop()

 			{
-				rootPEM, err := provider1.ActiveRoot()
+				root, err := provider1.GenerateRoot()
 				require.NoError(t, err)
-				assertCorrectKeyType(t, tc.SigningKeyType, rootPEM)
+				assertCorrectKeyType(t, tc.SigningKeyType, root.PEM)

 				intPEM, err := provider1.ActiveIntermediate()
 				require.NoError(t, err)
@ -424,9 +428,9 @@ func TestVaultCAProvider_CrossSignCA(t *testing.T) {
 			defer testVault2.Stop()

 			{
-				rootPEM, err := provider2.ActiveRoot()
+				root, err := provider2.GenerateRoot()
 				require.NoError(t, err)
-				assertCorrectKeyType(t, tc.CSRKeyType, rootPEM)
+				assertCorrectKeyType(t, tc.CSRKeyType, root.PEM)

 				intPEM, err := provider2.ActiveIntermediate()
 				require.NoError(t, err)
@ -492,7 +496,8 @@ func TestVaultProvider_SignIntermediateConsul(t *testing.T) {
 		delegate := newMockDelegate(t, conf)
 		provider1 := TestConsulProvider(t, delegate)
 		require.NoError(t, provider1.Configure(testProviderConfig(conf)))
-		require.NoError(t, provider1.GenerateRoot())
+		_, err := provider1.GenerateRoot()
+		require.NoError(t, err)

 		// Ensure that we don't configure vault to try and mint leafs that
 		// outlive their CA during the test (which hard fails in vault).
@ -786,8 +791,9 @@ func createVaultProvider(t *testing.T, isPrimary bool, addr, token string, rawCo
 	t.Cleanup(provider.Stop)
 	require.NoError(t, provider.Configure(cfg))
 	if isPrimary {
-		require.NoError(t, provider.GenerateRoot())
-		_, err := provider.GenerateIntermediate()
+		_, err := provider.GenerateRoot()
+		require.NoError(t, err)
+		_, err = provider.GenerateIntermediate()
 		require.NoError(t, err)
 	}

--- a/agent/consul/acl.go
+++ b/agent/consul/acl.go
@ -133,11 +133,12 @@ func tokenSecretCacheID(token string) string {
 	return "token-secret:" + token
 }

-type ACLResolverDelegate interface {
+type ACLResolverBackend interface {
 	ACLDatacenter() string
 	ResolveIdentityFromToken(token string) (bool, structs.ACLIdentity, error)
 	ResolvePolicyFromID(policyID string) (bool, *structs.ACLPolicy, error)
 	ResolveRoleFromID(roleID string) (bool, *structs.ACLRole, error)
+	// TODO: separate methods for each RPC call (there are 4)
 	RPC(method string, args interface{}, reply interface{}) error
 	EnterpriseACLResolverDelegate
 }
@ -160,8 +161,9 @@ type ACLResolverConfig struct {
 	// CacheConfig is a pass through configuration for ACL cache limits
 	CacheConfig *structs.ACLCachesConfig

-	// Delegate that implements some helper functionality that is server/client specific
-	Delegate ACLResolverDelegate
+	// Backend is used to retrieve data from the state store, or perform RPCs
+	// to fetch data from other Datacenters.
+	Backend ACLResolverBackend

 	// DisableDuration is the length of time to leave ACLs disabled when an RPC
 	// request to a server indicates that the ACL system is disabled. If set to
@ -219,9 +221,9 @@ type ACLResolverSettings struct {
 // ACLResolver is the type to handle all your token and policy resolution needs.
 //
 // Supports:
-//   - Resolving tokens locally via the ACLResolverDelegate
-//   - Resolving policies locally via the ACLResolverDelegate
-//   - Resolving roles locally via the ACLResolverDelegate
+//   - Resolving tokens locally via the ACLResolverBackend
+//   - Resolving policies locally via the ACLResolverBackend
+//   - Resolving roles locally via the ACLResolverBackend
 //   - Resolving legacy tokens remotely via an ACL.GetPolicy RPC
 //   - Resolving tokens remotely via an ACL.TokenRead RPC
 //   - Resolving policies remotely via an ACL.PolicyResolve RPC
@ -245,8 +247,8 @@ type ACLResolver struct {
 	config ACLResolverSettings
 	logger hclog.Logger

-	delegate ACLResolverDelegate
-	aclConf  *acl.Config
+	backend ACLResolverBackend
+	aclConf *acl.Config

 	tokens *token.Store

@ -298,8 +300,8 @@ func NewACLResolver(config *ACLResolverConfig) (*ACLResolver, error) {
 	if config == nil {
 		return nil, fmt.Errorf("ACL Resolver must be initialized with a config")
 	}
-	if config.Delegate == nil {
-		return nil, fmt.Errorf("ACL Resolver must be initialized with a valid delegate")
+	if config.Backend == nil {
+		return nil, fmt.Errorf("ACL Resolver must be initialized with a valid backend")
 	}

 	if config.Logger == nil {
@ -331,7 +333,7 @@ func NewACLResolver(config *ACLResolverConfig) (*ACLResolver, error) {
 	return &ACLResolver{
 		config:             config.Config,
 		logger:             config.Logger.Named(logging.ACL),
-		delegate:           config.Delegate,
+		backend:            config.Backend,
 		aclConf:            config.ACLConfig,
 		cache:              cache,
 		disableDuration:    config.DisableDuration,
@ -349,7 +351,7 @@ func (r *ACLResolver) fetchAndCacheIdentityFromToken(token string, cached *struc
 	cacheID := tokenSecretCacheID(token)

 	req := structs.ACLTokenGetRequest{
-		Datacenter:  r.delegate.ACLDatacenter(),
+		Datacenter:  r.backend.ACLDatacenter(),
 		TokenID:     token,
 		TokenIDType: structs.ACLTokenSecret,
 		QueryOptions: structs.QueryOptions{
@ -359,7 +361,7 @@ func (r *ACLResolver) fetchAndCacheIdentityFromToken(token string, cached *struc
 	}

 	var resp structs.ACLTokenResponse
-	err := r.delegate.RPC("ACL.TokenRead", &req, &resp)
+	err := r.backend.RPC("ACL.TokenRead", &req, &resp)
 	if err == nil {
 		if resp.Token == nil {
 			r.cache.PutIdentity(cacheID, nil)
@ -396,7 +398,7 @@ func (r *ACLResolver) fetchAndCacheIdentityFromToken(token string, cached *struc
 // we initiate an RPC for the value.
 func (r *ACLResolver) resolveIdentityFromToken(token string) (structs.ACLIdentity, error) {
 	// Attempt to resolve locally first (local results are not cached)
-	if done, identity, err := r.delegate.ResolveIdentityFromToken(token); done {
+	if done, identity, err := r.backend.ResolveIdentityFromToken(token); done {
 		return identity, err
 	}

@ -437,7 +439,7 @@ func (r *ACLResolver) resolveIdentityFromToken(token string) (structs.ACLIdentit

 func (r *ACLResolver) fetchAndCachePoliciesForIdentity(identity structs.ACLIdentity, policyIDs []string, cached map[string]*structs.PolicyCacheEntry) (map[string]*structs.ACLPolicy, error) {
 	req := structs.ACLPolicyBatchGetRequest{
-		Datacenter: r.delegate.ACLDatacenter(),
+		Datacenter: r.backend.ACLDatacenter(),
 		PolicyIDs:  policyIDs,
 		QueryOptions: structs.QueryOptions{
 			Token:      identity.SecretToken(),
@ -446,7 +448,7 @@ func (r *ACLResolver) fetchAndCachePoliciesForIdentity(identity structs.ACLIdent
 	}

 	var resp structs.ACLPolicyBatchResponse
-	err := r.delegate.RPC("ACL.PolicyResolve", &req, &resp)
+	err := r.backend.RPC("ACL.PolicyResolve", &req, &resp)
 	if err == nil {
 		out := make(map[string]*structs.ACLPolicy)
 		for _, policy := range resp.Policies {
@ -492,7 +494,7 @@ func (r *ACLResolver) fetchAndCachePoliciesForIdentity(identity structs.ACLIdent

 func (r *ACLResolver) fetchAndCacheRolesForIdentity(identity structs.ACLIdentity, roleIDs []string, cached map[string]*structs.RoleCacheEntry) (map[string]*structs.ACLRole, error) {
 	req := structs.ACLRoleBatchGetRequest{
-		Datacenter: r.delegate.ACLDatacenter(),
+		Datacenter: r.backend.ACLDatacenter(),
 		RoleIDs:    roleIDs,
 		QueryOptions: structs.QueryOptions{
 			Token:      identity.SecretToken(),
@ -501,7 +503,7 @@ func (r *ACLResolver) fetchAndCacheRolesForIdentity(identity structs.ACLIdentity
 	}

 	var resp structs.ACLRoleBatchResponse
-	err := r.delegate.RPC("ACL.RoleResolve", &req, &resp)
+	err := r.backend.RPC("ACL.RoleResolve", &req, &resp)
 	if err == nil {
 		out := make(map[string]*structs.ACLRole)
 		for _, role := range resp.Roles {
@ -774,7 +776,7 @@ func (r *ACLResolver) collectPoliciesForIdentity(identity structs.ACLIdentity, p
 	}

 	for _, policyID := range policyIDs {
-		if done, policy, err := r.delegate.ResolvePolicyFromID(policyID); done {
+		if done, policy, err := r.backend.ResolvePolicyFromID(policyID); done {
 			if err != nil && !acl.IsErrNotFound(err) {
 				return nil, err
 			}
@ -871,7 +873,7 @@ func (r *ACLResolver) collectRolesForIdentity(identity structs.ACLIdentity, role
 	expCacheMap := make(map[string]*structs.RoleCacheEntry)

 	for _, roleID := range roleIDs {
-		if done, role, err := r.delegate.ResolveRoleFromID(roleID); done {
+		if done, role, err := r.backend.ResolveRoleFromID(roleID); done {
 			if err != nil && !acl.IsErrNotFound(err) {
 				return nil, err
 			}
--- a/agent/consul/acl_client.go
+++ b/agent/consul/acl_client.go
@ -5,7 +5,7 @@ import (
 	"github.com/hashicorp/consul/agent/structs"
 )

-var clientACLCacheConfig *structs.ACLCachesConfig = &structs.ACLCachesConfig{
+var clientACLCacheConfig = &structs.ACLCachesConfig{
 	// The ACL cache configuration on client agents is more conservative than
 	// on the servers. It is assumed that individual client agents will have
 	// fewer distinct identities accessing the client than a server would
@ -23,23 +23,28 @@ var clientACLCacheConfig *structs.ACLCachesConfig = &structs.ACLCachesConfig{
 	Roles: 128,
 }

-func (c *Client) ACLDatacenter() string {
-	// For resolution running on clients, servers within the current datacenter
+type clientACLResolverBackend struct {
+	// TODO: un-embed
+	*Client
+}
+
+func (c *clientACLResolverBackend) ACLDatacenter() string {
+	// For resolution running on clients servers within the current datacenter
 	// must be queried first to pick up local tokens.
 	return c.config.Datacenter
 }

-func (c *Client) ResolveIdentityFromToken(token string) (bool, structs.ACLIdentity, error) {
+func (c *clientACLResolverBackend) ResolveIdentityFromToken(token string) (bool, structs.ACLIdentity, error) {
 	// clients do no local identity resolution at the moment
 	return false, nil, nil
 }

-func (c *Client) ResolvePolicyFromID(policyID string) (bool, *structs.ACLPolicy, error) {
+func (c *clientACLResolverBackend) ResolvePolicyFromID(policyID string) (bool, *structs.ACLPolicy, error) {
 	// clients do no local policy resolution at the moment
 	return false, nil, nil
 }

-func (c *Client) ResolveRoleFromID(roleID string) (bool, *structs.ACLRole, error) {
+func (c *clientACLResolverBackend) ResolveRoleFromID(roleID string) (bool, *structs.ACLRole, error) {
 	// clients do no local role resolution at the moment
 	return false, nil, nil
 }
--- a/agent/consul/acl_server.go
+++ b/agent/consul/acl_server.go
@ -100,9 +100,14 @@ func (s *Server) LocalTokensEnabled() bool {
 	return true
 }

-func (s *Server) ACLDatacenter() string {
-	// For resolution running on servers the only option
-	// is to contact the configured ACL Datacenter
+type serverACLResolverBackend struct {
+	// TODO: un-embed
+	*Server
+}
+
+func (s *serverACLResolverBackend) ACLDatacenter() string {
+	// For resolution running on servers the only option is to contact the
+	// configured ACL Datacenter
 	if s.config.PrimaryDatacenter != "" {
 		return s.config.PrimaryDatacenter
 	}
@ -114,6 +119,7 @@ func (s *Server) ACLDatacenter() string {
 }

 // ResolveIdentityFromToken retrieves a token's full identity given its secretID.
+// TODO: why does some code call this directly instead of using ACLResolver.ResolveTokenToIdentity ?
 func (s *Server) ResolveIdentityFromToken(token string) (bool, structs.ACLIdentity, error) {
 	// only allow remote RPC resolution when token replication is off and
 	// when not in the ACL datacenter
@ -131,7 +137,7 @@ func (s *Server) ResolveIdentityFromToken(token string) (bool, structs.ACLIdenti
 	return s.InPrimaryDatacenter() || index > 0, nil, acl.ErrNotFound
 }

-func (s *Server) ResolvePolicyFromID(policyID string) (bool, *structs.ACLPolicy, error) {
+func (s *serverACLResolverBackend) ResolvePolicyFromID(policyID string) (bool, *structs.ACLPolicy, error) {
 	index, policy, err := s.fsm.State().ACLPolicyGetByID(nil, policyID, nil)
 	if err != nil {
 		return true, nil, err
@ -145,7 +151,7 @@ func (s *Server) ResolvePolicyFromID(policyID string) (bool, *structs.ACLPolicy,
 	return s.InPrimaryDatacenter() || index > 0, policy, acl.ErrNotFound
 }

-func (s *Server) ResolveRoleFromID(roleID string) (bool, *structs.ACLRole, error) {
+func (s *serverACLResolverBackend) ResolveRoleFromID(roleID string) (bool, *structs.ACLRole, error) {
 	index, role, err := s.fsm.State().ACLRoleGetByID(nil, roleID, nil)
 	if err != nil {
 		return true, nil, err
--- a/agent/consul/acl_test.go
+++ b/agent/consul/acl_test.go
@ -715,7 +715,7 @@ func newTestACLResolver(t *testing.T, delegate *ACLResolverTestDelegate, cb func
 			Roles:          4,
 		},
 		DisableDuration: aclClientDisabledTTL,
-		Delegate:        delegate,
+		Backend:         delegate,
 	}

 	if cb != nil {
--- a/agent/consul/client.go
+++ b/agent/consul/client.go
@ -119,7 +119,7 @@ func NewClient(config *Config, deps Deps) (*Client, error) {

 	aclConfig := ACLResolverConfig{
 		Config:          config.ACLResolverSettings,
-		Delegate:        c,
+		Backend:         &clientACLResolverBackend{Client: c},
 		Logger:          c.logger,
 		DisableDuration: aclClientDisabledTTL,
 		CacheConfig:     clientACLCacheConfig,
--- a/agent/consul/leader_connect_ca.go
+++ b/agent/consul/leader_connect_ca.go
@ -197,11 +197,18 @@ func (c *CAManager) secondarySetPrimaryRoots(newRoots structs.IndexedCARoots) {
 	c.primaryRoots = newRoots
 }

-func (c *CAManager) secondaryGetPrimaryRoots() structs.IndexedCARoots {
+func (c *CAManager) secondaryGetActivePrimaryCARoot() (*structs.CARoot, error) {
 	// TODO: this could be a different lock, as long as its the same lock in secondarySetPrimaryRoots
 	c.stateLock.Lock()
-	defer c.stateLock.Unlock()
-	return c.primaryRoots
+	primaryRoots := c.primaryRoots
+	c.stateLock.Unlock()
+
+	for _, root := range primaryRoots.Roots {
+		if root.ID == primaryRoots.ActiveRootID && root.Active {
+			return root, nil
+		}
+	}
+	return nil, fmt.Errorf("primary datacenter does not have an active root CA for Connect")
 }

 // initializeCAConfig is used to initialize the CA config if necessary
@ -475,16 +482,12 @@ func (c *CAManager) primaryInitialize(provider ca.Provider, conf *structs.CAConf
 	if err := provider.Configure(pCfg); err != nil {
 		return fmt.Errorf("error configuring provider: %v", err)
 	}
-	if err := provider.GenerateRoot(); err != nil {
+	root, err := provider.GenerateRoot()
+	if err != nil {
 		return fmt.Errorf("error generating CA root certificate: %v", err)
 	}

-	// Get the active root cert from the CA
-	rootPEM, err := provider.ActiveRoot()
-	if err != nil {
-		return fmt.Errorf("error getting root cert: %v", err)
-	}
-	rootCA, err := parseCARoot(rootPEM, conf.Provider, conf.ClusterID)
+	rootCA, err := parseCARoot(root.PEM, conf.Provider, conf.ClusterID)
 	if err != nil {
 		return err
 	}
@ -602,79 +605,45 @@ func (c *CAManager) getLeafSigningCertFromRoot(root *structs.CARoot) string {
 	return root.IntermediateCerts[len(root.IntermediateCerts)-1]
 }

-// secondaryInitializeIntermediateCA runs the routine for generating an intermediate CA CSR and getting
-// it signed by the primary DC if the root CA of the primary DC has changed since the last
-// intermediate. It should only be called while the state lock is held by setting the state
-// to non-ready.
+// secondaryInitializeIntermediateCA generates a Certificate Signing Request (CSR)
+// for the intermediate CA that is used to sign leaf certificates in the secondary.
+// The CSR is signed by the primary DC and then persisted in the state store.
+//
+// This method should only be called while the state lock is held by setting the
+// state to non-ready.
 func (c *CAManager) secondaryInitializeIntermediateCA(provider ca.Provider, config *structs.CAConfiguration) error {
 	activeIntermediate, err := provider.ActiveIntermediate()
 	if err != nil {
 		return err
 	}

-	var (
-		storedRootID         string
-		expectedSigningKeyID string
-		currentSigningKeyID  string
-		activeSecondaryRoot  *structs.CARoot
-	)
-	if activeIntermediate != "" {
-		// In the event that we already have an intermediate, we must have
-		// already replicated some primary root information locally, so check
-		// to see if we're up to date by fetching the rootID and the
-		// signingKeyID used in the secondary.
-		//
-		// Note that for the same rootID the primary representation of the root
-		// will have a different SigningKeyID field than the secondary
-		// representation of the same root. This is because it's derived from
-		// the intermediate which is different in all datacenters.
-		storedRoot, err := provider.ActiveRoot()
-		if err != nil {
-			return err
-		}
-
-		storedRootID, err = connect.CalculateCertFingerprint(storedRoot)
-		if err != nil {
-			return fmt.Errorf("error parsing root fingerprint: %v, %#v", err, storedRoot)
-		}
+	_, activeRoot, err := c.delegate.State().CARootActive(nil)
+	if err != nil {
+		return err
+	}
+	var currentSigningKeyID string
+	if activeRoot != nil {
+		currentSigningKeyID = activeRoot.SigningKeyID
+	}

+	var expectedSigningKeyID string
+	if activeIntermediate != "" {
 		intermediateCert, err := connect.ParseCert(activeIntermediate)
 		if err != nil {
 			return fmt.Errorf("error parsing active intermediate cert: %v", err)
 		}
 		expectedSigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId)
-
-		// This will fetch the secondary's exact current representation of the
-		// active root. Note that this data should only be used if the IDs
-		// match, otherwise it's out of date and should be regenerated.
-		_, activeSecondaryRoot, err = c.delegate.State().CARootActive(nil)
-		if err != nil {
-			return err
-		}
-		if activeSecondaryRoot != nil {
-			currentSigningKeyID = activeSecondaryRoot.SigningKeyID
-		}
 	}

-	// Determine which of the provided PRIMARY representations of roots is the
-	// active one. We'll use this as a template to generate any new root
-	// representations meant for this secondary.
-	var newActiveRoot *structs.CARoot
-	primaryRoots := c.secondaryGetPrimaryRoots()
-	for _, root := range primaryRoots.Roots {
-		if root.ID == primaryRoots.ActiveRootID && root.Active {
-			newActiveRoot = root
-			break
-		}
-	}
-	if newActiveRoot == nil {
-		return fmt.Errorf("primary datacenter does not have an active root CA for Connect")
+	newActiveRoot, err := c.secondaryGetActivePrimaryCARoot()
+	if err != nil {
+		return err
 	}

 	// Get a signed intermediate from the primary DC if the provider
 	// hasn't been initialized yet or if the primary's root has changed.
-	needsNewIntermediate := false
-	if activeIntermediate == "" || storedRootID != primaryRoots.ActiveRootID {
+	needsNewIntermediate := activeIntermediate == ""
+	if activeRoot != nil && newActiveRoot.ID != activeRoot.ID {
 		needsNewIntermediate = true
 	}

@ -684,28 +653,19 @@ func (c *CAManager) secondaryInitializeIntermediateCA(provider ca.Provider, conf
 		needsNewIntermediate = true
 	}

-	newIntermediate := false
 	if needsNewIntermediate {
 		if err := c.secondaryRenewIntermediate(provider, newActiveRoot); err != nil {
 			return err
 		}
-		newIntermediate = true
 	} else {
 		// Discard the primary's representation since our local one is
 		// sufficiently up to date.
-		newActiveRoot = activeSecondaryRoot
-	}
-
-	// Update the roots list in the state store if there's a new active root.
-	state := c.delegate.State()
-	_, activeRoot, err := state.CARootActive(nil)
-	if err != nil {
-		return err
+		newActiveRoot = activeRoot
 	}

 	// Determine whether a root update is needed, and persist the roots/config accordingly.
 	var newRoot *structs.CARoot
-	if activeRoot == nil || activeRoot.ID != newActiveRoot.ID || newIntermediate {
+	if activeRoot == nil || needsNewIntermediate {
 		newRoot = newActiveRoot
 	}
 	if err := c.persistNewRootAndConfig(provider, newRoot, config); err != nil {
@ -899,15 +859,12 @@ func (c *CAManager) UpdateConfiguration(args *structs.CARequest) (reterr error)
 }

 func (c *CAManager) primaryUpdateRootCA(newProvider ca.Provider, args *structs.CARequest, config *structs.CAConfiguration) error {
-	if err := newProvider.GenerateRoot(); err != nil {
-		return fmt.Errorf("error generating CA root certificate: %v", err)
-	}
-
-	newRootPEM, err := newProvider.ActiveRoot()
+	providerRoot, err := newProvider.GenerateRoot()
 	if err != nil {
-		return err
+		return fmt.Errorf("error generating CA root certificate: %v", err)
 	}

+	newRootPEM := providerRoot.PEM
 	newActiveRoot, err := parseCARoot(newRootPEM, args.Config.Provider, args.Config.ClusterID)
 	if err != nil {
 		return err
@ -961,6 +918,7 @@ func (c *CAManager) primaryUpdateRootCA(newProvider ca.Provider, args *structs.C
 		// get a cross-signed certificate.
 		// 3. Take the active root for the new provider and append the intermediate from step 2
 		// to its list of intermediates.
+		// TODO: this cert is already parsed once in parseCARoot, could we remove the second parse?
 		newRoot, err := connect.ParseCert(newRootPEM)
 		if err != nil {
 			return err
--- a/agent/consul/leader_connect_ca_test.go
+++ b/agent/consul/leader_connect_ca_test.go
@ -227,9 +227,8 @@ type mockCAProvider struct {

 func (m *mockCAProvider) Configure(cfg ca.ProviderConfig) error { return nil }
 func (m *mockCAProvider) State() (map[string]string, error)     { return nil, nil }
-func (m *mockCAProvider) GenerateRoot() error                   { return nil }
-func (m *mockCAProvider) ActiveRoot() (string, error) {
-	return m.rootPEM, nil
+func (m *mockCAProvider) GenerateRoot() (ca.RootResult, error) {
+	return ca.RootResult{PEM: m.rootPEM}, nil
 }
 func (m *mockCAProvider) GenerateIntermediateCSR() (string, error) {
 	m.callbackCh <- "provider/GenerateIntermediateCSR"
--- a/agent/consul/rpc.go
+++ b/agent/consul/rpc.go
@ -919,108 +919,74 @@ type queryFn func(memdb.WatchSet, *state.Store) error

 // blockingQuery is used to process a potentially blocking query operation.
 func (s *Server) blockingQuery(queryOpts structs.QueryOptionsCompat, queryMeta structs.QueryMetaCompat, fn queryFn) error {
-	var cancel func()
 	var ctx context.Context = &lib.StopChannelContext{StopCh: s.shutdownCh}

-	var queriesBlocking uint64
-	var queryTimeout time.Duration
-
-	// Instrument all queries run
 	metrics.IncrCounter([]string{"rpc", "query"}, 1)

 	minQueryIndex := queryOpts.GetMinQueryIndex()
-	// Fast path right to the non-blocking query.
+	// Perform a non-blocking query
 	if minQueryIndex == 0 {
-		goto RUN_QUERY
-	}
+		if queryOpts.GetRequireConsistent() {
+			if err := s.consistentRead(); err != nil {
+				return err
+			}
+		}

-	queryTimeout = queryOpts.GetMaxQueryTime()
-	// Restrict the max query time, and ensure there is always one.
-	if queryTimeout > s.config.MaxQueryTime {
-		queryTimeout = s.config.MaxQueryTime
-	} else if queryTimeout <= 0 {
-		queryTimeout = s.config.DefaultQueryTime
+		var ws memdb.WatchSet
+		err := fn(ws, s.fsm.State())
+		s.setQueryMeta(queryMeta, queryOpts.GetToken())
+		return err
 	}

-	// Apply a small amount of jitter to the request.
-	queryTimeout += lib.RandomStagger(queryTimeout / structs.JitterFraction)
-
-	// wrap the base context with a deadline
-	ctx, cancel = context.WithDeadline(ctx, time.Now().Add(queryTimeout))
+	timeout := s.rpcQueryTimeout(queryOpts.GetMaxQueryTime())
+	ctx, cancel := context.WithTimeout(ctx, timeout)
 	defer cancel()

-	// instrument blockingQueries
-	// atomic inc our server's count of in-flight blockingQueries and store the new value
-	queriesBlocking = atomic.AddUint64(&s.queriesBlocking, 1)
-	// atomic dec when we return from blockingQuery()
+	count := atomic.AddUint64(&s.queriesBlocking, 1)
+	metrics.SetGauge([]string{"rpc", "queries_blocking"}, float32(count))
+	// decrement the count when the function returns.
 	defer atomic.AddUint64(&s.queriesBlocking, ^uint64(0))
-	// set the gauge directly to the new value of s.blockingQueries
-	metrics.SetGauge([]string{"rpc", "queries_blocking"}, float32(queriesBlocking))

-RUN_QUERY:
-	// Setup blocking loop
-
-	// Validate
-	// If the read must be consistent we verify that we are still the leader.
-	if queryOpts.GetRequireConsistent() {
-		if err := s.consistentRead(); err != nil {
-			return err
+	for {
+		if queryOpts.GetRequireConsistent() {
+			if err := s.consistentRead(); err != nil {
+				return err
+			}
 		}
-	}
-
-	// Run query

-	// Operate on a consistent set of state. This makes sure that the
-	// abandon channel goes with the state that the caller is using to
-	// build watches.
-	state := s.fsm.State()
-
-	// We can skip all watch tracking if this isn't a blocking query.
-	var ws memdb.WatchSet
-	if minQueryIndex > 0 {
-		ws = memdb.NewWatchSet()
+		// Operate on a consistent set of state. This makes sure that the
+		// abandon channel goes with the state that the caller is using to
+		// build watches.
+		state := s.fsm.State()

+		ws := memdb.NewWatchSet()
 		// This channel will be closed if a snapshot is restored and the
 		// whole state store is abandoned.
 		ws.Add(state.AbandonCh())
-	}

-	// Execute the queryFn
-	err := fn(ws, state)
-
-	// Update the query metadata.
-	s.setQueryMeta(queryMeta, queryOpts.GetToken())
-
-	// Note we check queryOpts.MinQueryIndex is greater than zero to determine if
-	// blocking was requested by client, NOT meta.Index since the state function
-	// might return zero if something is not initialized and care wasn't taken to
-	// handle that special case (in practice this happened a lot so fixing it
-	// systematically here beats trying to remember to add zero checks in every
-	// state method). We also need to ensure that unless there is an error, we
-	// return an index > 0 otherwise the client will never block and burn CPU and
-	// requests.
-	if err == nil && queryMeta.GetIndex() < 1 {
-		queryMeta.SetIndex(1)
-	}
-	// block up to the timeout if we don't see anything fresh.
-	if err == nil && minQueryIndex > 0 && queryMeta.GetIndex() <= minQueryIndex {
-		if err := ws.WatchCtx(ctx); err == nil {
-			// a non-nil error only occurs when the context is cancelled
-
-			// If a restore may have woken us up then bail out from
-			// the query immediately. This is slightly race-ey since
-			// this might have been interrupted for other reasons,
-			// but it's OK to kick it back to the caller in either
-			// case.
-			select {
-			case <-state.AbandonCh():
-			default:
-				// loop back and look for an update again
-				goto RUN_QUERY
-			}
+		err := fn(ws, state)
+		s.setQueryMeta(queryMeta, queryOpts.GetToken())
+		if err != nil {
+			return err
+		}
+
+		if queryMeta.GetIndex() > minQueryIndex {
+			return nil
+		}
+
+		// block until something changes, or the timeout
+		if err := ws.WatchCtx(ctx); err != nil {
+			// exit if we've reached the timeout, or other cancellation
+			return nil
+		}
+
+		// exit if the state store has been abandoned
+		select {
+		case <-state.AbandonCh():
+			return nil
+		default:
 		}
 	}
-	return err
 }

 // setQueryMeta is used to populate the QueryMeta data for an RPC call
@ -1035,6 +1001,17 @@ func (s *Server) setQueryMeta(m structs.QueryMetaCompat, token string) {
 		m.SetKnownLeader(s.raft.Leader() != "")
 	}
 	maskResultsFilteredByACLs(token, m)
+
+	// Always set a non-zero QueryMeta.Index. Generally we expect the
+	// QueryMeta.Index to be set to structs.RaftIndex.ModifyIndex. If the query
+	// returned no results we expect it to be set to the max index of the table,
+	// however we can't guarantee this always happens.
+	// To prevent a client from accidentally performing many non-blocking queries
+	// (which causes lots of unnecessary load), we always set a default value of 1.
+	// This is sufficient to prevent the unnecessary load in most cases.
+	if m.GetIndex() < 1 {
+		m.SetIndex(1)
+	}
 }

 // consistentRead is used to ensure we do not perform a stale
@ -1070,6 +1047,22 @@ func (s *Server) consistentRead() error {
 	return structs.ErrNotReadyForConsistentReads
 }

+// rpcQueryTimeout calculates the timeout for the query, ensures it is
+// constrained to the configured limit, and adds jitter to prevent multiple
+// blocking queries from all timing out at the same time.
+func (s *Server) rpcQueryTimeout(queryTimeout time.Duration) time.Duration {
+	// Restrict the max query time, and ensure there is always one.
+	if queryTimeout > s.config.MaxQueryTime {
+		queryTimeout = s.config.MaxQueryTime
+	} else if queryTimeout <= 0 {
+		queryTimeout = s.config.DefaultQueryTime
+	}
+
+	// Apply a small amount of jitter to the request.
+	queryTimeout += lib.RandomStagger(queryTimeout / structs.JitterFraction)
+	return queryTimeout
+}
+
 // maskResultsFilteredByACLs blanks out the ResultsFilteredByACLs flag if the
 // request is unauthenticated, to limit information leaking.
 //
--- a/agent/consul/rpc_test.go
+++ b/agent/consul/rpc_test.go
@ -236,7 +236,7 @@ func TestRPC_blockingQuery(t *testing.T) {
 	// Perform a non-blocking query. Note that it's significant that the meta has
 	// a zero index in response - the implied opts.MinQueryIndex is also zero but
 	// this should not block still.
-	{
+	t.Run("non-blocking query", func(t *testing.T) {
 		var opts structs.QueryOptions
 		var meta structs.QueryMeta
 		var calls int
@ -244,16 +244,13 @@ func TestRPC_blockingQuery(t *testing.T) {
 			calls++
 			return nil
 		}
-		if err := s.blockingQuery(&opts, &meta, fn); err != nil {
-			t.Fatalf("err: %v", err)
-		}
-		if calls != 1 {
-			t.Fatalf("bad: %d", calls)
-		}
-	}
+		err := s.blockingQuery(&opts, &meta, fn)
+		require.NoError(t, err)
+		require.Equal(t, 1, calls)
+	})

 	// Perform a blocking query that gets woken up and loops around once.
-	{
+	t.Run("blocking query - single loop", func(t *testing.T) {
 		opts := structs.QueryOptions{
 			MinQueryIndex: 3,
 		}
@ -272,13 +269,10 @@ func TestRPC_blockingQuery(t *testing.T) {
 			calls++
 			return nil
 		}
-		if err := s.blockingQuery(&opts, &meta, fn); err != nil {
-			t.Fatalf("err: %v", err)
-		}
-		if calls != 2 {
-			t.Fatalf("bad: %d", calls)
-		}
-	}
+		err := s.blockingQuery(&opts, &meta, fn)
+		require.NoError(t, err)
+		require.Equal(t, 2, calls)
+	})

 	// Perform a blocking query that returns a zero index from blocking func (e.g.
 	// no state yet). This should still return an empty response immediately, but
@ -289,7 +283,7 @@ func TestRPC_blockingQuery(t *testing.T) {
 	// covered by tests but eventually when hit in the wild causes blocking
 	// clients to busy loop and burn CPU. This test ensure that blockingQuery
 	// systematically does the right thing to prevent future bugs like that.
-	{
+	t.Run("blocking query with 0 modifyIndex from state func", func(t *testing.T) {
 		opts := structs.QueryOptions{
 			MinQueryIndex: 0,
 		}
@ -327,11 +321,11 @@ func TestRPC_blockingQuery(t *testing.T) {
 		assert.True(t, t1.Sub(t0) > 20*time.Millisecond,
 			"should have actually blocked waiting for timeout")

-	}
+	})

 	// Perform a query that blocks and gets interrupted when the state store
 	// is abandoned.
-	{
+	t.Run("blocking query interrupted by abandonCh", func(t *testing.T) {
 		opts := structs.QueryOptions{
 			MinQueryIndex: 3,
 		}
@ -360,13 +354,10 @@ func TestRPC_blockingQuery(t *testing.T) {
 			calls++
 			return nil
 		}
-		if err := s.blockingQuery(&opts, &meta, fn); err != nil {
-			t.Fatalf("err: %v", err)
-		}
-		if calls != 1 {
-			t.Fatalf("bad: %d", calls)
-		}
-	}
+		err := s.blockingQuery(&opts, &meta, fn)
+		require.NoError(t, err)
+		require.Equal(t, 1, calls)
+	})

 	t.Run("ResultsFilteredByACLs is reset for unauthenticated calls", func(t *testing.T) {
 		opts := structs.QueryOptions{
--- a/agent/consul/server.go
+++ b/agent/consul/server.go
@ -450,7 +450,7 @@ func NewServer(config *Config, flat Deps) (*Server, error) {
 	s.aclConfig = newACLConfig(partitionInfo, logger)
 	aclConfig := ACLResolverConfig{
 		Config:      config.ACLResolverSettings,
-		Delegate:    s,
+		Backend:     &serverACLResolverBackend{Server: s},
 		CacheConfig: serverACLCacheConfig,
 		Logger:      logger,
 		ACLConfig:   s.aclConfig,
--- a/agent/consul/state/catalog_events.go
+++ b/agent/consul/state/catalog_events.go
@ -32,31 +32,26 @@ func (e EventPayloadCheckServiceNode) HasReadPermission(authz acl.Authorizer) bo
 	return e.Value.CanRead(authz) == acl.Allow
 }

-func (e EventPayloadCheckServiceNode) MatchesKey(key, namespace, partition string) bool {
-	if key == "" && namespace == "" && partition == "" {
-		return true
+func (e EventPayloadCheckServiceNode) Subject() stream.Subject {
+	partition := e.Value.Service.PartitionOrDefault()
+	if e.overridePartition != "" {
+		partition = e.overridePartition
 	}
+	partition = strings.ToLower(partition)

-	if e.Value.Service == nil {
-		return false
+	namespace := e.Value.Service.NamespaceOrDefault()
+	if e.overrideNamespace != "" {
+		namespace = e.overrideNamespace
 	}
+	namespace = strings.ToLower(namespace)

-	name := e.Value.Service.Service
+	key := e.Value.Service.Service
 	if e.overrideKey != "" {
-		name = e.overrideKey
-	}
-	ns := e.Value.Service.EnterpriseMeta.NamespaceOrDefault()
-	if e.overrideNamespace != "" {
-		ns = e.overrideNamespace
-	}
-	ap := e.Value.Service.EnterpriseMeta.PartitionOrDefault()
-	if e.overridePartition != "" {
-		ap = e.overridePartition
+		key = e.overrideKey
 	}
+	key = strings.ToLower(key)

-	return (key == "" || strings.EqualFold(key, name)) &&
-		(namespace == "" || strings.EqualFold(namespace, ns)) &&
-		(partition == "" || strings.EqualFold(partition, ap))
+	return stream.Subject(partition + "/" + namespace + "/" + key)
 }

 // serviceHealthSnapshot returns a stream.SnapshotFunc that provides a snapshot
@ -67,8 +62,7 @@ func serviceHealthSnapshot(db ReadDB, topic stream.Topic) stream.SnapshotFunc {
 		defer tx.Abort()

 		connect := topic == topicServiceHealthConnect
-		entMeta := structs.NewEnterpriseMetaWithPartition(req.Partition, req.Namespace)
-		idx, nodes, err := checkServiceNodesTxn(tx, nil, req.Key, connect, &entMeta)
+		idx, nodes, err := checkServiceNodesTxn(tx, nil, req.Key, connect, &req.EnterpriseMeta)
 		if err != nil {
 			return 0, err
 		}
--- a/agent/consul/state/catalog_events_test.go
+++ b/agent/consul/state/catalog_events_test.go
@ -11,11 +11,106 @@ import (
 	"github.com/hashicorp/consul/agent/consul/stream"
 	"github.com/hashicorp/consul/agent/structs"
 	"github.com/hashicorp/consul/api"
-	"github.com/hashicorp/consul/proto/pbcommon"
 	"github.com/hashicorp/consul/proto/pbsubscribe"
 	"github.com/hashicorp/consul/types"
 )

+func TestEventPayloadCheckServiceNode_SubjectMatchesRequests(t *testing.T) {
+	// Matches.
+	for desc, tc := range map[string]struct {
+		evt EventPayloadCheckServiceNode
+		req stream.SubscribeRequest
+	}{
+		"default partition and namespace": {
+			EventPayloadCheckServiceNode{
+				Value: &structs.CheckServiceNode{
+					Service: &structs.NodeService{
+						Service: "foo",
+					},
+				},
+			},
+			stream.SubscribeRequest{
+				Key:            "foo",
+				EnterpriseMeta: structs.EnterpriseMeta{},
+			},
+		},
+		"mixed casing": {
+			EventPayloadCheckServiceNode{
+				Value: &structs.CheckServiceNode{
+					Service: &structs.NodeService{
+						Service: "FoO",
+					},
+				},
+			},
+			stream.SubscribeRequest{Key: "foo"},
+		},
+		"override key": {
+			EventPayloadCheckServiceNode{
+				Value: &structs.CheckServiceNode{
+					Service: &structs.NodeService{
+						Service: "foo",
+					},
+				},
+				overrideKey: "bar",
+			},
+			stream.SubscribeRequest{Key: "bar"},
+		},
+	} {
+		t.Run(desc, func(t *testing.T) {
+			require.Equal(t, tc.req.Subject(), tc.evt.Subject())
+		})
+	}
+
+	// Non-matches.
+	for desc, tc := range map[string]struct {
+		evt EventPayloadCheckServiceNode
+		req stream.SubscribeRequest
+	}{
+		"different key": {
+			EventPayloadCheckServiceNode{
+				Value: &structs.CheckServiceNode{
+					Service: &structs.NodeService{
+						Service: "foo",
+					},
+				},
+			},
+			stream.SubscribeRequest{
+				Key: "bar",
+			},
+		},
+		"different partition": {
+			EventPayloadCheckServiceNode{
+				Value: &structs.CheckServiceNode{
+					Service: &structs.NodeService{
+						Service: "foo",
+					},
+				},
+				overridePartition: "bar",
+			},
+			stream.SubscribeRequest{
+				Key: "foo",
+			},
+		},
+		"different namespace": {
+			EventPayloadCheckServiceNode{
+				Value: &structs.CheckServiceNode{
+					Service: &structs.NodeService{
+						Service: "foo",
+					},
+				},
+				overrideNamespace: "bar",
+			},
+			stream.SubscribeRequest{
+				Key: "foo",
+			},
+		},
+	} {
+		t.Run(desc, func(t *testing.T) {
+			require.NotEqual(t, tc.req.Subject(), tc.evt.Subject())
+		})
+	}
+}
+
 func TestServiceHealthSnapshot(t *testing.T) {
 	store := NewStateStore(nil)

@ -1771,7 +1866,7 @@ func assertDeepEqual(t *testing.T, x, y interface{}, opts ...cmp.Option) {
 // all events for a particular topic are grouped together. The sort is
 // stable so events with the same key retain their relative order.
 //
-// This sort should match the logic in EventPayloadCheckServiceNode.MatchesKey
+// This sort should match the logic in EventPayloadCheckServiceNode.Subject
 // to avoid masking bugs.
 var cmpPartialOrderEvents = cmp.Options{
 	cmpopts.SortSlices(func(i, j stream.Event) bool {
@ -2418,107 +2513,6 @@ func newTestEventServiceHealthDeregister(index uint64, nodeNum int, svc string)
 	}
 }

-func TestEventPayloadCheckServiceNode_FilterByKey(t *testing.T) {
-	type testCase struct {
-		name      string
-		payload   EventPayloadCheckServiceNode
-		key       string
-		namespace string
-		partition string // TODO(partitions): create test cases for this being set
-		expected  bool
-	}
-
-	fn := func(t *testing.T, tc testCase) {
-		if tc.namespace != "" && pbcommon.DefaultEnterpriseMeta.Namespace == "" {
-			t.Skip("cant test namespace matching without namespace support")
-		}
-
-		require.Equal(t, tc.expected, tc.payload.MatchesKey(tc.key, tc.namespace, tc.partition))
-	}
-
-	var testCases = []testCase{
-		{
-			name:     "no key or namespace",
-			payload:  newPayloadCheckServiceNode("srv1", "ns1"),
-			expected: true,
-		},
-		{
-			name:      "no key, with namespace match",
-			payload:   newPayloadCheckServiceNode("srv1", "ns1"),
-			namespace: "ns1",
-			expected:  true,
-		},
-		{
-			name:     "no namespace, with key match",
-			payload:  newPayloadCheckServiceNode("srv1", "ns1"),
-			key:      "srv1",
-			expected: true,
-		},
-		{
-			name:      "key match, namespace mismatch",
-			payload:   newPayloadCheckServiceNode("srv1", "ns1"),
-			key:       "srv1",
-			namespace: "ns2",
-			expected:  false,
-		},
-		{
-			name:      "key mismatch, namespace match",
-			payload:   newPayloadCheckServiceNode("srv1", "ns1"),
-			key:       "srv2",
-			namespace: "ns1",
-			expected:  false,
-		},
-		{
-			name:      "override key match",
-			payload:   newPayloadCheckServiceNodeWithOverride("proxy", "ns1", "srv1", ""),
-			key:       "srv1",
-			namespace: "ns1",
-			expected:  true,
-		},
-		{
-			name:      "override key mismatch",
-			payload:   newPayloadCheckServiceNodeWithOverride("proxy", "ns1", "srv2", ""),
-			key:       "proxy",
-			namespace: "ns1",
-			expected:  false,
-		},
-		{
-			name:      "override namespace match",
-			payload:   newPayloadCheckServiceNodeWithOverride("proxy", "ns1", "", "ns2"),
-			key:       "proxy",
-			namespace: "ns2",
-			expected:  true,
-		},
-		{
-			name:      "override namespace mismatch",
-			payload:   newPayloadCheckServiceNodeWithOverride("proxy", "ns1", "", "ns3"),
-			key:       "proxy",
-			namespace: "ns1",
-			expected:  false,
-		},
-		{
-			name:      "override both key and namespace match",
-			payload:   newPayloadCheckServiceNodeWithOverride("proxy", "ns1", "srv1", "ns2"),
-			key:       "srv1",
-			namespace: "ns2",
-			expected:  true,
-		},
-		{
-			name:      "override both key and namespace mismatch namespace",
-			payload:   newPayloadCheckServiceNodeWithOverride("proxy", "ns1", "srv2", "ns3"),
-			key:       "proxy",
-			namespace: "ns1",
-			expected:  false,
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-			fn(t, tc)
-		})
-	}
-}
-
 func newPayloadCheckServiceNode(service, namespace string) EventPayloadCheckServiceNode {
 	return EventPayloadCheckServiceNode{
 		Value: &structs.CheckServiceNode{
--- a/agent/consul/state/store_integration_test.go
+++ b/agent/consul/state/store_integration_test.go
@ -419,26 +419,14 @@ type nodePayload struct {
 	node *structs.ServiceNode
 }

-func (p nodePayload) MatchesKey(key, _, partition string) bool {
-	if key == "" && partition == "" {
-		return true
-	}
-
-	if p.node == nil {
-		return false
-	}
-
-	if structs.PartitionOrDefault(partition) != p.node.PartitionOrDefault() {
-		return false
-	}
-
-	return p.key == key
-}
-
 func (p nodePayload) HasReadPermission(acl.Authorizer) bool {
 	return true
 }

+func (p nodePayload) Subject() stream.Subject {
+	return stream.Subject(p.node.PartitionOrDefault() + "/" + p.node.NamespaceOrDefault() + "/" + p.key)
+}
+
 func createTokenAndWaitForACLEventPublish(t *testing.T, s *Store) *structs.ACLToken {
 	token := &structs.ACLToken{
 		AccessorID:  "3af117a9-2233-4cf4-8ff8-3c749c9906b4",
--- a/agent/consul/stream/event.go
+++ b/agent/consul/stream/event.go
@ -14,6 +14,11 @@ import (
 // events which match the Topic.
 type Topic fmt.Stringer

+// Subject identifies a portion of a topic for which a subscriber wishes to
+// receive events (e.g. health events for a particular service) usually the
+// normalized resource name (including partition and namespace if applicable).
+type Subject string
+
 // Event is a structure with identifiers and a payload. Events are Published to
 // EventPublisher and returned to Subscribers.
 type Event struct {
@ -26,18 +31,16 @@ type Event struct {
 // should not modify the state of the payload if the Event is being submitted to
 // EventPublisher.Publish.
 type Payload interface {
-	// MatchesKey must return true if the Payload should be included in a
-	// subscription requested with the key, namespace, and partition.
-	//
-	// Generally this means that the payload matches the key, namespace, and
-	// partition or the payload is a special framing event that should be
-	// returned to every subscription.
-	MatchesKey(key, namespace, partition string) bool
-
 	// HasReadPermission uses the acl.Authorizer to determine if the items in the
 	// Payload are visible to the request. It returns true if the payload is
 	// authorized for Read, otherwise returns false.
 	HasReadPermission(authz acl.Authorizer) bool
+
+	// Subject is used to identify which subscribers should be notified of this
+	// event - e.g. those subscribing to health events for a particular service.
+	// it is usually the normalized resource name (including the partition and
+	// namespace if applicable).
+	Subject() Subject
 }

 // PayloadEvents is a Payload that may be returned by Subscription.Next when
@ -81,14 +84,6 @@ func (p *PayloadEvents) filter(f func(Event) bool) bool {
 	return true
 }

-// MatchesKey filters the PayloadEvents to those which match the key,
-// namespace, and partition.
-func (p *PayloadEvents) MatchesKey(key, namespace, partition string) bool {
-	return p.filter(func(event Event) bool {
-		return event.Payload.MatchesKey(key, namespace, partition)
-	})
-}
-
 func (p *PayloadEvents) Len() int {
 	return len(p.Items)
 }
@ -101,6 +96,14 @@ func (p *PayloadEvents) HasReadPermission(authz acl.Authorizer) bool {
 	})
 }

+// Subject is required to satisfy the Payload interface but is not implemented
+// by PayloadEvents. PayloadEvents structs are constructed by Subscription.Next
+// *after* Subject has been used to dispatch the enclosed events to the correct
+// buffer.
+func (PayloadEvents) Subject() Subject {
+	panic("PayloadEvents does not implement Subject")
+}
+
 // IsEndOfSnapshot returns true if this is a framing event that indicates the
 // snapshot has completed. Subsequent events from Subscription.Next will be
 // streamed as they occur.
@ -117,12 +120,15 @@ func (e Event) IsNewSnapshotToFollow() bool {

 type framingEvent struct{}

-func (framingEvent) MatchesKey(string, string, string) bool {
+func (framingEvent) HasReadPermission(acl.Authorizer) bool {
 	return true
 }

-func (framingEvent) HasReadPermission(acl.Authorizer) bool {
-	return true
+// Subject is required by the Payload interface but is not implemented by
+// framing events, as they are typically *manually* appended to the correct
+// buffer and do not need to be routed using a Subject.
+func (framingEvent) Subject() Subject {
+	panic("framing events do not implement Subject")
 }

 type endOfSnapshot struct {
@ -137,12 +143,15 @@ type closeSubscriptionPayload struct {
 	tokensSecretIDs []string
 }

-func (closeSubscriptionPayload) MatchesKey(string, string, string) bool {
+func (closeSubscriptionPayload) HasReadPermission(acl.Authorizer) bool {
 	return false
 }

-func (closeSubscriptionPayload) HasReadPermission(acl.Authorizer) bool {
-	return false
+// Subject is required by the Payload interface but it is not implemented by
+// closeSubscriptionPayload, as this event type is handled separately and not
+// actually appended to the buffer.
+func (closeSubscriptionPayload) Subject() Subject {
+	panic("closeSubscriptionPayload does not implement Subject")
 }

 // NewCloseSubscriptionEvent returns a special Event that is handled by the
--- a/agent/consul/stream/event_publisher.go
+++ b/agent/consul/stream/event_publisher.go
@ -20,16 +20,16 @@ type EventPublisher struct {
 	// seconds.
 	snapCacheTTL time.Duration

-	// This lock protects the topicBuffers, and snapCache
+	// This lock protects the snapCache, topicBuffers and topicBuffer.refs.
 	lock sync.RWMutex

-	// topicBuffers stores the head of the linked-list buffer to publish events to
+	// topicBuffers stores the head of the linked-list buffers to publish events to
 	// for a topic.
-	topicBuffers map[Topic]*eventBuffer
+	topicBuffers map[topicSubject]*topicBuffer

-	// snapCache if a cache of EventSnapshots indexed by topic and key.
+	// snapCache if a cache of EventSnapshots indexed by topic and subject.
 	// TODO(streaming): new snapshotCache struct for snapCache and snapCacheTTL
-	snapCache map[Topic]map[string]*eventSnapshot
+	snapCache map[topicSubject]*eventSnapshot

 	subscriptions *subscriptions

@ -41,6 +41,13 @@ type EventPublisher struct {
 	snapshotHandlers SnapshotHandlers
 }

+// topicSubject is used as a map key when accessing topic buffers and cached
+// snapshots.
+type topicSubject struct {
+	Topic   Topic
+	Subject Subject
+}
+
 type subscriptions struct {
 	// lock for byToken. If both subscription.lock and EventPublisher.lock need
 	// to be held, EventPublisher.lock MUST always be acquired first.
@ -54,6 +61,14 @@ type subscriptions struct {
 	byToken map[string]map[*SubscribeRequest]*Subscription
 }

+// topicBuffer augments the eventBuffer with a reference counter, enabling
+// clean up of unused buffers once there are no longer any subscribers for
+// the given topic and key.
+type topicBuffer struct {
+	refs int // refs is guarded by EventPublisher.lock.
+	buf  *eventBuffer
+}
+
 // SnapshotHandlers is a mapping of Topic to a function which produces a snapshot
 // of events for the SubscribeRequest. Events are appended to the snapshot using SnapshotAppender.
 // The nil Topic is reserved and should not be used.
@ -79,8 +94,8 @@ type SnapshotAppender interface {
 func NewEventPublisher(handlers SnapshotHandlers, snapCacheTTL time.Duration) *EventPublisher {
 	e := &EventPublisher{
 		snapCacheTTL: snapCacheTTL,
-		topicBuffers: make(map[Topic]*eventBuffer),
-		snapCache:    make(map[Topic]map[string]*eventSnapshot),
+		topicBuffers: make(map[topicSubject]*topicBuffer),
+		snapCache:    make(map[topicSubject]*eventSnapshot),
 		publishCh:    make(chan []Event, 64),
 		subscriptions: &subscriptions{
 			byToken: make(map[string]map[*SubscribeRequest]*Subscription),
@ -116,36 +131,59 @@ func (e *EventPublisher) Run(ctx context.Context) {
 // publishEvent appends the events to any applicable topic buffers. It handles
 // any closeSubscriptionPayload events by closing associated subscriptions.
 func (e *EventPublisher) publishEvent(events []Event) {
-	eventsByTopic := make(map[Topic][]Event)
+	groupedEvents := make(map[topicSubject][]Event)
 	for _, event := range events {
 		if unsubEvent, ok := event.Payload.(closeSubscriptionPayload); ok {
 			e.subscriptions.closeSubscriptionsForTokens(unsubEvent.tokensSecretIDs)
 			continue
 		}

-		eventsByTopic[event.Topic] = append(eventsByTopic[event.Topic], event)
+		groupKey := topicSubject{event.Topic, event.Payload.Subject()}
+		groupedEvents[groupKey] = append(groupedEvents[groupKey], event)
 	}

 	e.lock.Lock()
 	defer e.lock.Unlock()
-	for topic, events := range eventsByTopic {
-		e.getTopicBuffer(topic).Append(events)
+	for groupKey, events := range groupedEvents {
+		// Note: bufferForPublishing returns nil if there are no subscribers for the
+		// given topic and subject, in which case events will be dropped on the floor and
+		// future subscribers will catch up by consuming the snapshot.
+		if buf := e.bufferForPublishing(groupKey); buf != nil {
+			buf.Append(events)
+		}
 	}
 }

-// getTopicBuffer for the topic. Creates a new event buffer if one does not
-// already exist.
+// bufferForSubscription returns the topic event buffer to which events for the
+// given topic and key will be appended. If no such buffer exists, a new buffer
+// will be created.
 //
-// EventPublisher.lock must be held to call this method.
-func (e *EventPublisher) getTopicBuffer(topic Topic) *eventBuffer {
-	buf, ok := e.topicBuffers[topic]
+// Warning: e.lock MUST be held when calling this function.
+func (e *EventPublisher) bufferForSubscription(key topicSubject) *topicBuffer {
+	buf, ok := e.topicBuffers[key]
 	if !ok {
-		buf = newEventBuffer()
-		e.topicBuffers[topic] = buf
+		buf = &topicBuffer{
+			buf: newEventBuffer(),
+		}
+		e.topicBuffers[key] = buf
 	}
+
 	return buf
 }

+// bufferForPublishing returns the event buffer to which events for the given
+// topic and key should be appended. nil will be returned if there are no
+// subscribers for the given topic and key.
+//
+// Warning: e.lock MUST be held when calling this function.
+func (e *EventPublisher) bufferForPublishing(key topicSubject) *eventBuffer {
+	buf, ok := e.topicBuffers[key]
+	if !ok {
+		return nil
+	}
+	return buf.buf
+}
+
 // Subscribe returns a new Subscription for the given request. A subscription
 // will receive an initial snapshot of events matching the request if req.Index > 0.
 // After the snapshot, events will be streamed as they are created.
@ -163,7 +201,34 @@ func (e *EventPublisher) Subscribe(req *SubscribeRequest) (*Subscription, error)
 	e.lock.Lock()
 	defer e.lock.Unlock()

-	topicHead := e.getTopicBuffer(req.Topic).Head()
+	topicBuf := e.bufferForSubscription(req.topicSubject())
+	topicBuf.refs++
+
+	// freeBuf is used to free the topic buffer once there are no remaining
+	// subscribers for the given topic and key.
+	//
+	// Note: it's called by Subcription.Unsubscribe which has its own side-effects
+	// that are made without holding e.lock (so there's a moment where the ref
+	// counter is inconsistent with the subscription map) — in practice this is
+	// fine, we don't need these things to be strongly consistent. The alternative
+	// would be to hold both locks, which introduces the risk of deadlocks.
+	freeBuf := func() {
+		e.lock.Lock()
+		defer e.lock.Unlock()
+
+		topicBuf.refs--
+
+		if topicBuf.refs == 0 {
+			delete(e.topicBuffers, req.topicSubject())
+
+			// Evict cached snapshot too because the topic buffer will have been spliced
+			// onto it. If we don't do this, any new subscribers started before the cache
+			// TTL is reached will get "stuck" waiting on the old buffer.
+			delete(e.snapCache, req.topicSubject())
+		}
+	}
+
+	topicHead := topicBuf.buf.Head()

 	// If the client view is fresh, resume the stream.
 	if req.Index > 0 && topicHead.HasEventIndex(req.Index) {
@ -173,7 +238,7 @@ func (e *EventPublisher) Subscribe(req *SubscribeRequest) (*Subscription, error)
 		// the subscription will receive new events.
 		next, _ := topicHead.NextNoBlock()
 		buf.AppendItem(next)
-		return e.subscriptions.add(req, subscriptionHead), nil
+		return e.subscriptions.add(req, subscriptionHead, freeBuf), nil
 	}

 	snapFromCache := e.getCachedSnapshotLocked(req)
@ -186,7 +251,7 @@ func (e *EventPublisher) Subscribe(req *SubscribeRequest) (*Subscription, error)

 	// If the request.Index is 0 the client has no view, send a full snapshot.
 	if req.Index == 0 {
-		return e.subscriptions.add(req, snapFromCache.First), nil
+		return e.subscriptions.add(req, snapFromCache.First, freeBuf), nil
 	}

 	// otherwise the request has an Index, the client view is stale and must be reset
@ -197,11 +262,17 @@ func (e *EventPublisher) Subscribe(req *SubscribeRequest) (*Subscription, error)
 		Payload: newSnapshotToFollow{},
 	}})
 	result.buffer.AppendItem(snapFromCache.First)
-	return e.subscriptions.add(req, result.First), nil
+	return e.subscriptions.add(req, result.First, freeBuf), nil
 }

-func (s *subscriptions) add(req *SubscribeRequest, head *bufferItem) *Subscription {
-	sub := newSubscription(*req, head, s.unsubscribe(req))
+func (s *subscriptions) add(req *SubscribeRequest, head *bufferItem, freeBuf func()) *Subscription {
+	// We wrap freeBuf in a sync.Once as it's expected that Subscription.unsub is
+	// idempotent, but freeBuf decrements the reference counter on every call.
+	var once sync.Once
+	sub := newSubscription(*req, head, func() {
+		s.unsubscribe(req)
+		once.Do(freeBuf)
+	})

 	s.lock.Lock()
 	defer s.lock.Unlock()
@ -228,24 +299,17 @@ func (s *subscriptions) closeSubscriptionsForTokens(tokenSecretIDs []string) {
 	}
 }

-// unsubscribe returns a function that the subscription will call to remove
-// itself from the subsByToken.
-// This function is returned as a closure so that the caller doesn't need to keep
-// track of the SubscriptionRequest, and can not accidentally call unsubscribe with the
-// wrong pointer.
-func (s *subscriptions) unsubscribe(req *SubscribeRequest) func() {
-	return func() {
-		s.lock.Lock()
-		defer s.lock.Unlock()
-
-		subsByToken, ok := s.byToken[req.Token]
-		if !ok {
-			return
-		}
-		delete(subsByToken, req)
-		if len(subsByToken) == 0 {
-			delete(s.byToken, req.Token)
-		}
+func (s *subscriptions) unsubscribe(req *SubscribeRequest) {
+	s.lock.Lock()
+	defer s.lock.Unlock()
+
+	subsByToken, ok := s.byToken[req.Token]
+	if !ok {
+		return
+	}
+	delete(subsByToken, req)
+	if len(subsByToken) == 0 {
+		delete(s.byToken, req.Token)
 	}
 }

@ -262,13 +326,7 @@ func (s *subscriptions) closeAll() {

 // EventPublisher.lock must be held to call this method.
 func (e *EventPublisher) getCachedSnapshotLocked(req *SubscribeRequest) *eventSnapshot {
-	topicSnaps, ok := e.snapCache[req.Topic]
-	if !ok {
-		topicSnaps = make(map[string]*eventSnapshot)
-		e.snapCache[req.Topic] = topicSnaps
-	}
-
-	snap, ok := topicSnaps[snapCacheKey(req)]
+	snap, ok := e.snapCache[req.topicSubject()]
 	if ok && snap.err() == nil {
 		return snap
 	}
@ -280,16 +338,12 @@ func (e *EventPublisher) setCachedSnapshotLocked(req *SubscribeRequest, snap *ev
 	if e.snapCacheTTL == 0 {
 		return
 	}
-	e.snapCache[req.Topic][snapCacheKey(req)] = snap
+	e.snapCache[req.topicSubject()] = snap

 	// Setup a cache eviction
 	time.AfterFunc(e.snapCacheTTL, func() {
 		e.lock.Lock()
 		defer e.lock.Unlock()
-		delete(e.snapCache[req.Topic], snapCacheKey(req))
+		delete(e.snapCache, req.topicSubject())
 	})
 }
-
-func snapCacheKey(req *SubscribeRequest) string {
-	return req.Partition + "/" + req.Namespace + "/" + req.Key
-}
--- a/agent/consul/stream/event_publisher_test.go
+++ b/agent/consul/stream/event_publisher_test.go
@ -56,6 +56,13 @@ func TestEventPublisher_SubscribeWithIndex0(t *testing.T) {
 		Payload: simplePayload{key: "sub-key", value: "the-published-event-payload"},
 	}
 	require.Equal(t, expected, next)
+
+	// Subscriber should not see events for other keys
+	publisher.Publish([]Event{{
+		Topic:   testTopic,
+		Payload: simplePayload{key: "other-key", value: "this-should-not-reach-the-subscriber"},
+	}})
+	assertNoResult(t, eventCh)
 }

 var testSnapshotEvent = Event{
@ -70,17 +77,12 @@ type simplePayload struct {
 	noReadPerm bool
 }

-func (p simplePayload) MatchesKey(key, _, _ string) bool {
-	if key == "" {
-		return true
-	}
-	return p.key == key
-}
-
 func (p simplePayload) HasReadPermission(acl.Authorizer) bool {
 	return !p.noReadPerm
 }

+func (p simplePayload) Subject() Subject { return Subject("default/default/" + p.key) }
+
 func newTestSnapshotHandlers() SnapshotHandlers {
 	return SnapshotHandlers{
 		testTopic: func(req SubscribeRequest, buf SnapshotAppender) (uint64, error) {
@ -190,9 +192,10 @@ func TestEventPublisher_SubscribeWithIndex0_FromCache(t *testing.T) {

 	publisher := NewEventPublisher(newTestSnapshotHandlers(), time.Second)
 	go publisher.Run(ctx)
+
 	sub, err := publisher.Subscribe(req)
 	require.NoError(t, err)
-	sub.Unsubscribe()
+	defer sub.Unsubscribe()

 	publisher.snapshotHandlers[testTopic] = func(_ SubscribeRequest, _ SnapshotAppender) (uint64, error) {
 		return 0, fmt.Errorf("error should not be seen, cache should have been used")
@ -200,6 +203,7 @@ func TestEventPublisher_SubscribeWithIndex0_FromCache(t *testing.T) {

 	sub, err = publisher.Subscribe(req)
 	require.NoError(t, err)
+	defer sub.Unsubscribe()

 	eventCh := runSubscription(ctx, sub)
 	next := getNextEvent(t, eventCh)
@ -233,7 +237,11 @@ func TestEventPublisher_SubscribeWithIndexNotZero_CanResume(t *testing.T) {

 	publisher := NewEventPublisher(newTestSnapshotHandlers(), time.Second)
 	go publisher.Run(ctx)
-	// Include the same event in the topicBuffer
+
+	simulateExistingSubscriber(t, publisher, req)
+
+	// Publish the testSnapshotEvent, to ensure that it is skipped over when
+	// splicing the topic buffer onto the snapshot.
 	publisher.publishEvent([]Event{testSnapshotEvent})

 	runStep(t, "start a subscription and unsub", func(t *testing.T) {
@ -338,7 +346,11 @@ func TestEventPublisher_SubscribeWithIndexNotZero_NewSnapshotFromCache(t *testin

 	publisher := NewEventPublisher(newTestSnapshotHandlers(), time.Second)
 	go publisher.Run(ctx)
-	// Include the same event in the topicBuffer
+
+	simulateExistingSubscriber(t, publisher, req)
+
+	// Publish the testSnapshotEvent, to ensure that it is skipped over when
+	// splicing the topic buffer onto the snapshot.
 	publisher.publishEvent([]Event{testSnapshotEvent})

 	runStep(t, "start a subscription and unsub", func(t *testing.T) {
@ -421,7 +433,11 @@ func TestEventPublisher_SubscribeWithIndexNotZero_NewSnapshot_WithCache(t *testi

 	publisher := NewEventPublisher(handlers, time.Second)
 	go publisher.Run(ctx)
-	// Include the same events in the topicBuffer
+
+	simulateExistingSubscriber(t, publisher, req)
+
+	// Publish the events, to ensure they are is skipped over when splicing the
+	// topic buffer onto the snapshot.
 	publisher.publishEvent([]Event{testSnapshotEvent})
 	publisher.publishEvent([]Event{nextEvent})

@ -495,3 +511,60 @@ func TestEventPublisher_Unsubscribe_ClosesSubscription(t *testing.T) {
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "subscription was closed by unsubscribe")
 }
+
+func TestEventPublisher_Unsubscribe_FreesResourcesWhenThereAreNoSubscribers(t *testing.T) {
+	req := &SubscribeRequest{
+		Topic: testTopic,
+		Key:   "sub-key",
+	}
+
+	publisher := NewEventPublisher(newTestSnapshotHandlers(), time.Second)
+
+	sub1, err := publisher.Subscribe(req)
+	require.NoError(t, err)
+
+	// Expect a topic buffer and snapshot to have been created.
+	publisher.lock.Lock()
+	require.NotNil(t, publisher.topicBuffers[req.topicSubject()])
+	require.NotNil(t, publisher.snapCache[req.topicSubject()])
+	publisher.lock.Unlock()
+
+	// Create another subscription and close the old one, to ensure the buffer and
+	// snapshot stick around as long as there's at least one subscriber.
+	sub2, err := publisher.Subscribe(req)
+	require.NoError(t, err)
+
+	sub1.Unsubscribe()
+
+	publisher.lock.Lock()
+	require.NotNil(t, publisher.topicBuffers[req.topicSubject()])
+	require.NotNil(t, publisher.snapCache[req.topicSubject()])
+	publisher.lock.Unlock()
+
+	// Close the other subscription and expect the buffer and snapshot to have
+	// been cleaned up.
+	sub2.Unsubscribe()
+
+	publisher.lock.Lock()
+	require.Nil(t, publisher.topicBuffers[req.topicSubject()])
+	require.Nil(t, publisher.snapCache[req.topicSubject()])
+	publisher.lock.Unlock()
+}
+
+// simulateExistingSubscriber creates a subscription that remains open throughout
+// a test to prevent the topic buffer getting garbage-collected.
+//
+// It evicts the created snapshot from the cache immediately (simulating an
+// existing subscription that has been open long enough the snapshot's TTL has
+// been reached) so you can test snapshots getting created afresh.
+func simulateExistingSubscriber(t *testing.T, p *EventPublisher, r *SubscribeRequest) {
+	t.Helper()
+
+	sub, err := p.Subscribe(r)
+	require.NoError(t, err)
+	t.Cleanup(sub.Unsubscribe)
+
+	p.lock.Lock()
+	delete(p.snapCache, r.topicSubject())
+	p.lock.Unlock()
+}
--- a/agent/consul/stream/event_test.go
+++ b/agent/consul/stream/event_test.go
@ -20,119 +20,6 @@ func newSimpleEvent(key string, index uint64) Event {
 	return Event{Index: index, Payload: simplePayload{key: key}}
 }

-func TestPayloadEvents_FilterByKey(t *testing.T) {
-	type testCase struct {
-		name        string
-		req         SubscribeRequest
-		events      []Event
-		expectEvent bool
-		expected    *PayloadEvents
-		expectedCap int
-	}
-
-	fn := func(t *testing.T, tc testCase) {
-		events := make([]Event, 0, 5)
-		events = append(events, tc.events...)
-
-		pe := &PayloadEvents{Items: events}
-		ok := pe.MatchesKey(tc.req.Key, tc.req.Namespace, tc.req.Partition)
-		require.Equal(t, tc.expectEvent, ok)
-		if !tc.expectEvent {
-			return
-		}
-
-		require.Equal(t, tc.expected, pe)
-		// test if there was a new array allocated or not
-		require.Equal(t, tc.expectedCap, cap(pe.Items))
-	}
-
-	var testCases = []testCase{
-		{
-			name: "all events match, no key or namespace",
-			req:  SubscribeRequest{Topic: testTopic},
-			events: []Event{
-				newSimpleEvent("One", 102),
-				newSimpleEvent("Two", 102)},
-			expectEvent: true,
-			expected: newPayloadEvents(
-				newSimpleEvent("One", 102),
-				newSimpleEvent("Two", 102)),
-			expectedCap: 5,
-		},
-		{
-			name: "all events match, no namespace",
-			req:  SubscribeRequest{Topic: testTopic, Key: "Same"},
-			events: []Event{
-				newSimpleEvent("Same", 103),
-				newSimpleEvent("Same", 103)},
-			expectEvent: true,
-			expected: newPayloadEvents(
-				newSimpleEvent("Same", 103),
-				newSimpleEvent("Same", 103)),
-			expectedCap: 5,
-		},
-		{
-			name: "all events match, no key",
-			req:  SubscribeRequest{Topic: testTopic, Namespace: "apps"},
-			events: []Event{
-				newNSEvent("Something", "apps"),
-				newNSEvent("Other", "apps")},
-			expectEvent: true,
-			expected: newPayloadEvents(
-				newNSEvent("Something", "apps"),
-				newNSEvent("Other", "apps")),
-			expectedCap: 5,
-		},
-		{
-			name: "some evens match, no namespace",
-			req:  SubscribeRequest{Topic: testTopic, Key: "Same"},
-			events: []Event{
-				newSimpleEvent("Same", 104),
-				newSimpleEvent("Other", 104),
-				newSimpleEvent("Same", 104)},
-			expectEvent: true,
-			expected: newPayloadEvents(
-				newSimpleEvent("Same", 104),
-				newSimpleEvent("Same", 104)),
-			expectedCap: 2,
-		},
-		{
-			name: "some events match, no key",
-			req:  SubscribeRequest{Topic: testTopic, Namespace: "apps"},
-			events: []Event{
-				newNSEvent("app1", "apps"),
-				newNSEvent("db1", "dbs"),
-				newNSEvent("app2", "apps")},
-			expectEvent: true,
-			expected: newPayloadEvents(
-				newNSEvent("app1", "apps"),
-				newNSEvent("app2", "apps")),
-			expectedCap: 2,
-		},
-		{
-			name: "no events match key",
-			req:  SubscribeRequest{Topic: testTopic, Key: "Other"},
-			events: []Event{
-				newSimpleEvent("Same", 0),
-				newSimpleEvent("Same", 0)},
-		},
-		{
-			name: "no events match namespace",
-			req:  SubscribeRequest{Topic: testTopic, Namespace: "apps"},
-			events: []Event{
-				newNSEvent("app1", "group1"),
-				newNSEvent("app2", "group2")},
-			expectEvent: false,
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-			fn(t, tc)
-		})
-	}
-}
-
 // TODO(partitions)
 func newNSEvent(key, namespace string) Event {
 	return Event{Index: 22, Payload: nsPayload{key: key, namespace: namespace}}
@ -146,12 +33,6 @@ type nsPayload struct {
 	value     string
 }

-func (p nsPayload) MatchesKey(key, namespace, partition string) bool {
-	return (key == "" || key == p.key) &&
-		(namespace == "" || namespace == p.namespace) &&
-		(partition == "" || partition == p.partition)
-}
-
 func TestPayloadEvents_HasReadPermission(t *testing.T) {
 	t.Run("some events filtered", func(t *testing.T) {
 		ep := newPayloadEvents(
--- a/agent/consul/stream/subscription.go
+++ b/agent/consul/stream/subscription.go
@ -4,7 +4,10 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"strings"
 	"sync/atomic"
+
+	"github.com/hashicorp/consul/agent/structs"
 )

 const (
@ -59,12 +62,9 @@ type SubscribeRequest struct {
 	// be returned by the subscription. A blank key will return all events. Key
 	// is generally the name of the resource.
 	Key string
-	// Namespace used to filter events in the topic. Only events matching the
-	// namespace will be returned by the subscription.
-	Namespace string
-	// Partition used to filter events in the topic. Only events matching the
-	// partition will be returned by the subscription.
-	Partition string // TODO(partitions): make this work
+	// EnterpriseMeta is used to filter events in the topic. Only events matching
+	// the partition and namespace will be returned by the subscription.
+	EnterpriseMeta structs.EnterpriseMeta
 	// Token that was used to authenticate the request. If any ACL policy
 	// changes impact the token the subscription will be forcefully closed.
 	Token string
@ -74,6 +74,19 @@ type SubscribeRequest struct {
 	Index uint64
 }

+func (req SubscribeRequest) Subject() Subject {
+	var (
+		partition = req.EnterpriseMeta.PartitionOrDefault()
+		namespace = req.EnterpriseMeta.NamespaceOrDefault()
+		key       = strings.ToLower(req.Key)
+	)
+	return Subject(partition + "/" + namespace + "/" + key)
+}
+
+func (req SubscribeRequest) topicSubject() topicSubject {
+	return topicSubject{req.Topic, req.Subject()}
+}
+
 // newSubscription return a new subscription. The caller is responsible for
 // calling Unsubscribe when it is done with the subscription, to free resources.
 func newSubscription(req SubscribeRequest, item *bufferItem, unsub func()) *Subscription {
@ -104,11 +117,7 @@ func (s *Subscription) Next(ctx context.Context) (Event, error) {
 		if len(next.Events) == 0 {
 			continue
 		}
-		event := newEventFromBatch(s.req, next.Events)
-		if !event.Payload.MatchesKey(s.req.Key, s.req.Namespace, s.req.Partition) {
-			continue
-		}
-		return event, nil
+		return newEventFromBatch(s.req, next.Events), nil
 	}
 }

--- a/agent/consul/stream/subscription_test.go
+++ b/agent/consul/stream/subscription_test.go
@ -6,10 +6,32 @@ import (
 	time "time"

 	"github.com/stretchr/testify/require"
+
+	"github.com/hashicorp/consul/agent/structs"
 )

 func noopUnSub() {}

+func TestSubscription_Subject(t *testing.T) {
+	for desc, tc := range map[string]struct {
+		req SubscribeRequest
+		sub Subject
+	}{
+		"default partition and namespace": {
+			SubscribeRequest{Key: "foo", EnterpriseMeta: structs.EnterpriseMeta{}},
+			"default/default/foo",
+		},
+		"mixed casing": {
+			SubscribeRequest{Key: "BaZ"},
+			"default/default/baz",
+		},
+	} {
+		t.Run(desc, func(t *testing.T) {
+			require.Equal(t, tc.sub, tc.req.Subject())
+		})
+	}
+}
+
 func TestSubscription(t *testing.T) {
 	if testing.Short() {
 		t.Skip("too slow for testing.Short")
@ -59,10 +81,6 @@ func TestSubscription(t *testing.T) {
 		"Event should have been delivered after short time, took %s", elapsed)
 	require.Equal(t, index, got.Index)

-	// Event with wrong key should not be delivered. Deliver a good message right
-	// so we don't have to block test thread forever or cancel func yet.
-	index++
-	publishTestEvent(index, eb, "nope")
 	index++
 	publishTestEvent(index, eb, "test")

--- a/agent/consul/txn_endpoint_test.go
+++ b/agent/consul/txn_endpoint_test.go
@ -817,6 +817,7 @@ func TestTxn_Read(t *testing.T) {
 		},
 		QueryMeta: structs.QueryMeta{
 			KnownLeader: true,
+			Index:       1,
 		},
 	}
 	require.Equal(t, expected, out)
--- a/agent/event_endpoint.go
+++ b/agent/event_endpoint.go
@ -6,7 +6,6 @@ import (
 	"io"
 	"net/http"
 	"strconv"
-	"strings"
 	"time"

 	"github.com/hashicorp/consul/acl"
@ -21,7 +20,11 @@ func (s *HTTPHandlers) EventFire(resp http.ResponseWriter, req *http.Request) (i
 	s.parseDC(req, &dc)

 	event := &UserEvent{}
-	event.Name = strings.TrimPrefix(req.URL.Path, "/v1/event/fire/")
+	var err error
+	event.Name, err = getPathSuffixUnescaped(req.URL.Path, "/v1/event/fire/")
+	if err != nil {
+		return nil, err
+	}
 	if event.Name == "" {
 		resp.WriteHeader(http.StatusBadRequest)
 		fmt.Fprint(resp, "Missing name")
--- a/agent/federation_state_endpoint.go
+++ b/agent/federation_state_endpoint.go
@ -2,14 +2,16 @@ package agent

 import (
 	"net/http"
-	"strings"

 	"github.com/hashicorp/consul/agent/structs"
 )

 // GET /v1/internal/federation-state/<datacenter>
 func (s *HTTPHandlers) FederationStateGet(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
-	datacenterName := strings.TrimPrefix(req.URL.Path, "/v1/internal/federation-state/")
+	datacenterName, err := getPathSuffixUnescaped(req.URL.Path, "/v1/internal/federation-state/")
+	if err != nil {
+		return nil, err
+	}
 	if datacenterName == "" {
 		return nil, BadRequestError{Reason: "Missing datacenter name"}
 	}
--- a/agent/health_endpoint.go
+++ b/agent/health_endpoint.go
@ -30,7 +30,11 @@ func (s *HTTPHandlers) HealthChecksInState(resp http.ResponseWriter, req *http.R
 	}

 	// Pull out the service name
-	args.State = strings.TrimPrefix(req.URL.Path, "/v1/health/state/")
+	var err error
+	args.State, err = getPathSuffixUnescaped(req.URL.Path, "/v1/health/state/")
+	if err != nil {
+		return nil, err
+	}
 	if args.State == "" {
 		resp.WriteHeader(http.StatusBadRequest)
 		fmt.Fprint(resp, "Missing check state")
--- a/agent/intentions_endpoint.go
+++ b/agent/intentions_endpoint.go
@ -486,7 +486,10 @@ func parseIntentionStringComponent(input string, entMeta *structs.EnterpriseMeta
 // IntentionSpecific handles the endpoint for /v1/connect/intentions/:id.
 // Deprecated: use IntentionExact.
 func (s *HTTPHandlers) IntentionSpecific(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
-	id := strings.TrimPrefix(req.URL.Path, "/v1/connect/intentions/")
+	id, err := getPathSuffixUnescaped(req.URL.Path, "/v1/connect/intentions/")
+	if err != nil {
+		return nil, err
+	}

 	switch req.Method {
 	case "GET":
--- a/agent/kvs_endpoint.go
+++ b/agent/kvs_endpoint.go
@ -6,7 +6,6 @@ import (
 	"io"
 	"net/http"
 	"strconv"
-	"strings"

 	"github.com/hashicorp/consul/agent/structs"
 	"github.com/hashicorp/consul/api"
@ -20,7 +19,11 @@ func (s *HTTPHandlers) KVSEndpoint(resp http.ResponseWriter, req *http.Request)
 	}

 	// Pull out the key name, validation left to each sub-handler
-	args.Key = strings.TrimPrefix(req.URL.Path, "/v1/kv/")
+	var err error
+	args.Key, err = getPathSuffixUnescaped(req.URL.Path, "/v1/kv/")
+	if err != nil {
+		return nil, err
+	}

 	// Check for a key list
 	keyList := false
--- a/agent/prepared_query_endpoint.go
+++ b/agent/prepared_query_endpoint.go
@ -319,7 +319,10 @@ func (s *HTTPHandlers) PreparedQuerySpecific(resp http.ResponseWriter, req *http
 	}

 	path := req.URL.Path
-	id := strings.TrimPrefix(path, "/v1/query/")
+	id, err := getPathSuffixUnescaped(path, "/v1/query/")
+	if err != nil {
+		return nil, err
+	}

 	switch {
 	case strings.HasSuffix(path, "/execute"):
--- a/agent/rpc/subscribe/subscribe.go
+++ b/agent/rpc/subscribe/subscribe.go
@ -57,6 +57,10 @@ func (h *Server) Subscribe(req *pbsubscribe.SubscribeRequest, serverStream pbsub
 		return err
 	}

+	if req.Key == "" {
+		return status.Error(codes.InvalidArgument, "Key is required")
+	}
+
 	sub, err := h.Backend.Subscribe(toStreamSubscribeRequest(req, entMeta))
 	if err != nil {
 		return err
@ -89,12 +93,11 @@ func (h *Server) Subscribe(req *pbsubscribe.SubscribeRequest, serverStream pbsub

 func toStreamSubscribeRequest(req *pbsubscribe.SubscribeRequest, entMeta structs.EnterpriseMeta) *stream.SubscribeRequest {
 	return &stream.SubscribeRequest{
-		Topic:     req.Topic,
-		Key:       req.Key,
-		Token:     req.Token,
-		Index:     req.Index,
-		Namespace: entMeta.NamespaceOrEmpty(),
-		Partition: entMeta.PartitionOrEmpty(),
+		Topic:          req.Topic,
+		Key:            req.Key,
+		EnterpriseMeta: entMeta,
+		Token:          req.Token,
+		Index:          req.Index,
 	}
 }

--- a/agent/rpc/subscribe/subscribe_test.go
+++ b/agent/rpc/subscribe/subscribe_test.go
@ -30,6 +30,33 @@ import (
 	"github.com/hashicorp/consul/types"
 )

+func TestServer_Subscribe_KeyIsRequired(t *testing.T) {
+	backend, err := newTestBackend()
+	require.NoError(t, err)
+
+	addr := runTestServer(t, NewServer(backend, hclog.New(nil)))
+
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	t.Cleanup(cancel)
+
+	conn, err := gogrpc.DialContext(ctx, addr.String(), gogrpc.WithInsecure())
+	require.NoError(t, err)
+	t.Cleanup(logError(t, conn.Close))
+
+	client := pbsubscribe.NewStateChangeSubscriptionClient(conn)
+
+	stream, err := client.Subscribe(ctx, &pbsubscribe.SubscribeRequest{
+		Topic: pbsubscribe.Topic_ServiceHealth,
+		Key:   "",
+	})
+	require.NoError(t, err)
+
+	_, err = stream.Recv()
+	require.Error(t, err)
+	require.Equal(t, codes.InvalidArgument.String(), status.Code(err).String())
+	require.Contains(t, err.Error(), "Key is required")
+}
+
 func TestServer_Subscribe_IntegrationWithBackend(t *testing.T) {
 	backend, err := newTestBackend()
 	require.NoError(t, err)
@ -878,6 +905,8 @@ func assertNoEvents(t *testing.T, chEvents chan eventOrError) {

 func logError(t *testing.T, f func() error) func() {
 	return func() {
+		t.Helper()
+
 		if err := f(); err != nil {
 			t.Logf(err.Error())
 		}
--- a/agent/session_endpoint.go
+++ b/agent/session_endpoint.go
@ -3,7 +3,6 @@ package agent
 import (
 	"fmt"
 	"net/http"
-	"strings"
 	"time"

 	"github.com/hashicorp/consul/agent/structs"
@ -72,7 +71,11 @@ func (s *HTTPHandlers) SessionDestroy(resp http.ResponseWriter, req *http.Reques
 	}

 	// Pull out the session id
-	args.Session.ID = strings.TrimPrefix(req.URL.Path, "/v1/session/destroy/")
+	var err error
+	args.Session.ID, err = getPathSuffixUnescaped(req.URL.Path, "/v1/session/destroy/")
+	if err != nil {
+		return nil, err
+	}
 	if args.Session.ID == "" {
 		resp.WriteHeader(http.StatusBadRequest)
 		fmt.Fprint(resp, "Missing session")
@ -97,7 +100,11 @@ func (s *HTTPHandlers) SessionRenew(resp http.ResponseWriter, req *http.Request)
 	}

 	// Pull out the session id
-	args.SessionID = strings.TrimPrefix(req.URL.Path, "/v1/session/renew/")
+	var err error
+	args.SessionID, err = getPathSuffixUnescaped(req.URL.Path, "/v1/session/renew/")
+	if err != nil {
+		return nil, err
+	}
 	args.Session = args.SessionID
 	if args.SessionID == "" {
 		resp.WriteHeader(http.StatusBadRequest)
@ -128,7 +135,11 @@ func (s *HTTPHandlers) SessionGet(resp http.ResponseWriter, req *http.Request) (
 	}

 	// Pull out the session id
-	args.SessionID = strings.TrimPrefix(req.URL.Path, "/v1/session/info/")
+	var err error
+	args.SessionID, err = getPathSuffixUnescaped(req.URL.Path, "/v1/session/info/")
+	if err != nil {
+		return nil, err
+	}
 	args.Session = args.SessionID
 	if args.SessionID == "" {
 		resp.WriteHeader(http.StatusBadRequest)
@ -183,7 +194,11 @@ func (s *HTTPHandlers) SessionsForNode(resp http.ResponseWriter, req *http.Reque
 	}

 	// Pull out the node name
-	args.Node = strings.TrimPrefix(req.URL.Path, "/v1/session/node/")
+	var err error
+	args.Node, err = getPathSuffixUnescaped(req.URL.Path, "/v1/session/node/")
+	if err != nil {
+		return nil, err
+	}
 	if args.Node == "" {
 		resp.WriteHeader(http.StatusBadRequest)
 		fmt.Fprint(resp, "Missing node name")
--- a/agent/structs/connect_ca.go
+++ b/agent/structs/connect_ca.go
@ -76,9 +76,14 @@ type CARoot struct {
 	// SerialNumber is the x509 serial number of the certificate.
 	SerialNumber uint64

-	// SigningKeyID is the ID of the public key that corresponds to the private
-	// key used to sign leaf certificates. Is is the HexString format of the
-	// raw AuthorityKeyID bytes.
+	// SigningKeyID is the connect.HexString encoded id of the public key that
+	// corresponds to the private key used to sign leaf certificates in the
+	// local datacenter.
+	//
+	// The value comes from x509.Certificate.SubjectKeyId of the local leaf
+	// signing cert.
+	//
+	// See https://www.rfc-editor.org/rfc/rfc3280#section-4.2.1.1 for more detail.
 	SigningKeyID string

 	// ExternalTrustDomain is the trust domain this root was generated under. It
@ -192,10 +197,14 @@ type IssuedCert struct {
 	// This is encoded in standard hex separated by :.
 	SerialNumber string

-	// CertPEM and PrivateKeyPEM are the PEM-encoded certificate and private
-	// key for that cert, respectively. This should not be stored in the
-	// state store, but is present in the sign API response.
-	CertPEM       string `json:",omitempty"`
+	// CertPEM is a PEM encoded bundle of a leaf certificate, optionally followed
+	// by one or more intermediate certificates that will form a chain of trust
+	// back to a root CA.
+	//
+	// This field is not persisted in the state store, but is present in the
+	// sign API response.
+	CertPEM string `json:",omitempty"`
+	// PrivateKeyPEM is the PEM encoded private key associated with CertPEM.
 	PrivateKeyPEM string `json:",omitempty"`

 	// Service is the name of the service for which the cert was issued.
--- a/agent/structs/structs.go
+++ b/agent/structs/structs.go
@ -370,7 +370,9 @@ func (q QueryBackend) String() string {
 // QueryMeta allows a query response to include potentially
 // useful metadata about a query
 type QueryMeta struct {
-	// Index in the raft log of the latest item returned by the query.
+	// Index in the raft log of the latest item returned by the query. If the
+	// query did not return any results the Index will be a value that will
+	// change when a new item is added.
 	Index uint64

 	// If AllowStale is used, this is time elapsed since
--- a/agent/txn_endpoint_test.go
+++ b/agent/txn_endpoint_test.go
@ -10,11 +10,12 @@ import (
 	"testing"
 	"time"

+	"github.com/hashicorp/raft"
+	"github.com/stretchr/testify/assert"
+
 	"github.com/hashicorp/consul/agent/structs"
 	"github.com/hashicorp/consul/api"
 	"github.com/hashicorp/consul/testrpc"
-	"github.com/hashicorp/raft"
-	"github.com/stretchr/testify/assert"
 )

 func TestTxnEndpoint_Bad_JSON(t *testing.T) {
@ -385,6 +386,7 @@ func TestTxnEndpoint_KV_Actions(t *testing.T) {
 				},
 				QueryMeta: structs.QueryMeta{
 					KnownLeader: true,
+					Index:       1,
 				},
 			}
 			assert.Equal(t, expected, txnResp)
--- a/agent/ui_endpoint.go
+++ b/agent/ui_endpoint.go
@ -133,7 +133,11 @@ func (s *HTTPHandlers) UINodeInfo(resp http.ResponseWriter, req *http.Request) (
 	}

 	// Verify we have some DC, or use the default
-	args.Node = strings.TrimPrefix(req.URL.Path, "/v1/internal/ui/node/")
+	var err error
+	args.Node, err = getPathSuffixUnescaped(req.URL.Path, "/v1/internal/ui/node/")
+	if err != nil {
+		return nil, err
+	}
 	if args.Node == "" {
 		resp.WriteHeader(http.StatusBadRequest)
 		fmt.Fprint(resp, "Missing node name")
@ -245,7 +249,11 @@ func (s *HTTPHandlers) UIGatewayServicesNodes(resp http.ResponseWriter, req *htt
 	}

 	// Pull out the service name
-	args.ServiceName = strings.TrimPrefix(req.URL.Path, "/v1/internal/ui/gateway-services-nodes/")
+	var err error
+	args.ServiceName, err = getPathSuffixUnescaped(req.URL.Path, "/v1/internal/ui/gateway-services-nodes/")
+	if err != nil {
+		return nil, err
+	}
 	if args.ServiceName == "" {
 		resp.WriteHeader(http.StatusBadRequest)
 		fmt.Fprint(resp, "Missing gateway name")
@ -287,7 +295,11 @@ func (s *HTTPHandlers) UIServiceTopology(resp http.ResponseWriter, req *http.Req
 		return nil, err
 	}

-	args.ServiceName = strings.TrimPrefix(req.URL.Path, "/v1/internal/ui/service-topology/")
+	var err error
+	args.ServiceName, err = getPathSuffixUnescaped(req.URL.Path, "/v1/internal/ui/service-topology/")
+	if err != nil {
+		return nil, err
+	}
 	if args.ServiceName == "" {
 		resp.WriteHeader(http.StatusBadRequest)
 		fmt.Fprint(resp, "Missing service name")
@ -566,7 +578,11 @@ func (s *HTTPHandlers) UIGatewayIntentions(resp http.ResponseWriter, req *http.R
 	}

 	// Pull out the service name
-	name := strings.TrimPrefix(req.URL.Path, "/v1/internal/ui/gateway-intentions/")
+	var err error
+	name, err := getPathSuffixUnescaped(req.URL.Path, "/v1/internal/ui/gateway-intentions/")
+	if err != nil {
+		return nil, err
+	}
 	if name == "" {
 		resp.WriteHeader(http.StatusBadRequest)
 		fmt.Fprint(resp, "Missing gateway name")
@ -647,7 +663,10 @@ func (s *HTTPHandlers) UIMetricsProxy(resp http.ResponseWriter, req *http.Reques
 	// here.

 	// Replace prefix in the path
-	subPath := strings.TrimPrefix(req.URL.Path, "/v1/internal/ui/metrics-proxy")
+	subPath, err := getPathSuffixUnescaped(req.URL.Path, "/v1/internal/ui/metrics-proxy")
+	if err != nil {
+		return nil, err
+	}

 	// Append that to the BaseURL (which might contain a path prefix component)
 	newURL := cfg.BaseURL + subPath
--- a/agent/xds/delta.go
+++ b/agent/xds/delta.go
@ -165,12 +165,6 @@ func (s *Server) processDelta(stream ADSDeltaStream, reqCh <-chan *envoy_discove
 				return status.Errorf(codes.InvalidArgument, "type URL is required for ADS")
 			}

-			if handler, ok := handlers[req.TypeUrl]; ok {
-				if handler.Recv(req) {
-					generator.Logger.Trace("subscribing to type", "typeUrl", req.TypeUrl)
-				}
-			}
-
 			if node == nil && req.Node != nil {
 				node = req.Node
 				var err error
@ -180,6 +174,12 @@ func (s *Server) processDelta(stream ADSDeltaStream, reqCh <-chan *envoy_discove
 				}
 			}

+			if handler, ok := handlers[req.TypeUrl]; ok {
+				if handler.Recv(req, generator.ProxyFeatures) {
+					generator.Logger.Trace("subscribing to type", "typeUrl", req.TypeUrl)
+				}
+			}
+
 		case cfgSnap = <-stateCh:
 			newRes, err := generator.allResourcesFromSnapshot(cfgSnap)
 			if err != nil {
@ -434,7 +434,7 @@ func newDeltaType(
 // Recv handles new discovery requests from envoy.
 //
 // Returns true the first time a type receives a request.
-func (t *xDSDeltaType) Recv(req *envoy_discovery_v3.DeltaDiscoveryRequest) bool {
+func (t *xDSDeltaType) Recv(req *envoy_discovery_v3.DeltaDiscoveryRequest, sf supportedProxyFeatures) bool {
 	if t == nil {
 		return false // not something we care about
 	}
@ -447,6 +447,16 @@ func (t *xDSDeltaType) Recv(req *envoy_discovery_v3.DeltaDiscoveryRequest) bool
 		t.wildcard = len(req.ResourceNamesSubscribe) == 0
 		t.registered = true
 		registeredThisTime = true
+
+		if sf.ForceLDSandCDSToAlwaysUseWildcardsOnReconnect {
+			switch t.typeURL {
+			case ListenerType, ClusterType:
+				if !t.wildcard {
+					t.wildcard = true
+					logger.Trace("fixing Envoy bug fixed in 1.19.0 by inferring wildcard mode for type")
+				}
+			}
+		}
 	}

 	/*
--- a/agent/xds/delta_test.go
+++ b/agent/xds/delta_test.go
@ -554,6 +554,72 @@ func TestServer_DeltaAggregatedResources_v3_SlowEndpointPopulation(t *testing.T)
 	}
 }

+func TestServer_DeltaAggregatedResources_v3_GetAllClusterAfterConsulRestarted(t *testing.T) {
+	// This illustrates a scenario related to https://github.com/hashicorp/consul/issues/11833
+
+	aclResolve := func(id string) (acl.Authorizer, error) {
+		// Allow all
+		return acl.RootAuthorizer("manage"), nil
+	}
+	scenario := newTestServerDeltaScenario(t, aclResolve, "web-sidecar-proxy", "", 0)
+	_, mgr, errCh, envoy := scenario.server, scenario.mgr, scenario.errCh, scenario.envoy
+	envoy.EnvoyVersion = "1.18.0"
+
+	sid := structs.NewServiceID("web-sidecar-proxy", nil)
+
+	// Register the proxy to create state needed to Watch() on
+	mgr.RegisterProxy(t, sid)
+
+	var snap *proxycfg.ConfigSnapshot
+	runStep(t, "get into state after consul restarted", func(t *testing.T) {
+		snap = newTestSnapshot(t, nil, "")
+
+		// Send initial cluster discover.
+		// This is to simulate the discovery request call from envoy after disconnected from consul ads stream.
+		//
+		// We need to force it to be an older version of envoy so that the logic shifts.
+		envoy.SendDeltaReq(t, ClusterType, &envoy_discovery_v3.DeltaDiscoveryRequest{
+			ResourceNamesSubscribe: []string{
+				"local_app",
+				"db.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul",
+			},
+			InitialResourceVersions: map[string]string{
+				"local_app": "a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447",
+				"db.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul": "5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03",
+			},
+		})
+
+		// Check no response sent yet
+		assertDeltaChanBlocked(t, envoy.deltaStream.sendCh)
+
+		requireProtocolVersionGauge(t, scenario, "v3", 1)
+
+		// Deliver a new snapshot
+		// the config contains 3 clusters: local_app, db, geo-cache.
+		// this is to simulate the fact that there is one additional (upstream) cluster gets added to the sidecar service
+		// during the time xds disconnected (consul restarted).
+		mgr.DeliverConfig(t, sid, snap)
+
+		assertDeltaResponseSent(t, envoy.deltaStream.sendCh, &envoy_discovery_v3.DeltaDiscoveryResponse{
+			TypeUrl: ClusterType,
+			Nonce:   hexString(1),
+			Resources: makeTestResources(t,
+				makeTestCluster(t, snap, "tcp:local_app"),
+				makeTestCluster(t, snap, "tcp:db"),
+				makeTestCluster(t, snap, "tcp:geo-cache"),
+			),
+		})
+	})
+
+	envoy.Close()
+	select {
+	case err := <-errCh:
+		require.NoError(t, err)
+	case <-time.After(50 * time.Millisecond):
+		t.Fatalf("timed out waiting for handler to finish")
+	}
+}
+
 func TestServer_DeltaAggregatedResources_v3_BasicProtocol_TCP_clusterChangesImpactEndpoints(t *testing.T) {
 	aclResolve := func(id string) (acl.Authorizer, error) {
 		// Allow all
--- a/agent/xds/envoy_versioning.go
+++ b/agent/xds/envoy_versioning.go
@ -13,8 +13,7 @@ var (
 	// the zero'th point release of the last element of proxysupport.EnvoyVersions.
 	minSupportedVersion = version.Must(version.NewVersion("1.17.0"))

-	// add min version constraints for associated feature flags when necessary, for example:
-	// minVersionAllowingEmptyGatewayClustersWithIncrementalXDS = version.Must(version.NewVersion("1.16.0"))
+	minVersionToForceLDSandCDSToAlwaysUseWildcardsOnReconnect = version.Must(version.NewVersion("1.19.0"))

 	specificUnsupportedVersions = []unsupportedVersion{}
 )
@ -26,16 +25,19 @@ type unsupportedVersion struct {
 }

 type supportedProxyFeatures struct {
-	// add version dependent feature flags here
+	// Older versions of Envoy incorrectly exploded a wildcard subscription for
+	// LDS and CDS into specific line items on incremental xDS reconnect. They
+	// would populate both InitialResourceVersions and ResourceNamesSubscribe
+	// when they SHOULD have left ResourceNamesSubscribe empty (or used an
+	// explicit "*" in later Envoy versions) to imply wildcard mode. On
+	// reconnect, Consul interpreted the lack of the wildcard attribute as
+	// implying that the Envoy instance should not receive updates for any
+	// newly created listeners and clusters for the remaining life of that
+	// Envoy sidecar process.
 	//
-	// For example, we previously had flags for Envoy < 1.16 called:
-	//
-	// GatewaysNeedStubClusterWhenEmptyWithIncrementalXDS
-	// IncrementalXDSUpdatesMustBeSerial
-	//
-	// Which then manifested in the code for checks with this struct populated.
-	// By dropping support for 1.15, we no longer have any special flags here
-	// but leaving this flagging functionality for future one-offs.
+	// see: https://github.com/envoyproxy/envoy/issues/16063
+	// see: https://github.com/envoyproxy/envoy/pull/16153
+	ForceLDSandCDSToAlwaysUseWildcardsOnReconnect bool
 }

 func determineSupportedProxyFeatures(node *envoy_core_v3.Node) (supportedProxyFeatures, error) {
@ -73,12 +75,9 @@ func determineSupportedProxyFeaturesFromVersion(version *version.Version) (suppo

 	sf := supportedProxyFeatures{}

-	// add version constraints to populate feature flags here when necessary, for example:
-	/*
-		if version.LessThan(minVersionAllowingEmptyGatewayClustersWithIncrementalXDS) {
-			sf.GatewaysNeedStubClusterWhenEmptyWithIncrementalXDS = true
-		}
-	*/
+	if version.LessThan(minVersionToForceLDSandCDSToAlwaysUseWildcardsOnReconnect) {
+		sf.ForceLDSandCDSToAlwaysUseWildcardsOnReconnect = true
+	}

 	return sf, nil
 }
--- a/agent/xds/envoy_versioning_test.go
+++ b/agent/xds/envoy_versioning_test.go
@ -125,6 +125,12 @@ func TestDetermineSupportedProxyFeaturesFromString(t *testing.T) {
 	for _, v := range []string{
 		"1.17.0", "1.17.1", "1.17.2", "1.17.3", "1.17.4",
 		"1.18.0", "1.18.1", "1.18.2", "1.18.3", "1.18.4",
+	} {
+		cases[v] = testcase{expect: supportedProxyFeatures{
+			ForceLDSandCDSToAlwaysUseWildcardsOnReconnect: true,
+		}}
+	}
+	for _, v := range []string{
 		"1.19.0", "1.19.1",
 		"1.20.0", "1.20.1",
 	} {
--- a/agent/xds/testing.go
+++ b/agent/xds/testing.go
@ -83,6 +83,8 @@ type TestEnvoy struct {
 	proxyID string
 	token   string

+	EnvoyVersion string
+
 	deltaStream *TestADSDeltaStream // Incremental v3
 }

@ -182,9 +184,14 @@ func (e *TestEnvoy) sendDeltaReq(
 	e.mu.Lock()
 	defer e.mu.Unlock()

-	ev, valid := stringToEnvoyVersion(proxysupport.EnvoyVersions[0])
+	stringVersion := e.EnvoyVersion
+	if stringVersion == "" {
+		stringVersion = proxysupport.EnvoyVersions[0]
+	}
+
+	ev, valid := stringToEnvoyVersion(stringVersion)
 	if !valid {
-		t.Fatal("envoy version is not valid: %s", proxysupport.EnvoyVersions[0])
+		t.Fatal("envoy version is not valid: %s", stringVersion)
 	}

 	if req == nil {
--- a/api/agent.go
+++ b/api/agent.go
@ -7,7 +7,6 @@ import (
 	"fmt"
 	"io"
 	"net/http"
-	"net/url"
 )

 // ServiceKind is the kind of service being registered.
@ -628,7 +627,7 @@ func (a *Agent) AgentHealthServiceByID(serviceID string) (string, *AgentServiceC
 }

 func (a *Agent) AgentHealthServiceByIDOpts(serviceID string, q *QueryOptions) (string, *AgentServiceChecksInfo, error) {
-	path := fmt.Sprintf("/v1/agent/health/service/id/%v", url.PathEscape(serviceID))
+	path := fmt.Sprintf("/v1/agent/health/service/id/%v", serviceID)
 	r := a.c.newRequest("GET", path)
 	r.setQueryOptions(q)
 	r.params.Add("format", "json")
@ -669,7 +668,7 @@ func (a *Agent) AgentHealthServiceByName(service string) (string, []AgentService
 }

 func (a *Agent) AgentHealthServiceByNameOpts(service string, q *QueryOptions) (string, []AgentServiceChecksInfo, error) {
-	path := fmt.Sprintf("/v1/agent/health/service/name/%v", url.PathEscape(service))
+	path := fmt.Sprintf("/v1/agent/health/service/name/%v", service)
 	r := a.c.newRequest("GET", path)
 	r.setQueryOptions(q)
 	r.params.Add("format", "json")
@ -707,7 +706,7 @@ func (a *Agent) AgentHealthServiceByNameOpts(service string, q *QueryOptions) (s
 // agent-local state. That means there is no persistent raft index so we block
 // based on object hash instead.
 func (a *Agent) Service(serviceID string, q *QueryOptions) (*AgentService, *QueryMeta, error) {
-	r := a.c.newRequest("GET", "/v1/agent/service/"+url.PathEscape(serviceID))
+	r := a.c.newRequest("GET", "/v1/agent/service/"+serviceID)
 	r.setQueryOptions(q)
 	rtt, resp, err := a.c.doRequest(r)
 	if err != nil {
@ -812,7 +811,7 @@ func (a *Agent) serviceRegister(service *AgentServiceRegistration, opts ServiceR
 // ServiceDeregister is used to deregister a service with
 // the local agent
 func (a *Agent) ServiceDeregister(serviceID string) error {
-	r := a.c.newRequest("PUT", "/v1/agent/service/deregister/"+url.PathEscape(serviceID))
+	r := a.c.newRequest("PUT", "/v1/agent/service/deregister/"+serviceID)
 	_, resp, err := a.c.doRequest(r)
 	if err != nil {
 		return err
@ -827,7 +826,7 @@ func (a *Agent) ServiceDeregister(serviceID string) error {
 // ServiceDeregisterOpts is used to deregister a service with
 // the local agent with QueryOptions.
 func (a *Agent) ServiceDeregisterOpts(serviceID string, q *QueryOptions) error {
-	r := a.c.newRequest("PUT", "/v1/agent/service/deregister/"+url.PathEscape(serviceID))
+	r := a.c.newRequest("PUT", "/v1/agent/service/deregister/"+serviceID)
 	r.setQueryOptions(q)
 	_, resp, err := a.c.doRequest(r)
 	if err != nil {
@ -884,7 +883,7 @@ func (a *Agent) updateTTL(checkID, note, status string) error {
 	default:
 		return fmt.Errorf("Invalid status: %s", status)
 	}
-	endpoint := fmt.Sprintf("/v1/agent/check/%s/%s", url.PathEscape(status), url.PathEscape(checkID))
+	endpoint := fmt.Sprintf("/v1/agent/check/%s/%s", status, checkID)
 	r := a.c.newRequest("PUT", endpoint)
 	r.params.Set("note", note)
 	_, resp, err := a.c.doRequest(r)
@ -932,7 +931,7 @@ func (a *Agent) UpdateTTLOpts(checkID, output, status string, q *QueryOptions) e
 		return fmt.Errorf("Invalid status: %s", status)
 	}

-	endpoint := fmt.Sprintf("/v1/agent/check/update/%s", url.PathEscape(checkID))
+	endpoint := fmt.Sprintf("/v1/agent/check/update/%s", checkID)
 	r := a.c.newRequest("PUT", endpoint)
 	r.setQueryOptions(q)
 	r.obj = &checkUpdate{
@ -976,7 +975,7 @@ func (a *Agent) CheckDeregister(checkID string) error {
 // CheckDeregisterOpts is used to deregister a check with
 // the local agent using query options
 func (a *Agent) CheckDeregisterOpts(checkID string, q *QueryOptions) error {
-	r := a.c.newRequest("PUT", "/v1/agent/check/deregister/"+url.PathEscape(checkID))
+	r := a.c.newRequest("PUT", "/v1/agent/check/deregister/"+checkID)
 	r.setQueryOptions(q)
 	_, resp, err := a.c.doRequest(r)
 	if err != nil {
@ -992,7 +991,7 @@ func (a *Agent) CheckDeregisterOpts(checkID string, q *QueryOptions) error {
 // Join is used to instruct the agent to attempt a join to
 // another cluster member
 func (a *Agent) Join(addr string, wan bool) error {
-	r := a.c.newRequest("PUT", "/v1/agent/join/"+url.PathEscape(addr))
+	r := a.c.newRequest("PUT", "/v1/agent/join/"+addr)
 	if wan {
 		r.params.Set("wan", "1")
 	}
@ -1044,7 +1043,7 @@ func (a *Agent) ForceLeavePrune(node string) error {
 // ForceLeaveOpts is used to have the agent eject a failed node or remove it
 // completely from the list of members.
 func (a *Agent) ForceLeaveOpts(node string, opts ForceLeaveOpts) error {
-	r := a.c.newRequest("PUT", "/v1/agent/force-leave/"+url.PathEscape(node))
+	r := a.c.newRequest("PUT", "/v1/agent/force-leave/"+node)
 	if opts.Prune {
 		r.params.Set("prune", "1")
 	}
@ -1108,7 +1107,7 @@ func (a *Agent) ConnectCARoots(q *QueryOptions) (*CARootList, *QueryMeta, error)

 // ConnectCALeaf gets the leaf certificate for the given service ID.
 func (a *Agent) ConnectCALeaf(serviceID string, q *QueryOptions) (*LeafCert, *QueryMeta, error) {
-	r := a.c.newRequest("GET", "/v1/agent/connect/ca/leaf/"+url.PathEscape(serviceID))
+	r := a.c.newRequest("GET", "/v1/agent/connect/ca/leaf/"+serviceID)
 	r.setQueryOptions(q)
 	rtt, resp, err := a.c.doRequest(r)
 	if err != nil {
@ -1136,7 +1135,7 @@ func (a *Agent) EnableServiceMaintenance(serviceID, reason string) error {
 }

 func (a *Agent) EnableServiceMaintenanceOpts(serviceID, reason string, q *QueryOptions) error {
-	r := a.c.newRequest("PUT", "/v1/agent/service/maintenance/"+url.PathEscape(serviceID))
+	r := a.c.newRequest("PUT", "/v1/agent/service/maintenance/"+serviceID)
 	r.setQueryOptions(q)
 	r.params.Set("enable", "true")
 	r.params.Set("reason", reason)
@ -1158,7 +1157,7 @@ func (a *Agent) DisableServiceMaintenance(serviceID string) error {
 }

 func (a *Agent) DisableServiceMaintenanceOpts(serviceID string, q *QueryOptions) error {
-	r := a.c.newRequest("PUT", "/v1/agent/service/maintenance/"+url.PathEscape(serviceID))
+	r := a.c.newRequest("PUT", "/v1/agent/service/maintenance/"+serviceID)
 	r.setQueryOptions(q)
 	r.params.Set("enable", "false")
 	_, resp, err := a.c.doRequest(r)
@ -1355,7 +1354,7 @@ func (a *Agent) updateTokenFallback(token string, q *WriteOptions, targets ...st
 }

 func (a *Agent) updateTokenOnce(target, token string, q *WriteOptions) (*WriteMeta, int, error) {
-	r := a.c.newRequest("PUT", fmt.Sprintf("/v1/agent/token/%s", url.PathEscape(target)))
+	r := a.c.newRequest("PUT", fmt.Sprintf("/v1/agent/token/%s", target))
 	r.setWriteOptions(q)
 	r.obj = &AgentToken{Token: token}

--- a/demo/docker-compose-cluster/docker-compose.yml
+++ b/demo/docker-compose-cluster/docker-compose.yml
@ -1,34 +0,0 @@
-version: '3'
-
-services:
-
-  consul-agent-1: &consul-agent
-    image: consul:latest
-    networks:
-      - consul-demo
-    command: "agent -retry-join consul-server-bootstrap -client 0.0.0.0"
-
-  consul-agent-2:
-    <<: *consul-agent
-
-  consul-agent-3:
-    <<: *consul-agent
-
-  consul-server-1: &consul-server
-    <<: *consul-agent
-    command: "agent -server -retry-join consul-server-bootstrap -client 0.0.0.0"
-
-  consul-server-2:
-    <<: *consul-server
-
-  consul-server-bootstrap:
-    <<: *consul-agent
-    ports:
-      - "8400:8400"
-      - "8500:8500"
-      - "8600:8600"
-      - "8600:8600/udp"
-    command: "agent -server -bootstrap-expect 3 -ui -client 0.0.0.0"
-
-networks:
-  consul-demo:
--- a/demo/vagrant-cluster/README.md
+++ b/demo/vagrant-cluster/README.md
@ -1,57 +0,0 @@
-# Vagrant Consul Demo
-
-This demo provides a very simple `Vagrantfile` that creates two Consul
-server nodes, one at *172.20.20.10* and another at *172.20.20.11*. Both are
-running a standard Debian * distribution, and *the latest version* of Consul
-is pre-installed.
-
-To get started, you can start the nodes by just doing:
-
-```
-vagrant up
-```
-
-> NOTE: If you prefer a different Vagrant box, you can set the `DEMO_BOX_NAME`
-> environment variable before starting `vagrant` like this: 
-> `DEMO_BOX_NAME="ubuntu/xenial64" vagrant up`
-
-Once it is finished, you should be able to see the following:
-
-```
-vagrant status
-Current machine states:
-
-n1                        running (virtualbox)
-n2                        running (virtualbox)
-```
-
-At this point the two nodes are running and you can SSH in to play with them:
-
-```
-vagrant ssh n1
-consul version
-Consul v0.7.5
-Protocol 2 spoken by default, understands 2 to 3 (agent will automatically use protocol >2 when speaking to compatible agents)
-exit
-```
-
-and
-
-```
-vagrant ssh n2
-consul version
-Consul v0.7.5
-Protocol 2 spoken by default, understands 2 to 3 (agent will automatically use protocol >2 when speaking to compatible agents)
-exit
-```
-
-> NOTE: This demo will query the HashiCorp Checkpoint service to determine
-> the latest Consul release version and install that version by default,
-> but if you need a different Consul version, set the `CONSUL_DEMO_VERSION`
-> environment variable before `vagrant up` like this:
-> `CONSUL_DEMO_VERSION=0.6.4 vagrant up`
-
-## Where to Next?
-
-To learn more about starting Consul, joining nodes into a cluster, and
-interacting with the agent, check out the [Getting Started guide](https://www.consul.io/intro/getting-started/install.html).
--- a/demo/vagrant-cluster/Vagrantfile
+++ b/demo/vagrant-cluster/Vagrantfile
@ -1,56 +0,0 @@
-# -*- mode: ruby -*-
-# vi: set ft=ruby :
-
-$script = <<SCRIPT
-
-echo "Installing dependencies ..."
-sudo apt-get update
-sudo apt-get install -y unzip curl jq dnsutils
-
-echo "Determining Consul version to install ..."
-CHECKPOINT_URL="https://checkpoint-api.hashicorp.com/v1/check"
-if [ -z "$CONSUL_DEMO_VERSION" ]; then
-    CONSUL_DEMO_VERSION=$(curl -s "${CHECKPOINT_URL}"/consul | jq .current_version | tr -d '"')
-fi
-
-echo "Fetching Consul version ${CONSUL_DEMO_VERSION} ..."
-cd /tmp/
-curl -s https://releases.hashicorp.com/consul/${CONSUL_DEMO_VERSION}/consul_${CONSUL_DEMO_VERSION}_linux_amd64.zip -o consul.zip
-
-echo "Installing Consul version ${CONSUL_DEMO_VERSION} ..."
-unzip consul.zip
-sudo chmod +x consul
-sudo mv consul /usr/bin/consul
-
-sudo mkdir /etc/consul.d
-sudo chmod a+w /etc/consul.d
-
-SCRIPT
-
-# Specify a Consul version
-CONSUL_DEMO_VERSION = ENV['CONSUL_DEMO_VERSION']
-
-# Specify a custom Vagrant box for the demo
-DEMO_BOX_NAME = ENV['DEMO_BOX_NAME'] || "debian/stretch64"
-
-# Vagrantfile API/syntax version.
-# NB: Don't touch unless you know what you're doing!
-VAGRANTFILE_API_VERSION = "2"
-
-Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
-  config.vm.box = DEMO_BOX_NAME
-
-  config.vm.provision "shell",
-                          inline: $script,
-                          env: {'CONSUL_DEMO_VERSION' => CONSUL_DEMO_VERSION}
-
-  config.vm.define "n1" do |n1|
-      n1.vm.hostname = "n1"
-      n1.vm.network "private_network", ip: "172.20.20.10"
-  end
-
-  config.vm.define "n2" do |n2|
-      n2.vm.hostname = "n2"
-      n2.vm.network "private_network", ip: "172.20.20.11"
-  end
-end
--- a/docs/service-mesh/README.md
+++ b/docs/service-mesh/README.md
@ -3,7 +3,7 @@
 - call out: envoy/proxy is the data plane, Consul is the control plane
 - [xDS Server] - a gRPC service that implements [xDS] and handles requests from an [envoy proxy].
 - [agent/proxycfg]
- CA Manager - certificate authority
+- [Certificate Authority](./ca) for issuing TLS certs for services and client agents
 - command/connect/envoy - bootstrapping and running envoy
 - command/connect/proxy - built-in proxy that is dev-only and not supported 
  for production.
--- a/docs/service-mesh/ca/README.md
+++ b/docs/service-mesh/ca/README.md
@ -0,0 +1,114 @@
+# Certificate Authority (Connect CA)
+
+The Certificate Authority Subsystem manages a CA trust chain for issuing certificates to
+services and client agents (via auto-encrypt and auto-config).
+
+The code for the Certificate Authority is in the following packages:
+1. most of the core logic is in [agent/consul/leader_connect_ca.go]
+2. the providers are in [agent/connect/ca]
+3. the RPC interface is in [agent/consul/connect_ca_endpoint.go]
+
+
+[agent/consul/leader_connect_ca.go]: https://github.com/hashicorp/consul/blob/main/agent/consul/leader_connect_ca.go
+[agent/connect/ca]: https://github.com/hashicorp/consul/blob/main/agent/connect/ca/
+[agent/consul/connect_ca_endpoint.go]: https://github.com/hashicorp/consul/blob/main/agent/consul/connect_ca_endpoint.go
+
+
+## Architecture
+
+### High level overview
+
+In Consul the leader is responsible for handling the CA management. 
+When a leader election happen, and the elected leader do not have any root CA available it will start a process of creating a set of CA certificate.
+Those certificates will be used to authenticate/encrypt communication between services (service mesh) or between `Consul client agent` (auto-encrypt/auto-config). This process is described in the following diagram:
+
+![CA creation](./hl-ca-overview.svg)
+
+<sup>[source](./hl-ca-overview.mmd)</sup>
+
+The features that benefit from Consul CA management are:
+- [service Mesh/Connect](https://www.consul.io/docs/connect)
+- [auto encrypt](https://www.consul.io/docs/agent/options#auto_encrypt)
+
+
+### CA and Certificate relationship
+
+This diagram shows the relationship between the CA certificates in Consul primary and
+secondary.
+
+![CA relationship](./cert-relationship.svg)
+
+<sup>[source](./cert-relationship.mmd)</sup>
+
+
+In most cases there is an external root CA that provides an intermediate CA that Consul
+uses as the Primary Root CA. The only except to this is when the Consul CA Provider is
+used without specifying a `RootCert`. In this one case Consul will generate the Root CA
+from the provided primary key, and it will be used in the primary as the top of the chain
+of trust.
+
+In the primary datacenter, the Consul and AWS providers use the Primary Root CA to sign
+leaf certificates. The Vault provider uses an intermediate CA to sign leaf certificates.
+
+Leaf certificates are created for two purposes:
+1. the Leaf Cert Service is used by envoy proxies in the mesh to perform mTLS with other
+   services.
+2. the Leaf Cert Client Agent is created by auto-encrypt and auto-config. It is used by
+   client agents for HTTP API TLS, and for mTLS for RPC requests to servers.
+
+Any secondary datacenters receive an intermediate certificate, signed by the Primary Root
+CA, which is used as the CA certificate to sign leaf certificates in the secondary
+datacenter.
+
+## Operations
+
+When trying to learn the CA subsystem it can be helpful to understand the operations that
+it can perform. The sections below are the complete set of read, write, and periodic
+operations that provide the full behaviour of the CA subsystem.
+
+### Periodic Operations
+
+Periodic (or background) opeartions are started automatically by the Consul leader. They run at some interval (often 1 hour).
+
+- `CAManager.InitializeCA` - attempts to initialize the CA when a leader is ellected. If the synchronous InitializeCA fails, `CAManager.backgroundCAInitialization` runs `InitializeCA` periodically in a goroutine until it succeeds.
+- `CAManager.RenewIntermediate` - (called by `CAManager.intermediateCertRenewalWatch`) runs in the primary if the provider uses a separate signing cert (the Vault provider). The operation always runs in the secondary. Renews the signing cert once half its lifetime has passed.
+- `CAManager.secondaryCARootWatch` - runs in secondary only. Performs a blocking query to the primary to retrieve any updates to the CA roots and stores them locally.
+- `Server.runCARootPruning` - removes non-active and expired roots from state.CARoots
+
+### Read Operations
+
+- `RPC.ConnectCA.ConfigurationGet` - returns the CA provider configuration. Only called by user, not by any internal subsystems.
+- `RPC.ConnectCA.Roots` - returns all the roots, the trust domain ID, and the ID of the active root. Each "root" also includes the signing key/cert, and any intermediate certs in the chain. It is used (via the cache) by all the connect proxy types.
+
+### Write Operations
+
+- `CAManager.UpdateConfiguration` - (via `RPC.ConnectCA.ConfigurationSet`) called by a user when they want to change the provider or provider configuration (ex: rotate root CA).
+- `CAManager.Provider.SignIntermediate` - (via `RPC.ConnectCA.SignIntermediate`) called from the secondary DC:
+    1. by `CAManager.RenewIntermediate` to sign the new intermediate when the old intermediate is about to expire
+    2. by `CAMananger.initializeSecondary` when setting up a new secondary, when the provider is changed in the secondary
+   by a user action, or when the primary roots changed and the secondary needs to generate a new intermediate for the new
+   primary roots.
+- `CAMananger.SignCertificate` - is used by:
+    1. (via `RPC.ConnectCA.Sign`) - called by client agents to sign a leaf cert for a connect proxy (via `agent/cache-types/connect_ca_leaf.go`)
+    2. (via in-process call to `RPC.ConnectCA.Sign`) - called by auto-encrypt to sign a leaf cert for a client agent
+    3. called by Auto-Config to sign a leaf cert for a client agent
+
+## detailed call flow
+![CA Leader Sequence](./ca-leader-sequence.svg)
+
+<sup>[source](./ca-leader-sequence.mmd)</sup>
+
+####TODO:
+- sequence diagram for leaf signing 
+- sequence diagram for CA cert rotation
+
+## CAManager states
+
+This section is a work in progress
+
+TODO: style the diagram to match the others, and add some narative text to describe the
+diagram.
+
+![CA Mananger states](./state-machine.svg)
+
+
--- a/docs/service-mesh/ca/ca-leader-sequence.mmd
+++ b/docs/service-mesh/ca/ca-leader-sequence.mmd
@ -0,0 +1,19 @@
+sequenceDiagram
+Participant Provider
+Participant PL As Primary Leader
+Participant SL As Secondary Leader
+Alt Primary don't have a valid CA
+PL->>Provider:initializeRootCA (fetch root and sign intermediate)
+Provider->>PL:root + intermediate
+PL->>PL:RPC ConnectCA.Roots (fetch primary root and store it)
+end
+SL->>PL: RPC ConnectCA.Roots (fetch primary root and store it)
+PL->>SL: Root + intermediate
+Alt Secondary needs a new intermediate (check if current intermediate is signed by primary root)
+SL->>Provider: Generate CSR
+Provider->>SL: CSR
+SL->>PL: ConnectCA.SignIntermediate (CSR)
+PL->>SL: Intermediate CA (secondary)
+SL->>Provider: Set Intermediate (secondary CA) + root (primary CA)
+SL->>SL: Store certs in RAFT (primary root + secondary intermediate)
+end
--- a/docs/service-mesh/ca/ca-leader-sequence.svg
+++ b/docs/service-mesh/ca/ca-leader-sequence.svg
--- a/docs/service-mesh/ca/cert-relationship.mmd
+++ b/docs/service-mesh/ca/cert-relationship.mmd
@ -0,0 +1,31 @@
+graph TD
+ 
+    ExternalRootCA["External RootCA (optional)"]
+
+    subgraph "Consul Primary"
+        PrimaryRootCA["Primary Root CA"]
+        PrimarySigningCA["Primary Signing CA (conditional)"]
+    end
+
+    subgraph "Consul Secondary"
+        SeconarySigningCA["Seconary Signing CA"]
+    end
+
+    LeafCertAgentPrimary[Leaf Cert Client Agent]
+    LeafCertServicePrimary[Leaf Cert Service]
+
+    LeafCertAgentSecondary[Leaf Cert Client Agent]
+    LeafCertServiceSecondary[Leaf Cert Service]
+
+
+    ExternalRootCA -.-> PrimaryRootCA
+    PrimaryRootCA -.-> PrimarySigningCA
+
+    PrimaryRootCA --> SeconarySigningCA
+
+    PrimarySigningCA --> LeafCertAgentPrimary
+    PrimarySigningCA --> LeafCertServicePrimary
+
+    SeconarySigningCA --> LeafCertAgentSecondary
+    SeconarySigningCA --> LeafCertServiceSecondary
+
--- a/docs/service-mesh/ca/cert-relationship.svg
+++ b/docs/service-mesh/ca/cert-relationship.svg
--- a/docs/service-mesh/ca/hl-ca-overview.mmd
+++ b/docs/service-mesh/ca/hl-ca-overview.mmd
@ -0,0 +1,43 @@
+graph TD
+    subgraph "Primary DC"
+        leaderP["Leader"]
+        rootCAI["Root CA "]
+        rootCA["Root CA "]
+        Provider["Consul/AWS providers"]
+        IntermediateProvider["Vault provider"]
+        intermediateCAP["Intermediate CA "]
+        leafP["Leaf certificates"]
+    end
+
+    subgraph "Secondary DC"
+        leaderS["Leader"]
+        intermediateCAS["Intermediate CA"]
+        leafS["Leaf certificates"]
+        ProviderS["Consul/AWS/Vault providers"]
+    end
+
+    consulCAS["Consul client Agents"]
+    servicesS["Mesh services"]
+
+    consulCAP["Consul client Agents"]
+    servicesP["Mesh services"]
+    
+    leaderP -->|use|Provider
+    leaderP-->|use|IntermediateProvider
+    Provider--> |fetch/self sign|rootCA
+    IntermediateProvider --> |fetch/self sign|rootCAI 
+    rootCAI -->|sign| intermediateCAP
+    intermediateCAP -->|sign| leafP
+    rootCA -->|sign| leafP
+
+    leaderS -->|use| ProviderS
+    ProviderS --> |generate csr| intermediateCAS
+    rootCA -->|sign| intermediateCAS
+    rootCAI -->|sign| intermediateCAS
+    intermediateCAS --> |sign| leafS
+
+    leafS -->|auth/encrypt| servicesS
+    leafS -->|auth/encrypt| consulCAS
+    leafP -->|auth/encrypt| servicesP
+    leafP -->|auth/encrypt| consulCAP
+
--- a/docs/service-mesh/ca/hl-ca-overview.svg
+++ b/docs/service-mesh/ca/hl-ca-overview.svg
--- a/docs/service-mesh/ca/state-machine.mmd
+++ b/docs/service-mesh/ca/state-machine.mmd
@ -0,0 +1,23 @@
+stateDiagram-v2
+
+    [*] --> Uninitialized
+    Uninitialized --> Initializing : InitializeCA
+    Uninitialized --> Reconfig : UpdateConfiguration
+    Reconfig --> Uninitialized : return
+
+    # Initialized can transition to any state
+    Initialized --> Renew : RenewIntermediate
+    Initialized --> Uninitialized : Stop
+    Initialized --> Reconfig  : UpdateConfiguration
+    Initialized --> Initializing : INVALID
+
+    # Initialized is set using validate=false
+    Uninitialized --> Initialized : INVALID
+    Reconfig --> Initialized : return
+    Initializing --> Initialized : InitializeCA complete
+    Renew --> Initialized : return
+
+    # Uninitialized is set using validate=false
+    Renew --> Uninitialized : Stop
+    Reconfig --> Uninitialized : Stop
+    Initializing --> Uninitialized : Stop
--- a/docs/service-mesh/ca/state-machine.svg
+++ b/docs/service-mesh/ca/state-machine.svg
--- a/proto/pbsubscribe/subscribe.pb.go
+++ b/proto/pbsubscribe/subscribe.pb.go
@ -90,9 +90,8 @@ type SubscribeRequest struct {
 	Topic Topic `protobuf:"varint,1,opt,name=Topic,proto3,enum=subscribe.Topic" json:"Topic,omitempty"`
 	// Key is a topic-specific identifier that restricts the scope of the
 	// subscription to only events pertaining to that identifier. For example,
-	// to receive events for a single service, the service's name is
-	// specified as the key. An empty key indicates that all events in the topic
-	// are of interest.
+	// to receive events for a single service, the service's name is specified
+	// as the key.
 	Key string `protobuf:"bytes,2,opt,name=Key,proto3" json:"Key,omitempty"`
 	// Token is the ACL token to authenticate the request. The token must have
 	// sufficient privileges to read the requested information otherwise events
--- a/proto/pbsubscribe/subscribe.proto
+++ b/proto/pbsubscribe/subscribe.proto
@ -51,9 +51,8 @@ message SubscribeRequest {

    // Key is a topic-specific identifier that restricts the scope of the
    // subscription to only events pertaining to that identifier. For example,
-    // to receive events for a single service, the service's name is
-    // specified as the key. An empty key indicates that all events in the topic
-    // are of interest.
+    // to receive events for a single service, the service's name is specified
+    // as the key.
    string Key = 2;

    // Token is the ACL token to authenticate the request. The token must have
--- a/testrpc/wait.go
+++ b/testrpc/wait.go
@ -144,6 +144,7 @@ func WaitForTestAgent(t *testing.T, rpc rpcFn, dc string, options ...waitOption)
 // raft leadership is gained so WaitForLeader isn't sufficient to be sure that
 // the CA is fully initialized.
 func WaitForActiveCARoot(t *testing.T, rpc rpcFn, dc string, expect *structs.CARoot) {
+	t.Helper()
 	retry.Run(t, func(r *retry.R) {
 		args := &structs.DCSpecificRequest{
 			Datacenter: dc,
--- a/ui/packages/consul-ui/app/helpers/class-map.js
+++ b/ui/packages/consul-ui/app/helpers/class-map.js
@ -0,0 +1,17 @@
+import { helper } from '@ember/component/helper';
+
+/**
+ * Conditionally maps classInfos (classes) to a string ready for typical DOM
+ * usage (i.e. space delimited)
+ *
+ * @typedef {([string, boolean] | [string])} classInfo
+ * @param {(classInfo | string)[]} entries - An array of 'entry-like' arrays of `classInfo`s to map
+ */
+const classMap = entries => {
+  const str = entries
+    .filter(entry => (typeof entry === 'string' ? true : entry[entry.length - 1]))
+    .map(entry => (typeof entry === 'string' ? entry : entry[0]))
+    .join(' ');
+  return str.length > 0 ? str : undefined;
+};
+export default helper(classMap);
--- a/ui/packages/consul-ui/app/helpers/class-map.mdx
+++ b/ui/packages/consul-ui/app/helpers/class-map.mdx
@ -0,0 +1,45 @@
+# class-map
+
+`{{class-map}}` is used to easily add a list of classes, conditionally, and
+have them all formatted nicely ready to be printed in a DOM `class` attribute.
+
+For ease, as well as using entries, you can also just provide a simple string
+without the boolean and that class will always be added.
+
+```hbs preview-template
+<figure>
+  <figcaption>
+    The correct classes added/omitted
+  </figcaption>
+  <div
+    class={{class-map
+      'component-name'
+      (array 'add-this-class' true)
+      (array 'dont-add-this-class' false)
+      'simple-string-class'
+    }}
+    ...attributes
+  >
+    <code>
+      class="{{class-map
+        (array 'add-this-class' true)
+        (array 'dont-add-this-class' false)
+        'simple-string-class'
+      }}"
+    </code>
+  </div>
+</figure>
+```
+
+## Positional Arguments
+
+| Argument | Type | Default | Description |
+| --- | --- | --- | --- |
+| `entries` | `(classInfo \| string)[]` |  | An array of 'entry-like' arrays of `classInfo`s to map |
+
+## Types
+
+| Type | Default | Description |
+| --- | --- | --- |
+| `classInfo` | `([string, boolean] \| [string])` |  |
+
--- a/ui/packages/consul-ui/app/modifiers/css-prop.js
+++ b/ui/packages/consul-ui/app/modifiers/css-prop.js
@ -0,0 +1,12 @@
+import Modifier from 'ember-modifier';
+import { inject as service } from '@ember/service';
+
+export default class CSSPropModifier extends Modifier {
+  @service('-document') doc;
+  didReceiveArguments() {
+    const params = this.args.positional;
+    const options = this.args.named;
+    const returns = params[1] || options.returns;
+    returns(this.doc.defaultView.getComputedStyle(this.element).getPropertyValue(params[0]));
+  }
+}
--- a/ui/packages/consul-ui/app/modifiers/css-prop.mdx
+++ b/ui/packages/consul-ui/app/modifiers/css-prop.mdx
@ -0,0 +1,25 @@
+# css-prop
+
+Get the value for a single specific CSS Property from the modified element.
+`returns` can be specified either as a second parameter or an option.
+
+```hbs preview-template
+<div
+  {{css-prop '--red-500' returns=(set this 'red')}}
+>
+  <code>--red-500: {{this.red}}</code>
+</div>
+```
+
+## Positional Arguments
+
+| Argument | Type | Default | Description |
+| --- | --- | --- | --- |
+| `property` | `string` |  | The name of the CSS property to fetch from the element |
+| `returns` | `function` |  | Usually `set` or `mut` or similar |
+
+## Named Arguments
+
+| Argument | Type | Default | Description |
+| --- | --- | --- | --- |
+| `returns` | `function` |  | See the `returns` positional argument |
--- a/website/content/commands/join.mdx
+++ b/website/content/commands/join.mdx
@ -34,9 +34,9 @@ are not supported from commands, but may be from the corresponding HTTP endpoint

 Usage: `consul join [options] address ...`

-You may call join with multiple addresses if you want to try to join
-multiple clusters. Consul will attempt to join all addresses, and the join
-command will fail only if Consul was unable to join with any.
+You may call `join` with multiple addresses if you want attempt to join the cluster
+through multiple nodes. Consul will attempt to join all addresses. The join
+command will fail only if Consul was unable to join any of the specified addresses.

 #### API Options

--- a/website/content/docs/agent/telemetry.mdx
+++ b/website/content/docs/agent/telemetry.mdx
@ -269,6 +269,60 @@ resources will still work.
 This metric should be monitored to ensure that the license doesn't expire to prevent degradation of functionality.


+### Bolt DB Performance
+
+| Metric Name                       | Description                                                      | Unit  | Type  |
+| :-------------------------------- | :--------------------------------------------------------------- | :---- | :---- |
+| `consul.raft.boltdb.freelistBytes`                  | Represents the number of bytes necessary to encode the freelist metadata. When [`raft_boltdb.NoFreelistSync`](/docs/agent/options#NoFreelistSync) is set to `false` these metadata bytes must also be written to disk for each committed log. | bytes | gauge   |
+| `consul.raft.boltdb.logsPerBatch`                   | Measures the number of logs being written per batch to the db. | logs | sample |
+| `consul.raft.boltdb.storeLogs`                      | Measures the amount of time spent writing logs to the db. | ms | timer |
+
+
+** Requirements: **
+* Consul 1.11.0+
+
+**Why they're important:**
+
+The `consul.raft.boltdb.storeLogs` metric is a direct indicator of disk write performance of a Consul server. If there are issues with the disk or
+performance degradations related to Bolt DB, these metrics will show the issue and potentially the cause as well.
+
+**What to look for:**
+
+The primary thing to look for are increases in the `consul.raft.boltdb.storeLogs` times. Its value will directly govern an 
+upper limit to the throughput of write operations within Consul.
+
+In Consul each write operation will turn into a single Raft log to be committed. Raft will process these
+logs and store them within Bolt DB in batches. Each call to store logs within Bolt DB is measured to record how long
+it took as well as how many logs were contained in the batch. Writing logs is this fashion is serialized so that
+a subsequent log storage operation can only be started after the previous one completed. Therefore the maximum number
+of log storage operations that can be performed each second can be calculated with the following equation: 
+`(1000 ms) / (consul.raft.boltdb.storeLogs ms/op)`. From there we can extrapolate the maximum number of Consul writes
+per second by multiplying that value by the `consul.raft.boltdb.logsPerBatch` metric's value. When log storage 
+operations are becoming slower you may not see an immediate decrease in write throughput to Consul due to increased 
+batch sizes of the each operation. However, the max batch size allowed is 64 logs. Therefore if the `logsPerBatch`
+metric is near 64 and the `storeLogs` metric is seeing increased time to write each batch to disk, then it is likely 
+that increased write latencies and other errors may occur.
+
+There can be a number of potential issues that can cause this. Often times it could be performance of the underlying
+disks that is the issue. Other times it may be caused by Bolt DB behavior. Bolt DB keeps track of free space within
+the `raft.db` file. When needing to allocate data it will use existing free space first before further expanding the
+file. By default, Bolt DB will write a data structure containing metadata about free pages within the DB to disk for
+every log storage operation. Therefore if the free space within the database grows excessively large, such as after
+a large spike in writes beyond the normal steady state and a subsequent slow down in the write rate, then Bolt DB
+could end up writing a large amount of extra data to disk for each log storage operation. This has the potential
+to drastically increase disk write throughput, potentially beyond what the underlying disks can keep up with. To
+detect this situation you can look at the `consul.raft.boltdb.freelistBytes` metric. This metric is a count of
+the extra bytes that are being written for each log storage operation beyond the log data itself. While not a clear
+indicator of an actual issue, this metric can be used to diagnose why the `consul.raft.boltdb.storeLogs` metric
+is high. 
+
+If Bolt DB log storage performance becomes an issue and is caused by free list management then setting
+[`raft_boltdb.NoFreelistSync`](/docs/agent/options#NoFreelistSync) to `true` in the server's configuration
+may help to reduce disk IO and log storage operation times. Disabling free list syncing will however increase
+the startup time for a server as it must scan the raft.db file for free space instead of loading the already
+populated free list structure.
+
+
 ## Metrics Reference

 This is a full list of metrics emitted by Consul.
@ -344,7 +398,7 @@ These metrics are used to monitor the health of the Consul servers.
 | `consul.raft.applied_index`                         | Represents the raft applied index.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        | index                             | gauge   |
 | `consul.raft.apply`                                 | Counts the number of Raft transactions occurring over the interval, which is a general indicator of the write load on the Consul servers.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            | raft transactions / interval      | counter |
 | `consul.raft.barrier`                               | Counts the number of times the agent has started the barrier i.e the number of times it has issued a blocking call, to ensure that the agent has all the pending operations that were queued, to be applied to the agent's FSM.                                                                                                                                                                                                                                                                                                                                                                                                                               | blocks / interval                 | counter |
-| `consul.raft.boltdb.freelistBytes`                  | Represents the number of bytes necessary to encode the freelist metadata. When `raft_boltdb.NoFreelistSync` is set to `false` these metadata bytes must also be written to disk for each committed log. | bytes | gauge   |
+| `consul.raft.boltdb.freelistBytes`                  | Represents the number of bytes necessary to encode the freelist metadata. When [`raft_boltdb.NoFreelistSync`](/docs/agent/options#NoFreelistSync) is set to `false` these metadata bytes must also be written to disk for each committed log. | bytes | gauge   |
 | `consul.raft.boltdb.freePageBytes`                  | Represents the number of bytes of free space within the raft.db file. | bytes | gauge |
 | `consul.raft.boltdb.getLog`                         | Measures the amount of time spent reading logs from the db. | ms | timer |
 | `consul.raft.boltdb.logBatchSize`                   | Measures the total size in bytes of logs being written to the db in a single batch. | bytes | sample |
--- a/website/content/docs/api-gateway.mdx
+++ b/website/content/docs/api-gateway.mdx
@ -0,0 +1,309 @@
+---
+layout: docs
+page_title: API Gateway
+description: Using Consul API gateway functionality
+---
+
+# Consul API Gateway
+
+This topic describes how to use the Consul API Gateway add-on module, which helps users control access to services running within a Consul service mesh. The API gateway enables external network clients to access applications and services running in a Consul datacenter. This type of network traffic is commonly referred to as "north-south" network traffic as it refers to the flow of data into and out of a specific environment. Requests from clients can also be forwarded based on path or request protocol.
+
+You can learn more about using Consul API Gateway by completing the [Consul API Gateway tutorial](https://learn.hashicorp.com/tutorials/consul/kubernetes-api-gateway).
+
+## Introduction
+
+Consul API Gateway implements the Kubernetes [Gateway API Specification](https://gateway-api.sigs.k8s.io/). This specification defines a set of custom resource definitions (CRD) that can create logical gateways and routes based on the path or protocol of a client request. Consul API Gateway solves two primary use cases:
+
+- **Controlling access at the point of entry**: Consul API Gateway allows users to set the protocols of external connection requests and provide clients with TLS certificates from trusted providers (e.g., Verisign, Let’s Encrypt).
+- **Simplifying traffic management**: The Consul API Gateway can load balance requests across services and route traffic to the appropriate service by matching one or more criteria, such as hostname, path, header presence or value, and HTTP Method type (e.g., GET, POST, PATCH).
+
+## Requirements
+
+Your datacenter must meet the following requirements prior to configuring the Consul API Gateway:
+
+- A Kubernetes cluster must be running
+- `kubectl` 1.21+
+- Consul 1.11.2+
+- HashiCorp Helm chart 0.40.0+
+
+## Installation
+
+1. Issue the following command to install the Consul API Gateway controller:
+
+   <CodeBlockConfig>
+
+   ```shell-session
+   $ kubectl apply --kustomize="github.com/hashicorp/consul-api-gateway/config/crd?ref=v0.1.0-beta"
+   ```
+
+   </CodeBlockConfig>
+
+1. Create a values file for your Consul server agents that contains the following parameters:
+
+   <CodeBlockConfig hideClipboard filename="values.yaml">
+
+   ```yaml
+   global:
+     name: consul
+     image: 'hashicorp/consul:1.11.2'
+     tls:
+       enabled: true
+   connectInject:
+     enabled: true
+   controller:
+     enabled: true
+   apiGateway:
+     enabled: true
+     image: hashicorp/consul-api-gateway:0.1.0-beta
+   ```
+
+   </CodeBlockConfig>
+
+1. Install Consul API Gateway using the standard Consul Helm chart and specify the custom values file.
+
+   <CodeBlockConfig>
+
+   ```shell-session
+   $ helm install consul hashicorp/consul --version 0.40.0 --values values.yaml
+   ```
+
+   </CodeBlockConfig>
+
+## Usage
+
+1. Verify that the [requirements](#requirements) have been met.
+1. Verify that the Consul API Gateway software has been installed and applied (see [Installation](#installation)).
+1. Configure the artifacts described in [Configuration](#configuration).
+1. Issue the `kubectl apply` command to implement the configurations, e.g.:
+
+   <CodeBlockConfig>
+
+   ```shell-session
+   $ kubectl apply --values gateway-configuration.yaml
+   ```
+
+   </CodeBlockConfig>
+
+<!--- Commented out per https://github.com/hashicorp/consul/pull/11951/files#r791204596
+
+### Using the Consul API Gateway Binary
+
+You can download the Consul API Gateway binary and use it to manually start the control plane server.
+
+1. Download the binary from the [Consul API Gateway repository](https://github.com/hashicorp/consul-api-gateway).
+1. Navigate to the `consul-api-gateway-main` directory and build the binary:
+
+```shell-session
+$ go build
+```
+
+1.  (Optional) Copy the binary to the execution path, e.g.:
+
+```shell-session
+$ cp consul-api-gateway /usr/bin
+```
+
+1.  Use the `server` command to interact with the Consul API Gateway binary:
+
+```shell-session
+$ ./consul-api-gateway server <options>
+```
+
+The following options are supported:
+
+| Option                 | Description                                                                                                                                                                           | Required | Default                                                                 |
+| ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ----------------------------------------------------------------------- |
+| `-ca-file`             | String value that specifies the path to the CA for the Consul server.                                                                                                                 | Required | none                                                                    |
+| `-ca-secret`           | String value that specifies the CA secret for the Consul server.                                                                                                                      | Required | none                                                                    |
+| `-ca-secret-namespace` | String value that specifies the CA secret namespace for the Consul server.                                                                                                            | Required | none                                                                    |
+| `-k8-context`          | String value that specifies the Kubernetes context to use when starting the Consul server.                                                                                            | Optional | current context                                                         |
+| `-k8-namespace`        | String value that specifies the Kubernetes namespace to use when starting the Consul server.                                                                                          | Optional | `default`                                                               |
+| `-log-json`            | Boolean value that enables or disables JSON format for the log output.                                                                                                                | Required | `false`                                                                 |
+| `-log-level`           | String value that specifies the logging level. The following values are supported: <br/>- `trace` (highest level of detail) <br/>- `debug` <br/>- `info` <br/>- `warn` <br/>- `error` | Optional | `info`                                                                  |
+| `-metrics-port`        | Integer value that specifies the port number for collecting metrics.                                                                                                                  | Optional | none                                                                    |
+| `-pprof`               | Integer value that specifies the Go pprof port number for collecting metrics.                                                                                                         | Optional | none                                                                    |
+| `-sds-server-host`     | String value that specifies the host server for the secret discovery service (SDS).                                                                                                   | Optional | `consul-api-gateway-controller.default.`<br/>`svc.cluster.`<br/>`local` |
+| `-sds-server-host`     | Integer value that specifies the port number for the secret discovery service (SDS).                                                                                                  | Optional | `9090`                                                                  |
+
+You can also issue the `version` command to print the Consul API Gateway version to the console:
+
+```shell-session
+$ ./consul-api-gateway version
+consul-api-gateway 0.1.0
+```
+--->
+
+## Configuration
+
+Configure the following artifacts to facilitate ingress into your Consul service mesh:
+
+- [GatewayClassConfig](#gatewayclassconfig): Describes additional Consul API Gateway-related configuration parameters for the `GatewayClass` resource.
+- [GatewayClass](#gatewayclass): Defines a class of gateway resources that you can use as a template for creating gateways.
+- [Gateway](#gateway): Defines the main infrastructure resource that links API gateway components. It specifies the name of the `GatewayClass` and one or more `listeners` (see [Listeners](#listeners)), which specify the logical endpoints bound to the gateway's addresses.
+- [Routes](#routes): Specifies the path from the client to the listener.
+
+### GatewayClass
+
+The `GatewayClass` resource is used as a template for creating `Gateway` resources.
+The specification includes the name of the controller (`controllerName`) and an API object containing controller-specific configuration resources within the cluster (`parametersRef`).
+The value of the `controllerName` field must be set to `hashicorp.com/consul-api-gateway-controller`.
+
+When gateways are created from a `GatewayClass`, they use the parameters specified in the `GatewayClass` at the time of instantiation.
+
+Add the `kind: GatewayClass` option to the the gateway values file to declare a gateway class.
+The following example creates a gateway class called `test-gateway-class`:
+
+<CodeBlockConfig filename="gateway.yaml">
+
+```yaml
+apiVersion: gateway.networking.k8s.io/v1alpha2
+kind: GatewayClass
+metadata:
+  name: test-gateway-class
+spec:
+  controllerName: 'hashicorp.com/consul-api-gateway-controller'
+  parametersRef:
+    group: api-gateway.consul.hashicorp.com
+    kind: GatewayClassConfig
+    name: test-gateway-class-config
+```
+
+</CodeBlockConfig>
+
+Refer to the [Kubernetes Gateway API documentation](https://gateway-api.sigs.k8s.io/v1alpha2/references/spec/#gateway.networking.k8s.io/v1alpha2.GatewayClass) for details about configuring gateway classes.
+
+### GatewayClassConfig
+
+The `GatewayClassConfig` object describes additional Consul API Gateway-related configuration parameters for the `GatewayClass`.
+
+Add the `kind: GatewayClassConfig` option to the gateway values file to declare a gateway class.
+The following example creates a gateway class called `test-gateway-class-config`:
+
+<CodeBlockConfig filename="gateway.yaml">
+
+```yaml
+apiVersion: api-gateway.consul.hashicorp.com/v1alpha1
+kind: GatewayClassConfig
+metadata:
+  name: test-gateway-class-config
+spec:
+  useHostPorts: true
+  logLevel: 'trace'
+  consul:
+    scheme: 'https'
+    caSecret: 'consul-ca-cert'
+    ports:
+      http: 8501
+      grpc: 8502
+```
+
+</CodeBlockConfig>
+
+The following table describes the required parameters for the `spec` array:
+
+| Parameter                         | Description                                                                                                                                                                                                                                                                                                                                                                         | Type    | Default                                          |
+| --------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | ------------------------------------------------ |
+| `consul.address`                  | Specifies the address of the Consul server to communicate with in the gateway pod. If unspecified, the pod will attempt to use a local agent on the host on which the pod is running.                                                                                                                                                                                               | String  | N/A                                              |
+| `consul.authentication.account`   | Specifies the Kubernetes service account to use for authentication.                                                                                                                                                                                                                                                                                                                 | String  | N/A                                              |
+| `consul.authentication.managed`   | Set to `true` to enable deployments to run with managed service accounts created by the gateway controller. The `consul.authentication.account` field is ignored when this option is enabled.                                                                                                                                                                                       | Boolean | `false`                                          |
+| `consul.authentication.method`    | Specifies the Consul auth method used for initial authentication by Consul API Gateway.                                                                                                                                                                                                                                                                                             | String  | N/A                                              |
+| `consul.authentication.namespace` | Specifies the Consul namespace to use for authentication.                                                                                                                                                                                                                                                                                                                           | String  | N/A                                              |
+| `consul.ports.grpc`               | Specifies the gRPC port for Consul's xDS server.                                                                                                                                                                                                                                                                                                                                    | Integer | `8502`                                           |
+| `consul.ports.http`               | Specifies the port for Consul's HTTP server.                                                                                                                                                                                                                                                                                                                                        | Integer | `8500`                                           |
+| `consul.scheme`                   | Specifies the scheme to use for connecting to Consul. The supported values are `"http"` and `"https"`.                                                                                                                                                                                                                                                                              | String  | `"http"`                                         |
+| `copyAnnotations.service`         | List of annotations to copy to the gateway service.                                                                                                                                                                                                                                                                                                                                 | Array   | `["external-dns.alpha.kubernetes.io/hostname"]`  |
+| `image.consulAPIGateway`          | The image to use for consul-api-gateway.                                                                                                                                                                                                                                                                                                                                            | String  | `"hashicorp/consul-api-gateway:RELEASE_VERSION"` |
+| `image.envoy`                     | Specifies the container image to use for Envoy.                                                                                                                                                                                                                                                                                                                                     | String  | `"envoyproxy/envoy:v1.19-latest"`                |
+| `logLevel`                        | Specifies the error reporting level for logs. You can specify the following values: `fatal`, `error`, `warning`, `info`, `debug`, `trace`.                                                                                                                                                                                                                                          | String  | `"info"`                                         |
+| `nodeSelector`                    | Specifies a set of parameters that constrain the nodes on which the pod can run. Defining nodes with the `nodeSelector` enables the pod to fit on a node. The selector must match a node's labels for the pod to be scheduled on that node. Refer to the [Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/) for additional information. | Object  | N/A                                              |
+| `serviceType`                     | Specifies the ingress methods for a service. The following values are supported: <br/>`ClusterIP` <br/>`NodePort` <br/>`LoadBalancer`.                                                                                                                                                                                                                                              | String  | N/A                                              |
+| `useHostPorts`                    | If set to `true`, then the Envoy container ports are mapped to host ports.                                                                                                                                                                                                                                                                                                          | Boolean | `false`                                          |
+
+Refer to the [Consul API Gateway repository](https://github.com/hashicorp/consul-api-gateway/blob/main/config/crd/bases/api-gateway.consul.hashicorp.com_gatewayclassconfigs.yaml) for the complete specification.
+
+### Gateway
+
+The gateway configuration is the main infrastructure resource that links API gateway components. It specifies the name of the `GatewayClass` and one or more `listeners`.
+
+Add the `kind: Gateway` option to the configuration file to declare a gateway.
+The following example creates a gateway called `example-gateway`.
+The gateway is based on the `test-gateway-class` and includes a listener called `https` (see [Listeners](#listeners) for details about the `listener` configuration).
+
+<CodeBlockConfig filename="gateway.yaml">
+
+```yaml
+apiVersion: gateway.networking.k8s.io/v1alpha2
+kind: Gateway
+metadata:
+  name: example-gateway
+  annotations:
+    'external-dns.alpha.kubernetes.io/hostname': DNS_HOSTNAME
+spec:
+  gatewayClassName: test-gateway-class
+  listeners:
+    - protocol: HTTPS
+      hostname: DNS_HOSTNAME
+      port: 443
+      name: https
+      allowedRoutes:
+        namespaces:
+          from: Same
+      tls:
+        certificateRefs:
+          - name: gateway-production-certificate
+```
+
+</CodeBlockConfig>
+
+Refer to the [Kubernetes Gateway API documentation](https://gateway-api.sigs.k8s.io/v1alpha2/references/spec/#gateway.networking.k8s.io/v1alpha2.Gateway) for details about configuring gateways:
+
+#### Listeners
+
+Listeners are the logical endpoints bound to the gateway's addresses.
+Add the `listener` object to the `gateway` configuration and specify the following properties to define a listener:
+
+| Parameter                       | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | Type    | Default         |
+| ------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | --------------- |
+| `hostname`                      | Specifies the virtual hostname to match for protocol types.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | String  | none            |
+| `port`                          | Specifies the network port number.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           | Integer | none            |
+| `protocol`                      | Specifies the network protocol expected by the listener.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | String  | `http`          |
+| `tls`                           | Collection of parameters that specify TLS options for the listener. Refer to the [`GatewayTLSConfig`](https://gateway-api.sigs.k8s.io/v1alpha2/references/spec/#gateway.networking.k8s.io/v1alpha2.GatewayTLSConfig) documentation for additional information about configuring TLS.                                                                                                                                                                                                                                                                                                                                                                                                                                                         | Object  | N/A             |
+| `tls.mode`                      | Specifies a mode for operating Consul API Gateway listeners over TLS. <br/>You can only specify the `Terminate` mode, which configures the TLS session between the downstream client and the gateway to terminate at the gateway. <br/>Refer to the [`TLSModeType` documentation](https://gateway-api.sigs.k8s.io/v1alpha2/references/spec/#gateway.networking.k8s.io/v1alpha2.TLSModeType) for additional information.                                                                                                                                                                                                                                                                                                                      | String  | `Terminate`     |
+| `tls.certificateRefs`           | Specifies the name of secret object used for Envoy SDS (Secret Discovery Service) to support terminating TLS. Refer to the [`[]*SecretObjectReference` documentation](https://gateway-api.sigs.k8s.io/v1alpha2/references/spec/#gateway.networking.k8s.io/v1alpha2.SecretObjectReference) for additional information.                                                                                                                                                                                                                                                                                                                                                                                                                        | String  | N/A             |
+| `tls.options`                   | Specifies key/value pairs to enable extended TLS configuration specific to an implementation.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                | Object  | N/A             |
+| `tls.options.tls_min_version`   | Specifies the minimum TLS version supported for the listener. The following values are supported: `TLS_AUTO`, `TLSv1_0`, `TLSv1_1`, `TLSv1_2`, `TLSv1_3`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    | String  | `TLS 1.2`       |
+| `tls.options.tls_max_version`   | Specifies the maximum TLS version supported for the listener. The specified version must be greater than or equal to `TLSMinVersion`. The following values are supported: `TLS_AUTO`, `TLSv1_0`, `TLSv1_1`, `TLSv1_2`, `TLSv1_3`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            | String  | `TLS 1.3`       |
+| `tls.options.tls_cipher_suites` | Specifies the list of TLS cipher suites to support when negotiating connections using TLS 1.2 or earlier. <br/>If unspecified, a [more secure set of cipher suites](https://github.com/hashicorp/consul-api-gateway/blob/main/internal/common/tls.go#L3-L10) than Envoy's current [default server cipher list](https://www.envoyproxy.io/docs/envoy/latest/api-v3/extensions/transport_sockets/tls/v3/common.proto#envoy-v3-api-field-extensions-transport-sockets-tls-v3-tlsparameters-cipher-suites) will be used. <br/>The full list of supported cipher suites can seen in [`internal/common/tls.go`](https://github.com/hashicorp/consul-api-gateway/blob/main/internal/common/tls.go) and is dependent on underlying support in Envoy. | String  | See description |
+
+Refer to the [Kubernetes Gateway API documentation](https://gateway-api.sigs.k8s.io/v1alpha2/references/spec/#gateway.networking.k8s.io/v1alpha2.Listener) for details about configuring listeners.
+
+### Route
+
+Routes are independent configuration objects that are associated with specific listeners.
+
+Declare a route with either `kind: HTTPRoute` or `kind: TCPRoute` and configure the route parameters in the `spec` block.
+Refer to the Kubernetes Gateway API documentation for each object type for details:
+
+- [HTTPRoute](https://gateway-api.sigs.k8s.io/v1alpha2/references/spec/#gateway.networking.k8s.io/v1alpha2.HTTPRoute)
+- [TCPRoute](https://gateway-api.sigs.k8s.io/v1alpha2/references/spec/#gateway.networking.k8s.io/v1alpha2.TCPRoute)
+
+The following example creates a route named `example-route` associated with a listener defined in `example-gateway`.
+
+<CodeBlockConfig filename="routes.yaml">
+
+```yaml
+apiVersion: gateway.networking.k8s.io/v1alpha2
+kind: HTTPRoute
+metadata:
+  name: example-route
+spec:
+  parentRefs:
+    - name: example-gateway
+  rules:
+    - backendRefs:
+        - kind: Service
+          name: echo
+          port: 8080
+```
+
+</CodeBlockConfig>
--- a/website/content/docs/connect/config-entries/ingress-gateway.mdx
+++ b/website/content/docs/connect/config-entries/ingress-gateway.mdx
@ -979,6 +979,36 @@ You can specify the following parameters to configure ingress gateway configurat
              "Set this configuration to `true` to enable built-in TLS for every listener on the gateway.<br><br>If TLS is enabled, then each host defined in each service's `hosts` fields will be added as a DNSSAN to the gateway's x509 certificate.",
          },
        },
+        {
+          name: 'TLSMinVersion',
+          type: 'string: ""',
+          description: "Set the default minimum TLS version supported for the gateway's listeners. One of `TLS_AUTO`, `TLSv1_0`, `TLSv1_1`, `TLSv1_2`, or `TLSv1_3`. If unspecified, Envoy v1.22.0 and newer [will default to TLS 1.2 as a min version](https://github.com/envoyproxy/envoy/pull/19330), while older releases of Envoy default to TLS 1.0.",
+        },
+        {
+          name: 'TLSMaxVersion',
+          type: 'string: ""',
+          description: {
+            hcl:
+              "Set the default maximum TLS version supported for the gateway's listeners. Must be greater than or equal to `TLSMinVersion`. One of `TLS_AUTO`, `TLSv1_0`, `TLSv1_1`, `TLSv1_2`, or `TLSv1_3`." ,
+            yaml:
+              "Set the default maximum TLS version supported for the gateway's listeners. Must be greater than or equal to `tls_min_version`. One of `TLS_AUTO`, `TLSv1_0`, `TLSv1_1`, `TLSv1_2`, or `TLSv1_3`." ,
+          },
+        },
+        {
+          name: 'CipherSuites',
+          type: 'array<string>: <optional>',
+          description: `Set the default list of TLS cipher suites for the gateway's
+                        listeners to support when negotiating connections using
+                        TLS 1.2 or earlier. If unspecified, Envoy will use a
+                        [default server cipher list](https://www.envoyproxy.io/docs/envoy/latest/api-v3/extensions/transport_sockets/tls/v3/common.proto#envoy-v3-api-field-extensions-transport-sockets-tls-v3-tlsparameters-cipher-suites).
+                        The list of supported cipher suites can seen in
+                        [\`consul/types/tls.go\`](https://github.com/hashicorp/consul/blob/v1.11.2/types/tls.go#L154-L169)
+                        and is dependent on underlying support in Envoy. Future
+                        releases of Envoy may remove currently-supported but
+                        insecure cipher suites, and future releases of Consul
+                        may add new supported cipher suites if any are added to
+                        Envoy.`
+        },
        {
          name: 'SDS',
          yaml: false,
@ -1133,11 +1163,40 @@ You can specify the following parameters to configure ingress gateway configurat
              type: 'bool: false',
              description: {
                hcl:
-                  "Set this configuration to `true` to enable built-in TLS for this listener.<br><br>If TLS is enabled, then each host defined in each service's `Hosts` field will be added as a DNSSAN to the gateway's x509 certificate. Note that even hosts from other listeners with TLS disabled will be added.",
+                  "Set this configuration to `true` to enable built-in TLS for this listener.<br><br>If TLS is enabled, then each host defined in each service's `Hosts` field will be added as a DNSSAN to the gateway's x509 certificate. Note that even hosts from other listeners with TLS disabled will be added. TLS can not be disabled for individual listeners if it is enabled on the gateway.",
                yaml:
-                  "Set this configuration to `true` to enable built-in TLS for this listener.<br><br>If TLS is enabled, then each host defined in the `hosts` field will be added as a DNSSAN to the gateway's x509 certificate. Note that even hosts from other listeners with TLS disabled will be added.",
+                  "Set this configuration to `true` to enable built-in TLS for this listener.<br><br>If TLS is enabled, then each host defined in the `hosts` field will be added as a DNSSAN to the gateway's x509 certificate. Note that even hosts from other listeners with TLS disabled will be added. TLS can not be disabled for individual listeners if it is enabled on the gateway.",
              },
            },
+            {
+              name: 'TLSMinVersion',
+              type: 'string: ""',
+              description: "Set the minimum TLS version supported for this listener. One of `TLS_AUTO`, `TLSv1_0`, `TLSv1_1`, `TLSv1_2`, or `TLSv1_3`. If unspecified, Envoy v1.22.0 and newer [will default to TLS 1.2 as a min version](https://github.com/envoyproxy/envoy/pull/19330), while older releases of Envoy default to TLS 1.0.",
+            },
+            {
+              name: 'TLSMaxVersion',
+              type: 'string: ""',
+              description: {
+                hcl:
+                  "Set the maximum TLS version supported for this listener. Must be greater than or equal to `TLSMinVersion`. One of `TLS_AUTO`, `TLSv1_0`, `TLSv1_1`, `TLSv1_2`, or `TLSv1_3`." ,
+                yaml:
+                  "Set the maximum TLS version supported for this listener. Must be greater than or equal to `tls_min_version`. One of `TLS_AUTO`, `TLSv1_0`, `TLSv1_1`, `TLSv1_2`, or `TLSv1_3`." ,
+              },
+            },
+            {
+              name: 'CipherSuites',
+              type: 'array<string>: <optional>',
+              description: `Set the list of TLS cipher suites to support when negotiating
+                            connections using TLS 1.2 or earlier. If unspecified,
+                            Envoy will use a
+                            [default server cipher list](https://www.envoyproxy.io/docs/envoy/latest/api-v3/extensions/transport_sockets/tls/v3/common.proto#envoy-v3-api-field-extensions-transport-sockets-tls-v3-tlsparameters-cipher-suites).
+                            The list of supported cipher suites can seen in
+                            [\`consul/types/tls.go\`](https://github.com/hashicorp/consul/blob/v1.11.2/types/tls.go#L154-L169)
+                            and is dependent on underlying support in Envoy. Future
+                            releases of Envoy may remove currently-supported but
+                            insecure cipher suites, and future releases of Consul
+                            may add new supported cipher suites if any are added to Envoy.`
+            },
            {
              name: 'SDS',
              type: 'SDSConfig: <optional>',
--- a/website/content/docs/connect/distributed-tracing.mdx
+++ b/website/content/docs/connect/distributed-tracing.mdx
@ -238,3 +238,25 @@ config to take effect.
   Envoy not yet having support.

 1. Tracing is only supported with Envoy proxies, not the built-in proxy.
+
+1. When configuring the Zipkin tracer in `envoy_tracing_json`, set [`trace_id_128bit`](https://www.envoyproxy.io/docs/envoy/v1.21.0/api-v3/config/trace/v3/zipkin.proto#envoy-v3-api-field-config-trace-v3-zipkinconfig-trace-id-128bit) to  `true` if your application is configured to generate 128-bit trace IDs. For example:
+
+   <CodeBlockConfig highlight="10">
+
+   ```json
+   {
+     "http": {
+       "name": "envoy.tracers.zipkin",
+       "typedConfig": {
+         "@type": "type.googleapis.com/envoy.config.trace.v3.ZipkinConfig",
+         "collector_cluster": "zipkin",
+         "collector_endpoint_version": "HTTP_JSON",
+         "collector_endpoint": "/api/v2/spans",
+         "shared_span_context": false,
+         "trace_id_128bit": true
+       }
+     }
+   }
+   ```
+
+   </CodeBlockConfig>
--- a/website/content/docs/connect/observability/ui-visualization.mdx
+++ b/website/content/docs/connect/observability/ui-visualization.mdx
@ -66,6 +66,8 @@ service named `prometheus-server` so each Consul agent can reach it on

 A full configuration to enable Prometheus is given below.

+<CodeTabs>
+
 ```hcl
 ui_config {
  enabled = true
@ -76,6 +78,24 @@ ui_config {
 }
 ```

+```json
+{
+  "ui_config": [
+    {
+      "enabled": true,
+      "metrics_provider": "prometheus",
+      "metrics_proxy": [
+        {
+          "base_url": "http://prometheus-server"
+        }
+      ]
+    }
+  ]
+}
+```
+
+</CodeTabs>
+
 Similarly, to configure the UI on Kubernetes, use this [reference](/docs/k8s/connect/observability/metrics).

 ## Configuring Dashboard URLs
@ -95,30 +115,41 @@ to the relevant information.

 An example with Grafana is shown below.

-<Tabs>
-<Tab heading="HCL">
+<CodeTabs tabs={[ "HCL", "JSON", "YAML (Kubernetes)" ]}>
+
+<CodeBlockConfig>

 ```hcl
 ui_config {
  enabled = true
  dashboard_url_templates {
-    service = "https://grafana.example.com/d/lDlaj-NGz/
-      service-overview?orgId=1&var-service={{Service.Name}}&
-      var-namespace={{Service.Namespace}}&
-      var-partition={{Service.Partition}}&var-dc={{Datacenter}}"
+    service = "https://grafana.example.com/d/lDlaj-NGz/service-overview?orgId=1&var-service={{Service.Name}}&var-namespace={{Service.Namespace}}&var-partition={{Service.Partition}}&var-dc={{Datacenter}}"
  }
 }
 ```

-> **Note**: the URL is wrapped over multiple lines to make it easier to read
-without horizontal scrolling in the example above however this needs to be a
-normal single-line string value in an HCL configuration file.
+</CodeBlockConfig>

-</Tab>
-<Tab heading="Kubernetes YAML">
+<CodeBlockConfig>

-On Kubernetes, Consul Server configuration is set in your Helm config via the
-[`server.extraConfig`](/docs/k8s/helm#v-server-extraconfig) key as JSON:
+```json
+{
+  "ui_config": [
+    {
+      "dashboard_url_templates": [
+        {
+          "service": "https://grafana.example.com/d/lDlaj-NGz/service-overview?orgId=1\u0026var-service={{Service.Name}}\u0026var-namespace={{Service.Namespace}}\u0026var-partition={{Service.Partition}}\u0026var-dc={{Datacenter}}"
+        }
+      ],
+      "enabled": true
+    }
+  ]
+}
+```
+
+</CodeBlockConfig>
+
+<CodeBlockConfig>

 ```yaml
 # The UI is enabled by default so this stanza is not required.
@ -135,10 +166,14 @@ server:
    }
 ```

-> **Note**: The `{{` characters in the URL must be escaped using `{{ "{{" }}` so that Helm doesn't try to template them.
+</CodeBlockConfig>

-</Tab>
-</Tabs>
+</CodeTabs>
+
+~> **Note**: On Kubernetes, Consul Server configuration is set in your Helm
+config via the [`server.extraConfig`](/docs/k8s/helm#v-server-extraconfig) key as JSON.
+The `{{` characters in the URL must be escaped using `{{ "{{" }}` so that Helm
+doesn't try to template them.

 ![Consul UI Service Dashboard Link](/img/ui-dashboard-url-template.png)

@ -171,6 +206,8 @@ un-authenticated workloads on the network**.
 With ACLs enabled, the proxy endpoint requires a valid token with read access
 to all nodes and services (across all namespaces in Enterprise):

+<CodeTabs>
+
 ```hcl
 # Consul OSS
 service_prefix "" {
@ -191,6 +228,8 @@ namespace_prefix "" {
 }
 ```

+</CodeTabs>
+
 It's typical for most authenticated users to have this level of access in Consul
 as it's required for viewing the catalog or discovering services. If you use a
 [Single Sign-On integration](/docs/security/acl/auth-methods/oidc) (Consul
@ -235,6 +274,8 @@ visible to Consul operators in the configuration file while UI users can query
 the metrics they need without separately obtaining a token for that provider or
 having a token exposed to them that they might be able to use elsewhere.

+<CodeTabs>
+
 ```hcl
 ui_config {
  enabled = true
@ -251,6 +292,30 @@ ui_config {
 }
 ```

+```json
+{
+  "ui_config": [
+    {
+      "enabled": true,
+      "metrics_provider": "example-apm",
+      "metrics_proxy": [
+        {
+          "add_headers": [
+            {
+              "name": "Authorization",
+              "value": "Bearer \u003ctoken\u003e"
+            }
+          ],
+          "base_url": "https://example-apm.com/api/v1/metrics"
+        }
+      ]
+    }
+  ]
+}
+```
+
+</CodeTabs>
+
 ## Custom Metrics Providers

 Consul 1.9.0 includes a built-in provider for fetching metrics from
@ -266,6 +331,8 @@ feedback on [GitHub](https://github.com/hashicorp/consul) or

 The template for a complete provider JavaScript file is given below.

+<CodeTabs>
+
 ```JavaScript
 (function () {
  var provider = {
@ -472,6 +539,8 @@ The template for a complete provider JavaScript file is given below.
 }());
 ```

+</CodeTabs>
+
 Additionally, the built in [Prometheus
 provider code](https://github.com/hashicorp/consul/blob/main/ui/packages/consul-ui/vendor/metrics-providers/prometheus.js)
 can be used as a reference.
@ -484,6 +553,8 @@ named `example-provider`, which is defined in
 have been specified in the call to `consul.registerMetricsProvider` as in the
 code listing in the last section.

+<CodeTabs>
+
 ```hcl
 ui_config {
  enabled = true
@ -497,6 +568,19 @@ ui_config {
 }
 ```

+```json
+{
+  "ui_config": {
+    "enabled": true,
+    "metrics_provider": "example-provider",
+    "metrics_provide_files": ["/usr/local/bin/example-metrics-provider.js"],
+    "metrics_provider_options_json": "{\"foo\":\"bar\"}"
+  }
+}
+```
+
+</CodeTabs>
+
 More than one JavaScript file may be specified in
 [`metrics_provider_files`](/docs/agent/options#ui_config_metrics_provider_files)
 and all we be served allowing flexibility if needed to include dependencies.
--- a/website/content/docs/connect/transparent-proxy.mdx
+++ b/website/content/docs/connect/transparent-proxy.mdx
@ -9,6 +9,35 @@ description: |-

 # Transparent Proxy

+Transparent proxy allows applications to communicate through the mesh without changing their configuration.
+Transparent proxy also hardens application security by preventing direct inbound connections that bypass the mesh.
+
+#### Without Transparent Proxy
+
+![Diagram demonstrating that without transparent proxy, applications must "opt in" to connecting to their dependencies through the mesh](/img/consul-connect/without-transparent-proxy.png)
+
+Without transparent proxy, application owners need to:
+
+1. Explicitly configure upstream services, choosing a local port to access them.
+1. Change application to access `localhost:<chosen port>`.
+1. Configure application to listen only on the loopback interface to prevent unauthorized
+   traffic from bypassing the mesh.
+
+#### With Transparent Proxy
+
+![Diagram demonstrating that with transparent proxy, connections are automatically routed through the mesh](/img/consul-connect/with-transparent-proxy.png)
+
+With transparent proxy:
+
+1. Upstreams are inferred from service intentions, so no explicit configuration
+   is needed.
+1. Outbound connections pointing to a KubeDNS name "just work" — network rules
+   redirect them through the proxy.
+1. Inbound traffic is forced to go through the proxy to prevent unauthorized
+   direct access to the application.
+
+#### Overview
+
 Transparent proxy allows users to reach other services in the service mesh while ensuring that inbound and outbound
 traffic for services in the mesh are directed through the sidecar proxy. Traffic is secured
 and only reaches intended destinations since the proxy can enforce security and policy like TLS and Service Intentions.
@ -33,6 +62,7 @@ the traffic redirection command is automatically set up via an init container.
 * To use transparent proxy on Kubernetes, Consul-helm >= `0.32.0` and Consul-k8s >= `0.26.0` are required in addition to Consul >= `1.10.0`.
 * If the default policy for ACLs is "deny", then Service Intentions should be set up to allow intended services to connect to each other.
 Otherwise, all Connect services can talk to all other services.
+* If using Transparent Proxy, all worker nodes within a Kubernetes cluster must have the `ip_tables` kernel module running, e.g. `modprobe ip_tables`. 

 The Kubernetes integration takes care of registering Kubernetes services with Consul, injecting a sidecar proxy, and
 enabling traffic redirection.
--- a/website/content/docs/ecs/architecture.mdx
+++ b/website/content/docs/ecs/architecture.mdx
@ -19,15 +19,16 @@ The following diagram shows the main components of the Consul architecture when
   1. **Sidecar proxy:** The sidecar proxy container runs [Envoy](https://envoyproxy.io/). All requests
      to and from the application container(s) run through the sidecar proxy. This communication
      is called _data plane_ communication.
+1. **Mesh Init:** Each task runs a short-lived container, called `mesh-init`, which sets up initial configuration
+   for Consul and Envoy.
+1. **Health Syncing:** Optionally, an additional `health-sync` container can be included in a task to sync health statuses
+   from ECS into Consul.
 1. **ACL Controller:** Automatically provisions Consul ACL tokens for Consul clients and service mesh services
   in an ECS Cluster.

 For more information about how Consul works in general, see Consul's [Architecture Overview](/docs/architecture).

-In addition to the long-running Consul client and sidecar proxy containers, the `mesh-init` container runs
-at startup and sets up initial configuration for Consul and Envoy.
-
-### Task Startup
+## Task Startup

 This diagram shows the timeline of a task starting up and all its containers:

@ -44,7 +45,7 @@ This diagram shows the timeline of a task starting up and all its containers:
  - If applicable, the `health-sync` container syncs health checks from ECS to Consul (see [ECS Health Check Syncing](#ecs-health-check-syncing)).
 - **T2:** The `sidecar-proxy` container is marked as healthy by ECS. It uses a health check that detects if its public listener port is open. At this time, your application containers are started since all Consul machinery is ready to service requests. The only running containers are `consul-client`, `sidecar-proxy`, and your application container(s).

-### Task Shutdown
+## Task Shutdown

 This diagram shows an example timeline of a task shutting down:

@ -64,7 +65,7 @@ This diagram shows an example timeline of a task shutting down:
  - Updates about this task have reached the rest of the Consul cluster, so downstream proxies have been updated to stopped sending traffic to this task.
 - **T4**: At this point task shutdown should be complete. Otherwise, ECS will send a KILL signal to any containers still running. The KILL signal cannot be ignored and will forcefully stop containers. This will interrupt in-progress operations and possibly cause errors.

-### Automatic ACL Token Provisioning
+## Automatic ACL Token Provisioning

 Consul ACL tokens secure communication between agents and services.
 The following containers in a task require an ACL token:
@ -83,13 +84,13 @@ token does not yet exist.
 The ACL controller stores all ACL tokens in AWS Secrets Manager, and tasks are configured to pull these
 tokens from AWS Secrets Manager when they start.

-### ECS Health Check Syncing
+## ECS Health Check Syncing

 If the following conditions apply, ECS health checks automatically sync with Consul health checks for all application containers:

-* marked as `essential` 
-* have ECS `healthChecks` 
-* are not configured with native Consul health checks 
+* marked as `essential`
+* have ECS `healthChecks`
+* are not configured with native Consul health checks

 The `mesh-init` container creates a TTL health check for
 every container that fits these criteria and the `health-sync` container ensures
--- a/website/content/docs/ecs/configuration-reference.mdx
+++ b/website/content/docs/ecs/configuration-reference.mdx
@ -0,0 +1,147 @@
+---
+layout: docs
+page_title: AWS ECS
+description: >-
+  Configuration Reference for Consul on AWS ECS (Elastic Container Service).
+  Do not modify by hand! This is automatically generated documentation.
+---
+
+# Configuration Reference
+
+This pages details the configuration options for the JSON config format used
+by the `consul-ecs` binary. This configuration is passed to the `consul-ecs`
+binary as a string using the `CONSUL_ECS_CONFIG_JSON` environment variable.
+
+This configuration format follows a [JSON schema](https://github.com/hashicorp/consul-ecs/blob/main/config/schema.json)
+that can be used for validation.
+
+## Terraform Mesh Task Module Configuration
+
+The `mesh-task` Terraform module provides input variables for commonly used fields.
+The following table shows which Terraform input variables correspond to each field
+of the Consul ECS configuration. Refer to the
+[Terraform registry documentation](https://registry.terraform.io/modules/hashicorp/consul-ecs/aws/latest/submodules/mesh-task?tab=inputs)
+for a complete reference of supported input variables for the `mesh-task` module.
+
+| Terraform Input Variable | Consul ECS Config Field               |
+| ------------------------ | ------------------------------------- |
+| `upstreams`              | [`proxy.upstreams`](#proxy-upstreams) |
+| `checks`                 | [`service.checks`](#service-checks)   |
+| `consul_service_name`    | [`service.name`](#service)            |
+| `consul_service_tags`    | [`service.tags`](#service)            |
+| `consul_service_meta`    | [`service.meta`](#service)            |
+| `consul_namespace`       | [`service.namespace`](#service)       |
+| `consul_partition`       | [`service.partition`](#service)       |
+
+Each of these Terraform input variables follow the Consul ECS config schema.
+The remaining fields of the Consul ECS configuration not listed in this table can be passed
+using the `consul_ecs_config` input variable.
+
+# Top-level fields
+
+These are the top-level fields for the Consul ECS configuration format.
+
+| Field | Type | Required | Description |
+| ----- | ---- | -------- | ----------- |
+| `bootstrapDir` | `string` | required | The directory at which to mount the shared volume where Envoy bootstrap configuration is written by `consul-ecs mesh-init`.  |
+| `healthSyncContainers` | `array` | optional | The names of containers that will have health check status synced from ECS into Consul. Cannot be specified with `service.checks`.  |
+| [`proxy`](#proxy) | `object` | optional | Configuration for the sidecar proxy registration with Consul.  |
+| [`service`](#service) | `object` | required | Configuration for Consul service registration.  |
+
+# `service`
+
+Configuration for Consul service registration.
+
+| Field | Type | Required | Description |
+| ----- | ---- | -------- | ----------- |
+| [`checks`](#service-checks) | `array` | optional | The list of Consul checks for the service. Cannot be specified with `healthSyncContainers`.  |
+| `enableTagOverride` | `boolean` | optional | Determines if the anti-entropy feature for the service is enabled  |
+| `meta` | `object` | optional | Key-value pairs of metadata to include for the Consul service.  |
+| `name` | `string` | optional | The name the service will be registered as in Consul. Defaults to the Task family name if empty or null.  |
+| `namespace` | `string` | optional | The Consul namespace where the service will be registered [Consul Enterprise].  |
+| `partition` | `string` | optional | The Consul admin partition where the service will be registered [Consul Enterprise].  |
+| `port` | `integer` | required | Port the application listens on, if any.  |
+| `tags` | `array` | optional | List of string values that can be used to add service-level labels.  |
+| [`weights`](#service-weights) | `object` | optional | Configures the weight of the service in terms of its DNS service (SRV) response.  |
+
+# `service.checks`
+
+Defines the Consul checks for the service. Each check may contain these fields.
+
+| Field | Type | Required | Description |
+| ----- | ---- | -------- | ----------- |
+| `aliasNode` | `string` | optional | Specifies the ID of the node for an alias check.  |
+| `aliasService` | `string` | optional | Specifies the ID of a service for an alias check.  |
+| `args` | `array` | optional | Command arguments to run to update the status of the check.  |
+| `body` | `string` | optional | Specifies a body that should be sent with `HTTP` checks.  |
+| `checkId` | `string` | optional | The unique ID for this check on the node. Defaults to the check `name`.  |
+| `failuresBeforeCritical` | `integer` | optional | Specifies the number of consecutive unsuccessful results required before check status transitions to critical.  |
+| `grpc` | `string` | optional | Specifies a `gRPC` check. Must be an endpoint that supports the [standard gRPC health checking protocol](https://github.com/grpc/grpc/blob/master/doc/health-checking.md). The endpoint will be probed every `interval`.  |
+| `grpcUseTls` | `boolean` | optional | Specifies whether to use TLS for this gRPC health check.  |
+| `h2ping` | `string` | optional | Specifies this is an h2ping check. Must be an address, which will be pinged every `interval`.  |
+| `h2pingUseTls` | `boolean` | optional | Specifies whether TLS is used for an h2ping check.  |
+| `header` | `object` | optional | Specifies a set of headers that should be set for HTTP checks. Each header can have multiple values.  |
+| `http` | `string` | optional | Specifies this is an HTTP check. Must be a URL against which request is performed every `interval`.  |
+| `interval` | `string` | optional | Specifies the frequency at which to run this check. Required for HTTP and TCP checks.  |
+| `method` | `string` | optional | Specifies the HTTP method to be used for an HTTP check. When no value is specified, `GET` is used.  |
+| `name` | `string` | optional | The name of the check.  |
+| `notes` | `string` | optional | Specifies arbitrary information for humans. This is not used by Consul internally.  |
+| `status` | `string` | optional | Specifies the initial status the health check. Must be one of `passing`, `warning`, `critical`, `maintenance`, or`null`. |
+| `successBeforePassing` | `integer` | optional | Specifies the number of consecutive successful results required before check status transitions to passing.  |
+| `tcp` | `string` | optional | Specifies this is a TCP check. Must be an IP/hostname plus port to which a TCP connection is made every `interval`.  |
+| `timeout` | `string` | optional | Specifies a timeout for outgoing connections in the case of a Script, HTTP, TCP, or gRPC check. Must be a duration string, such as `10s` or `5m`.  |
+| `tlsServerName` | `string` | optional | Specifies an optional string used to set the SNI host when connecting via TLS.  |
+| `tlsSkipVerify` | `boolean` | optional | Specifies if the certificate for an HTTPS check should not be verified.  |
+| `ttl` | `string` | optional | Specifies this is a TTL check. Must be a duration string, such as `10s` or `5m`.  |
+
+# `service.weights`
+
+Configures the weight of the service in terms of its DNS service (SRV) response.
+
+| Field | Type | Required | Description |
+| ----- | ---- | -------- | ----------- |
+| `passing` | `integer` | required | Weight for the service when its health checks are passing.  |
+| `warning` | `integer` | required | Weight for the service when it has health checks in `warning` status.  |
+
+# `proxy`
+
+Configuration for the sidecar proxy registration with Consul.
+
+| Field | Type | Required | Description |
+| ----- | ---- | -------- | ----------- |
+| `config` | `object` | optional | Object value that specifies an opaque JSON configuration. The JSON is stored and returned along with the service instance when called from the API.  |
+| [`meshGateway`](#proxy-meshgateway) | `object` | optional | Specifies the mesh gateway configuration for the proxy.  |
+| [`upstreams`](#proxy-upstreams) | `array` | optional | The list of the upstream services that the proxy should create listeners for.  |
+
+# `proxy.upstreams`
+
+The list of the upstream services that the proxy should create listeners for. Each upstream may contain these fields.
+
+| Field | Type | Required | Description |
+| ----- | ---- | -------- | ----------- |
+| `config` | `object` | optional | Specifies opaque configuration options that will be provided to the proxy instance for the upstream.  |
+| `datacenter` | `string` | optional | Specifies the datacenter to issue the discovery query to.  |
+| `destinationName` | `string` | required | Specifies the name of the upstream service or prepared query to route the service mesh to.  |
+| `destinationNamespace` | `string` | optional | Specifies the namespace containing the upstream service [Consul Enterprise].  |
+| `destinationPartition` | `string` | optional | Specifies the name of the admin partition containing the upstream service [Consul Enterprise].  |
+| `destinationType` | `string` | optional | Specifies the type of discovery query the proxy should use for finding service mesh instances. Must be one of `service`, `prepared_query`, or`null`. |
+| `localBindAddress` | `string` | optional | Specifies the address to bind a local listener to.  |
+| `localBindPort` | `integer` | required | Specifies the port to bind a local listener to. The application will make outbound connections to the upstream from the local port.  |
+| [`meshGateway`](#proxy-upstreams-meshgateway) | `object` | optional | Specifies the mesh gateway configuration for the proxy for this upstream.  |
+
+## `proxy.upstreams.meshGateway`
+
+Specifies the mesh gateway configuration for the proxy for this upstream.
+
+| Field | Type | Required | Description |
+| ----- | ---- | -------- | ----------- |
+| `mode` | `string` | required | Specifies how the upstream with a remote destination datacenter gets resolved. Must be one of `none`, `local`, or`remote`. |
+
+# `proxy.meshGateway`
+
+Specifies the mesh gateway configuration for the proxy.
+
+| Field | Type | Required | Description |
+| ----- | ---- | -------- | ----------- |
+| `mode` | `string` | required | Specifies how upstreams with a remote destination datacenter get resolved. Must be one of `none`, `local`, or`remote`. |
+
--- a/website/content/docs/ecs/enterprise.mdx
+++ b/website/content/docs/ecs/enterprise.mdx
@ -27,10 +27,10 @@ module "my_task" {

 ## Licensing

-~> **Warning:** Consul Enterprise is currently only fully supported when [ACLs are enabled](/docs/ecs/get-started/production-installation#deploy-acl-controller).
+~> **Warning:** Consul Enterprise is currently only fully supported when [ACLs are enabled](/docs/ecs/production-installation#deploy-acl-controller).

 Consul Enterprise [requires a license](/docs/enterprise/license/overview). If running
-Consul on ECS with [ACLs enabled](/docs/ecs/get-started/production-installation#deploy-acl-controller), the license
+Consul on ECS with [ACLs enabled](/docs/ecs/production-installation#deploy-acl-controller), the license
 will be automatically pulled down from Consul servers.

 Currently there is no capability for specifying the license when ACLs are disabled so if you wish to
--- a/website/content/docs/ecs/get-started/install.mdx
+++ b/website/content/docs/ecs/get-started/install.mdx
@ -1,186 +0,0 @@
---
-layout: docs
-page_title: Installation - AWS ECS
-description: >-
-  Install Consul Service Mesh on AWS ECS (Elastic Container Service).
---
-
-# Installation
-
-Installing Consul on ECS is a multi-part process:
-
-1. [**Task Module:**](#task-module) Define the [`mesh-task` Terraform module](https://registry.terraform.io/modules/hashicorp/consul-ecs/aws/latest/submodules/mesh-task)
-   to create a task definition with the necessary sidecar containers for your application to join the service mesh.
-1. [**Routing:**](#routing) With your tasks as part of the mesh, you must specify their upstream
-   services and change the URLs the tasks are using so that they're making requests through the service mesh.
-1. [**Bind Address:**](#bind-address) Now that all communication is flowing through the service mesh,
-   you should change the address your application is listening on to `127.0.0.1`
-   so that it only receives requests through the sidecar proxy.
-
-> **NOTE:** This page assumes you're familiar with ECS. See [What is Amazon Elastic Container Service](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/Welcome.html) for more details.
-
-## Task Module
-
-In order to add the necessary sidecar containers for your task to join the mesh,
-you must use the [`mesh-task` module](https://registry.terraform.io/modules/hashicorp/consul-ecs/aws/latest/submodules/mesh-task):
-
-```hcl
-module "my_task" {
-  source  = "hashicorp/consul-ecs/aws//modules/mesh-task"
-  version = "<latest version>"
-
-  family                = "my_task"
-  container_definitions = [
-    {
-      name         = "example-client-app"
-      image        = "docker.io/org/my_task:v0.0.1"
-      essential    = true
-      portMappings = [
-        {
-          containerPort = 9090
-          hostPort      = 9090
-          protocol      = "tcp"
-        }
-      ]
-      cpu         = 0
-      mountPoints = []
-      volumesFrom = []
-    }
-  ]
-
-  port       = "9090"
-  retry_join = ["<address of the Consul server>"]
-}
-```
-
-All possible inputs are documented on the [module reference documentation](https://registry.terraform.io/modules/hashicorp/consul-ecs/aws/latest/submodules/mesh-task?tab=inputs),
-however there are some important inputs worth highlighting:
-
- `family` is used as the [task definition family](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_definition_parameters.html#family)
-  but it's also used as the name of the service that gets registered in Consul.
- `container_definitions` accepts an array of [container definitions](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_definition_parameters.html#container_definitions).
-  This is where you include application containers.
- `port` is the port that your application listens on. This should be set to a
-  string, not an integer, i.e. `port = "9090"`, not `port = 9090`.
- `retry_join` is passed to the [`-retry-join`](/docs/agent/options#_retry_join) option for the Consul agent. This tells
-  the agent the location of your Consul servers so that it can join the Consul cluster.
-
-> **NOTE:** If your tasks run in a public subnet, they must have `assign_public_ip = true`
-in their [`network_configuration`](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ecs_service#network_configuration) block so that ECS can pull the Docker images.
-
-## ECS Service
-
-To define an ECS Service, reference the mesh-task module's `task_definition_arn` output value
-in your `aws_ecs_service` resource:
-
-```hcl
-resource "aws_ecs_service" "my_task" {
-  ...
-  task_definition = module.my_task.task_definition_arn
-}
-```
-
-After running `terraform apply`, you should see your tasks registered in
-the Consul UI.
-
-## Routing
-
-Now that your tasks are registered in the mesh, you're able to use the service
-mesh to route between them.
-
-In order to make calls through the service mesh, you must configure the sidecar
-proxy to listen on a different port for each upstream service your application
-needs to call. You then must modify your application to make requests to the sidecar
-proxy on that port.
-
-For example, if your application `web` makes calls to another application called `backend`, then you would first configure the `mesh-task` module's upstream(s):
-`backend`.
-
-```hcl
-module "web" {
-  family = "web"
-  upstreams = [
-    {
-      destination_name = "backend"
-      local_bind_port  = 8080
-    }
-  ]
-}
-```
-
- Set the `destination_name` to the name of the upstream service (in this case `backend`)
- Set `local_bind_port` to an unused port. This is the port that the sidecar proxy
-  will listen on. Any requests to this port will be forwarded over to the `destination_name`.
-  This does not have to be the port that `backend` is listening on because the service mesh
-  will handle routing the request to the right port.
-
-If you have multiple upstream services they each need to be listed here.
-
-Next, configure your application to make requests to `localhost:8080` when
-it wants to call the `backend` service.
-
-For example, if your service allows configuring the URL for `backend` via the
-`BACKEND_URL` environment variable, you would set:
-
-```hcl
-module "web" {
-  family = "web"
-  upstreams = [
-    {
-      destination_name = "backend"
-      local_bind_port  = 8080
-    }
-  ]
-  container_definitions = [
-    {
-      name        = "web"
-      environment = [
-        {
-          name  = "BACKEND_URL"
-          value = "http://localhost:8080"
-        }
-      ]
-      ...
-    }
-  ]
-  ...
-}
-```
-
-## Bind Address
-
-To ensure that your application only receives traffic through the service mesh,
-you must change the address that your application is listening on to only the loopback address
-(also known as `localhost`, `lo`, and `127.0.0.1`)
-so that only the sidecar proxy running in the same task can make requests to it.
-
-If your application is listening on all interfaces, e.g. `0.0.0.0`, then other
-applications can call it directly, bypassing its sidecar proxy.
-
-Changing the listening address is specific to the language and framework you're
-using in your application. Regardless of which language/framework you're using,
-it's a good practice to make the address configurable via environment variable.
-
-For example in Go, you would use:
-
-```go
-s := &http.Server{
-	Addr:           "127.0.0.1:8080",
-  ...
-}
-log.Fatal(s.ListenAndServe())
-```
-
-In Django you'd use:
-
-```bash
-python manage.py runserver "127.0.0.1:8080"
-```
-
-## Next Steps
-
- Configure a secure [Production Installation](/docs/ecs/get-started/production-installation).
- Now that your applications are running in the service mesh, read about
-  other [Service Mesh features](/docs/connect).
- View the [Architecture](/docs/ecs/architecture) documentation to understand
-  what's going on under the hood.
--- a/website/content/docs/ecs/index.mdx
+++ b/website/content/docs/ecs/index.mdx
@ -8,9 +8,9 @@ description: >-

 # AWS ECS

-Consul can be deployed on [AWS ECS](https://aws.amazon.com/ecs/) (Elastic Container Service) using our official Terraform modules.
-
-![Consul on ECS Architecture](/img/consul-ecs-arch.png)
+Consul service mesh applications can be deployed on [AWS Elastic Container Service](https://aws.amazon.com/ecs/) (ECS)
+using either our official [Terraform modules](/docs/ecs/terraform/install) or without Terraform by [manually configuring
+the task definition](/docs/ecs/manual/install).

 ## Service Mesh

@ -18,6 +18,17 @@ Using Consul on AWS ECS enables you to add your ECS tasks to the service mesh an
 take advantage of features such as zero-trust-security, intentions, observability,
 traffic policy, and more.

+## Architecture
+
+![Consul on ECS Architecture](/img/consul-ecs-arch.png)
+
+Consul on ECS follows an [architecture](/docs/internals/architecture) similar to other platforms, but each ECS task is a
+Consul node. An ECS task runs the user application container(s), as well as a Consul client container for control plane
+communication and an [Envoy](https://envoyproxy.io/) sidecar proxy container to faciliate data plane communication for
+[Consul Connect](/docs/connect).
+
+For a detailed architecture overview, see the [Architecture](/docs/ecs/architecture) page.
+
 ## Getting Started

 There are several ways to get started with Consul with ECS.
@ -27,4 +38,6 @@ There are several ways to get started with Consul with ECS.
 * The [Consul with Dev Server on Fargate](https://registry.terraform.io/modules/hashicorp/consul-ecs/aws/latest/examples/dev-server-fargate) example installation deploys a sample application in ECS using the Fargate launch type.
 * The [Consul with Dev Server on EC2](https://registry.terraform.io/modules/hashicorp/consul-ecs/aws/latest/examples/dev-server-ec2) example installation deploys a sample application in ECS using the EC2 launch type.

-See the [Requirements](/docs/ecs/get-started/requirements) and the full [Install Guide](/docs/ecs/get-started/install) when you're ready to install Consul on an existing ECS cluster and add existing tasks to the service mesh.
+Refer to the [Requirements](/docs/ecs/requirements) and use one of the following sets of instructions when you're ready to install Consul on an existing ECS cluster and add tasks to the service mesh: 
+* [Install with Terraform](/docs/ecs/terraform/install) 
+* [Install Manually](/docs/ecs/manual/install)
--- a/website/content/docs/ecs/manual/acl-controller.mdx
+++ b/website/content/docs/ecs/manual/acl-controller.mdx
@ -0,0 +1,213 @@
+---
+layout: docs
+page_title: ACL Controller - AWS ECS
+description: >-
+  Manual Deployment of the ACL Controller for Consul Service Mesh on AWS ECS (Elastic Container Service).
+---
+
+# Install the ACL Controller
+
+This topic describes how to manually deploy the ACL controller to [automatically provision ACL tokens](/docs/ecs/architecture#automatic-acl-token-provisioning) for Consul on ECS.
+If you are using Terraform, refer to the [Terraform Secure Configuration](/docs/ecs/terraform/secure-configuration) page to deploy the ACL controller.
+
+## Prerequisites
+
+* Your application tasks must include certain tags to be compatible with the ACL controller.
+Refer to the [Task Tags](/docs/ecs/manual/install#task-tags) section of the installation page.
+* You should be familiar with configuring Consul's secure features, including how to create ACL tokens and policies. Refer to the following [Learn Guides](https://learn.hashicorp.com/collections/consul/security) for an introduction and the [ACL system](/docs/security/acl) documentation for more information.
+
+## Set Up Secrets
+
+The ACL controller supports managing secrets in AWS Secrets Manager.
+
+Before deploying the ACL controller for the first time, you must [create the following secrets](https://docs.aws.amazon.com/secretsmanager/latest/userguide/manage_create-basic-secret.html) from Consul in AWS Secrets Manager.
+
+| Secret                  | Initial Value  | Sample Secret Name             |
+| ---------------------   | -------------- | ------------------------------ |
+| Consul server CA cert   | Set            | `my-consul-ca-cert`            |
+| Bootstrap ACL Token     | Set            | `my-consul-bootstrap-token`    |
+| Consul Client ACL Token | Empty          | `<PREFIX>-consul-client-token` |
+
+The secret for the client token must be intially empty. The ACL controller creates the client token in Consul
+and stores the token in Secrets Manager. In the secret name, `<PREFIX>` should be replaced with the
+[secret name prefix](/docs/ecs/manual/acl-controller#secret-name-prefix) of your choice.
+
+### Secret Name Prefix
+
+The ACL controller requires that the secrets it reads and writes are named with a unique prefix. The name prefix is used
+in the [Task Role Policy](/docs/ecs/manual/acl-controller#task-role-policy) to limit the ACL controller's access within
+AWS Secrets Manager to only those secrets strictly needed by the ACL controller.
+
+The name prefix should be unique among secrets in your AWS account. We recommend using a short (8 character) random
+string for the prefix.
+
+-> **NOTE:** If you are using the ACL controller with multiple ECS clusters, each cluster requires
+its own instance of the ACL controller, and each instance of the ACL controller should have a unique
+name prefix.
+
+## Task Definition
+
+You must create a task definition to deploy the ACL controller in your ECS cluster.
+The ACL controller must run in the same ECS cluster hosting your service mesh application
+tasks.
+
+The following example shows how the task definition should be configured for the ACL controller.
+
+```json
+{
+  "family": "my-consul-acl-controller",
+  "networkMode": "awsvpc",
+  "containerDefinitions": [
+    {
+      "name": "acl-controller",
+      "image": "public.ecr.aws/hashicorp/consul-ecs:<CONSUL_ECS_VERSION>",
+      "essential": true,
+      "command": [
+        "acl-controller",
+        "-consul-client-secret-arn", "arn:aws:secretsmanager:us-west-2:000000000000:secret:<PREFIX>-consul-client-token",
+        "-secret-name-prefix", "<PREFIX>",
+      ],
+      "secrets": [
+        {
+          "name": "CONSUL_HTTP_TOKEN",
+          "valueFrom": "arn:aws:secretsmanager:us-west-2:000000000000:secret:my-consul-bootstrap-token"
+        },
+        {
+          "name": "CONSUL_CACERT_PEM",
+          "valueFrom": "arn:aws:secretsmanager:us-west-2:000000000000:secret:my-consul-ca-cert"
+        }
+      ],
+      "environment": [
+        {
+          "name": "CONSUL_HTTP_ADDR",
+          "value": "<Consul server HTTP API address>"
+        }
+      ]
+    }
+  ]
+}
+```
+
+You must include the following top-level fields.
+
+| Field name    | Type    | Description                                                                  |
+| -----------   | ------- | ---------------------------------------------------------------------------- |
+| `family`      | string  | The task family name of your choice.                                         |
+| `networkMode` | string  | Must be `awsvpc`, which is the only network mode supported by Consul on ECS. |
+
+In the `containerDefinitions` list, include one container with the following fields.
+
+| Field name    | Type    | Description                                                                                                                      |
+| -----------   | ------- | -------------------------------------------------------------------------------------------------------------------------------- |
+| `name`        | string  | The container name, which should be `acl-controller`                                                                             |
+| `image`       | string  | The `consul-ecs` image. Use our public AWS registry, `public.ecr.aws/hashicorp/consul-ecs`, to avoid rate limits.                |
+| `command`     | list    | Must be set as shown. The startup command for the ACL controller.                                                                |
+| `essential`   | boolean | Must be `true` to ensure the health of your application container affects the health status of the task.                         |
+| `secrets`     | list    | Must have `CONSUL_HTTP_TOKEN` set to the ACL bootstrap token and `CONSUL_CACERT_PEM` set to the Consul server CA certificate.    |
+| `environment` | string  | Must set the `CONSUL_HTTP_ADDR` environment variable to the address of the HTTP API of your Consul servers.                      |
+
+The following CLI options are required in the `command` field of the container definition.
+
+| Flag                        | Type    | Description                                                                                                          |
+| --------------------------- | ------- | -------------------------------------------------------------------------------------------------------------------- |
+| `-consul-client-secret-arn` | string  | The secret where the ACL controller will store the Consul client token.                                              |
+| `-secret-name-prefix`       | string  | The [secret name prefix](/docs/ecs/manual/acl-controller#secret-name-prefix) that you chose for this ACL controller. |
+
+## ECS Service
+
+Once the task definition is created, define an ECS service in order to start an ACL controller task.
+
+The following example contains the recommended settings for the ACL controller. Refer to
+the [ECS service](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/service_definition_parameters.html) documentation
+to complete the remaining details for your use case.
+
+```json
+{
+   "cluster": "<Your ECS cluster ARN>"
+   "desiredCount": 1,
+   "launchType": "FARGATE",
+   "serviceName": "my-acl-controller",
+   "taskDefinition": "<task definition ARN>",
+   ...
+}
+```
+
+| Field name       | Type    | Description                                                                                                      |
+| ---------------- | ------- | ---------------------------------------------------------------------------------------------------------------- |
+| `cluster`        | string  | Set to your ECS cluster name or ARN. This must be the same ECS cluster where your service mesh applications run. |
+| `desiredCount`   | integer | Must be `1`. Only one instance of the ACL controller should run per ECS cluster.                                   |
+| `launchType`     | string  | Consul on ECS supports both the `FARGATE` and `EC2` launch types.                                                |
+| `serviceName`    | string  | The service name of your choice.                                                                                 |
+| `taskDefinition` | string  | Must be set to the ACL controller [task definition](/docs/ecs/manual/acl-controller#task-definition).            |
+
+## AWS IAM Roles
+
+The ECS task and execution roles must be configured to allow the ACL controller access
+to the ECS API and Secrets Manager API.
+
+### Task Role Policy
+
+The following example shows the policy needed for the ECS task role for the ACL controller.
+This grants the ACL controller permission to list tasks, describe tasks, and read and update
+secrets.
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Effect": "Allow",
+      "Action": [
+        "ecs:ListTasks",
+        "ecs:DescribeTasks"
+      ],
+      "Resource": ["*"]
+    },
+    {
+      "Effect": "Allow",
+      "Action": [
+        "secretsmanager:GetSecretValue",
+        "secretsmanager:UpdateSecret"
+      ],
+      "Resource": [
+        "arn:aws:secretsmanager:us-west-2:000000000000:secret:<PREFIX>-*"
+      ]
+    }
+  ]
+}
+```
+
+The following are the required permissions. You will need to substitute `<PREFIX>` with your chosen [name prefix](/docs/ecs/manual/acl-controller#secret-name-prefix).
+
+| Action                          | Resource                                                          | Description                                                                        |
+| ------------------------------- | ----------------------------------------------------------------- | ---------------------------------------------------------------------------------- |
+| `ecs:ListTasks`                 | `*`                                                               | Allow the ACL controller to watch for new tasks.                                   |
+| `ecs:DescribeTasks`             | `*`                                                               | Allow the ACL controller to retrieve details for new tasks.                        |
+| `secretsmanager:GetSecretValue` | `arn:aws:secretsmanager:us-west-2:000000000000:secret:<PREFIX>-*` | Allow the ACL controller to read secrets with a name prefix.                       |
+| `secretsmanager:UpdateSecret`   | `arn:aws:secretsmanager:us-west-2:000000000000:secret:<PREFIX>-*` | Allow the ACL controller to store Consul ACL tokens in secrets with a name prefix. |
+
+### Execution Role Policy
+
+The following IAM policy document allows ECS to retrieve secrets needed
+to start the ACL controller task from AWS Secrets Manager, including the ACL
+bootstrap token.
+
+The following example shows the policy needed for the execution role.
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Effect": "Allow",
+      "Action": [
+        "secretsmanager:GetSecretValue"
+      ],
+      "Resource": [
+        "arn:aws:secretsmanager:us-west-2:000000000000:secret:my-consul-bootstrap-token",
+        "arn:aws:secretsmanager:us-west-2:000000000000:secret:<PREFIX>-consul-client-token"
+      ]
+    }
+  ]
+}
+```
--- a/website/content/docs/ecs/manual/install.mdx
+++ b/website/content/docs/ecs/manual/install.mdx
@ -0,0 +1,552 @@
+---
+layout: docs
+page_title: Manual Installation - AWS ECS
+description: >-
+  Manually Install Consul Service Mesh on AWS ECS (Elastic Container Service).
+---
+
+# Manual Installation
+
+The following instructions describe how to manually create the ECS task definition using the [`consul-ecs` Docker image](https://gallery.ecr.aws/hashicorp/consul-ecs) without Terraform. Refer to the [Consul ECS Terraform module](/docs/ecs/terraform/install) documentation for an alternative method for installing Consul on ECS.
+
+This topic does not include instructions for creating all AWS resources necessary to install Consul, such as a VPC or the ECS cluster. Refer to the linked guides in the [Getting Started](/docs/ecs#getting-started) section for complete, runnable examples.
+
+## Prerequisites
+
+You should have some familiarity with AWS ECS. See [What is Amazon Elastic Container Service](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/Welcome.html) for details.
+
+## Task Definition
+
+You must create a task definition, which includes the following containers:
+
+* Your application container
+* An Envoy sidecar-proxy container
+* A Consul client container
+* A `consul-ecs-mesh-init` container for service mesh setup
+* Optionally, a `consul-ecs-health-sync` container to sync ECS health checks into Consul
+
+## Top-level fields
+
+Your task definition must include the following top-level fields.
+
+The `volumes` list contains two [bind mounts](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/bind-mounts.html),
+named `consul_data` and `consul_binary`. Bind mounts are directories on the host which can be mounted into one or more containers
+in order to share files among containers. For Consul on ECS, certain binaries and configuration are shared among containers
+during task startup.
+
+```json
+{
+  "family": "my-example-client-app",
+  "networkMode": "awsvpc",
+  "volumes": [
+    {
+      "name": "consul_data",
+    },
+    {
+      "name": "consul_binary",
+    }
+  ],
+  "containerDefinitions": [...]
+  "tags": [
+    {
+      "key": "consul.hashicorp.com/mesh",
+      "value": "true"
+    },
+    {
+      "key": "consul.hashicorp.com/service-name",
+      "value":  "example-client-app"
+    }
+  ]
+}
+```
+
+| Field name             | Type   | Description                                                                                                        |
+| ---------------------- | ------ | ------------------------------------------------------------------------------------------------------------------ |
+| `family`               | string | The task family name. This is used as the Consul service name by default.                                          |
+| `networkMode`          | string | Must be `awsvpc`, which is the only network mode supported by Consul on ECS.                                       |
+| `volumes`              | list   | Must be defined as shown above. Volumes are used to share configuration between containers for intial task setup.  |
+| `containerDefinitions` | list   | The list of containers to run in this task (see [Application container](#application-container)).                  |
+
+### Task Tags
+
+The `tags` list must include the following if you are using the ACL controller in a [secure configuration](/docs/manual/secure-configuration).
+Without these tags, the ACL controller will be unable to provision a service token for the task.
+
+| Tag Key                             | Tag Value           | Description                                                                                                                |
+| ----------------------------------- | ------------------- | -------------------------------------------------------------------------------------------------------------------------- |
+| `consul.hashicorp.com/mesh`         | `true` (string)     | The ACL controller ignores tasks without this tag set to `true`.                                                           |
+| `consul.hashicorp.com/service-name` | Consul service name | Specifies the Consul service associated with this task. Required if the service name is different than the task `family`.  |
+
+## Application container
+
+First, include your application container in the `containerDefinitions` list
+in the task definition.
+
+Ensure that the `containerName` and `condition` fields in the `dependsOn` list
+are specified as described in the following example. These are container dependencies,
+which must be used to enforce a specific [startup order](/docs/ecs/architecture#task-startup).
+By using the following settings, your application container will start after `consul-ecs-mesh-init`
+has completed task setup and after `sidecar-proxy` is ready to proxy traffic between
+this task and the service mesh.
+
+```json
+{
+  "containerDefinitions": [
+    {
+      "name": "example-client-app",
+      "image": "docker.io/org/my_task:v0.0.1",
+      "essential": true,
+      "dependsOn": [
+        {
+          "containerName": "consul-ecs-mesh-init",
+          "condition": "SUCCESS"
+        },
+        {
+          "containerName": "sidecar-proxy",
+          "condition": "HEALTHY"
+        }
+      ],
+      ...
+    }
+  ]
+}
+```
+
+| Field name  | Type    | Description                                                                                                                       |
+| ----------- | ------- | --------------------------------------------------------------------------------------------------------------------------------  |
+| `name`      | string  | The name of your application container.                                                                                           |
+| `image`     | string  | The container image used to run your application.                                                                                 |
+| `essential` | boolean | Must be `true` to ensure the health of your application container affects the health status of the task.                          |
+| `dependsOn` | list    | Must be set as shown above. Container dependencies ensure your application container starts after service mesh setup is complete. |
+
+See the [ECS Task Definition](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_definition_parameters.html) documentation for a complete reference.
+
+## `sidecar-proxy` container
+
+The `sidecar-proxy` container runs [Envoy proxy](/docs/connect/proxies/envoy) for Consul Connect. In most cases, the container should contain the following parameters and values.
+
+The `mountPoints` list must be set as shown in the following example. This will mount the shared `consul_data` volume into the
+`sidecar-proxy` container at the path `/consul`. This volume is where the `consul-ecs-mesh-init` container copies the `envoy-bootstrap.json`
+file and the `consul-ecs` binary, which are required to start Envoy. The `dependsOn` list must also be defined as follows to ensure the
+`sidecar-proxy` container starts after `consul-ecs-mesh-init` has successfully written these files to the shared volume.
+
+<CodeBlockConfig highlight="8-40">
+
+```json
+{
+  "containerDefinitions": [
+    {
+      "name": "example-client-app",
+      "image": "docker.io/org/my_task:v0.0.1",
+      ...
+    },
+    {
+      "name": "sidecar-proxy",
+      "image": "envoyproxy/envoy-alpine:<VERSION>",
+      "essential": false,
+      "dependsOn": [
+        {
+          "containerName": "consul-ecs-mesh-init",
+          "condition": "SUCCESS"
+        }
+      ],
+      "healthCheck": {
+        "retries": 3,
+        "command": ["nc", "-z", "127.0.0.1", "20000"],
+        "timeout": 5,
+        "interval": 30
+      },
+      "mountPoints": [
+        {
+          "readOnly": true,
+          "containerPath": "/consul",
+          "sourceVolume": "consul_data"
+        }
+      ],
+      "ulimits": [
+        {
+          "name": "nofile",
+          "softLimit": 1048576,
+          "hardLimit": 1048576
+        }
+      ],
+      "command": ["envoy", "--config-path", "/consul/envoy-bootstrap.json"],
+      "entryPoint": ["/consul/consul-ecs", "envoy-entrypoint"],
+    }
+  ]
+}
+```
+
+</CodeBlockConfig>
+
+The following table describes the necessary configuration settings.
+
+| Field name    | Type    | Description                                                                                                                                                |
+| ------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `name`        | string  | The container name, which must be `sidecar-proxy`.                                                                                                         |
+| `image`       | string  | The Envoy image. This must be a [supported version of Envoy](/docs/connect/proxies/envoy#supported-versions).                                              |
+| `dependsOn`   | list    | Must be set as shown above to ensure Envoy starts after the `consul-ecs-mesh-init` container has written the `envoy-bootstrap.json` config file for Envoy. |
+| `healthCheck` | list    | Must be set as shown above to monitor the health of Envoy's primary listener port, which ties into container dependencies and startup ordering.            |
+| `mountPoints` | list    | Must be set as shown above to access the files shared in the `/consul` directory, like the Envoy bootstrap configuration file and the `consul-ecs` binary. |
+| `ulimits`     | list    | The `nofile` ulimit must be raised to a sufficiently high value so that Envoy does not fail to open sockets.                                               |
+| `entrypoint`  | list    | Must be set to the custom Envoy entrypoint, `consul-ecs envoy-entrypoint`, to facilitate graceful shutdown.                                                |
+| `command`     | list    | The startup command. This passes the bootstrap configuration to Envoy.                                                                                     |
+
+-> **NOTE**: Envoy and Consul must be compatible versions. See the [supported versions of Envoy](/docs/connect/proxies/envoy#supported-versions) in the Consul documentation.
+
+## `consul-client` container
+
+Each task must include a Consul client container in order for the task to join your Consul cluster.
+
+<CodeBlockConfig highlight="13-31">
+
+```json
+{
+  "containerDefinitions": [
+    {
+      "name": "example-client-app",
+      "image": "docker.io/org/my_task:v0.0.1",
+      ...
+    },
+    {
+      "name": "sidecar-proxy",
+      "image": "envoyproxy/envoy-alpine:<ENVOY_VERSION>",
+      ...
+    }
+    {
+      "name": "consul-client"
+      "image": "public.ecr.aws/hashicorp/consul:<CONSUL_VERSION>",
+      "mountPoints": [
+        {
+          "readOnly": false,
+          "containerPath": "/consul",
+          "sourceVolume": "consul_data"
+        },
+        {
+          "containerPath": "/bin/consul-inject",
+          "sourceVolume": "consul_binary"
+        }
+      ],
+      "entryPoint": ["/bin/sh", "-ec"],
+      "command": [
+        "cp /bin/consul /bin/consul-inject/consul\n\nECS_IPV4=$(curl -s $ECS_CONTAINER_METADATA_URI_V4 | jq -r '.Networks[0].IPv4Addresses[0]')\n\n\ncat << EOF > /consul/agent-defaults.hcl\naddresses = {\n  dns = \"127.0.0.1\"\n  grpc = \"127.0.0.1\"\n  http = \"127.0.0.1\"\n}\nadvertise_addr = \"$ECS_IPV4\"\nadvertise_reconnect_timeout = \"15m\"\nclient_addr = \"0.0.0.0\"\ndatacenter = \"dc1\"\nenable_central_service_config = true\nleave_on_terminate = true\nports {\n  grpc = 8502\n}\nretry_join = [\n  \"<Consul server location>",\n]\ntelemetry {\n  disable_compat_1.9 = true\n}\n\nEOF\n\ncat << EOF > /consul/agent-extra.hcl\naddresses = {\n  dns = \"0.0.0.0\"\n}\nlog_level = \"debug\"\n\nEOF\n\nexec consul agent \\\n    -data-dir /consul/data \\\n    -config-file /consul/agent-defaults.hcl \\\n    -config-file /consul/agent-extra.hcl\n"
+      ]
+    }
+  ]
+}
+```
+
+</CodeBlockConfig>
+
+| Field name    | Type    | Description                                                                                                                       |
+| ------------- | ------- | --------------------------------------------------------------------------------------------------------------------------------- |
+| `name`        | string  | The container name, which should always be `consul-client`.                                                                       |
+| `image`       | string  | The Consul image. Use our public AWS registry, `public.ecr.aws/hashicorp/consul`, to avoid rate limits.                           |
+| `mountPoints` | list    | Must be set as shown above. Volumes are mounted to share information with other containers for task setup.                        |
+| `entrypoint`  | list    | Must be set to a plain shell so that the startup `command` works properly.                                                        |
+| `command`     | list    | Specifies the contents of the [startup script](#consul-client-startup-script). Copy the script and format it into a JSON string.  |
+
+### Consul client startup script
+
+The following script is used to start the Consul client for Consul on ECS.
+
+```shell
+# Copy the consul binary to a shared volume for `consul-ecs-mesh-init` to use to generate Envoy configuration.
+cp /bin/consul /bin/consul-inject/consul
+
+# At runtime, determine the IP address assigned to this ECS Task.
+ECS_IPV4=$(curl -s $ECS_CONTAINER_METADATA_URI_V4 | jq -r '.Networks[0].IPv4Addresses[0]')
+
+# Write the Consul agent configuration file.
+cat << EOF > /consul/agent-defaults.hcl
+addresses = {
+  dns = "127.0.0.1"
+  grpc = "127.0.0.1"
+  http = "127.0.0.1"
+}
+advertise_addr = "$ECS_IPV4"
+advertise_reconnect_timeout = "15m"
+client_addr = "0.0.0.0"
+datacenter = "dc1"
+enable_central_service_config = true
+leave_on_terminate = true
+ports {
+  grpc = 8502
+}
+retry_join = ["<consul server location>"]
+telemetry {
+  disable_compat_1.9 = true
+}
+
+EOF
+
+# Start the consul agent.
+exec consul agent \
+    -data-dir /consul/data \
+    -config-file /consul/agent-defaults.hcl
+```
+
+The following table describes the values that you should use to configure the `command` script:
+
+| Field name           | Type    | Description                                                                                                  |
+| -------------------- | ------- | ------------------------------------------------------------------------------------------------------------ |
+| `addresses.*`        | strings | Set the DNS, GRPC, and HTTP addresses to `127.0.0.1` to ensure these are not accessible outside of the task. |
+| `advertise_addr`     | string  | Must be set to the task IP address so that other Consul agents know how to reach this agent.                 |
+| `client_addr`        | string  | Must be set to an interface reachable by other Consul agents.                                                |
+| `datacenter`         | string  | Must be set to the Consul datacenter this task will join.                                                    |
+| `leave_on_terminate` | boolean | Must be set to `true` so that the Consul agent leaves the cluster gracefully before exiting.                 |
+| `retry_join`         | string  | Must be set to your Consul server location(s) so this agent can join the Consul cluster.                     |
+
+-> **NOTE**: Use `exec` to start the Consul agent so that the Consul agent runs as PID 1. This ensures
+the Consul agent directly receives signals from ECS, which is important for graceful shutdown of the Consul agent.
+
+Refer to the [Consul Agent documentation](/docs/agent/options#configuration_files) for a complete reference of Consul agent
+configuration options.
+
+## `consul-ecs-mesh-init` container
+
+The `consul-ecs-mesh-init` container runs at task startup to setup this instance for Consul service mesh.
+It registers the service and proxy for this task with Consul and writes Envoy bootstrap
+configuration to a shared volume.
+
+<CodeBlockConfig highlight="18-41">
+
+```json
+{
+  "containerDefinitions": [
+    {
+      "name": "example-client-app",
+      "image": "docker.io/org/my_task:v0.0.1",
+      ...
+    },
+    {
+      "name": "sidecar-proxy",
+      "image": "envoyproxy/envoy-alpine:<ENVOY_VERSION>",
+      ...
+    },
+    {
+      "name": "consul-client"
+      "image": "public.ecr.aws/hashicorp/consul:<CONSUL_VERSION>",
+      ...
+    },
+    {
+      "name": "consul-ecs-mesh-init",
+      "image": "public.ecr.aws/hashicorp/consul-ecs:<CONSUL_ECS_VERSION>",
+      "command": ["mesh-init"],
+      "essential": false,
+      "environment": [
+        {
+          "name": "CONSUL_ECS_CONFIG_JSON",
+          "value": "{\"bootstrapDir\":\"/consul\",\"healthSyncContainers\":[],\"proxy\":{\"upstreams\":[{\"destinationName\":\"example-server-app\",\"localBindPort\":1234}]},\"service\":{\"checks\":[],\"meta\":{},\"name\":\"example-client-app\",\"port\":9090,\"tags\":[]}}"
+        }
+      ],
+      "mountPoints": [
+        {
+          "readOnly": false,
+          "containerPath": "/consul",
+          "sourceVolume": "consul_data"
+        },
+        {
+          "readOnly": true,
+          "containerPath": "/bin/consul-inject",
+          "sourceVolume": "consul_binary"
+        }
+      ]
+    }
+  ]
+}
+```
+
+</CodeBlockConfig>
+
+| Field name    | Type    | Description                                                                                                                             |
+| -----------   | ------- | --------------------------------------------------------------------------------------------------------------------------------------- |
+| `name`        | string  | The container name should be `consul-ecs-mesh-init`.                                                                                    |
+| `image`       | string  | The `consul-ecs` image. Use our public AWS registry, `public.ecr.aws/hashicorp/consul-ecs`, to avoid rate limits.                       |
+| `mountPoints` | list    | Must be set as show above, so the `consul` and `consul-ecs` binaries can be shared among containers for task setup.                     |
+| `command`     | list    | Set to `["mesh-init"]` so that the container runs the `consul-ecs mesh-init` command.                                                   |
+| `environment` | list    | This must include the [`CONSUL_ECS_CONFIG_JSON`](/docs/ecs/manual-installation#consul_ecs_config_json) variable. See below for details. |
+
+### `CONSUL_ECS_CONFIG_JSON`
+
+Configuration is passed to the `consul-ecs` binary in JSON format using the `CONSUL_ECS_CONFIG_JSON` environment variable.
+
+The following is an example of the configuration that might be used for a service named `example-client-app` with one upstream
+service name `example-server-app`. The `proxy` and `service` blocks include information used by `consul-ecs-mesh-init` to perform
+[service registration](/docs/discovery/services) with Consul during task startup. The same configuration format is used for
+the `consul-ecs-health-sync` container.
+
+```json
+{
+  "bootstrapDir": "/consul",
+  "healthSyncContainers": [],
+  "proxy": {
+    "upstreams": [
+      {
+        "destinationName": "example-server-app",
+        "localBindPort": 1234
+      }
+    ]
+  },
+  "service": {
+    "checks": [],
+    "meta": {},
+    "name": "example-client-app",
+    "port": 9090,
+    "tags": []
+  }
+}
+```
+
+| Field name             | Type    | Description                                                                                                                                     |
+| ---------------------- | ------  | ----------------------------------------------------------------------------------------------------------------------------------------------- |
+| `bootstrapDir`         | string  | This is the path of a  shared volume that is mounted to other containers, where `consul-ecs-mesh-init` will write out Envoy configuration.      |
+| `healthSyncContainers` | list    | Used for [health status syncing](/docs/ecs/manual-installation#consul-ecs-health-sync-container) from ECS to Consul. See below for details.     |
+| `proxy.upstreams`      | list    | The upstream services that your application calls over the service mesh, if any. The `destinationName` and `localBindPort` fields are required. |
+| `service.name`         | string  | The name used to register this service into the Consul service catalog.                                                                         |
+| `service.port`         | integer | The port your application listens on. Set to `0` if your application does not listen on any port.                                               |
+| `service.checks`       | list    | Consul [checks](/docs/discovery/checks) to include so that Consul can run health checks against your application.                               |
+
+See the [Configuration Reference](/docs/ecs/configuration-reference) for a complete reference of fields.
+
+## `consul-ecs-health-sync` container
+
+Optionally, Consul ECS can sync health checks for this task into Consul checks.
+This allows you to configure a health check for your application in one place and
+see a consistent health status in both ECS and Consul.
+
+For example, the following defines an ECS health check command that runs `curl localhost:9090/health`:
+
+<CodeBlockConfig highlight="6-11">
+
+```json
+{
+  "containerDefinitions": [
+    {
+      "name": "example-client-app",
+      "image": "docker.io/org/my_task:v0.0.1",
+      "healthCheck": {
+        "retries": 3,
+        "command": ["CMD-SHELL", "curl localhost:9090/health"],
+        "timeout": 5,
+        "interval": 30
+      },
+      ...
+    },
+    ...
+  ]
+}
+```
+
+</CodeBlockConfig>
+
+First, define which containers need their health status synced into Consul. To do this,
+add the container name(s) to the `healthSyncContainers` list of the `CONSUL_ECS_CONFIG_JSON` variable,
+as shown in the following example. This configuration must be passed to both the `consul-ecs-mesh-init`
+and `consul-ecs-health-sync` containers.
+
+<CodeBlockConfig highlight="3-3">
+
+```json
+{
+  "bootstrapDir": "/consul",
+  "healthSyncContainers": ["example-client-app"],
+  ...
+}
+```
+
+</CodeBlockConfig>
+
+Next, set the `CONSUL_ECS_CONFIG_JSON` variable for the `consul-ecs-mesh-init` container.
+The following example shows how the `CONSUL_ECS_CONFIG_JSON` variable should be formatted.
+The JSON configuration is compacted down to a single line and escaped.
+
+<CodeBlockConfig highlight="7-10">
+
+```json
+{
+  "containerDefinitions": [
+    {
+      "name": "consul-ecs-mesh-init",
+      "image": "public.ecr.aws/hashicorp/consul-ecs:<VERSION>",
+      "environment": [
+        {
+          "name": "CONSUL_ECS_CONFIG_JSON",
+          "value": "{\"bootstrapDir\":\"/consul\",\"healthSyncContainers\":[\"example-client-app\"],\"proxy\":{\"upstreams\":[{\"destinationName\":\"example-server-app\",\"localBindPort\":1234}]},\"service\":{\"checks\":[],\"meta\":{},\"name\":\"example-client-app\",\"port\":9090,\"tags\":[]}}"
+        }
+      ],
+      ...
+    },
+    ...
+  ]
+}
+```
+
+</CodeBlockConfig>
+
+Finally, include the `consul-ecs-health-sync` container in the `containerDefinitions` list.
+Pass the same value for `CONSUL_ECS_CONFIG_JSON` for both the `consul-ecs-health-sync`
+and `consul-ecs-mesh-init` containers.
+
+<CodeBlockConfig highlight="23-40">
+
+```json
+{
+  "containerDefinitions": [
+    {
+      "name": "example-client-app",
+      "image": "docker.io/org/my_task:v0.0.1",
+      ...
+    },
+    {
+      "name": "sidecar-proxy",
+      "image": "envoyproxy/envoy-alpine:<ENVOY_VERSION>",
+      ...
+    },
+    {
+      "name": "consul-client"
+      "image": "public.ecr.aws/hashicorp/consul:<CONSUL_VERSION>",
+      ...
+    },
+    {
+      "name": "consul-ecs-mesh-init",
+      "image": "public.ecr.aws/hashicorp/consul-ecs:<CONSUL_ECS_VERSION>",
+      ...
+    },
+    {
+      "name": "consul-ecs-health-sync",
+      "image": "public.ecr.aws/hashicorp/consul-ecs:<CONSUL_ECS_VERSION>",
+      "command": ["health-sync"],
+      "essential": false,
+      "dependsOn": [
+        {
+          "containerName": "consul-ecs-mesh-init",
+          "condition": "SUCCESS"
+        }
+      ],
+      "environment": [
+        {
+          "name": "CONSUL_ECS_CONFIG_JSON",
+          "value": "{\"bootstrapDir\":\"/consul\",\"healthSyncContainers\":[\"example-client-app\"],\"proxy\":{\"upstreams\":[{\"destinationName\":\"example-server-app\",\"localBindPort\":1234}]},\"service\":{\"checks\":[],\"meta\":{},\"name\":\"example-client-app\",\"port\":9090,\"tags\":[]}}"
+        }
+      ]
+    }
+  ]
+}
+```
+
+</CodeBlockConfig>
+
+| Field name    | Type   | Description                                                                                                       |
+| ------------- | ------ | ----------------------------------------------------------------------------------------------------------------- |
+| `name`        | string | The container name, which must be `consul-ecs-health-sync`.                                                       |
+| `image`       | string | The `consul-ecs` image. Use our public AWS registry, `public.ecr.aws/hashicorp/consul-ecs`, to avoid rate limits. |
+| `command`     | list   | Must be set to `["health-sync"]` to run the `consul-ecs health-sync` command.                                     |
+| `dependsOn`   | list   | Must be set as shown above to ensure the `health-sync` container starts after service registration has completed. |
+| `environment` | list   | Must include the `CONSUL_ECS_CONFIG_JSON` variable to pass configuration to the `consul-ecs health-sync` command. |
+
+# Next Steps
+
+* Create the task definition using the [AWS Console](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-ecs-taskdefinition.html) or the [AWS CLI](https://docs.aws.amazon.com/cli/latest/reference/ecs/register-task-definition.html), or another method of your choice.
+* Create an [ECS Service](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs_services.html) to start tasks using the task definition.
+* Follow the [Secure Configration](/docs/ecs/manual/secure-configuration) to get production-ready.
--- a/website/content/docs/ecs/manual/secure-configuration.mdx
+++ b/website/content/docs/ecs/manual/secure-configuration.mdx
@ -0,0 +1,215 @@
+---
+layout: docs
+page_title: Secure Configuration - AWS ECS
+description: >-
+  Manual Secure Confguration of the Consul Service Mesh on AWS ECS (Elastic Container Service).
+---
+
+# Secure Configuration
+
+For a production-ready installation of Consul on ECS, you will need to make sure that the cluster is secured.
+A secure Consul cluster should include the following:
+
+1. [TLS Encryption](/docs/security/encryption#rpc-encryption-with-tls) for RPC communication between Consul clients and servers.
+1. [Gossip Encryption](/docs/security/encryption#gossip-encryption) for encrypting gossip traffic.
+1. [Access Control (ACLs)](/docs/security/acl) for authentication and authorization for Consul clients and services on the mesh.
+
+-> **NOTE:** In this topic, we assume that you have already configured your Consul server with the security-related features.
+
+## Prerequisites
+
+* You should already have followed the [installation instructions](/docs/ecs/manual/install) to understand how to define
+the necessary components of the task definition for Consul on ECS.
+* You should be familiar with [specifying sensitive data](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/specifying-sensitive-data.html) on ECS.
+* You should be familiar with configuring Consul's secure features, including how to create ACL tokens and policies. Refer to the following [Learn Guides](https://learn.hashicorp.com/collections/consul/security) for an introduction and the [ACL system](/docs/security/acl) documentation for more information.
+
+## ACL Tokens
+
+You must create two types of ACL tokens for Consul on ECS:
+
+* **Client tokens:** used by the `consul-client` containers to join the Consul cluster
+* **Service tokens:** used by sidecar containers for service registration and health syncing
+
+The following sections describe the ACL polices which must be associated with these token types.
+
+-> **NOTE:** This section describes how operators would create ACL tokens by hand. To ease operator
+burden, the ACL Controller can automatically create ACL tokens for Consul on ECS. Refer to the
+[ACL Controller](/docs/manual/acl-controller) page for installation details.
+
+### Create Consul client token
+
+You must create a token for the Consul client. This is a shared token used by the `consul-client`
+containers to join the Consul cluster.
+
+The following is the ACL policy needed for the Consul client token:
+
+```hcl
+node_prefix "" {
+  policy = "write"
+}
+service_prefix "" {
+  policy = "read"
+}
+```
+
+This policy allows `node:write` for any node name, which is necessary because the Consul node
+names on ECS are not known until runtime.
+
+### Create service tokens
+
+Service tokens should be associated with a [service identity](https://www.consul.io/docs/security/acl/acl-system#acl-service-identities).
+The service identity includes `service:write` permissions for the service and sidecar proxy.
+
+The following example shows how to use the Consul CLI to create a service token for a service named `example-client-app`:
+
+```shell
+consul acl token create -service-identity=example-client-app ...
+```
+
+-> **NOTE**: You will need to create one service token for each registered Consul service in ECS,
+including when new services are added to the service mesh.
+
+## Secret storage
+
+You should securely store the following secrets in order to make them available to ECS tasks.
+
+1. Consul Server CA certificate
+2. Consul gossip encryption key
+3. Consul client ACL token
+4. Consul service ACL tokens (one per service)
+
+These secrets can be securely stored and passed to ECS tasks using either of the following AWS secret services:
+
+* [AWS Systems Manager Parameter Store](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/specifying-sensitive-data-parameters.html)
+* [AWS Secrets Manager](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/specifying-sensitive-data-secrets.html)
+
+Once the secrets are stored they can be referenced using their ARN. The following shows
+example secret ARNs when using AWS Secrets Manager:
+
+| Secret                 | Sample Secret ARN                                                                  |
+| ---------------------- | ---------------------------------------------------------------------------------- |
+| Consul Server CA Cert  | `arn:aws:secretsmanager:us-west-2:000000000000:secret:my-consul-ca-cert`           |
+| Gossip encryption key  | `arn:aws:secretsmanager:us-west-2:000000000000:secret:my-consul-gossip-key`        |
+| Client token           | `arn:aws:secretsmanager:us-west-2:000000000000:secret:my-consul-client-token`      |
+| Service token          | `arn:aws:secretsmanager:us-west-2:000000000000:secret:my-example-client-app-token` |
+
+## Configure `consul-client`
+
+The following secrets must be passed to the `consul-client` container:
+
+* Consul server CA certificate
+* Gossip encryption key
+* Consul client ACL token
+
+The following example shows how to include these secrets in the task definition. The `secrets`
+list specifies environment variable `name`s that will be set to the secret values for this container.
+ECS automatically fetches the secret values specified in the `valueFrom` fields during task provisioning.
+
+```json
+{
+  "containerDefinitions": [
+    {
+      "name": "consul-client"
+      "image": "public.ecr.aws/hashicorp/consul:<CONSUL_VERSION>",
+      "secrets": [
+        {
+          "name": "CONSUL_CACERT",
+          "valueFrom": "arn:aws:secretsmanager:us-west-2:000000000000:secret:my-consul-ca-cert"
+        },
+        {
+          "name": "CONSUL_GOSSIP_ENCRYPTION_KEY",
+          "valueFrom": "arn:aws:secretsmanager:us-west-2:000000000000:secret:my-consul-gossip-key"
+        },
+        {
+          "name": "AGENT_TOKEN",
+          "valueFrom": "arn:aws:secretsmanager:us-west-2:000000000000:secret:my-consul-client-token"
+        }
+      ]
+    },
+    ...
+  ]
+}
+```
+
+Next, update Consul configuration options to pass the secrets to the Consul client.
+
+The following is an example of the *additional* content to include in the `consul-client` startup script. Refer to the [install
+page](/docs/ecs/manual/install#consul-client-container) for the remainder of the startup script and how to pass this
+script to the container.
+
+<CodeBlockConfig highlight="3-4,10-29">
+
+```shell
+...
+
+# Write the CA Cert to a file
+echo "$CONSUL_CACERT" > /tmp/consul-ca-cert.pem
+
+# Write the Consul agent configuration file.
+cat << EOF > /consul/agent-defaults.hcl
+...
+
+# Configure gossip encryption key
+encrypt = "$CONSUL_GOSSIP_ENCRYPTION_KEY"
+
+# Configure TLS settings
+auto_encrypt = {
+  tls = true
+  ip_san = ["$ECS_IPV4"]
+}
+ca_file = "/tmp/consul-ca-cert.pem"
+verify_outgoing = true
+
+# Configure ACLs
+acl {
+  enabled = true
+  default_policy = "deny"
+  down_policy = "async-cache"
+  tokens {
+    agent = "$AGENT_TOKEN"
+  }
+}
+
+EOF
+```
+
+</CodeBlockConfig>
+
+The following table describes the additional fields that must be included in the Consul client configuration file.
+
+| Field name                                                 | Type    | Description                                                                          |
+| ---------------------------------------------------------  | ------- | ------------------------------------------------------------------------------------ |
+| [`encrypt`](/docs/agent/options#_encrypt)                  | string  | Specifies the gossip encryption key                                                  |
+| [`ca_file`](/docs/agent/options#ca_file)                   | string  | Specifies the Consul server CA cert for TLS verification.                            |
+| [`acl.enabled`](/docs/agent/options#acl_enabled)           | boolen  | Enable ACLs for this agent.                                                          |
+| [`acl.tokens.agent`](/docs/agent/options#acl_tokens_agent) | string  | Specifies the Consul client token which authorizes this agent with Consul servers.   |
+
+## Configure `consul-ecs-mesh-init` and `consul-ecs-health-sync`
+
+Both `consul-ecs-mesh-init` and `consul-ecs-health-sync` containers need to be configured with
+the service ACL token. This allows these containers to make HTTP API requests to the local
+Consul client for service registration and health syncing.
+
+The following shows how to set the `CONSUL_HTTP_TOKEN` variable to the service token for the `example-client-app` service,
+if the token is stored in AWS Secrets Manager.
+
+<CodeBlockConfig highlight="5-8">
+
+```json
+{
+  "containerDefinitions": [
+    {
+      "secrets": [
+        {
+          "name": "CONSUL_HTTP_TOKEN",
+          "valueFrom": "arn:aws:secretsmanager:us-west-2:000000000000:secret:my-example-client-app-token"
+        }
+      ]
+    },
+    ...
+  ],
+  ...
+}
+```
+
+</CodeBlockConfig>
--- a/website/content/docs/ecs/get-started/requirements.mdx
+++ b/website/content/docs/ecs/get-started/requirements.mdx
@ -9,8 +9,18 @@ description: >-

 The following requirements must be met in order to install Consul on ECS:

-1. **Terraform:** The tasks that you want to add to the service mesh must first be modeled in Terraform.
 1. **Launch Type:** Fargate and EC2 launch types are supported.
 1. **Subnets:** ECS Tasks can run in private or public subnets. Tasks must have [network access](https://aws.amazon.com/premiumsupport/knowledge-center/ecs-pull-container-api-error-ecr/) to Amazon ECR or other public container registries to pull images.
 1. **Consul Servers:** You can use your own Consul servers running on virtual machines or use [HashiCorp Cloud Platform Consul](https://www.hashicorp.com/cloud-platform) to host the servers for you. For development purposes or testing, you may use the `dev-server` [Terraform module](https://github.com/hashicorp/terraform-aws-consul-ecs/tree/main) that runs the Consul server as an ECS task. The `dev-server` does not support persistent storage.
 1. **ACL Controller:** If you are running a secure Consul installation with ACLs enabled, configure the ACL controller.
+1. **Sidecar containers:** Consul on ECS requires two sidecar containers to run in each ECS task: a
+   Consul agent container and a sidecar proxy container. These additional sidecar containers must
+   be included in the ECS task definition. The [Consul ECS Terraform module](/docs/ecs/terraform/install)
+   will include these sidecar containers for you. If you do not use Terraform, you can construct
+   the task definition yourself by following [our documentation](/docs/ecs/manual/install).
+1. **Routing:** With your application running in tasks as part of the mesh, you must specify the
+   upstream services that your application calls. You will also need to change the URLs your
+   application uses to ensure the application is making requests through the service mesh.
+1. **Bind Address:** Once all communication is flowing through the service mesh, you should change
+   the address your application is listening on to `127.0.0.1` so that it only receives requests
+   through the sidecar proxy.
--- a/website/content/docs/ecs/terraform/install.mdx
+++ b/website/content/docs/ecs/terraform/install.mdx
@ -0,0 +1,276 @@
+---
+layout: docs
+page_title: Installing Consul on AWS ECS using Terraform
+description: >-
+  Install Consul Service Mesh on AWS ECS with Terraform (Elastic Container Service).
+---
+
+# Installation with Terraform
+
+This topic describes how to use the [`mesh-task`](https://registry.terraform.io/modules/hashicorp/consul-ecs/aws/latest/submodules/mesh-task) Terraform module to launch your application in AWS ECS as part of Consul service mesh. If you do not use Terraform, see the [Manual Installation](/docs/ecs/manual-installation) page to install Consul on ECS without Terraform.
+
+This topic does not include instructions for creating all AWS resources necessary to install Consul, such as a VPC or the ECS cluster. Refer to the linked guides in the [Getting Started](/docs/ecs#getting-started) section for complete, runnable examples.
+
+## Overview
+
+This topic describes the following procedure:
+
+1. Create Terraform configuration files for the necessary components:
+
+    * [ECS task definition](#using-the-mesh-task-module): Use the `mesh-task` module to create an ECS task definition for Consul on ECS
+    * [ECS service](#ecs-service): Use the `aws_ecs_service` resource to create an ECS service that schedules service mesh tasks to run on ECS
+
+2. [Run Terraform](#running-terraform) to deploy the resources in AWS
+
+## Prerequisites
+
+* You should have some familiarity with using Terraform. Refer to the [Terraform documentation](https://www.terraform.io/docs) to learn about infrastructure as code and how to get started with Terraform.
+* You should also be familiar with AWS ECS before following these instructions. See [What is Amazon Elastic Container Service](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/Welcome.html) for details.
+
+## Using the Mesh Task Module
+
+To run an application in ECS with Consul service mesh, you must create an ECS task definition, which includes your application container(s)
+and additional sidecar containers, such as the Consul agent container and the Envoy sidecar proxy container.
+
+The [`mesh-task` module](https://registry.terraform.io/modules/hashicorp/consul-ecs/aws/latest/submodules/mesh-task) will automatically include the necessary sidecar containers.
+
+The following example shows a Terraform configuration file that creates a task definition with an application container called `example-client-app` in a file called `mesh-task.tf`:
+
+<CodeBlockConfig filename="mesh-task.tf">
+
+```hcl
+module "my_task" {
+  source  = "hashicorp/consul-ecs/aws//modules/mesh-task"
+  version = "<latest version>"
+
+  family                = "my_task"
+  container_definitions = [
+    {
+      name         = "example-client-app"
+      image        = "docker.io/org/my_task:v0.0.1"
+      essential    = true
+      portMappings = [
+        {
+          containerPort = 9090
+          hostPort      = 9090
+          protocol      = "tcp"
+        }
+      ]
+      cpu         = 0
+      mountPoints = []
+      volumesFrom = []
+    }
+  ]
+
+  port       = 9090
+  retry_join = ["<address of the Consul server>"]
+}
+```
+
+</CodeBlockConfig>
+
+The following fields are required. Refer to the [module reference documentation](https://registry.terraform.io/modules/hashicorp/consul-ecs/aws/latest/submodules/mesh-task?tab=inputs) for a complete reference.
+
+| Input Variable          | Type     | Description                                                                                                                                                                                                                                    |
+| ----------------------- | -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `source`                | string   | Must be set to the source location of the `mesh-task` module, `hashicorp/consul-ecs/aws//modules/mesh-task`.                                                                                                                                   |
+| `version`               | string   | Must be set to the version of the `mesh-task` module.                                                                                                                                                                                          |
+| `family`                | string   | The [ECS task definition family](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_definition_parameters.html#family). The family is also used as the Consul service name by default.                                           |
+| `container_definitions` | list     | This is the list of [container definitions](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_definition_parameters.html#container_definitions) for the task definition. This is where you include your application containers. |
+| `essential`             | boolean  | Must be `true` to ensure the health of your application container affects the health status of the task.                                                                                                                                       |
+| `port`                  | integer  | The port that your application listens on, if any. If your application does not listen on a port, set `outbound_only = true`.                                                                                                                  |
+| `retry_join`            | list     | The is the [`retry_join`](/docs/agent/options#_retry_join) option for the Consul agent, which specifies the locations of your Consul servers.                                                                                                  |
+
+### Running Terraform
+
+You will need to run Terraform to create the task definition.
+
+Save the Terraform configuration for the task definition to a file, such as `mesh-task.tf`.
+You should place this file in a directory alongside other Terraform configuration files for your project.
+
+The `mesh-task` module requires the AWS Terraform provider. The following example shows how to include
+and configure the AWS provider in a file called `provider.tf`. Refer to the [AWS Terraform provider](https://registry.terraform.io/providers/hashicorp/aws/latest/docs)
+documentation for complete configuration details.
+
+<CodeBlockConfig filename="provider.tf">
+
+```hcl
+terraform {
+  required_providers {
+    aws = {
+      source  = "hashicorp/aws"
+      version = "<latest version>"
+    }
+  }
+}
+
+provider "aws" {
+  region = "<AWS region>"
+  ...
+}
+```
+
+</CodeBlockConfig>
+
+Additional AWS resources for your project can be included in additional Terraform configuration files
+in the same directory. The following example shows a basic project directory:
+
+```shell-session
+$ ls
+mesh-task.tf
+provider.tf
+...
+```
+
+Terraform should be run in your project directory as follows.
+
+* Run `terraform init` first to download dependencies, such as Terraform providers
+* Run `terraform apply` to have Terraform create AWS resources, such as the task definition from the `mesh-task` module.
+
+Terraform automatically reads all files in the current directory that have a `.tf` file extension.
+Refer to the [Terraform documentation](https://www.terraform.io/docs) for more information and Terraform best practices.
+
+## ECS Service
+
+[ECS services](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs_services.html) are one of the most common
+ways to start tasks using a task definition.
+
+To define an ECS service, reference the `mesh-task` module's `task_definition_arn` output value
+in your `aws_ecs_service` resource. The following example shows how to include the service in the `mesh-task.tf` file.
+
+<CodeBlockConfig filename="mesh-task.tf" highlight="6-12">
+
+```hcl
+module "my_task" {
+  source  = "hashicorp/consul-ecs/aws//modules/mesh-task"
+  ...
+}
+
+resource "aws_ecs_service" "my_task" {
+  name            = "my_task_service"
+  task_definition = module.my_task.task_definition_arn
+  launch_type     = "FARGATE"
+  propagate_tags  = "TASK_DEFINITION"
+  ...
+}
+```
+
+</CodeBlockConfig>
+
+This is a partial configuration to highlight some important fields.
+See the [`aws_ecs_service`](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ecs_service) documentation for a complete reference.
+
+| Input Variable    | Type    | Description                                                                                                         |
+| ----------------- | ------- | ------------------------------------------------------------------------------------------------------------------- |
+| `name`            | string  | The name of the ECS service. This is required by AWS but is not used by Consul service mesh.                        |
+| `task_definition` | string  | The task definition used to start tasks. Set this to the task definition ARN returned by the `mesh-task` module.    |
+| `launch_type`     | string  | The launch type. Consul on ECS supports the `FARGATE` and `EC2` launch types.                                       |
+| `propagate_tags`  | string  | This must be set to `TASK_DEFINITION` so that tags added by `mesh-task` to the task definition are copied to tasks. |
+
+After including the ECS service in your Terraform configuration, run `terraform apply`
+from your project directory to create the ECS service resource. The ECS service will
+soon start your application in a task. The task will automatically register itself
+into the Consul service catalog during startup.
+
+-> **NOTE:** If your tasks run in a public subnet, they must have `assign_public_ip = true`
+in their [`network_configuration`](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ecs_service#network_configuration) block so that ECS can pull the Docker images.
+
+## Routing
+
+Now that your tasks are registered in the mesh, you're able to use the service
+mesh to route between them.
+
+In order to make calls through the service mesh, you must configure the sidecar
+proxy to listen on a different port for each upstream service your application
+needs to call. You then must modify your application to make requests to the sidecar
+proxy on that port.
+
+For example, if your application `web` makes calls to another application called `backend`, then you would first configure the `mesh-task` module's upstream(s):
+
+```hcl
+module "web" {
+  family = "web"
+  upstreams = [
+    {
+      destinationName = "backend"
+      localBindPort = 8080
+    }
+  ]
+}
+```
+
+| Input Variable    | Type    | Description                                                                                                                                                      |
+| ----------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `destinationName` | string  | The name of the upstream service, as it is registered in the Consul service catalog.                                                                             |
+| `localBindPort`   | integer | Requests to this port will be forwarded by the proxy to the upstream service. This must be an unused port, but does not need to match the upstream service port. |
+
+If you have multiple upstream services they each need to be listed here.
+
+Next, configure your application to make requests to `localhost:8080` when
+it wants to call the `backend` service.
+
+For example, if your service allows configuring the URL for `backend` via the
+`BACKEND_URL` environment variable, you would set:
+
+```hcl
+module "web" {
+  family = "web"
+  upstreams = [
+    {
+      destinationName = "backend"
+      localBindPort = 8080
+    }
+  ]
+  container_definitions = [
+    {
+      name        = "web"
+      environment = [
+        {
+          name  = "BACKEND_URL"
+          value = "http://localhost:8080"
+        }
+      ]
+      ...
+    }
+  ]
+  ...
+}
+```
+
+## Bind Address
+
+To ensure that your application only receives traffic through the service mesh,
+you must change the address that your application is listening on to only the loopback address
+(also known as `localhost`, `lo`, and `127.0.0.1`)
+so that only the sidecar proxy running in the same task can make requests to it.
+
+If your application is listening on all interfaces, e.g. `0.0.0.0`, then other
+applications can call it directly, bypassing its sidecar proxy.
+
+Changing the listening address is specific to the language and framework you're
+using in your application. Regardless of which language/framework you're using,
+it's a good practice to make the address configurable via environment variable.
+
+For example in Go, you would use:
+
+```go
+s := &http.Server{
+	Addr:           "127.0.0.1:8080",
+  ...
+}
+log.Fatal(s.ListenAndServe())
+```
+
+In Django you'd use:
+
+```bash
+python manage.py runserver "127.0.0.1:8080"
+```
+
+## Next Steps
+
+- Follow the [Secure Configuration](/docs/ecs/secure-configuration) to get production-ready.
+- Now that your applications are running in the service mesh, read about
+  other [Service Mesh features](/docs/connect).
+- View the [Architecture](/docs/ecs/architecture) documentation to understand
+  what's going on under the hood.
--- a/website/content/docs/ecs/get-started/migrate-existing-tasks.mdx
+++ b/website/content/docs/ecs/get-started/migrate-existing-tasks.mdx
@ -90,7 +90,7 @@ module "my_task" {
    }
  ]

-  port       = "9090"
+  port       = 9090
  retry_join = ["<address of the Consul server>"]
 }
 ```
@ -111,5 +111,5 @@ resource.

 Now that your task(s) are migrated to the `mesh-task` module,

- Start at the [ECS Service section](/docs/ecs/get-started/install#ecs-service) of the Installation Guide to continue installing Consul on ECS.
+- Start at the [ECS Service section](/docs/ecs/terraform/install#ecs-service) of the Installation Guide to continue installing Consul on ECS.
 - Refer to the [`mesh-task` reference documentation](https://registry.terraform.io/modules/hashicorp/consul-ecs/aws/latest/submodules/mesh-task?tab=inputs) for all available inputs to your mesh tasks.
--- a/website/content/docs/ecs/get-started/production-installation.mdx
+++ b/website/content/docs/ecs/get-started/production-installation.mdx
@ -1,11 +1,11 @@
 ---
 layout: docs
-page_title: Production Installation - AWS ECS
+page_title: Secure Configuration - AWS ECS
 description: >-
-  Production Installation of the Consul Service Mesh on AWS ECS (Elastic Container Service).
+  Secure Configuration of the Consul Service Mesh on AWS ECS (Elastic Container Service) with Terraform.
 ---

-# Production Installation
+# Secure Configuration

 For a production-ready installation of Consul on ECS, you will need to make sure that the cluster is secured.
 A secure Consul cluster should include the following:
@ -68,7 +68,7 @@ deploying this controller.
 ## Deploy Services

 Once the ACL controller is up and running, you will be able to deploy services on the mesh using the [`mesh-task` module](https://registry.terraform.io/modules/hashicorp/consul-ecs/aws/latest/submodules/mesh-task).
-Start with the basic configuration for the [Task Module](/docs/ecs/get-started/install#task-module) and specify additional settings to make the configuration production-ready.
+Start with the basic configuration for the [Task Module](/docs/ecs/terraform/install#task-module) and specify additional settings to make the configuration production-ready.

 First, you will need to create an AWS Secrets Manager secret for the gossip encryption key that the Consul clients
 should use.
@ -104,5 +104,5 @@ module "my_task" {
 }
 ```

-Now you can deploy your services! Follow the rest of the steps in the [Installation instructions](/docs/ecs/get-started/install#task-module)
+Now you can deploy your services! Follow the rest of the steps in the [Installation instructions](/docs/ecs/terraform/install#task-module)
 to deploy and connect your services.
--- a/website/content/docs/enterprise/admin-partitions.mdx
+++ b/website/content/docs/enterprise/admin-partitions.mdx
@ -82,7 +82,8 @@ Your Consul configuration must meet the following requirements to use admin part

 One of the primary use cases for admin partitions is for enabling a service mesh across multiple Kubernetes clusters. The following requirements must be met to create admin partitions on Kubernetes:

-* Two or more Kubernetes clusters. Consul servers must be deployed to a single cluster. The other clusters should run Consul clients.
+* If you are deploying Consul servers on Kubernetes, then ensure that the Consul servers are deployed within the same Kubernetes cluster. Consul servers may be deployed external to Kubernetes and configured using the `externalServers` stanza. 
+* Consul clients deployed on the same Kubernetes cluster as the Consul Servers must use the `default` partition. If the clients are required to run on a non-default partition, then the clients must be deployed in a separate Kubernetes cluster. 
 * A Consul Enterprise license must be installed on each Kubernetes cluster.
 * The helm chart for consul-k8s v0.39.0 or greater.
 * Consul 1.11.1-ent or greater.
--- a/website/content/docs/enterprise/network-segments.mdx
+++ b/website/content/docs/enterprise/network-segments.mdx
@ -52,8 +52,7 @@ in their local cluster through the gossip mechanism and make RPC requests to
 them. LAN Gossip (OSS) is an open intra-cluster networking model, and Network
 Segments (Enterprise) creates multiple segments within one cluster.

-**Federated Cluster:** A cluster of clusters with a Consul server group per
-cluster each set per "datacenter". These Consul servers are federated together
+**Federated Cluster:** A set of connected clusters, each representing a unique Consul “datacenter”. These Consul servers are federated together
 over the WAN. Consul clients make use of resources in federated clusters by
 forwarding RPCs through the Consul servers in their local cluster, but they
 never interact with remote Consul servers directly. There are currently two
--- a/website/content/docs/k8s/connect/index.mdx
+++ b/website/content/docs/k8s/connect/index.mdx
@ -231,29 +231,29 @@ Pod annotations can be used to configure the injection behavior.
  injector's configuration if desired.

 - `consul.hashicorp.com/transparent-proxy` - If this is "true", this Pod
-   will run with transparent proxy enabled. This means you can use Kubernetes
-   DNS to access upstream services and all inbound and outbound traffic within
-   the pod is redirected to go through the proxy.
+  will run with transparent proxy enabled. This means you can use Kubernetes
+  DNS to access upstream services and all inbound and outbound traffic within
+  the pod is redirected to go through the proxy.

 - `consul.hashicorp.com/transparent-proxy-overwrite-probes` - If this is "true"
-   and transparent proxy is enabled, the Connect injector will overwrite Kubernetes
-   HTTP probes to point to the Envoy proxy.
+  and transparent proxy is enabled, the Connect injector will overwrite Kubernetes
+  HTTP probes to point to the Envoy proxy.

 - `consul.hashicorp.com/transparent-proxy-exclude-inbound-ports` - A comma-separated
-   list of inbound ports to exclude from traffic redirection when running in transparent proxy
-   mode.
+  list of inbound ports to exclude from traffic redirection when running in transparent proxy
+  mode.

 - `consul.hashicorp.com/transparent-proxy-exclude-outbound-cidrs` - A comma-separated
-   list of outbound CIDRs to exclude from traffic redirection when running in transparent proxy
-   mode.
+  list of outbound CIDRs to exclude from traffic redirection when running in transparent proxy
+  mode.

 - `consul.hashicorp.com/transparent-proxy-exclude-outbound-ports` - A comma-separated
-   list of outbound ports to exclude from traffic redirection when running in transparent proxy
-   mode.
+  list of outbound ports to exclude from traffic redirection when running in transparent proxy
+  mode.

 - `consul.hashicorp.com/transparent-proxy-exclude-uids` - A comma-separated
-   list of additional user IDs to exclude from traffic redirection when running in transparent proxy
-   mode.
+  list of additional user IDs to exclude from traffic redirection when running in transparent proxy
+  mode.

 - `consul.hashicorp.com/connect-service` - For pods that accept inbound
  connections, this specifies the name of the service that is being
@ -338,6 +338,9 @@ Pod annotations can be used to configure the injection behavior.
    consul.hashicorp.com/service-tags: foo,bar,baz
  ```

+  If you need your tag to have a comma in it you can escape the comma with `\,`. For example,
+  `consul.hashicorp.com/service-tags: foo\,bar\,baz` will become the single tag `foo,bar,baz`.
+
 - `consul.hashicorp.com/service-meta-<YOUR_KEY>` - Set Consul meta key/value
  pairs that will be applied to the Consul service and its sidecar.
  The key will be what comes after `consul.hashicorp.com/service-meta-`, e.g.
@ -359,13 +362,13 @@ Pod annotations can be used to configure the injection behavior.
  - `consul.hashicorp.com/sidecar-proxy-memory-request` - Override the default memory request.

 - `consul.hashicorp.com/consul-sidecar-` - Override default resource settings for
-  the consul sidecar container.
-  The defaults are set in Helm config via the [`connectInject.consulSidecar.resources`](/docs/k8s/helm#v-global-consulsidecarcontainer) key.
+the `consul-sidecar` container.
+The defaults are set in Helm config via the [`global.consulSidecarContainer.resources`](/docs/k8s/helm#v-global-consulsidecarcontainer) key.

  - `consul.hashicorp.com/consul-sidecar-cpu-limit` - Override the default CPU limit.
  - `consul.hashicorp.com/consul-sidecar-cpu-request` - Override the default CPU request.
  - `consul.hashicorp.com/consul-sidecar-memory-limit` - Override the default memory limit.
-  - `consul.hashicorp.com/consul-sidecar-memory-request` - Override the default memory request
+  - `consul.hashicorp.com/consul-sidecar-memory-request` - Override the default memory request.

 - `consul.hashicorp.com/enable-metrics` - Override the default Helm value [`connectInject.metrics.defaultEnabled`](/docs/k8s/helm#v-connectinject-metrics-defaultenabled).
 - `consul.hashicorp.com/enable-metrics-merging` - Override the default Helm value [`connectInject.metrics.defaultEnableMerging`](/docs/k8s/helm#v-connectinject-metrics-defaultenablemerging).
--- a/website/content/docs/k8s/service-sync.mdx
+++ b/website/content/docs/k8s/service-sync.mdx
@ -32,6 +32,8 @@ service discovery, including hosted services like databases.

 ## Installation and Configuration

+~> Enabling both Service Mesh and Service Sync on the same Kubernetes services is not supported, as Service Mesh also registers Kubernetes service instances to Consul. Please ensure that Service Sync is only enabled for namespaces and services that are not injected with the Consul sidecar for Service Mesh as described in [Sync Enable/Disable](/docs/k8s/service-sync#sync-enable-disable).
+
 The service sync is done using an external long-running process in the
 [consul-k8s project](https://github.com/hashicorp/consul-k8s). This process
 can run either in or out of a Kubernetes cluster. However, running this within
--- a/website/content/docs/k8s/upgrade/index.mdx
+++ b/website/content/docs/k8s/upgrade/index.mdx
@ -183,7 +183,7 @@ $ helm diff upgrade consul hashicorp/consul --version 0.24.1 --values /path/to/y
  grep "has changed"
 ```

-1. Take specific note if `consul, DaemonSet` or `consul-server, StatefulSet` are listed.
+1. Take specific note if `consul-client, DaemonSet` or `consul-server, StatefulSet` are listed.
   This means that your Consul client daemonset or Consul server statefulset (or both) will be redeployed.

 If either is being redeployed, we will follow the same pattern for upgrades as
--- a/website/content/docs/security/acl/acl-rules.mdx
+++ b/website/content/docs/security/acl/acl-rules.mdx
@ -329,7 +329,7 @@ The following table provides an overview of the resources you can use to create
 | `node`<br/>`node_prefix` &nbsp; | Controls access to node-level registration and read access to the [Catalog API](/api/catalog). <br/>See [Node Rules](#node-rules) for details. | Yes      |
 | `operator` &nbsp; &nbsp; &nbsp; | Controls access to cluster-level operations available in the [Operator API](/api/operator) excluding keyring API endpoints. <br/>See [Operator Rules](#operator-rules) for details. | No      |
 | `query`<br/>`query_prefix` | Controls access to create, update, and delete prepared queries in the [Prepared Query API](/api/query). Access to the [node](#node-rules) and [service](#service-rules) must also be granted. <br/>See [Prepared Query Rules](#prepared-query-rules) for details. | Yes      |
-| `service`<br/>`service_prefix` | Controls service-level registration and read access to the [Catalog API](/api/catalog), as well as service discovery with the [Health API](/api/health). <br/>See [Service Rules](#node-rules) for details. | Yes      |
+| `service`<br/>`service_prefix` | Controls service-level registration and read access to the [Catalog API](/api/catalog), as well as service discovery with the [Health API](/api/health). <br/>See [Service Rules](#service-rules) for details. | Yes      |
 | `session`<br/>`session_prefix` | Controls access to operations in the [Session API](/api/session). <br/>See [Session Rules](#session-rules) for details. | Yes      |

 The following topics provide additional details about the available resources.
--- a/website/data/docs-nav-data.json
+++ b/website/data/docs-nav-data.json
@ -360,6 +360,10 @@
      }
    ]
  },
+  {
+    "title": "Consul API Gateway <sup>BETA</sup>",
+    "path": "api-gateway"
+  },
  {
    "title": "Kubernetes",
    "routes": [
@ -608,23 +612,40 @@
        "path": "ecs"
      },
      {
-        "title": "Get Started",
+        "title": "Requirements",
+        "path": "ecs/requirements"
+      },
+      {
+        "title": "Install with Terraform",
        "routes": [
          {
-            "title": "Requirements",
-            "path": "ecs/get-started/requirements"
+            "title": "Installation",
+            "path": "ecs/terraform/install"
          },
+          {
+            "title": "Secure Configuration",
+            "path": "ecs/terraform/secure-configuration"
+          },
+          {
+            "title": "Migrate Existing Tasks",
+            "path": "ecs/terraform/migrate-existing-tasks"
+          }
+        ]
+      },
+      {
+        "title": "Install Manually",
+        "routes": [
          {
            "title": "Installation",
-            "path": "ecs/get-started/install"
+            "path": "ecs/manual/install"
          },
          {
-            "title": "Production Installation",
-            "path": "ecs/get-started/production-installation"
+            "title": "Secure Configuration",
+            "path": "ecs/manual/secure-configuration"
          },
          {
-            "title": "Migrate Existing Tasks",
-            "path": "ecs/get-started/migrate-existing-tasks"
+            "title": "ACL Controller",
+            "path": "ecs/manual/acl-controller"
          }
        ]
      },
@ -635,6 +656,10 @@
      {
        "title": "Consul Enterprise",
        "path": "ecs/enterprise"
+      },
+      {
+        "title": "Configuration Reference",
+        "path": "ecs/configuration-reference"
      }
    ]
  },
--- a/Show More
+++ b/Show More