From 2ec94b027e4ee73ad542b6ffcd2b797a96f6ef2e Mon Sep 17 00:00:00 2001 From: Kyle Havlovitz Date: Wed, 30 Sep 2020 12:31:21 -0700 Subject: [PATCH 1/5] connect: Enable renewing the intermediate cert in the primary DC --- agent/connect/ca/provider.go | 7 ++ agent/connect/ca/provider_vault.go | 23 ++-- agent/connect/ca/provider_vault_test.go | 131 +--------------------- agent/connect/ca/testing.go | 142 ++++++++++++++++++++++++ agent/consul/leader_connect.go | 59 ++++++++-- agent/consul/leader_connect_test.go | 113 ++++++++++++++++++- agent/consul/server.go | 2 +- 7 files changed, 330 insertions(+), 147 deletions(-) diff --git a/agent/connect/ca/provider.go b/agent/connect/ca/provider.go index f9e637c419..1dd1408cf2 100644 --- a/agent/connect/ca/provider.go +++ b/agent/connect/ca/provider.go @@ -16,6 +16,13 @@ import ( // on servers and CA provider. var ErrRateLimited = errors.New("operation rate limited by CA provider") +// PrimaryIntermediateProviders is a list of CA providers that make use use of an +// intermediate cert in the primary datacenter as well as the secondary. This is used +// when determining whether to run the intermediate renewal routine in the primary. +var PrimaryIntermediateProviders = map[string]struct{}{ + "vault": struct{}{}, +} + // ProviderConfig encapsulates all the data Consul passes to `Configure` on a // new provider instance. The provider must treat this as read-only and make // copies of any map or slice if it might modify them internally. diff --git a/agent/connect/ca/provider_vault.go b/agent/connect/ca/provider_vault.go index d3a9ac7146..7c5fe47c8b 100644 --- a/agent/connect/ca/provider_vault.go +++ b/agent/connect/ca/provider_vault.go @@ -92,7 +92,7 @@ func (v *VaultProvider) Configure(cfg ProviderConfig) error { // Set up a renewer to renew the token automatically, if supported. if token.Renewable { - lifetimeWatcher, err := client.NewLifetimeWatcher(&vaultapi.LifetimeWatcherInput{ + renewer, err := client.NewRenewer(&vaultapi.RenewerInput{ Secret: &vaultapi.Secret{ Auth: &vaultapi.SecretAuth{ ClientToken: config.Token, @@ -100,8 +100,7 @@ func (v *VaultProvider) Configure(cfg ProviderConfig) error { LeaseDuration: secret.LeaseDuration, }, }, - Increment: token.TTL, - RenewBehavior: vaultapi.RenewBehaviorIgnoreErrors, + Increment: token.TTL, }) if err != nil { return fmt.Errorf("Error beginning Vault provider token renewal: %v", err) @@ -109,31 +108,31 @@ func (v *VaultProvider) Configure(cfg ProviderConfig) error { ctx, cancel := context.WithCancel(context.TODO()) v.shutdown = cancel - go v.renewToken(ctx, lifetimeWatcher) + go v.renewToken(ctx, renewer) } return nil } // renewToken uses a vaultapi.Renewer to repeatedly renew our token's lease. -func (v *VaultProvider) renewToken(ctx context.Context, watcher *vaultapi.LifetimeWatcher) { - go watcher.Start() - defer watcher.Stop() +func (v *VaultProvider) renewToken(ctx context.Context, renewer *vaultapi.Renewer) { + go renewer.Renew() + defer renewer.Stop() for { select { case <-ctx.Done(): return - case err := <-watcher.DoneCh(): + case err := <-renewer.DoneCh(): if err != nil { v.logger.Error("Error renewing token for Vault provider", "error", err) } - // Watcher routine has finished, so start it again. - go watcher.Start() + // Renewer routine has finished, so start it again. + go renewer.Renew() - case <-watcher.RenewCh(): + case <-renewer.RenewCh(): v.logger.Error("Successfully renewed token for Vault provider") } } @@ -384,6 +383,7 @@ func (v *VaultProvider) GenerateIntermediate() (string, error) { "csr": csr, "use_csr_values": true, "format": "pem_bundle", + "ttl": v.config.IntermediateCertTTL.String(), }) if err != nil { return "", err @@ -456,6 +456,7 @@ func (v *VaultProvider) SignIntermediate(csr *x509.CertificateRequest) (string, "use_csr_values": true, "format": "pem_bundle", "max_path_length": 0, + "ttl": v.config.IntermediateCertTTL.String(), }) if err != nil { return "", err diff --git a/agent/connect/ca/provider_vault_test.go b/agent/connect/ca/provider_vault_test.go index 3094cb092f..b1890e08eb 100644 --- a/agent/connect/ca/provider_vault_test.go +++ b/agent/connect/ca/provider_vault_test.go @@ -7,13 +7,11 @@ import ( "io/ioutil" "os" "os/exec" - "sync" "testing" "time" "github.com/hashicorp/consul/agent/connect" "github.com/hashicorp/consul/agent/structs" - "github.com/hashicorp/consul/sdk/freeport" "github.com/hashicorp/consul/sdk/testutil/retry" "github.com/hashicorp/go-hclog" vaultapi "github.com/hashicorp/vault/api" @@ -70,7 +68,7 @@ func TestVaultCAProvider_RenewToken(t *testing.T) { require.NoError(t, err) providerToken := secret.Auth.ClientToken - _, err = createVaultProvider(t, true, testVault.addr, providerToken, nil) + _, err = createVaultProvider(t, true, testVault.Addr, providerToken, nil) require.NoError(t, err) // Check the last renewal time. @@ -382,11 +380,11 @@ func getIntermediateCertTTL(t *testing.T, caConf *structs.CAConfiguration) time. return dur } -func testVaultProvider(t *testing.T) (*VaultProvider, *testVaultServer) { +func testVaultProvider(t *testing.T) (*VaultProvider, *TestVaultServer) { return testVaultProviderWithConfig(t, true, nil) } -func testVaultProviderWithConfig(t *testing.T, isPrimary bool, rawConf map[string]interface{}) (*VaultProvider, *testVaultServer) { +func testVaultProviderWithConfig(t *testing.T, isPrimary bool, rawConf map[string]interface{}) (*VaultProvider, *TestVaultServer) { testVault, err := runTestVault(t) if err != nil { t.Fatalf("err: %v", err) @@ -394,7 +392,7 @@ func testVaultProviderWithConfig(t *testing.T, isPrimary bool, rawConf map[strin testVault.WaitUntilReady(t) - provider, err := createVaultProvider(t, isPrimary, testVault.addr, testVault.rootToken, rawConf) + provider, err := createVaultProvider(t, isPrimary, testVault.Addr, testVault.RootToken, rawConf) if err != nil { testVault.Stop() t.Fatalf("err: %v", err) @@ -459,124 +457,3 @@ func skipIfVaultNotPresent(t *testing.T) { t.Skipf("%q not found on $PATH - download and install to run this test", vaultBinaryName) } } - -func runTestVault(t *testing.T) (*testVaultServer, error) { - vaultBinaryName := os.Getenv("VAULT_BINARY_NAME") - if vaultBinaryName == "" { - vaultBinaryName = "vault" - } - - path, err := exec.LookPath(vaultBinaryName) - if err != nil || path == "" { - return nil, fmt.Errorf("%q not found on $PATH", vaultBinaryName) - } - - ports := freeport.MustTake(2) - returnPortsFn := func() { - freeport.Return(ports) - } - - var ( - clientAddr = fmt.Sprintf("127.0.0.1:%d", ports[0]) - clusterAddr = fmt.Sprintf("127.0.0.1:%d", ports[1]) - ) - - const token = "root" - - client, err := vaultapi.NewClient(&vaultapi.Config{ - Address: "http://" + clientAddr, - }) - if err != nil { - returnPortsFn() - return nil, err - } - client.SetToken(token) - - args := []string{ - "server", - "-dev", - "-dev-root-token-id", - token, - "-dev-listen-address", - clientAddr, - "-address", - clusterAddr, - } - - cmd := exec.Command(vaultBinaryName, args...) - cmd.Stdout = ioutil.Discard - cmd.Stderr = ioutil.Discard - if err := cmd.Start(); err != nil { - returnPortsFn() - return nil, err - } - - testVault := &testVaultServer{ - rootToken: token, - addr: "http://" + clientAddr, - cmd: cmd, - client: client, - returnPortsFn: returnPortsFn, - } - t.Cleanup(func() { - testVault.Stop() - }) - return testVault, nil -} - -type testVaultServer struct { - rootToken string - addr string - cmd *exec.Cmd - client *vaultapi.Client - - // returnPortsFn will put the ports claimed for the test back into the - returnPortsFn func() -} - -var printedVaultVersion sync.Once - -func (v *testVaultServer) WaitUntilReady(t *testing.T) { - var version string - retry.Run(t, func(r *retry.R) { - resp, err := v.client.Sys().Health() - if err != nil { - r.Fatalf("err: %v", err) - } - if !resp.Initialized { - r.Fatalf("vault server is not initialized") - } - if resp.Sealed { - r.Fatalf("vault server is sealed") - } - version = resp.Version - }) - printedVaultVersion.Do(func() { - fmt.Fprintf(os.Stderr, "[INFO] agent/connect/ca: testing with vault server version: %s\n", version) - }) -} - -func (v *testVaultServer) Stop() error { - // There was no process - if v.cmd == nil { - return nil - } - - if v.cmd.Process != nil { - if err := v.cmd.Process.Signal(os.Interrupt); err != nil { - return fmt.Errorf("failed to kill vault server: %v", err) - } - } - - // wait for the process to exit to be sure that the data dir can be - // deleted on all platforms. - if err := v.cmd.Wait(); err != nil { - return err - } - - if v.returnPortsFn != nil { - v.returnPortsFn() - } - - return nil -} diff --git a/agent/connect/ca/testing.go b/agent/connect/ca/testing.go index 9a637843e0..81995275fe 100644 --- a/agent/connect/ca/testing.go +++ b/agent/connect/ca/testing.go @@ -3,9 +3,15 @@ package ca import ( "fmt" "io/ioutil" + "os" + "os/exec" + "sync" "github.com/hashicorp/consul/agent/connect" + "github.com/hashicorp/consul/sdk/freeport" + "github.com/hashicorp/consul/sdk/testutil/retry" "github.com/hashicorp/go-hclog" + vaultapi "github.com/hashicorp/vault/api" "github.com/mitchellh/go-testing-interface" ) @@ -76,3 +82,139 @@ func TestConsulProvider(t testing.T, d ConsulProviderStateDelegate) *ConsulProvi provider.SetLogger(logger) return provider } + +func NewTestVaultServer(t testing.T) *TestVaultServer { + testVault, err := runTestVault(t) + if err != nil { + t.Fatalf("err: %v", err) + } + + testVault.WaitUntilReady(t) + + return testVault +} + +func runTestVault(t testing.T) (*TestVaultServer, error) { + vaultBinaryName := os.Getenv("VAULT_BINARY_NAME") + if vaultBinaryName == "" { + vaultBinaryName = "vault" + } + + path, err := exec.LookPath(vaultBinaryName) + if err != nil || path == "" { + return nil, fmt.Errorf("%q not found on $PATH", vaultBinaryName) + } + + ports := freeport.MustTake(2) + returnPortsFn := func() { + freeport.Return(ports) + } + + var ( + clientAddr = fmt.Sprintf("127.0.0.1:%d", ports[0]) + clusterAddr = fmt.Sprintf("127.0.0.1:%d", ports[1]) + ) + + const token = "root" + + client, err := vaultapi.NewClient(&vaultapi.Config{ + Address: "http://" + clientAddr, + }) + if err != nil { + returnPortsFn() + return nil, err + } + client.SetToken(token) + + args := []string{ + "server", + "-dev", + "-dev-root-token-id", + token, + "-dev-listen-address", + clientAddr, + "-address", + clusterAddr, + } + + cmd := exec.Command(vaultBinaryName, args...) + cmd.Stdout = ioutil.Discard + cmd.Stderr = ioutil.Discard + if err := cmd.Start(); err != nil { + returnPortsFn() + return nil, err + } + + testVault := &TestVaultServer{ + RootToken: token, + Addr: "http://" + clientAddr, + cmd: cmd, + client: client, + returnPortsFn: returnPortsFn, + } + t.Cleanup(func() { + testVault.Stop() + }) + return testVault, nil +} + +type TestVaultServer struct { + RootToken string + Addr string + cmd *exec.Cmd + client *vaultapi.Client + + // returnPortsFn will put the ports claimed for the test back into the + returnPortsFn func() +} + +var printedVaultVersion sync.Once + +func (v *TestVaultServer) Client() *vaultapi.Client { + return v.client +} + +func (v *TestVaultServer) WaitUntilReady(t testing.T) { + var version string + retry.Run(t, func(r *retry.R) { + resp, err := v.client.Sys().Health() + if err != nil { + r.Fatalf("err: %v", err) + } + if !resp.Initialized { + r.Fatalf("vault server is not initialized") + } + if resp.Sealed { + r.Fatalf("vault server is sealed") + } + version = resp.Version + }) + printedVaultVersion.Do(func() { + fmt.Fprintf(os.Stderr, "[INFO] agent/connect/ca: testing with vault server version: %s\n", version) + }) +} + +func (v *TestVaultServer) Stop() error { + // There was no process + if v.cmd == nil { + return nil + } + + if v.cmd.Process != nil { + if err := v.cmd.Process.Signal(os.Interrupt); err != nil { + return fmt.Errorf("failed to kill vault server: %v", err) + } + } + + // wait for the process to exit to be sure that the data dir can be + // deleted on all platforms. + if err := v.cmd.Wait(); err != nil { + return err + } + + if v.returnPortsFn != nil { + v.returnPortsFn() + } + + return nil +} diff --git a/agent/consul/leader_connect.go b/agent/consul/leader_connect.go index d512cc2760..6befad04dd 100644 --- a/agent/consul/leader_connect.go +++ b/agent/consul/leader_connect.go @@ -510,6 +510,31 @@ func (s *Server) persistNewRoot(provider ca.Provider, newActiveRoot *structs.CAR return nil } +// getIntermediateCAPrimary regenerates the intermediate cert in the primary datacenter. +// This is only run for CAs that require an intermediary in the primary DC, such as Vault. +// This function is being called while holding caProviderReconfigurationLock +// which means it must never take that lock itself or call anything that does. +func (s *Server) getIntermediateCAPrimary(provider ca.Provider, newActiveRoot *structs.CARoot) error { + connectLogger := s.loggers.Named(logging.Connect) + intermediatePEM, err := provider.GenerateIntermediate() + if err != nil { + return fmt.Errorf("error generating new intermediate cert: %v", err) + } + + intermediateCert, err := connect.ParseCert(intermediatePEM) + if err != nil { + return fmt.Errorf("error parsing intermediate cert: %v", err) + } + + // Append the new intermediate to our local active root entry. This is + // where the root representations start to diverge. + newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediatePEM) + newActiveRoot.SigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId) + + connectLogger.Info("generated new intermediate certificate in primary datacenter") + return nil +} + // getIntermediateCASigned is being called while holding caProviderReconfigurationLock // which means it must never take that lock itself or call anything that does. func (s *Server) getIntermediateCASigned(provider ca.Provider, newActiveRoot *structs.CARoot) error { @@ -558,10 +583,10 @@ func (s *Server) startConnectLeader() { if s.config.ConnectEnabled && s.config.Datacenter != s.config.PrimaryDatacenter { s.leaderRoutineManager.Start(secondaryCARootWatchRoutineName, s.secondaryCARootWatch) s.leaderRoutineManager.Start(intentionReplicationRoutineName, s.replicateIntentions) - s.leaderRoutineManager.Start(secondaryCertRenewWatchRoutineName, s.secondaryIntermediateCertRenewalWatch) s.startConnectLeaderEnterprise() } + s.leaderRoutineManager.Start(intermediateCertRenewWatchRoutineName, s.intermediateCertRenewalWatch) s.leaderRoutineManager.Start(caRootPruningRoutineName, s.runCARootPruning) } @@ -652,11 +677,12 @@ func (s *Server) pruneCARoots() error { return nil } -// secondaryIntermediateCertRenewalWatch checks the intermediate cert for +// intermediateCertRenewalWatch checks the intermediate cert for // expiration. As soon as more than half the time a cert is valid has passed, // it will try to renew it. -func (s *Server) secondaryIntermediateCertRenewalWatch(ctx context.Context) error { +func (s *Server) intermediateCertRenewalWatch(ctx context.Context) error { connectLogger := s.loggers.Named(logging.Connect) + isPrimary := s.config.Datacenter == s.config.PrimaryDatacenter for { select { @@ -672,7 +698,8 @@ func (s *Server) secondaryIntermediateCertRenewalWatch(ctx context.Context) erro // this happens when leadership is being revoked and this go routine will be stopped return nil } - if !s.configuredSecondaryCA() { + // If this isn't the primary, make sure the CA has been initialized. + if !isPrimary && !s.configuredSecondaryCA() { return fmt.Errorf("secondary CA is not yet configured.") } @@ -682,13 +709,26 @@ func (s *Server) secondaryIntermediateCertRenewalWatch(ctx context.Context) erro return err } + // If this is the primary, check if this is a provider that uses an intermediate cert. If + // it isn't, we don't need to check for a renewal. + if isPrimary { + _, config, err := state.CAConfig(nil) + if err != nil { + return err + } + + if _, ok := ca.PrimaryIntermediateProviders[config.Provider]; !ok { + return nil + } + } + activeIntermediate, err := provider.ActiveIntermediate() if err != nil { return err } if activeIntermediate == "" { - return fmt.Errorf("secondary datacenter doesn't have an active intermediate.") + return fmt.Errorf("datacenter doesn't have an active intermediate.") } intermediateCert, err := connect.ParseCert(activeIntermediate) @@ -698,10 +738,15 @@ func (s *Server) secondaryIntermediateCertRenewalWatch(ctx context.Context) erro if lessThanHalfTimePassed(time.Now(), intermediateCert.NotBefore.Add(ca.CertificateTimeDriftBuffer), intermediateCert.NotAfter) { + //connectLogger.Info("checked time passed", intermediateCert.NotBefore.Add(ca.CertificateTimeDriftBuffer), intermediateCert.NotAfter) return nil } - if err := s.getIntermediateCASigned(provider, activeRoot); err != nil { + renewalFunc := s.getIntermediateCAPrimary + if !isPrimary { + renewalFunc = s.getIntermediateCASigned + } + if err := renewalFunc(provider, activeRoot); err != nil { return err } @@ -713,7 +758,7 @@ func (s *Server) secondaryIntermediateCertRenewalWatch(ctx context.Context) erro return nil }, func(err error) { connectLogger.Error("error renewing intermediate certs", - "routine", secondaryCertRenewWatchRoutineName, + "routine", intermediateCertRenewWatchRoutineName, "error", err, ) }) diff --git a/agent/consul/leader_connect_test.go b/agent/consul/leader_connect_test.go index 1f22f27349..8690f3c994 100644 --- a/agent/consul/leader_connect_test.go +++ b/agent/consul/leader_connect_test.go @@ -13,7 +13,7 @@ import ( "time" "github.com/hashicorp/consul/agent/connect" - ca "github.com/hashicorp/consul/agent/connect/ca" + "github.com/hashicorp/consul/agent/connect/ca" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/token" tokenStore "github.com/hashicorp/consul/agent/token" @@ -181,6 +181,117 @@ func getCAProviderWithLock(s *Server) (ca.Provider, *structs.CARoot) { return s.getCAProvider() } +func TestLeader_PrimaryCA_IntermediateRenew(t *testing.T) { + // no parallel execution because we change globals + origInterval := structs.IntermediateCertRenewInterval + origMinTTL := structs.MinLeafCertTTL + defer func() { + structs.IntermediateCertRenewInterval = origInterval + structs.MinLeafCertTTL = origMinTTL + }() + + structs.IntermediateCertRenewInterval = time.Millisecond + structs.MinLeafCertTTL = time.Second + require := require.New(t) + + testVault := ca.NewTestVaultServer(t) + defer testVault.Stop() + + dir1, s1 := testServerWithConfig(t, func(c *Config) { + c.Build = "1.6.0" + c.PrimaryDatacenter = "dc1" + c.CAConfig = &structs.CAConfiguration{ + Provider: "vault", + Config: map[string]interface{}{ + "Address": testVault.Addr, + "Token": testVault.RootToken, + "RootPKIPath": "pki-root/", + "IntermediatePKIPath": "pki-intermediate/", + "LeafCertTTL": "5s", + // The retry loop only retries for 7sec max and + // the ttl needs to be below so that it + // triggers definitely. + // Since certs are created so that they are + // valid from 1minute in the past, we need to + // account for that, otherwise it will be + // expired immediately. + "IntermediateCertTTL": "15s", + }, + } + }) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + + testrpc.WaitForLeader(t, s1.RPC, "dc1") + + // Capture the current root + var originalRoot *structs.CARoot + { + rootList, activeRoot, err := getTestRoots(s1, "dc1") + require.NoError(err) + require.Len(rootList.Roots, 1) + originalRoot = activeRoot + } + + // Get the original intermediate + waitForActiveCARoot(t, s1, originalRoot) + provider, _ := getCAProviderWithLock(s1) + intermediatePEM, err := provider.ActiveIntermediate() + require.NoError(err) + cert, err := connect.ParseCert(intermediatePEM) + require.NoError(err) + + // Wait for dc1's intermediate to be refreshed. + // It is possible that test fails when the blocking query doesn't return. + retry.Run(t, func(r *retry.R) { + provider, _ = getCAProviderWithLock(s1) + newIntermediatePEM, err := provider.ActiveIntermediate() + r.Check(err) + _, err = connect.ParseCert(intermediatePEM) + r.Check(err) + if newIntermediatePEM == intermediatePEM { + r.Fatal("not a renewed intermediate") + } + intermediatePEM = newIntermediatePEM + }) + require.NoError(err) + + // Get the root from dc1 and validate a chain of: + // dc1 leaf -> dc1 intermediate -> dc1 root + provider, caRoot := getCAProviderWithLock(s1) + + // Have the new intermediate sign a leaf cert and make sure the chain is correct. + spiffeService := &connect.SpiffeIDService{ + Host: "node1", + Namespace: "default", + Datacenter: "dc1", + Service: "foo", + } + raw, _ := connect.TestCSR(t, spiffeService) + + leafCsr, err := connect.ParseCSR(raw) + require.NoError(err) + + leafPEM, err := provider.Sign(leafCsr) + require.NoError(err) + + cert, err = connect.ParseCert(leafPEM) + require.NoError(err) + + // Check that the leaf signed by the new intermediate can be verified using the + // returned cert chain (signed intermediate + remote root). + intermediatePool := x509.NewCertPool() + intermediatePool.AppendCertsFromPEM([]byte(intermediatePEM)) + rootPool := x509.NewCertPool() + rootPool.AppendCertsFromPEM([]byte(caRoot.RootCert)) + + _, err = cert.Verify(x509.VerifyOptions{ + Intermediates: intermediatePool, + Roots: rootPool, + }) + require.NoError(err) +} + func TestLeader_SecondaryCA_IntermediateRenew(t *testing.T) { // no parallel execution because we change globals origInterval := structs.IntermediateCertRenewInterval diff --git a/agent/consul/server.go b/agent/consul/server.go index a478d0c396..d7c299ad51 100644 --- a/agent/consul/server.go +++ b/agent/consul/server.go @@ -102,7 +102,7 @@ const ( federationStatePruningRoutineName = "federation state pruning" intentionReplicationRoutineName = "intention replication" secondaryCARootWatchRoutineName = "secondary CA roots watch" - secondaryCertRenewWatchRoutineName = "secondary cert renew watch" + intermediateCertRenewWatchRoutineName = "intermediate cert renew watch" ) var ( From e13f4af06b82858f4dc2b2877d414134091d84ad Mon Sep 17 00:00:00 2001 From: Kyle Havlovitz Date: Fri, 9 Oct 2020 04:35:42 -0700 Subject: [PATCH 2/5] connect: Check for expired root cert when cross-signing --- agent/connect/ca/provider_consul.go | 7 +++---- agent/connect/ca/provider_vault.go | 15 ++++++++++++++- agent/consul/leader_connect.go | 4 ++-- agent/consul/leader_connect_test.go | 7 +++++-- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/agent/connect/ca/provider_consul.go b/agent/connect/ca/provider_consul.go index fa13ed7bec..55f9052fb5 100644 --- a/agent/connect/ca/provider_consul.go +++ b/agent/connect/ca/provider_consul.go @@ -21,14 +21,13 @@ import ( "github.com/hashicorp/go-hclog" ) -const ( - +var ( // NotBefore will be CertificateTimeDriftBuffer in the past to account for // time drift between different servers. CertificateTimeDriftBuffer = time.Minute -) -var ErrNotInitialized = errors.New("provider not initialized") + ErrNotInitialized = errors.New("provider not initialized") +) type ConsulProvider struct { Delegate ConsulProviderStateDelegate diff --git a/agent/connect/ca/provider_vault.go b/agent/connect/ca/provider_vault.go index 7c5fe47c8b..0eee7d6d20 100644 --- a/agent/connect/ca/provider_vault.go +++ b/agent/connect/ca/provider_vault.go @@ -9,6 +9,7 @@ import ( "io/ioutil" "net/http" "strings" + "time" "github.com/hashicorp/consul/agent/connect" "github.com/hashicorp/consul/agent/structs" @@ -476,8 +477,20 @@ func (v *VaultProvider) SignIntermediate(csr *x509.CertificateRequest) (string, // CrossSignCA takes a CA certificate and cross-signs it to form a trust chain // back to our active root. func (v *VaultProvider) CrossSignCA(cert *x509.Certificate) (string, error) { + rootPEM, err := v.ActiveRoot() + if err != nil { + return "", err + } + rootCert, err := connect.ParseCert(rootPEM) + if err != nil { + return "", fmt.Errorf("error parsing root cert: %v", err) + } + if rootCert.NotAfter.Before(time.Now()) { + return "", fmt.Errorf("root certificate is expired") + } + var pemBuf bytes.Buffer - err := pem.Encode(&pemBuf, &pem.Block{Type: "CERTIFICATE", Bytes: cert.Raw}) + err = pem.Encode(&pemBuf, &pem.Block{Type: "CERTIFICATE", Bytes: cert.Raw}) if err != nil { return "", err } diff --git a/agent/consul/leader_connect.go b/agent/consul/leader_connect.go index 6befad04dd..c81b2120b3 100644 --- a/agent/consul/leader_connect.go +++ b/agent/consul/leader_connect.go @@ -516,6 +516,7 @@ func (s *Server) persistNewRoot(provider ca.Provider, newActiveRoot *structs.CAR // which means it must never take that lock itself or call anything that does. func (s *Server) getIntermediateCAPrimary(provider ca.Provider, newActiveRoot *structs.CARoot) error { connectLogger := s.loggers.Named(logging.Connect) + // Generate and sign an intermediate cert using the root CA. intermediatePEM, err := provider.GenerateIntermediate() if err != nil { return fmt.Errorf("error generating new intermediate cert: %v", err) @@ -531,7 +532,7 @@ func (s *Server) getIntermediateCAPrimary(provider ca.Provider, newActiveRoot *s newActiveRoot.IntermediateCerts = append(newActiveRoot.IntermediateCerts, intermediatePEM) newActiveRoot.SigningKeyID = connect.EncodeSigningKeyID(intermediateCert.SubjectKeyId) - connectLogger.Info("generated new intermediate certificate in primary datacenter") + connectLogger.Info("generated new intermediate certificate for primary datacenter") return nil } @@ -738,7 +739,6 @@ func (s *Server) intermediateCertRenewalWatch(ctx context.Context) error { if lessThanHalfTimePassed(time.Now(), intermediateCert.NotBefore.Add(ca.CertificateTimeDriftBuffer), intermediateCert.NotAfter) { - //connectLogger.Info("checked time passed", intermediateCert.NotBefore.Add(ca.CertificateTimeDriftBuffer), intermediateCert.NotAfter) return nil } diff --git a/agent/consul/leader_connect_test.go b/agent/consul/leader_connect_test.go index 8690f3c994..6cad61bf63 100644 --- a/agent/consul/leader_connect_test.go +++ b/agent/consul/leader_connect_test.go @@ -185,11 +185,14 @@ func TestLeader_PrimaryCA_IntermediateRenew(t *testing.T) { // no parallel execution because we change globals origInterval := structs.IntermediateCertRenewInterval origMinTTL := structs.MinLeafCertTTL + origDriftBuffer := ca.CertificateTimeDriftBuffer defer func() { structs.IntermediateCertRenewInterval = origInterval structs.MinLeafCertTTL = origMinTTL + ca.CertificateTimeDriftBuffer = origDriftBuffer }() + ca.CertificateTimeDriftBuffer = 30 * time.Second structs.IntermediateCertRenewInterval = time.Millisecond structs.MinLeafCertTTL = time.Second require := require.New(t) @@ -207,7 +210,7 @@ func TestLeader_PrimaryCA_IntermediateRenew(t *testing.T) { "Token": testVault.RootToken, "RootPKIPath": "pki-root/", "IntermediatePKIPath": "pki-intermediate/", - "LeafCertTTL": "5s", + "LeafCertTTL": "1s", // The retry loop only retries for 7sec max and // the ttl needs to be below so that it // triggers definitely. @@ -215,7 +218,7 @@ func TestLeader_PrimaryCA_IntermediateRenew(t *testing.T) { // valid from 1minute in the past, we need to // account for that, otherwise it will be // expired immediately. - "IntermediateCertTTL": "15s", + "IntermediateCertTTL": "5s", }, } }) From 01ce9f5b18b23bc86aea0d2a329d08a9a35b539b Mon Sep 17 00:00:00 2001 From: Kyle Havlovitz Date: Fri, 9 Oct 2020 05:02:18 -0700 Subject: [PATCH 3/5] Update CI for leader renew CA test using Vault --- GNUmakefile | 2 ++ agent/connect/ca/provider.go | 2 +- agent/connect/ca/provider_vault.go | 21 ++++++++-------- agent/connect/ca/provider_vault_test.go | 32 ++++++------------------- agent/connect/ca/testing.go | 16 +++++++++++++ agent/consul/leader_connect_test.go | 8 ++++--- 6 files changed, 42 insertions(+), 39 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index 7c4fe143bf..3253280e2c 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -357,6 +357,8 @@ test-connect-ca-providers: ifeq ("$(CIRCLECI)","true") # Run in CI gotestsum --format=short-verbose --junitfile "$(TEST_RESULTS_DIR)/gotestsum-report.xml" -- -cover -coverprofile=coverage.txt ./agent/connect/ca +# Run leader tests that require Vault + gotestsum --format=short-verbose --junitfile "$(TEST_RESULTS_DIR)/gotestsum-report-leader.xml" -- -cover -coverprofile=coverage-leader.txt -run TestLeader_Vault_ ./agent/consul else # Run locally @echo "Running /agent/connect/ca tests in verbose mode" diff --git a/agent/connect/ca/provider.go b/agent/connect/ca/provider.go index 1dd1408cf2..17538ee6f4 100644 --- a/agent/connect/ca/provider.go +++ b/agent/connect/ca/provider.go @@ -20,7 +20,7 @@ var ErrRateLimited = errors.New("operation rate limited by CA provider") // intermediate cert in the primary datacenter as well as the secondary. This is used // when determining whether to run the intermediate renewal routine in the primary. var PrimaryIntermediateProviders = map[string]struct{}{ - "vault": struct{}{}, + "vault": {}, } // ProviderConfig encapsulates all the data Consul passes to `Configure` on a diff --git a/agent/connect/ca/provider_vault.go b/agent/connect/ca/provider_vault.go index 0eee7d6d20..c74a2e4609 100644 --- a/agent/connect/ca/provider_vault.go +++ b/agent/connect/ca/provider_vault.go @@ -93,7 +93,7 @@ func (v *VaultProvider) Configure(cfg ProviderConfig) error { // Set up a renewer to renew the token automatically, if supported. if token.Renewable { - renewer, err := client.NewRenewer(&vaultapi.RenewerInput{ + lifetimeWatcher, err := client.NewLifetimeWatcher(&vaultapi.LifetimeWatcherInput{ Secret: &vaultapi.Secret{ Auth: &vaultapi.SecretAuth{ ClientToken: config.Token, @@ -101,7 +101,8 @@ func (v *VaultProvider) Configure(cfg ProviderConfig) error { LeaseDuration: secret.LeaseDuration, }, }, - Increment: token.TTL, + Increment: token.TTL, + RenewBehavior: vaultapi.RenewBehaviorIgnoreErrors, }) if err != nil { return fmt.Errorf("Error beginning Vault provider token renewal: %v", err) @@ -109,31 +110,31 @@ func (v *VaultProvider) Configure(cfg ProviderConfig) error { ctx, cancel := context.WithCancel(context.TODO()) v.shutdown = cancel - go v.renewToken(ctx, renewer) + go v.renewToken(ctx, lifetimeWatcher) } return nil } // renewToken uses a vaultapi.Renewer to repeatedly renew our token's lease. -func (v *VaultProvider) renewToken(ctx context.Context, renewer *vaultapi.Renewer) { - go renewer.Renew() - defer renewer.Stop() +func (v *VaultProvider) renewToken(ctx context.Context, watcher *vaultapi.LifetimeWatcher) { + go watcher.Start() + defer watcher.Stop() for { select { case <-ctx.Done(): return - case err := <-renewer.DoneCh(): + case err := <-watcher.DoneCh(): if err != nil { v.logger.Error("Error renewing token for Vault provider", "error", err) } - // Renewer routine has finished, so start it again. - go renewer.Renew() + // Watcher routine has finished, so start it again. + go watcher.Start() - case <-renewer.RenewCh(): + case <-watcher.RenewCh(): v.logger.Error("Successfully renewed token for Vault provider") } } diff --git a/agent/connect/ca/provider_vault_test.go b/agent/connect/ca/provider_vault_test.go index b1890e08eb..d14ceeefc4 100644 --- a/agent/connect/ca/provider_vault_test.go +++ b/agent/connect/ca/provider_vault_test.go @@ -5,8 +5,6 @@ import ( "encoding/json" "fmt" "io/ioutil" - "os" - "os/exec" "testing" "time" @@ -40,7 +38,7 @@ func TestVaultCAProvider_VaultTLSConfig(t *testing.T) { func TestVaultCAProvider_SecondaryActiveIntermediate(t *testing.T) { t.Parallel() - skipIfVaultNotPresent(t) + SkipIfVaultNotPresent(t) provider, testVault := testVaultProviderWithConfig(t, false, nil) defer testVault.Stop() @@ -53,7 +51,7 @@ func TestVaultCAProvider_SecondaryActiveIntermediate(t *testing.T) { func TestVaultCAProvider_RenewToken(t *testing.T) { t.Parallel() - skipIfVaultNotPresent(t) + SkipIfVaultNotPresent(t) testVault, err := runTestVault(t) require.NoError(t, err) @@ -90,7 +88,7 @@ func TestVaultCAProvider_RenewToken(t *testing.T) { func TestVaultCAProvider_Bootstrap(t *testing.T) { t.Parallel() - skipIfVaultNotPresent(t) + SkipIfVaultNotPresent(t) provider, testVault := testVaultProvider(t) defer testVault.Stop() @@ -151,7 +149,7 @@ func assertCorrectKeyType(t *testing.T, want, certPEM string) { func TestVaultCAProvider_SignLeaf(t *testing.T) { t.Parallel() - skipIfVaultNotPresent(t) + SkipIfVaultNotPresent(t) for _, tc := range KeyTestCases { tc := tc @@ -235,7 +233,7 @@ func TestVaultCAProvider_SignLeaf(t *testing.T) { func TestVaultCAProvider_CrossSignCA(t *testing.T) { t.Parallel() - skipIfVaultNotPresent(t) + SkipIfVaultNotPresent(t) tests := CASigningKeyTypeCases() @@ -290,7 +288,7 @@ func TestVaultCAProvider_CrossSignCA(t *testing.T) { func TestVaultProvider_SignIntermediate(t *testing.T) { t.Parallel() - skipIfVaultNotPresent(t) + SkipIfVaultNotPresent(t) tests := CASigningKeyTypeCases() @@ -319,7 +317,7 @@ func TestVaultProvider_SignIntermediate(t *testing.T) { func TestVaultProvider_SignIntermediateConsul(t *testing.T) { t.Parallel() - skipIfVaultNotPresent(t) + SkipIfVaultNotPresent(t) // primary = Vault, secondary = Consul t.Run("pri=vault,sec=consul", func(t *testing.T) { @@ -441,19 +439,3 @@ func createVaultProvider(t *testing.T, isPrimary bool, addr, token string, rawCo return provider, nil } - -// skipIfVaultNotPresent skips the test if the vault binary is not in PATH. -// -// These tests may be skipped in CI. They are run as part of a separate -// integration test suite. -func skipIfVaultNotPresent(t *testing.T) { - vaultBinaryName := os.Getenv("VAULT_BINARY_NAME") - if vaultBinaryName == "" { - vaultBinaryName = "vault" - } - - path, err := exec.LookPath(vaultBinaryName) - if err != nil || path == "" { - t.Skipf("%q not found on $PATH - download and install to run this test", vaultBinaryName) - } -} diff --git a/agent/connect/ca/testing.go b/agent/connect/ca/testing.go index 81995275fe..25533f8dd4 100644 --- a/agent/connect/ca/testing.go +++ b/agent/connect/ca/testing.go @@ -83,6 +83,22 @@ func TestConsulProvider(t testing.T, d ConsulProviderStateDelegate) *ConsulProvi return provider } +// SkipIfVaultNotPresent skips the test if the vault binary is not in PATH. +// +// These tests may be skipped in CI. They are run as part of a separate +// integration test suite. +func SkipIfVaultNotPresent(t testing.T) { + vaultBinaryName := os.Getenv("VAULT_BINARY_NAME") + if vaultBinaryName == "" { + vaultBinaryName = "vault" + } + + path, err := exec.LookPath(vaultBinaryName) + if err != nil || path == "" { + t.Skipf("%q not found on $PATH - download and install to run this test", vaultBinaryName) + } +} + func NewTestVaultServer(t testing.T) *TestVaultServer { testVault, err := runTestVault(t) if err != nil { diff --git a/agent/consul/leader_connect_test.go b/agent/consul/leader_connect_test.go index bca6790b2e..960981e28f 100644 --- a/agent/consul/leader_connect_test.go +++ b/agent/consul/leader_connect_test.go @@ -180,7 +180,9 @@ func getCAProviderWithLock(s *Server) (ca.Provider, *structs.CARoot) { return s.getCAProvider() } -func TestLeader_PrimaryCA_IntermediateRenew(t *testing.T) { +func TestLeader_Vault_PrimaryCA_IntermediateRenew(t *testing.T) { + ca.SkipIfVaultNotPresent(t) + // no parallel execution because we change globals origInterval := structs.IntermediateCertRenewInterval origMinTTL := structs.MinLeafCertTTL @@ -240,7 +242,7 @@ func TestLeader_PrimaryCA_IntermediateRenew(t *testing.T) { provider, _ := getCAProviderWithLock(s1) intermediatePEM, err := provider.ActiveIntermediate() require.NoError(err) - cert, err := connect.ParseCert(intermediatePEM) + _, err = connect.ParseCert(intermediatePEM) require.NoError(err) // Wait for dc1's intermediate to be refreshed. @@ -277,7 +279,7 @@ func TestLeader_PrimaryCA_IntermediateRenew(t *testing.T) { leafPEM, err := provider.Sign(leafCsr) require.NoError(err) - cert, err = connect.ParseCert(leafPEM) + cert, err := connect.ParseCert(leafPEM) require.NoError(err) // Check that the leaf signed by the new intermediate can be verified using the From cc901dfd470d2461a5751f15770f33ba30c7a1a6 Mon Sep 17 00:00:00 2001 From: Kyle Havlovitz Date: Fri, 9 Oct 2020 08:01:55 -0700 Subject: [PATCH 4/5] Add changelog note --- .changelog/8784.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/8784.txt diff --git a/.changelog/8784.txt b/.changelog/8784.txt new file mode 100644 index 0000000000..1809cfe72b --- /dev/null +++ b/.changelog/8784.txt @@ -0,0 +1,3 @@ +```release-note:bug +connect: Fixed an issue where the Vault intermediate was not renewed in the primary datacenter. +``` \ No newline at end of file From 876500e0dc89386452e63e04f48a367fe068928c Mon Sep 17 00:00:00 2001 From: Kyle Havlovitz Date: Fri, 9 Oct 2020 08:53:33 -0700 Subject: [PATCH 5/5] Fix intermediate refresh test comments --- agent/consul/leader_connect_test.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/agent/consul/leader_connect_test.go b/agent/consul/leader_connect_test.go index 960981e28f..6f861c17e5 100644 --- a/agent/consul/leader_connect_test.go +++ b/agent/consul/leader_connect_test.go @@ -193,6 +193,7 @@ func TestLeader_Vault_PrimaryCA_IntermediateRenew(t *testing.T) { ca.CertificateTimeDriftBuffer = origDriftBuffer }() + // Vault backdates certs by 30s by default. ca.CertificateTimeDriftBuffer = 30 * time.Second structs.IntermediateCertRenewInterval = time.Millisecond structs.MinLeafCertTTL = time.Second @@ -215,10 +216,6 @@ func TestLeader_Vault_PrimaryCA_IntermediateRenew(t *testing.T) { // The retry loop only retries for 7sec max and // the ttl needs to be below so that it // triggers definitely. - // Since certs are created so that they are - // valid from 1minute in the past, we need to - // account for that, otherwise it will be - // expired immediately. "IntermediateCertTTL": "5s", }, } @@ -228,7 +225,7 @@ func TestLeader_Vault_PrimaryCA_IntermediateRenew(t *testing.T) { testrpc.WaitForLeader(t, s1.RPC, "dc1") - // Capture the current root + // Capture the current root. var originalRoot *structs.CARoot { rootList, activeRoot, err := getTestRoots(s1, "dc1") @@ -237,7 +234,7 @@ func TestLeader_Vault_PrimaryCA_IntermediateRenew(t *testing.T) { originalRoot = activeRoot } - // Get the original intermediate + // Get the original intermediate. waitForActiveCARoot(t, s1, originalRoot) provider, _ := getCAProviderWithLock(s1) intermediatePEM, err := provider.ActiveIntermediate()