diff --git a/agent/consul/fsm/snapshot_oss.go b/agent/consul/fsm/snapshot_oss.go index b042c78315..f9eb18cc89 100644 --- a/agent/consul/fsm/snapshot_oss.go +++ b/agent/consul/fsm/snapshot_oss.go @@ -22,6 +22,7 @@ func init() { registerRestorer(structs.AutopilotRequestType, restoreAutopilot) registerRestorer(structs.IntentionRequestType, restoreIntention) registerRestorer(structs.ConnectCARequestType, restoreConnectCA) + registerRestorer(structs.ConnectCAProviderStateType, restoreConnectCAProviderState) } func persistOSS(s *snapshot, sink raft.SnapshotSink, encoder *codec.Encoder) error { @@ -52,6 +53,9 @@ func persistOSS(s *snapshot, sink raft.SnapshotSink, encoder *codec.Encoder) err if err := s.persistConnectCA(sink, encoder); err != nil { return err } + if err := s.persistConnectCAProviderState(sink, encoder); err != nil { + return err + } return nil } @@ -284,6 +288,24 @@ func (s *snapshot) persistConnectCA(sink raft.SnapshotSink, return nil } +func (s *snapshot) persistConnectCAProviderState(sink raft.SnapshotSink, + encoder *codec.Encoder) error { + state, err := s.state.CAProviderState() + if err != nil { + return err + } + + for _, r := range state { + if _, err := sink.Write([]byte{byte(structs.ConnectCAProviderStateType)}); err != nil { + return err + } + if err := encoder.Encode(r); err != nil { + return err + } + } + return nil +} + func (s *snapshot) persistIntentions(sink raft.SnapshotSink, encoder *codec.Encoder) error { ixns, err := s.state.Intentions() @@ -430,3 +452,14 @@ func restoreConnectCA(header *snapshotHeader, restore *state.Restore, decoder *c } return nil } + +func restoreConnectCAProviderState(header *snapshotHeader, restore *state.Restore, decoder *codec.Decoder) error { + var req structs.CAConsulProviderState + if err := decoder.Decode(&req); err != nil { + return err + } + if err := restore.CAProviderState(&req); err != nil { + return err + } + return nil +} diff --git a/agent/consul/fsm/snapshot_oss_test.go b/agent/consul/fsm/snapshot_oss_test.go index 971e6bbf5a..9a6f3a3555 100644 --- a/agent/consul/fsm/snapshot_oss_test.go +++ b/agent/consul/fsm/snapshot_oss_test.go @@ -123,6 +123,14 @@ func TestFSM_SnapshotRestore_OSS(t *testing.T) { assert.Nil(err) assert.True(ok) + ok, err = fsm.state.CASetProviderState(16, &structs.CAConsulProviderState{ + ID: "asdf", + PrivateKey: "foo", + RootCert: "bar", + }) + assert.Nil(err) + assert.True(ok) + // Snapshot snap, err := fsm.Snapshot() if err != nil { @@ -296,6 +304,12 @@ func TestFSM_SnapshotRestore_OSS(t *testing.T) { assert.Nil(err) assert.Len(roots, 2) + // Verify provider state is restored. + _, state, err := fsm2.state.CAProviderState("asdf") + assert.Nil(err) + assert.Equal("foo", state.PrivateKey) + assert.Equal("bar", state.RootCert) + // Snapshot snap, err = fsm2.Snapshot() if err != nil { diff --git a/agent/consul/leader.go b/agent/consul/leader.go index 8b32226dcb..bab926997c 100644 --- a/agent/consul/leader.go +++ b/agent/consul/leader.go @@ -448,15 +448,9 @@ func (s *Server) initializeCA() error { return err } - // TODO(banks): in the case that we've just gained leadership in an already - // configured cluster. We really need to fetch RootCA from state to provide it - // in setCAProvider. This matters because if the current active root has - // intermediates, parsing the rootCA from only the root cert PEM above will - // not include them and so leafs we sign will not bundle the intermediates. - - s.setCAProvider(provider, rootCA) - - // Check if the CA root is already initialized and exit if it is. + // Check if the CA root is already initialized and exit if it is, + // adding on any existing intermediate certs since they aren't directly + // tied to the provider. // Every change to the CA after this initial bootstrapping should // be done through the rotation process. state := s.fsm.State() @@ -465,12 +459,15 @@ func (s *Server) initializeCA() error { return err } if activeRoot != nil { + // This state shouldn't be possible to get into because we update the root and + // CA config in the same FSM operation. if activeRoot.ID != rootCA.ID { - // TODO(banks): this seems like a pretty catastrophic state to get into. - // Shouldn't we do something stronger than warn and continue signing with - // a key that's not the active CA according to the state? - s.logger.Printf("[WARN] connect: CA root %q is not the active root (%q)", rootCA.ID, activeRoot.ID) + return fmt.Errorf("stored CA root %q is not the active root (%s)", rootCA.ID, activeRoot.ID) } + + rootCA.IntermediateCerts = activeRoot.IntermediateCerts + s.setCAProvider(provider, rootCA) + return nil } @@ -494,6 +491,8 @@ func (s *Server) initializeCA() error { return respErr } + s.setCAProvider(provider, rootCA) + s.logger.Printf("[INFO] connect: initialized CA with provider %q", conf.Provider) return nil diff --git a/agent/consul/leader_test.go b/agent/consul/leader_test.go index a685223cb3..18769ff876 100644 --- a/agent/consul/leader_test.go +++ b/agent/consul/leader_test.go @@ -2,6 +2,7 @@ package consul import ( "os" + "reflect" "testing" "time" @@ -1064,3 +1065,83 @@ func TestLeader_CARootPruning(t *testing.T) { require.True(roots[0].Active) require.NotEqual(roots[0].ID, oldRoot.ID) } + +func TestLeader_PersistIntermediateCAs(t *testing.T) { + t.Parallel() + + require := require.New(t) + dir1, s1 := testServer(t) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + codec := rpcClient(t, s1) + defer codec.Close() + + dir2, s2 := testServerDCBootstrap(t, "dc1", false) + defer os.RemoveAll(dir2) + defer s2.Shutdown() + + dir3, s3 := testServerDCBootstrap(t, "dc1", false) + defer os.RemoveAll(dir3) + defer s3.Shutdown() + + joinLAN(t, s2, s1) + joinLAN(t, s3, s1) + + testrpc.WaitForLeader(t, s1.RPC, "dc1") + + // Get the current root + rootReq := &structs.DCSpecificRequest{ + Datacenter: "dc1", + } + var rootList structs.IndexedCARoots + require.Nil(msgpackrpc.CallWithCodec(codec, "ConnectCA.Roots", rootReq, &rootList)) + require.Len(rootList.Roots, 1) + + // Update the provider config to use a new private key, which should + // cause a rotation. + _, newKey, err := connect.GeneratePrivateKey() + require.NoError(err) + newConfig := &structs.CAConfiguration{ + Provider: "consul", + Config: map[string]interface{}{ + "PrivateKey": newKey, + "RootCert": "", + "RotationPeriod": 90 * 24 * time.Hour, + }, + } + { + args := &structs.CARequest{ + Datacenter: "dc1", + Config: newConfig, + } + var reply interface{} + + require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.ConfigurationSet", args, &reply)) + } + + // Get the active root before leader change. + _, root := s1.getCAProvider() + require.Len(root.IntermediateCerts, 1) + + // Force a leader change and make sure the root CA values are preserved. + s1.Leave() + s1.Shutdown() + + retry.Run(t, func(r *retry.R) { + var leader *Server + for _, s := range []*Server{s2, s3} { + if s.IsLeader() { + leader = s + break + } + } + if leader == nil { + r.Fatal("no leader") + } + + _, newLeaderRoot := leader.getCAProvider() + if !reflect.DeepEqual(newLeaderRoot, root) { + r.Fatalf("got %v, want %v", newLeaderRoot, root) + } + }) +} diff --git a/agent/structs/structs.go b/agent/structs/structs.go index 7b719405a4..f5308b351e 100644 --- a/agent/structs/structs.go +++ b/agent/structs/structs.go @@ -31,20 +31,21 @@ type RaftIndex struct { // These are serialized between Consul servers and stored in Consul snapshots, // so entries must only ever be added. const ( - RegisterRequestType MessageType = 0 - DeregisterRequestType = 1 - KVSRequestType = 2 - SessionRequestType = 3 - ACLRequestType = 4 - TombstoneRequestType = 5 - CoordinateBatchUpdateType = 6 - PreparedQueryRequestType = 7 - TxnRequestType = 8 - AutopilotRequestType = 9 - AreaRequestType = 10 - ACLBootstrapRequestType = 11 // FSM snapshots only. - IntentionRequestType = 12 - ConnectCARequestType = 13 + RegisterRequestType MessageType = 0 + DeregisterRequestType = 1 + KVSRequestType = 2 + SessionRequestType = 3 + ACLRequestType = 4 + TombstoneRequestType = 5 + CoordinateBatchUpdateType = 6 + PreparedQueryRequestType = 7 + TxnRequestType = 8 + AutopilotRequestType = 9 + AreaRequestType = 10 + ACLBootstrapRequestType = 11 // FSM snapshots only. + IntentionRequestType = 12 + ConnectCARequestType = 13 + ConnectCAProviderStateType = 14 ) const (