diff --git a/.changelog/8537.txt b/.changelog/8537.txt new file mode 100644 index 0000000000..f2b92b26bf --- /dev/null +++ b/.changelog/8537.txt @@ -0,0 +1,3 @@ +```release-note:bug +api: Fixed a panic caused by an api request with Connect=null +``` diff --git a/.changelog/8552.txt b/.changelog/8552.txt new file mode 100644 index 0000000000..aa4dc69aa0 --- /dev/null +++ b/.changelog/8552.txt @@ -0,0 +1,3 @@ +```release-note:feature +cache: Config parameters for cache throttling are now reloaded automatically on agent reload. Restarting the agent is not needed anymore. +``` diff --git a/.changelog/8588.txt b/.changelog/8588.txt new file mode 100644 index 0000000000..4d29ea25a9 --- /dev/null +++ b/.changelog/8588.txt @@ -0,0 +1,3 @@ +```release-note:bug +connect: fix renewing secondary intermediate certificates +``` diff --git a/.changelog/8596.txt b/.changelog/8596.txt new file mode 100644 index 0000000000..0c96f0c7d9 --- /dev/null +++ b/.changelog/8596.txt @@ -0,0 +1,3 @@ +```release-note:feature +connect: all config entries pick up a meta field +``` diff --git a/.changelog/8601.txt b/.changelog/8601.txt new file mode 100644 index 0000000000..f791fe2efe --- /dev/null +++ b/.changelog/8601.txt @@ -0,0 +1,3 @@ +```release-note:bug +connect: fix bug in preventing some namespaced config entry modifications +``` diff --git a/.changelog/8602.txt b/.changelog/8602.txt new file mode 100644 index 0000000000..49f2c74ce8 --- /dev/null +++ b/.changelog/8602.txt @@ -0,0 +1,3 @@ +```release-note:improvement +api: Allow for the client to use TLS over a Unix domain socket. +``` diff --git a/.changelog/8603.txt b/.changelog/8603.txt new file mode 100644 index 0000000000..ffe9a9401f --- /dev/null +++ b/.changelog/8603.txt @@ -0,0 +1,3 @@ +```release-note:feature +telemetry: track node and service counts and emit them as metrics +``` diff --git a/.changelog/8606.txt b/.changelog/8606.txt new file mode 100644 index 0000000000..a899232f22 --- /dev/null +++ b/.changelog/8606.txt @@ -0,0 +1,3 @@ +```release-note:bug +connect: `connect envoy` command now respects the `-ca-path` flag +``` diff --git a/.changelog/_8621.txt b/.changelog/_8621.txt new file mode 100644 index 0000000000..194633f37f --- /dev/null +++ b/.changelog/_8621.txt @@ -0,0 +1,3 @@ +```release-note:improvement +snapshot agent: Deregister critical snapshotting TTL check if leadership is transferred. +``` \ No newline at end of file diff --git a/.circleci/config.yml b/.circleci/config.yml index adb1cf6cd0..06bcda1f4f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -19,7 +19,7 @@ references: EMAIL: noreply@hashicorp.com GIT_AUTHOR_NAME: circleci-consul GIT_COMMITTER_NAME: circleci-consul - S3_ARTIFACT_BUCKET: consul-dev-artifacts + S3_ARTIFACT_BUCKET: consul-dev-artifacts-v2 BASH_ENV: .circleci/bash_env.sh VAULT_BINARY_VERSION: 1.2.2 @@ -33,6 +33,27 @@ steps: curl -sSL "${url}/v${GOTESTSUM_RELEASE}/gotestsum_${GOTESTSUM_RELEASE}_linux_amd64.tar.gz" | \ sudo tar -xz --overwrite -C /usr/local/bin gotestsum + get-aws-cli: &get-aws-cli + run: + name: download and install AWS CLI + command: | + curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" + echo -e "${AWS_CLI_GPG_KEY}" | gpg --import + curl -o awscliv2.sig https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip.sig + gpg --verify awscliv2.sig awscliv2.zip + unzip awscliv2.zip + sudo ./aws/install + + aws-assume-role: &aws-assume-role + run: + name: assume-role aws creds + command: | + # assume role has duration of 15 min (the minimum allowed) + CREDENTIALS="$(aws sts assume-role --duration-seconds 900 --role-arn ${ROLE_ARN} --role-session-name build-${CIRCLE_SHA1} | jq '.Credentials')" + echo "export AWS_ACCESS_KEY_ID=$(echo $CREDENTIALS | jq -r '.AccessKeyId')" >> $BASH_ENV + echo "export AWS_SECRET_ACCESS_KEY=$(echo $CREDENTIALS | jq -r '.SecretAccessKey')" >> $BASH_ENV + echo "export AWS_SESSION_TOKEN=$(echo $CREDENTIALS | jq -r '.SessionToken')" >> $BASH_ENV + # This step MUST be at the end of any set of steps due to the 'when' condition notify-slack-failure: ¬ify-slack-failure name: notify-slack-failure @@ -389,13 +410,13 @@ jobs: # upload development build to s3 dev-upload-s3: docker: - - image: circleci/python:stretch + - image: *GOLANG_IMAGE environment: <<: *ENVIRONMENT steps: - - run: - name: Install awscli - command: sudo pip install awscli + - checkout + - *get-aws-cli + - *aws-assume-role # get consul binary - attach_workspace: at: bin/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e5451a8dc..f5432731f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,33 @@ ## UNRELEASED +## 1.8.4 (September 11, 2020) + +FEATURES: + +* agent: expose the list of supported envoy versions on /v1/agent/self [[GH-8545](https://github.com/hashicorp/consul/issues/8545)] +* cache: Config parameters for cache throttling are now reloaded automatically on agent reload. Restarting the agent is not needed anymore. [[GH-8552](https://github.com/hashicorp/consul/issues/8552)] +* connect: all config entries pick up a meta field [[GH-8596](https://github.com/hashicorp/consul/issues/8596)] + +IMPROVEMENTS: + +* api: Added `ACLMode` method to the `AgentMember` type to determine what ACL mode the agent is operating in. [[GH-8575](https://github.com/hashicorp/consul/issues/8575)] +* api: Added `IsConsulServer` method to the `AgentMember` type to easily determine whether the agent is a server. [[GH-8575](https://github.com/hashicorp/consul/issues/8575)] +* api: Added constants for common tag keys and values in the `Tags` field of the `AgentMember` struct. [[GH-8575](https://github.com/hashicorp/consul/issues/8575)] +* api: Allow for the client to use TLS over a Unix domain socket. [[GH-8602](https://github.com/hashicorp/consul/issues/8602)] +* api: `GET v1/operator/keyring` also lists primary keys. [[GH-8522](https://github.com/hashicorp/consul/issues/8522)] +* connect: Add support for http2 and grpc to ingress gateways [[GH-8458](https://github.com/hashicorp/consul/issues/8458)] +* serf: update to `v0.9.4` which supports primary keys in the ListKeys operation. [[GH-8522](https://github.com/hashicorp/consul/issues/8522)] + +BUGFIXES: + +* connect: use stronger validation that ingress gateways have compatible protocols defined for their upstreams [[GH-8494](https://github.com/hashicorp/consul/issues/8494)] +* agent: ensure that we normalize bootstrapped config entries [[GH-8547](https://github.com/hashicorp/consul/issues/8547)] +* api: Fixed a panic caused by an api request with Connect=null [[GH-8537](https://github.com/hashicorp/consul/issues/8537)] +* connect: `connect envoy` command now respects the `-ca-path` flag [[GH-8606](https://github.com/hashicorp/consul/issues/8606)] +* connect: fix bug in preventing some namespaced config entry modifications [[GH-8601](https://github.com/hashicorp/consul/issues/8601)] +* connect: fix renewing secondary intermediate certificates [[GH-8588](https://github.com/hashicorp/consul/issues/8588)] +* ui: fixed a bug related to in-folder KV creation [GH-8613](https://github.com/hashicorp/consul/pull/8613) + ## 1.8.3 (August 12, 2020) BUGFIXES: @@ -116,6 +144,17 @@ BUGFIXES: * ui: Miscellaneous amends for Safari and Firefox [[GH-7904](https://github.com/hashicorp/consul/issues/7904)] [[GH-7907](https://github.com/hashicorp/consul/pull/7907)] * ui: Ensure a value is always passed to CONSUL_SSO_ENABLED [[GH-7913](https://github.com/hashicorp/consul/pull/7913)] +## 1.7.8 (September 11, 2020) + +FEATURES: + +* agent: expose the list of supported envoy versions on /v1/agent/self [[GH-8545](https://github.com/hashicorp/consul/issues/8545)] + +BUG FIXES: + +* connect: fix bug in preventing some namespaced config entry modifications [[GH-8601](https://github.com/hashicorp/consul/issues/8601)] +* api: fixed a panic caused by an api request with Connect=null [[GH-8537](https://github.com/hashicorp/consul/pull/8537)] + ## 1.7.7 (August 12, 2020) BUGFIXES: @@ -127,7 +166,7 @@ BUGFIXES: BUG FIXES: -* [backport/1.7.x] xds: revert setting set_node_on_first_message_only to true when generating envoy bootstrap config [[GH-8441](https://github.com/hashicorp/consul/issues/8441)] +* xds: revert setting set_node_on_first_message_only to true when generating envoy bootstrap config [[GH-8441](https://github.com/hashicorp/consul/issues/8441)] ## 1.7.5 (July 30, 2020) @@ -340,6 +379,12 @@ BUGFIXES: * ui: Discovery-Chain: Improve parsing of redirects [[GH-7174](https://github.com/hashicorp/consul/pull/7174)] * ui: Fix styling of ‘duplicate intention’ error message [[GH6936]](https://github.com/hashicorp/consul/pull/6936) +## 1.6.9 (September 11, 2020) + +BUG FIXES: + +* api: fixed a panic caused by an api request with Connect=null [[GH-8537](https://github.com/hashicorp/consul/pull/8537)] + ## 1.6.8 (August 12, 2020) BUG FIXES: diff --git a/README.md b/README.md index 05945f70dd..d70061d541 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Consul [![CircleCI](https://circleci.com/gh/hashicorp/consul/tree/master.svg?style=svg)](https://circleci.com/gh/hashicorp/consul/tree/master) [![Discuss](https://img.shields.io/badge/discuss-consul-ca2171.svg?style=flat)](https://discuss.hashicorp.com/c/consul) * Website: https://www.consul.io -* Tutorials: [https://learn.hashicorp.com](https://learn.hashicorp.com/consul) +* Tutorials: [HashiCorp Learn](https://learn.hashicorp.com/consul) * Forum: [Discuss](https://discuss.hashicorp.com/c/consul) Consul is a distributed, highly available, and data center aware solution to connect and configure applications across dynamic, distributed infrastructure. @@ -10,12 +10,12 @@ Consul provides several key features: * **Multi-Datacenter** - Consul is built to be datacenter aware, and can support any number of regions without complex configuration. - + * **Service Mesh/Service Segmentation** - Consul Connect enables secure service-to-service - communication with automatic TLS encryption and identity-based authorization. Applications - can use sidecar proxies in a service mesh configuration to establish TLS - connections for inbound and outbound connections without being aware of Connect at all. - + communication with automatic TLS encryption and identity-based authorization. Applications + can use sidecar proxies in a service mesh configuration to establish TLS + connections for inbound and outbound connections without being aware of Connect at all. + * **Service Discovery** - Consul makes it simple for services to register themselves and to discover other services via a DNS or HTTP interface. External services such as SaaS providers can be registered as well. @@ -41,9 +41,10 @@ contacting us at security@hashicorp.com. A few quick start guides are available on the Consul website: -* **Standalone binary install:** https://learn.hashicorp.com/consul/getting-started/install -* **Minikube install:** https://learn.hashicorp.com/consul/kubernetes/minikube -* **Kubernetes install:** https://learn.hashicorp.com/consul/kubernetes/kubernetes-deployment-guide +* **Standalone binary install:** https://learn.hashicorp.com/tutorials/consul/get-started-install +* **Minikube install:** https://learn.hashicorp.com/tutorials/consul/kubernetes-minikube +* **Kind install:** https://learn.hashicorp.com/tutorials/consul/kubernetes-kind +* **Kubernetes install:** https://learn.hashicorp.com/tutorials/consul/kubernetes-deployment-guide ## Documentation diff --git a/agent/acl_test.go b/agent/acl_test.go index a88d89273f..79ade86b20 100644 --- a/agent/acl_test.go +++ b/agent/acl_test.go @@ -184,7 +184,9 @@ func TestACL_AgentMasterToken(t *testing.T) { t.Parallel() a := NewTestACLAgent(t, t.Name(), TestACLConfig(), nil, nil) - a.loadTokens(a.config) + err := a.tokens.Load(a.config.ACLTokens, a.logger) + require.NoError(t, err) + authz, err := a.resolveToken("towel") require.NotNil(t, authz) require.Nil(t, err) diff --git a/agent/agent.go b/agent/agent.go index 71f011a51b..9c63eb06b3 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -19,6 +19,7 @@ import ( "github.com/hashicorp/consul/agent/dns" "github.com/hashicorp/consul/agent/router" + "github.com/hashicorp/consul/agent/token" "github.com/hashicorp/go-connlimit" "github.com/hashicorp/go-hclog" "github.com/hashicorp/go-memdb" @@ -31,7 +32,6 @@ import ( autoconf "github.com/hashicorp/consul/agent/auto-config" "github.com/hashicorp/consul/agent/cache" cachetype "github.com/hashicorp/consul/agent/cache-types" - certmon "github.com/hashicorp/consul/agent/cert-monitor" "github.com/hashicorp/consul/agent/checks" "github.com/hashicorp/consul/agent/config" "github.com/hashicorp/consul/agent/consul" @@ -40,7 +40,6 @@ import ( "github.com/hashicorp/consul/agent/proxycfg" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/systemd" - "github.com/hashicorp/consul/agent/token" "github.com/hashicorp/consul/agent/xds" "github.com/hashicorp/consul/api" "github.com/hashicorp/consul/api/watch" @@ -68,9 +67,6 @@ const ( checksDir = "checks" checkStateDir = "checks/state" - // Name of the file tokens will be persisted within - tokensPath = "acl-tokens.json" - // Default reasons for node/service maintenance mode defaultNodeMaintReason = "Maintenance mode is enabled for this node, " + "but no reason was provided. This is a default message." @@ -162,8 +158,6 @@ type notifier interface { type Agent struct { autoConf *autoconf.AutoConfig - certMonitor *certmon.CertMonitor - // config is the agent configuration. config *config.RuntimeConfig @@ -262,10 +256,12 @@ type Agent struct { // dnsServer provides the DNS API dnsServers []*DNSServer - // httpServers provides the HTTP API on various endpoints - httpServers []*HTTPServer + // apiServers listening for connections. If any of these server goroutines + // fail, the agent will be shutdown. + apiServers *apiServers // wgServers is the wait group for all HTTP and DNS servers + // TODO: remove once dnsServers are handled by apiServers wgServers sync.WaitGroup // watchPlans tracks all the currently-running watch plans for the @@ -295,11 +291,6 @@ type Agent struct { // based on the current consul configuration. tlsConfigurator *tlsutil.Configurator - // persistedTokensLock is used to synchronize access to the persisted token - // store within the data directory. This will prevent loading while writing as - // well as multiple concurrent writes. - persistedTokensLock sync.RWMutex - // httpConnLimiter is used to limit connections to the HTTP server by client // IP. httpConnLimiter connlimit.Limiter @@ -373,6 +364,12 @@ func New(bd BaseDeps) (*Agent, error) { // pass the agent itself so its safe to move here. a.registerCache() + // TODO: why do we ignore failure to load persisted tokens? + _ = a.tokens.Load(bd.RuntimeConfig.ACLTokens, a.logger) + + // TODO: pass in a fully populated apiServers into Agent.New + a.apiServers = NewAPIServers(a.logger) + return &a, nil } @@ -426,11 +423,6 @@ func (a *Agent) Start(ctx context.Context) error { return fmt.Errorf("Failed to load TLS configurations after applying auto-config settings: %w", err) } - // TODO: move to newBaseDeps - // TODO: handle error - a.loadTokens(a.config) - a.loadEnterpriseTokens(a.config) - // create the local state a.State = local.NewState(LocalConfig(c), a.logger, a.tokens) @@ -495,43 +487,6 @@ func (a *Agent) Start(ctx context.Context) error { a.State.Delegate = a.delegate a.State.TriggerSyncChanges = a.sync.SyncChanges.Trigger - if a.config.AutoEncryptTLS && !a.config.ServerMode { - reply, err := a.autoEncryptInitialCertificate(ctx) - if err != nil { - return fmt.Errorf("AutoEncrypt failed: %s", err) - } - - cmConfig := new(certmon.Config). - WithCache(a.cache). - WithLogger(a.logger.Named(logging.AutoEncrypt)). - WithTLSConfigurator(a.tlsConfigurator). - WithTokens(a.tokens). - WithFallback(a.autoEncryptInitialCertificate). - WithDNSSANs(a.config.AutoEncryptDNSSAN). - WithIPSANs(a.config.AutoEncryptIPSAN). - WithDatacenter(a.config.Datacenter). - WithNodeName(a.config.NodeName) - - monitor, err := certmon.New(cmConfig) - if err != nil { - return fmt.Errorf("AutoEncrypt failed to setup certificate monitor: %w", err) - } - if err := monitor.Update(reply); err != nil { - return fmt.Errorf("AutoEncrypt failed to setup certificate monitor: %w", err) - } - a.certMonitor = monitor - - // we don't need to worry about ever calling Stop as we have tied the go routines - // to the agents lifetime by using the StopCh. Also the agent itself doesn't have - // a need of ensuring that the go routine was stopped before performing any action - // so we can ignore the chan in the return. - if _, err := a.certMonitor.Start(&lib.StopChannelContext{StopCh: a.shutdownCh}); err != nil { - return fmt.Errorf("AutoEncrypt failed to start certificate monitor: %w", err) - } - - a.logger.Info("automatically upgraded to TLS") - } - if err := a.autoConf.Start(&lib.StopChannelContext{StopCh: a.shutdownCh}); err != nil { return fmt.Errorf("AutoConf failed to start certificate monitor: %w", err) } @@ -620,10 +575,7 @@ func (a *Agent) Start(ctx context.Context) error { // Start HTTP and HTTPS servers. for _, srv := range servers { - if err := a.serveHTTP(srv); err != nil { - return err - } - a.httpServers = append(a.httpServers, srv) + a.apiServers.Start(srv) } // Start gRPC server. @@ -645,17 +597,10 @@ func (a *Agent) Start(ctx context.Context) error { return nil } -func (a *Agent) autoEncryptInitialCertificate(ctx context.Context) (*structs.SignedResponse, error) { - client := a.delegate.(*consul.Client) - - addrs := a.config.StartJoinAddrsLAN - disco, err := newDiscover() - if err != nil && len(addrs) == 0 { - return nil, err - } - addrs = append(addrs, retryJoinAddrs(disco, retryJoinSerfVariant, "LAN", a.config.RetryJoinLAN, a.logger)...) - - return client.RequestAutoEncryptCerts(ctx, addrs, a.config.ServerPort, a.tokens.AgentToken(), a.config.AutoEncryptDNSSAN, a.config.AutoEncryptIPSAN) +// Failed returns a channel which is closed when the first server goroutine exits +// with a non-nil error. +func (a *Agent) Failed() <-chan struct{} { + return a.apiServers.failed } func (a *Agent) listenAndServeGRPC() error { @@ -790,14 +735,16 @@ func (a *Agent) startListeners(addrs []net.Addr) ([]net.Listener, error) { // // This approach should ultimately be refactored to the point where we just // start the server and any error should trigger a proper shutdown of the agent. -func (a *Agent) listenHTTP() ([]*HTTPServer, error) { +func (a *Agent) listenHTTP() ([]apiServer, error) { var ln []net.Listener - var servers []*HTTPServer + var servers []apiServer + start := func(proto string, addrs []net.Addr) error { listeners, err := a.startListeners(addrs) if err != nil { return err } + ln = append(ln, listeners...) for _, l := range listeners { var tlscfg *tls.Config @@ -807,18 +754,15 @@ func (a *Agent) listenHTTP() ([]*HTTPServer, error) { l = tls.NewListener(l, tlscfg) } + srv := &HTTPServer{ + agent: a, + denylist: NewDenylist(a.config.HTTPBlockEndpoints), + } httpServer := &http.Server{ Addr: l.Addr().String(), TLSConfig: tlscfg, + Handler: srv.handler(a.config.EnableDebug), } - srv := &HTTPServer{ - Server: httpServer, - ln: l, - agent: a, - denylist: NewDenylist(a.config.HTTPBlockEndpoints), - proto: proto, - } - httpServer.Handler = srv.handler(a.config.EnableDebug) // Load the connlimit helper into the server connLimitFn := a.httpConnLimiter.HTTPConnStateFuncWithDefault429Handler(10 * time.Millisecond) @@ -831,27 +775,39 @@ func (a *Agent) listenHTTP() ([]*HTTPServer, error) { httpServer.ConnState = connLimitFn } - ln = append(ln, l) - servers = append(servers, srv) + servers = append(servers, apiServer{ + Protocol: proto, + Addr: l.Addr(), + Shutdown: httpServer.Shutdown, + Run: func() error { + err := httpServer.Serve(l) + if err == nil || err == http.ErrServerClosed { + return nil + } + return fmt.Errorf("%s server %s failed: %w", proto, l.Addr(), err) + }, + }) } return nil } if err := start("http", a.config.HTTPAddrs); err != nil { - for _, l := range ln { - l.Close() - } + closeListeners(ln) return nil, err } if err := start("https", a.config.HTTPSAddrs); err != nil { - for _, l := range ln { - l.Close() - } + closeListeners(ln) return nil, err } return servers, nil } +func closeListeners(lns []net.Listener) { + for _, l := range lns { + l.Close() + } +} + // setupHTTPS adds HTTP/2 support, ConnState, and a connection handshake timeout // to the http.Server. func setupHTTPS(server *http.Server, connState func(net.Conn, http.ConnState), timeout time.Duration) error { @@ -913,43 +869,6 @@ func (a *Agent) listenSocket(path string) (net.Listener, error) { return l, nil } -func (a *Agent) serveHTTP(srv *HTTPServer) error { - // https://github.com/golang/go/issues/20239 - // - // In go.8.1 there is a race between Serve and Shutdown. If - // Shutdown is called before the Serve go routine was scheduled then - // the Serve go routine never returns. This deadlocks the agent - // shutdown for some tests since it will wait forever. - notif := make(chan net.Addr) - a.wgServers.Add(1) - go func() { - defer a.wgServers.Done() - notif <- srv.ln.Addr() - err := srv.Server.Serve(srv.ln) - if err != nil && err != http.ErrServerClosed { - a.logger.Error("error closing server", "error", err) - } - }() - - select { - case addr := <-notif: - if srv.proto == "https" { - a.logger.Info("Started HTTPS server", - "address", addr.String(), - "network", addr.Network(), - ) - } else { - a.logger.Info("Started HTTP server", - "address", addr.String(), - "network", addr.Network(), - ) - } - return nil - case <-time.After(time.Second): - return fmt.Errorf("agent: timeout starting HTTP servers") - } -} - // stopAllWatches stops all the currently running watches func (a *Agent) stopAllWatches() { for _, wp := range a.watchPlans { @@ -1380,12 +1299,6 @@ func (a *Agent) ShutdownAgent() error { // this should help them to be stopped more quickly a.autoConf.Stop() - if a.certMonitor != nil { - // this would be cancelled anyways (by the closing of the shutdown ch) - // but this should help them to be stopped more quickly - a.certMonitor.Stop() - } - // Stop the service manager (must happen before we take the stateLock to avoid deadlock) if a.serviceManager != nil { a.serviceManager.Stop() @@ -1454,13 +1367,12 @@ func (a *Agent) ShutdownAgent() error { // ShutdownEndpoints terminates the HTTP and DNS servers. Should be // preceded by ShutdownAgent. +// TODO: remove this method, move to ShutdownAgent func (a *Agent) ShutdownEndpoints() { a.shutdownLock.Lock() defer a.shutdownLock.Unlock() - if len(a.dnsServers) == 0 && len(a.httpServers) == 0 { - return - } + ctx := context.TODO() for _, srv := range a.dnsServers { if srv.Server != nil { @@ -1474,27 +1386,11 @@ func (a *Agent) ShutdownEndpoints() { } a.dnsServers = nil - for _, srv := range a.httpServers { - a.logger.Info("Stopping server", - "protocol", strings.ToUpper(srv.proto), - "address", srv.ln.Addr().String(), - "network", srv.ln.Addr().Network(), - ) - ctx, cancel := context.WithTimeout(context.Background(), time.Second) - defer cancel() - srv.Server.Shutdown(ctx) - if ctx.Err() == context.DeadlineExceeded { - a.logger.Warn("Timeout stopping server", - "protocol", strings.ToUpper(srv.proto), - "address", srv.ln.Addr().String(), - "network", srv.ln.Addr().Network(), - ) - } - } - a.httpServers = nil - + a.apiServers.Shutdown(ctx) a.logger.Info("Waiting for endpoints to shut down") - a.wgServers.Wait() + if err := a.apiServers.WaitForShutdown(); err != nil { + a.logger.Error(err.Error()) + } a.logger.Info("Endpoints down") } @@ -3446,90 +3342,6 @@ func (a *Agent) unloadChecks() error { return nil } -type persistedTokens struct { - Replication string `json:"replication,omitempty"` - AgentMaster string `json:"agent_master,omitempty"` - Default string `json:"default,omitempty"` - Agent string `json:"agent,omitempty"` -} - -func (a *Agent) getPersistedTokens() (*persistedTokens, error) { - persistedTokens := &persistedTokens{} - if !a.config.ACLEnableTokenPersistence { - return persistedTokens, nil - } - - a.persistedTokensLock.RLock() - defer a.persistedTokensLock.RUnlock() - - tokensFullPath := filepath.Join(a.config.DataDir, tokensPath) - - buf, err := ioutil.ReadFile(tokensFullPath) - if err != nil { - if os.IsNotExist(err) { - // non-existence is not an error we care about - return persistedTokens, nil - } - return persistedTokens, fmt.Errorf("failed reading tokens file %q: %s", tokensFullPath, err) - } - - if err := json.Unmarshal(buf, persistedTokens); err != nil { - return persistedTokens, fmt.Errorf("failed to decode tokens file %q: %s", tokensFullPath, err) - } - - return persistedTokens, nil -} - -func (a *Agent) loadTokens(conf *config.RuntimeConfig) error { - persistedTokens, persistenceErr := a.getPersistedTokens() - - if persistenceErr != nil { - a.logger.Warn("unable to load persisted tokens", "error", persistenceErr) - } - - if persistedTokens.Default != "" { - a.tokens.UpdateUserToken(persistedTokens.Default, token.TokenSourceAPI) - - if conf.ACLToken != "" { - a.logger.Warn("\"default\" token present in both the configuration and persisted token store, using the persisted token") - } - } else { - a.tokens.UpdateUserToken(conf.ACLToken, token.TokenSourceConfig) - } - - if persistedTokens.Agent != "" { - a.tokens.UpdateAgentToken(persistedTokens.Agent, token.TokenSourceAPI) - - if conf.ACLAgentToken != "" { - a.logger.Warn("\"agent\" token present in both the configuration and persisted token store, using the persisted token") - } - } else { - a.tokens.UpdateAgentToken(conf.ACLAgentToken, token.TokenSourceConfig) - } - - if persistedTokens.AgentMaster != "" { - a.tokens.UpdateAgentMasterToken(persistedTokens.AgentMaster, token.TokenSourceAPI) - - if conf.ACLAgentMasterToken != "" { - a.logger.Warn("\"agent_master\" token present in both the configuration and persisted token store, using the persisted token") - } - } else { - a.tokens.UpdateAgentMasterToken(conf.ACLAgentMasterToken, token.TokenSourceConfig) - } - - if persistedTokens.Replication != "" { - a.tokens.UpdateReplicationToken(persistedTokens.Replication, token.TokenSourceAPI) - - if conf.ACLReplicationToken != "" { - a.logger.Warn("\"replication\" token present in both the configuration and persisted token store, using the persisted token") - } - } else { - a.tokens.UpdateReplicationToken(conf.ACLReplicationToken, token.TokenSourceConfig) - } - - return persistenceErr -} - // snapshotCheckState is used to snapshot the current state of the health // checks. This is done before we reload our checks, so that we can properly // restore into the same state. @@ -3709,8 +3521,7 @@ func (a *Agent) reloadConfigInternal(newCfg *config.RuntimeConfig) error { // Reload tokens - should be done before all the other loading // to ensure the correct tokens are available for attaching to // the checks and service registrations. - a.loadTokens(newCfg) - a.loadEnterpriseTokens(newCfg) + a.tokens.Load(newCfg.ACLTokens, a.logger) if err := a.tlsConfigurator.Update(newCfg.ToTLSUtilConfig()); err != nil { return fmt.Errorf("Failed reloading tls configuration: %s", err) @@ -3764,6 +3575,12 @@ func (a *Agent) reloadConfigInternal(newCfg *config.RuntimeConfig) error { return err } + if a.cache.ReloadOptions(newCfg.Cache) { + a.logger.Info("Cache options have been updated") + } else { + a.logger.Debug("Cache options have not been modified") + } + // Update filtered metrics metrics.UpdateFilter(newCfg.Telemetry.AllowedPrefixes, newCfg.Telemetry.BlockedPrefixes) diff --git a/agent/agent_endpoint.go b/agent/agent_endpoint.go index 32f26b02eb..1457d5093a 100644 --- a/agent/agent_endpoint.go +++ b/agent/agent_endpoint.go @@ -1,10 +1,8 @@ package agent import ( - "encoding/json" "fmt" "net/http" - "path/filepath" "strconv" "strings" @@ -21,7 +19,6 @@ import ( "github.com/hashicorp/consul/api" "github.com/hashicorp/consul/ipaddr" "github.com/hashicorp/consul/lib" - "github.com/hashicorp/consul/lib/file" "github.com/hashicorp/consul/logging" "github.com/hashicorp/consul/logging/monitor" "github.com/hashicorp/consul/types" @@ -1233,79 +1230,42 @@ func (s *HTTPServer) AgentToken(resp http.ResponseWriter, req *http.Request) (in return nil, nil } - if s.agent.config.ACLEnableTokenPersistence { - // we hold the lock around updating the internal token store - // as well as persisting the tokens because we don't want to write - // into the store to have something else wipe it out before we can - // persist everything (like an agent config reload). The token store - // lock is only held for those operations so other go routines that - // just need to read some token out of the store will not be impacted - // any more than they would be without token persistence. - s.agent.persistedTokensLock.Lock() - defer s.agent.persistedTokensLock.Unlock() - } - // Figure out the target token. target := strings.TrimPrefix(req.URL.Path, "/v1/agent/token/") - triggerAntiEntropySync := false - switch target { - case "acl_token", "default": - changed := s.agent.tokens.UpdateUserToken(args.Token, token_store.TokenSourceAPI) - if changed { - triggerAntiEntropySync = true + + err = s.agent.tokens.WithPersistenceLock(func() error { + triggerAntiEntropySync := false + switch target { + case "acl_token", "default": + changed := s.agent.tokens.UpdateUserToken(args.Token, token_store.TokenSourceAPI) + if changed { + triggerAntiEntropySync = true + } + + case "acl_agent_token", "agent": + changed := s.agent.tokens.UpdateAgentToken(args.Token, token_store.TokenSourceAPI) + if changed { + triggerAntiEntropySync = true + } + + case "acl_agent_master_token", "agent_master": + s.agent.tokens.UpdateAgentMasterToken(args.Token, token_store.TokenSourceAPI) + + case "acl_replication_token", "replication": + s.agent.tokens.UpdateReplicationToken(args.Token, token_store.TokenSourceAPI) + + default: + return NotFoundError{Reason: fmt.Sprintf("Token %q is unknown", target)} } - case "acl_agent_token", "agent": - changed := s.agent.tokens.UpdateAgentToken(args.Token, token_store.TokenSourceAPI) - if changed { - triggerAntiEntropySync = true - } - - case "acl_agent_master_token", "agent_master": - s.agent.tokens.UpdateAgentMasterToken(args.Token, token_store.TokenSourceAPI) - - case "acl_replication_token", "replication": - s.agent.tokens.UpdateReplicationToken(args.Token, token_store.TokenSourceAPI) - - default: - resp.WriteHeader(http.StatusNotFound) - fmt.Fprintf(resp, "Token %q is unknown", target) - return nil, nil - } - - if triggerAntiEntropySync { - s.agent.sync.SyncFull.Trigger() - } - - if s.agent.config.ACLEnableTokenPersistence { - tokens := persistedTokens{} - - if tok, source := s.agent.tokens.UserTokenAndSource(); tok != "" && source == token_store.TokenSourceAPI { - tokens.Default = tok - } - - if tok, source := s.agent.tokens.AgentTokenAndSource(); tok != "" && source == token_store.TokenSourceAPI { - tokens.Agent = tok - } - - if tok, source := s.agent.tokens.AgentMasterTokenAndSource(); tok != "" && source == token_store.TokenSourceAPI { - tokens.AgentMaster = tok - } - - if tok, source := s.agent.tokens.ReplicationTokenAndSource(); tok != "" && source == token_store.TokenSourceAPI { - tokens.Replication = tok - } - - data, err := json.Marshal(tokens) - if err != nil { - s.agent.logger.Warn("failed to persist tokens", "error", err) - return nil, fmt.Errorf("Failed to marshal tokens for persistence: %v", err) - } - - if err := file.WriteAtomicWithPerms(filepath.Join(s.agent.config.DataDir, tokensPath), data, 0700, 0600); err != nil { - s.agent.logger.Warn("failed to persist tokens", "error", err) - return nil, fmt.Errorf("Failed to persist tokens - %v", err) + // TODO: is it safe to move this out of WithPersistenceLock? + if triggerAntiEntropySync { + s.agent.sync.SyncFull.Trigger() } + return nil + }) + if err != nil { + return nil, err } s.agent.logger.Info("Updated agent's ACL token", "token", target) diff --git a/agent/agent_endpoint_test.go b/agent/agent_endpoint_test.go index 505835ce84..5958c8b8c5 100644 --- a/agent/agent_endpoint_test.go +++ b/agent/agent_endpoint_test.go @@ -4774,13 +4774,14 @@ func TestAgent_Token(t *testing.T) { init tokens raw tokens effective tokens + expectedErr error }{ { - name: "bad token name", - method: "PUT", - url: "nope?token=root", - body: body("X"), - code: http.StatusNotFound, + name: "bad token name", + method: "PUT", + url: "nope?token=root", + body: body("X"), + expectedErr: NotFoundError{Reason: `Token "nope" is unknown`}, }, { name: "bad JSON", @@ -4942,7 +4943,12 @@ func TestAgent_Token(t *testing.T) { url := fmt.Sprintf("/v1/agent/token/%s", tt.url) resp := httptest.NewRecorder() req, _ := http.NewRequest(tt.method, url, tt.body) + _, err := a.srv.AgentToken(resp, req) + if tt.expectedErr != nil { + require.Equal(t, tt.expectedErr, err) + return + } require.NoError(t, err) require.Equal(t, tt.code, resp.Code) require.Equal(t, tt.effective.user, a.tokens.UserToken()) diff --git a/agent/agent_oss.go b/agent/agent_oss.go index 03b2f7ef52..705205fb30 100644 --- a/agent/agent_oss.go +++ b/agent/agent_oss.go @@ -23,10 +23,6 @@ func (a *Agent) initEnterprise(consulCfg *consul.Config) error { return nil } -// loadEnterpriseTokens is a noop stub for the func defined agent_ent.go -func (a *Agent) loadEnterpriseTokens(conf *config.RuntimeConfig) { -} - // reloadEnterprise is a noop stub for the func defined agent_ent.go func (a *Agent) reloadEnterprise(conf *config.RuntimeConfig) error { return nil diff --git a/agent/agent_test.go b/agent/agent_test.go index 4a3a672c20..dded499ee2 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -43,6 +43,7 @@ import ( "github.com/hashicorp/serf/serf" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "golang.org/x/time/rate" "gopkg.in/square/go-jose.v2/jwt" ) @@ -765,10 +766,18 @@ func TestCacheRateLimit(test *testing.T) { test.Run(fmt.Sprintf("rate_limit_at_%v", currentTest.rateLimit), func(t *testing.T) { tt := currentTest t.Parallel() - a := NewTestAgent(t, fmt.Sprintf("cache = { entry_fetch_rate = %v, entry_fetch_max_burst = 1 }", tt.rateLimit)) + a := NewTestAgent(t, "cache = { entry_fetch_rate = 1, entry_fetch_max_burst = 100 }") defer a.Shutdown() testrpc.WaitForTestAgent(t, a.RPC, "dc1") + cfg := a.config + require.Equal(t, rate.Limit(1), a.config.Cache.EntryFetchRate) + require.Equal(t, 100, a.config.Cache.EntryFetchMaxBurst) + cfg.Cache.EntryFetchRate = rate.Limit(tt.rateLimit) + cfg.Cache.EntryFetchMaxBurst = 1 + a.reloadConfigInternal(cfg) + require.Equal(t, rate.Limit(tt.rateLimit), a.config.Cache.EntryFetchRate) + require.Equal(t, 1, a.config.Cache.EntryFetchMaxBurst) var wg sync.WaitGroup stillProcessing := true @@ -1908,7 +1917,7 @@ func TestAgent_HTTPCheck_EnableAgentTLSForChecks(t *testing.T) { Status: api.HealthCritical, } - url := fmt.Sprintf("https://%s/v1/agent/self", a.srv.ln.Addr().String()) + url := fmt.Sprintf("https://%s/v1/agent/self", a.HTTPAddr()) chk := &structs.CheckType{ HTTP: url, Interval: 20 * time.Millisecond, @@ -3336,163 +3345,6 @@ func TestAgent_reloadWatchesHTTPS(t *testing.T) { } } -func TestAgent_loadTokens(t *testing.T) { - t.Parallel() - a := NewTestAgent(t, ` - acl = { - enabled = true - tokens = { - agent = "alfa" - agent_master = "bravo", - default = "charlie" - replication = "delta" - } - } - - `) - defer a.Shutdown() - require := require.New(t) - - tokensFullPath := filepath.Join(a.config.DataDir, tokensPath) - - t.Run("original-configuration", func(t *testing.T) { - require.Equal("alfa", a.tokens.AgentToken()) - require.Equal("bravo", a.tokens.AgentMasterToken()) - require.Equal("charlie", a.tokens.UserToken()) - require.Equal("delta", a.tokens.ReplicationToken()) - }) - - t.Run("updated-configuration", func(t *testing.T) { - cfg := &config.RuntimeConfig{ - ACLToken: "echo", - ACLAgentToken: "foxtrot", - ACLAgentMasterToken: "golf", - ACLReplicationToken: "hotel", - } - // ensures no error for missing persisted tokens file - require.NoError(a.loadTokens(cfg)) - require.Equal("echo", a.tokens.UserToken()) - require.Equal("foxtrot", a.tokens.AgentToken()) - require.Equal("golf", a.tokens.AgentMasterToken()) - require.Equal("hotel", a.tokens.ReplicationToken()) - }) - - t.Run("persisted-tokens", func(t *testing.T) { - cfg := &config.RuntimeConfig{ - ACLToken: "echo", - ACLAgentToken: "foxtrot", - ACLAgentMasterToken: "golf", - ACLReplicationToken: "hotel", - } - - tokens := `{ - "agent" : "india", - "agent_master" : "juliett", - "default": "kilo", - "replication" : "lima" - }` - - require.NoError(ioutil.WriteFile(tokensFullPath, []byte(tokens), 0600)) - require.NoError(a.loadTokens(cfg)) - - // no updates since token persistence is not enabled - require.Equal("echo", a.tokens.UserToken()) - require.Equal("foxtrot", a.tokens.AgentToken()) - require.Equal("golf", a.tokens.AgentMasterToken()) - require.Equal("hotel", a.tokens.ReplicationToken()) - - a.config.ACLEnableTokenPersistence = true - require.NoError(a.loadTokens(cfg)) - - require.Equal("india", a.tokens.AgentToken()) - require.Equal("juliett", a.tokens.AgentMasterToken()) - require.Equal("kilo", a.tokens.UserToken()) - require.Equal("lima", a.tokens.ReplicationToken()) - }) - - t.Run("persisted-tokens-override", func(t *testing.T) { - tokens := `{ - "agent" : "mike", - "agent_master" : "november", - "default": "oscar", - "replication" : "papa" - }` - - cfg := &config.RuntimeConfig{ - ACLToken: "quebec", - ACLAgentToken: "romeo", - ACLAgentMasterToken: "sierra", - ACLReplicationToken: "tango", - } - - require.NoError(ioutil.WriteFile(tokensFullPath, []byte(tokens), 0600)) - require.NoError(a.loadTokens(cfg)) - - require.Equal("mike", a.tokens.AgentToken()) - require.Equal("november", a.tokens.AgentMasterToken()) - require.Equal("oscar", a.tokens.UserToken()) - require.Equal("papa", a.tokens.ReplicationToken()) - }) - - t.Run("partial-persisted", func(t *testing.T) { - tokens := `{ - "agent" : "uniform", - "agent_master" : "victor" - }` - - cfg := &config.RuntimeConfig{ - ACLToken: "whiskey", - ACLAgentToken: "xray", - ACLAgentMasterToken: "yankee", - ACLReplicationToken: "zulu", - } - - require.NoError(ioutil.WriteFile(tokensFullPath, []byte(tokens), 0600)) - require.NoError(a.loadTokens(cfg)) - - require.Equal("uniform", a.tokens.AgentToken()) - require.Equal("victor", a.tokens.AgentMasterToken()) - require.Equal("whiskey", a.tokens.UserToken()) - require.Equal("zulu", a.tokens.ReplicationToken()) - }) - - t.Run("persistence-error-not-json", func(t *testing.T) { - cfg := &config.RuntimeConfig{ - ACLToken: "one", - ACLAgentToken: "two", - ACLAgentMasterToken: "three", - ACLReplicationToken: "four", - } - - require.NoError(ioutil.WriteFile(tokensFullPath, []byte{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, 0600)) - err := a.loadTokens(cfg) - require.Error(err) - - require.Equal("one", a.tokens.UserToken()) - require.Equal("two", a.tokens.AgentToken()) - require.Equal("three", a.tokens.AgentMasterToken()) - require.Equal("four", a.tokens.ReplicationToken()) - }) - - t.Run("persistence-error-wrong-top-level", func(t *testing.T) { - cfg := &config.RuntimeConfig{ - ACLToken: "alfa", - ACLAgentToken: "bravo", - ACLAgentMasterToken: "charlie", - ACLReplicationToken: "foxtrot", - } - - require.NoError(ioutil.WriteFile(tokensFullPath, []byte("[1,2,3]"), 0600)) - err := a.loadTokens(cfg) - require.Error(err) - - require.Equal("alfa", a.tokens.UserToken()) - require.Equal("bravo", a.tokens.AgentToken()) - require.Equal("charlie", a.tokens.AgentMasterToken()) - require.Equal("foxtrot", a.tokens.ReplicationToken()) - }) -} - func TestAgent_SecurityChecks(t *testing.T) { t.Parallel() hcl := ` diff --git a/agent/apiserver.go b/agent/apiserver.go new file mode 100644 index 0000000000..27087829a6 --- /dev/null +++ b/agent/apiserver.go @@ -0,0 +1,94 @@ +package agent + +import ( + "context" + "net" + "sync" + "time" + + "github.com/hashicorp/go-hclog" + "golang.org/x/sync/errgroup" +) + +// apiServers is a wrapper around errgroup.Group for managing go routines for +// long running agent components (ex: http server, dns server). If any of the +// servers fail, the failed channel will be closed, which will cause the agent +// to be shutdown instead of running in a degraded state. +// +// This struct exists as a shim for using errgroup.Group without making major +// changes to Agent. In the future it may be removed and replaced with more +// direct usage of errgroup.Group. +type apiServers struct { + logger hclog.Logger + group *errgroup.Group + servers []apiServer + // failed channel is closed when the first server goroutines exit with a + // non-nil error. + failed <-chan struct{} +} + +type apiServer struct { + // Protocol supported by this server. One of: dns, http, https + Protocol string + // Addr the server is listening on + Addr net.Addr + // Run will be called in a goroutine to run the server. When any Run exits + // with a non-nil error, the failed channel will be closed. + Run func() error + // Shutdown function used to stop the server + Shutdown func(context.Context) error +} + +// NewAPIServers returns an empty apiServers that is ready to Start servers. +func NewAPIServers(logger hclog.Logger) *apiServers { + group, ctx := errgroup.WithContext(context.TODO()) + return &apiServers{ + logger: logger, + group: group, + failed: ctx.Done(), + } +} + +func (s *apiServers) Start(srv apiServer) { + srv.logger(s.logger).Info("Starting server") + s.servers = append(s.servers, srv) + s.group.Go(srv.Run) +} + +func (s apiServer) logger(base hclog.Logger) hclog.Logger { + return base.With( + "protocol", s.Protocol, + "address", s.Addr.String(), + "network", s.Addr.Network()) +} + +// Shutdown all the servers and log any errors as warning. Each server is given +// 1 second, or until ctx is cancelled, to shutdown gracefully. +func (s *apiServers) Shutdown(ctx context.Context) { + shutdownGroup := new(sync.WaitGroup) + + for i := range s.servers { + server := s.servers[i] + shutdownGroup.Add(1) + + go func() { + defer shutdownGroup.Done() + logger := server.logger(s.logger) + logger.Info("Stopping server") + + ctx, cancel := context.WithTimeout(ctx, time.Second) + defer cancel() + if err := server.Shutdown(ctx); err != nil { + logger.Warn("Failed to stop server") + } + }() + } + s.servers = nil + shutdownGroup.Wait() +} + +// WaitForShutdown waits until all server goroutines have exited. Shutdown +// must be called before WaitForShutdown, otherwise it will block forever. +func (s *apiServers) WaitForShutdown() error { + return s.group.Wait() +} diff --git a/agent/apiserver_test.go b/agent/apiserver_test.go new file mode 100644 index 0000000000..72f8c6d651 --- /dev/null +++ b/agent/apiserver_test.go @@ -0,0 +1,65 @@ +package agent + +import ( + "context" + "fmt" + "net" + "testing" + "time" + + "github.com/hashicorp/go-hclog" + "github.com/stretchr/testify/require" +) + +func TestAPIServers_WithServiceRunError(t *testing.T) { + servers := NewAPIServers(hclog.New(nil)) + + server1, chErr1 := newAPIServerStub() + server2, _ := newAPIServerStub() + + t.Run("Start", func(t *testing.T) { + servers.Start(server1) + servers.Start(server2) + + select { + case <-servers.failed: + t.Fatalf("expected servers to still be running") + case <-time.After(5 * time.Millisecond): + } + }) + + err := fmt.Errorf("oops, I broke") + + t.Run("server exit non-nil error", func(t *testing.T) { + chErr1 <- err + + select { + case <-servers.failed: + case <-time.After(time.Second): + t.Fatalf("expected failed channel to be closed") + } + }) + + t.Run("shutdown remaining services", func(t *testing.T) { + servers.Shutdown(context.Background()) + require.Equal(t, err, servers.WaitForShutdown()) + }) +} + +func newAPIServerStub() (apiServer, chan error) { + chErr := make(chan error) + return apiServer{ + Protocol: "http", + Addr: &net.TCPAddr{ + IP: net.ParseIP("127.0.0.11"), + Port: 5505, + }, + Run: func() error { + return <-chErr + }, + Shutdown: func(ctx context.Context) error { + close(chErr) + return nil + }, + }, chErr +} diff --git a/agent/auto-config/auto_config.go b/agent/auto-config/auto_config.go index 939879a767..335f0f9872 100644 --- a/agent/auto-config/auto_config.go +++ b/agent/auto-config/auto_config.go @@ -4,62 +4,54 @@ import ( "context" "fmt" "io/ioutil" - "net" - "os" - "path/filepath" - "strconv" - "strings" + "sync" "time" + "github.com/hashicorp/consul/agent/cache" "github.com/hashicorp/consul/agent/config" - "github.com/hashicorp/consul/agent/connect" - "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/agent/token" "github.com/hashicorp/consul/lib" "github.com/hashicorp/consul/logging" "github.com/hashicorp/consul/proto/pbautoconf" - "github.com/hashicorp/go-discover" - discoverk8s "github.com/hashicorp/go-discover/provider/k8s" "github.com/hashicorp/go-hclog" - - "github.com/golang/protobuf/jsonpb" -) - -const ( - // autoConfigFileName is the name of the file that the agent auto-config settings are - // stored in within the data directory - autoConfigFileName = "auto-config.json" - - dummyTrustDomain = "dummytrustdomain" -) - -var ( - pbMarshaler = &jsonpb.Marshaler{ - OrigName: false, - EnumsAsInts: false, - Indent: " ", - EmitDefaults: true, - } - - pbUnmarshaler = &jsonpb.Unmarshaler{ - AllowUnknownFields: false, - } ) // AutoConfig is all the state necessary for being able to parse a configuration // as well as perform the necessary RPCs to perform Agent Auto Configuration. -// -// NOTE: This struct and methods on it are not currently thread/goroutine safe. -// However it doesn't spawn any of its own go routines yet and is used in a -// synchronous fashion. In the future if either of those two conditions change -// then we will need to add some locking here. I am deferring that for now -// to help ease the review of this already large PR. type AutoConfig struct { + sync.Mutex + acConfig Config logger hclog.Logger - certMonitor CertMonitor + cache Cache + waiter *lib.RetryWaiter config *config.RuntimeConfig autoConfigResponse *pbautoconf.AutoConfigResponse autoConfigSource config.Source + + running bool + done chan struct{} + // cancel is used to cancel the entire AutoConfig + // go routine. This is the main field protected + // by the mutex as it being non-nil indicates that + // the go routine has been started and is stoppable. + // note that it doesn't indcate that the go routine + // is currently running. + cancel context.CancelFunc + + // cancelWatches is used to cancel the existing + // cache watches regarding the agents certificate. This is + // mainly only necessary when the Agent token changes. + cancelWatches context.CancelFunc + + // cacheUpdates is the chan used to have the cache + // send us back events + cacheUpdates chan cache.UpdateEvent + + // tokenUpdates is the struct used to receive + // events from the token store when the Agent + // token is updated. + tokenUpdates token.Notifier } // New creates a new AutoConfig object for providing automatic Consul configuration. @@ -69,6 +61,19 @@ func New(config Config) (*AutoConfig, error) { return nil, fmt.Errorf("must provide a config loader") case config.DirectRPC == nil: return nil, fmt.Errorf("must provide a direct RPC delegate") + case config.Cache == nil: + return nil, fmt.Errorf("must provide a cache") + case config.TLSConfigurator == nil: + return nil, fmt.Errorf("must provide a TLS configurator") + case config.Tokens == nil: + return nil, fmt.Errorf("must provide a token store") + } + + if config.FallbackLeeway == 0 { + config.FallbackLeeway = 10 * time.Second + } + if config.FallbackRetry == 0 { + config.FallbackRetry = time.Minute } logger := config.Logger @@ -83,15 +88,16 @@ func New(config Config) (*AutoConfig, error) { } return &AutoConfig{ - acConfig: config, - logger: logger, - certMonitor: config.CertMonitor, + acConfig: config, + logger: logger, }, nil } // ReadConfig will parse the current configuration and inject any // auto-config sources if present into the correct place in the parsing chain. func (ac *AutoConfig) ReadConfig() (*config.RuntimeConfig, error) { + ac.Lock() + defer ac.Unlock() cfg, warnings, err := ac.acConfig.Loader(ac.autoConfigSource) if err != nil { return cfg, err @@ -105,46 +111,6 @@ func (ac *AutoConfig) ReadConfig() (*config.RuntimeConfig, error) { return cfg, nil } -// restorePersistedAutoConfig will attempt to load the persisted auto-config -// settings from the data directory. It returns true either when there was an -// unrecoverable error or when the configuration was successfully loaded from -// disk. Recoverable errors, such as "file not found" are suppressed and this -// method will return false for the first boolean. -func (ac *AutoConfig) restorePersistedAutoConfig() (bool, error) { - if ac.config.DataDir == "" { - // no data directory means we don't have anything to potentially load - return false, nil - } - - path := filepath.Join(ac.config.DataDir, autoConfigFileName) - ac.logger.Debug("attempting to restore any persisted configuration", "path", path) - - content, err := ioutil.ReadFile(path) - if err == nil { - rdr := strings.NewReader(string(content)) - - var resp pbautoconf.AutoConfigResponse - if err := pbUnmarshaler.Unmarshal(rdr, &resp); err != nil { - return false, fmt.Errorf("failed to decode persisted auto-config data: %w", err) - } - - if err := ac.update(&resp); err != nil { - return false, fmt.Errorf("error restoring persisted auto-config response: %w", err) - } - - ac.logger.Info("restored persisted configuration", "path", path) - return true, nil - } - - if !os.IsNotExist(err) { - return true, fmt.Errorf("failed to load %s: %w", path, err) - } - - // ignore non-existence errors as that is an indicator that we haven't - // performed the auto configuration before - return false, nil -} - // InitialConfiguration will perform a one-time RPC request to the configured servers // to retrieve various cluster wide configurations. See the proto/pbautoconf/auto_config.proto // file for a complete reference of what configurations can be applied in this manner. @@ -164,30 +130,49 @@ func (ac *AutoConfig) InitialConfiguration(ctx context.Context) (*config.Runtime ac.config = config } - if !ac.config.AutoConfig.Enabled { - return ac.config, nil - } - - ready, err := ac.restorePersistedAutoConfig() - if err != nil { - return nil, err - } - - if !ready { - ac.logger.Info("retrieving initial agent auto configuration remotely") - if err := ac.getInitialConfiguration(ctx); err != nil { + switch { + case ac.config.AutoConfig.Enabled: + resp, err := ac.readPersistedAutoConfig() + if err != nil { return nil, err } - } - // re-read the configuration now that we have our initial auto-config - config, err := ac.ReadConfig() - if err != nil { - return nil, err - } + if resp == nil { + ac.logger.Info("retrieving initial agent auto configuration remotely") + resp, err = ac.getInitialConfiguration(ctx) + if err != nil { + return nil, err + } + } - ac.config = config - return ac.config, nil + ac.logger.Debug("updating auto-config settings") + if err = ac.recordInitialConfiguration(resp); err != nil { + return nil, err + } + + // re-read the configuration now that we have our initial auto-config + config, err := ac.ReadConfig() + if err != nil { + return nil, err + } + + ac.config = config + return ac.config, nil + case ac.config.AutoEncryptTLS: + certs, err := ac.autoEncryptInitialCerts(ctx) + if err != nil { + return nil, err + } + + if err := ac.setInitialTLSCertificates(certs); err != nil { + return nil, err + } + + ac.logger.Info("automatically upgraded to TLS") + return ac.config, nil + default: + return ac.config, nil + } } // introToken is responsible for determining the correct intro token to use @@ -217,118 +202,45 @@ func (ac *AutoConfig) introToken() (string, error) { return token, nil } -// serverHosts is responsible for taking the list of server addresses and -// resolving any go-discover provider invocations. It will then return a list -// of hosts. These might be hostnames and is expected that DNS resolution may -// be performed after this function runs. Additionally these may contain ports -// so SplitHostPort could also be necessary. -func (ac *AutoConfig) serverHosts() ([]string, error) { - servers := ac.config.AutoConfig.ServerAddresses +// recordInitialConfiguration is responsible for recording the AutoConfigResponse from +// the AutoConfig.InitialConfiguration RPC. It is an all-in-one function to do the following +// * update the Agent token in the token store +func (ac *AutoConfig) recordInitialConfiguration(resp *pbautoconf.AutoConfigResponse) error { + ac.autoConfigResponse = resp - providers := make(map[string]discover.Provider) - for k, v := range discover.Providers { - providers[k] = v + ac.autoConfigSource = config.LiteralSource{ + Name: autoConfigFileName, + Config: translateConfig(resp.Config), } - providers["k8s"] = &discoverk8s.Provider{} - - disco, err := discover.New( - discover.WithUserAgent(lib.UserAgent()), - discover.WithProviders(providers), - ) + // we need to re-read the configuration to determine what the correct ACL + // token to push into the token store is. Any user provided token will override + // any AutoConfig generated token. + config, err := ac.ReadConfig() if err != nil { - return nil, fmt.Errorf("Failed to create go-discover resolver: %w", err) + return fmt.Errorf("failed to fully resolve configuration: %w", err) } - var addrs []string - for _, addr := range servers { - switch { - case strings.Contains(addr, "provider="): - resolved, err := disco.Addrs(addr, ac.logger.StandardLogger(&hclog.StandardLoggerOptions{InferLevels: true})) - if err != nil { - ac.logger.Error("failed to resolve go-discover auto-config servers", "configuration", addr, "err", err) - continue - } + // ignoring the return value which would indicate a change in the token + _ = ac.acConfig.Tokens.UpdateAgentToken(config.ACLTokens.ACLAgentToken, token.TokenSourceConfig) - addrs = append(addrs, resolved...) - ac.logger.Debug("discovered auto-config servers", "servers", resolved) - default: - addrs = append(addrs, addr) - } - } - - if len(addrs) == 0 { - return nil, fmt.Errorf("no auto-config server addresses available for use") - } - - return addrs, nil -} - -// resolveHost will take a single host string and convert it to a list of TCPAddrs -// This will process any port in the input as well as looking up the hostname using -// normal DNS resolution. -func (ac *AutoConfig) resolveHost(hostPort string) []net.TCPAddr { - port := ac.config.ServerPort - host, portStr, err := net.SplitHostPort(hostPort) + // extra a structs.SignedResponse from the AutoConfigResponse for use in cache prepopulation + signed, err := extractSignedResponse(resp) if err != nil { - if strings.Contains(err.Error(), "missing port in address") { - host = hostPort - } else { - ac.logger.Warn("error splitting host address into IP and port", "address", hostPort, "error", err) - return nil - } - } else { - port, err = strconv.Atoi(portStr) - if err != nil { - ac.logger.Warn("Parsed port is not an integer", "port", portStr, "error", err) - return nil - } + return fmt.Errorf("failed to extract certificates from the auto-config response: %w", err) } - // resolve the host to a list of IPs - ips, err := net.LookupIP(host) - if err != nil { - ac.logger.Warn("IP resolution failed", "host", host, "error", err) - return nil + // prepopulate the cache + if err = ac.populateCertificateCache(signed); err != nil { + return fmt.Errorf("failed to populate the cache with certificate responses: %w", err) } - var addrs []net.TCPAddr - for _, ip := range ips { - addrs = append(addrs, net.TCPAddr{IP: ip, Port: port}) - } - - return addrs -} - -// recordResponse takes an AutoConfig RPC response records it with the agent -// This will persist the configuration to disk (unless in dev mode running without -// a data dir) and will reload the configuration. -func (ac *AutoConfig) recordResponse(resp *pbautoconf.AutoConfigResponse) error { - serialized, err := pbMarshaler.MarshalToString(resp) - if err != nil { - return fmt.Errorf("failed to encode auto-config response as JSON: %w", err) - } - - if err := ac.update(resp); err != nil { + // update the TLS configurator with the latest certificates + if err := ac.updateTLSFromResponse(resp); err != nil { return err } - // now that we know the configuration is generally fine including TLS certs go ahead and persist it to disk. - if ac.config.DataDir == "" { - ac.logger.Debug("not persisting auto-config settings because there is no data directory") - return nil - } - - path := filepath.Join(ac.config.DataDir, autoConfigFileName) - - err = ioutil.WriteFile(path, []byte(serialized), 0660) - if err != nil { - return fmt.Errorf("failed to write auto-config configurations: %w", err) - } - - ac.logger.Debug("auto-config settings were persisted to disk") - - return nil + return ac.persistAutoConfig(resp) } // getInitialConfigurationOnce will perform full server to TCPAddr resolution and @@ -352,7 +264,7 @@ func (ac *AutoConfig) getInitialConfigurationOnce(ctx context.Context, csr strin var resp pbautoconf.AutoConfigResponse - servers, err := ac.serverHosts() + servers, err := ac.autoConfigHosts() if err != nil { return nil, err } @@ -369,6 +281,7 @@ func (ac *AutoConfig) getInitialConfigurationOnce(ctx context.Context, csr strin ac.logger.Error("AutoConfig.InitialConfiguration RPC failed", "addr", addr.String(), "error", err) continue } + ac.logger.Debug("AutoConfig.InitialConfiguration RPC was successful") // update the Certificate with the private key we generated locally if resp.Certificate != nil { @@ -379,17 +292,17 @@ func (ac *AutoConfig) getInitialConfigurationOnce(ctx context.Context, csr strin } } - return nil, ctx.Err() + return nil, fmt.Errorf("No server successfully responded to the auto-config request") } // getInitialConfiguration implements a loop to retry calls to getInitialConfigurationOnce. // It uses the RetryWaiter on the AutoConfig object to control how often to attempt // the initial configuration process. It is also canceallable by cancelling the provided context. -func (ac *AutoConfig) getInitialConfiguration(ctx context.Context) error { +func (ac *AutoConfig) getInitialConfiguration(ctx context.Context) (*pbautoconf.AutoConfigResponse, error) { // generate a CSR csr, key, err := ac.generateCSR() if err != nil { - return err + return nil, err } // this resets the failures so that we will perform immediate request @@ -397,183 +310,95 @@ func (ac *AutoConfig) getInitialConfiguration(ctx context.Context) error { for { select { case <-wait: - resp, err := ac.getInitialConfigurationOnce(ctx, csr, key) - if resp != nil { - return ac.recordResponse(resp) + if resp, err := ac.getInitialConfigurationOnce(ctx, csr, key); err == nil && resp != nil { + return resp, nil } else if err != nil { ac.logger.Error(err.Error()) } else { - ac.logger.Error("No error returned when fetching the initial auto-configuration but no response was either") + ac.logger.Error("No error returned when fetching configuration from the servers but no response was either") } + wait = ac.acConfig.Waiter.Failed() case <-ctx.Done(): ac.logger.Info("interrupted during initial auto configuration", "err", ctx.Err()) - return ctx.Err() + return nil, ctx.Err() } } } -// generateCSR will generate a CSR for an Agent certificate. This should -// be sent along with the AutoConfig.InitialConfiguration RPC. The generated -// CSR does NOT have a real trust domain as when generating this we do -// not yet have the CA roots. The server will update the trust domain -// for us though. -func (ac *AutoConfig) generateCSR() (csr string, key string, err error) { - // We don't provide the correct host here, because we don't know any - // better at this point. Apart from the domain, we would need the - // ClusterID, which we don't have. This is why we go with - // dummyTrustDomain the first time. Subsequent CSRs will have the - // correct TrustDomain. - id := &connect.SpiffeIDAgent{ - // will be replaced - Host: dummyTrustDomain, - Datacenter: ac.config.Datacenter, - Agent: ac.config.NodeName, - } - - caConfig, err := ac.config.ConnectCAConfiguration() - if err != nil { - return "", "", fmt.Errorf("Cannot generate CSR: %w", err) - } - - conf, err := caConfig.GetCommonConfig() - if err != nil { - return "", "", fmt.Errorf("Failed to load common CA configuration: %w", err) - } - - if conf.PrivateKeyType == "" { - conf.PrivateKeyType = connect.DefaultPrivateKeyType - } - if conf.PrivateKeyBits == 0 { - conf.PrivateKeyBits = connect.DefaultPrivateKeyBits - } - - // Create a new private key - pk, pkPEM, err := connect.GeneratePrivateKeyWithConfig(conf.PrivateKeyType, conf.PrivateKeyBits) - if err != nil { - return "", "", fmt.Errorf("Failed to generate private key: %w", err) - } - - dnsNames := append([]string{"localhost"}, ac.config.AutoConfig.DNSSANs...) - ipAddresses := append([]net.IP{net.ParseIP("127.0.0.1"), net.ParseIP("::")}, ac.config.AutoConfig.IPSANs...) - - // Create a CSR. - // - // The Common Name includes the dummy trust domain for now but Server will - // override this when it is signed anyway so it's OK. - cn := connect.AgentCN(ac.config.NodeName, dummyTrustDomain) - csr, err = connect.CreateCSR(id, cn, pk, dnsNames, ipAddresses) - if err != nil { - return "", "", err - } - - return csr, pkPEM, nil -} - -// update will take an AutoConfigResponse and do all things necessary -// to restore those settings. This currently involves updating the -// config data to be used during a call to ReadConfig, updating the -// tls Configurator and prepopulating the cache. -func (ac *AutoConfig) update(resp *pbautoconf.AutoConfigResponse) error { - ac.autoConfigResponse = resp - - ac.autoConfigSource = config.LiteralSource{ - Name: autoConfigFileName, - Config: translateConfig(resp.Config), - } - - if err := ac.updateTLSFromResponse(resp); err != nil { - return err - } - - return nil -} - -// updateTLSFromResponse will update the TLS certificate and roots in the shared -// TLS configurator. -func (ac *AutoConfig) updateTLSFromResponse(resp *pbautoconf.AutoConfigResponse) error { - if ac.certMonitor == nil { - return nil - } - - roots, err := translateCARootsToStructs(resp.CARoots) - if err != nil { - return err - } - - cert, err := translateIssuedCertToStructs(resp.Certificate) - if err != nil { - return err - } - - update := &structs.SignedResponse{ - IssuedCert: *cert, - ConnectCARoots: *roots, - ManualCARoots: resp.ExtraCACertificates, - } - - if resp.Config != nil && resp.Config.TLS != nil { - update.VerifyServerHostname = resp.Config.TLS.VerifyServerHostname - } - - if err := ac.certMonitor.Update(update); err != nil { - return fmt.Errorf("failed to update the certificate monitor: %w", err) - } - - return nil -} - func (ac *AutoConfig) Start(ctx context.Context) error { - if ac.certMonitor == nil { + ac.Lock() + defer ac.Unlock() + + if !ac.config.AutoConfig.Enabled && !ac.config.AutoEncryptTLS { return nil } - if !ac.config.AutoConfig.Enabled { - return nil + if ac.running || ac.cancel != nil { + return fmt.Errorf("AutoConfig is already running") } - _, err := ac.certMonitor.Start(ctx) - return err + // create the top level context to control the go + // routine executing the `run` method + ctx, cancel := context.WithCancel(ctx) + + // create the channel to get cache update events through + // really we should only ever get 10 updates + ac.cacheUpdates = make(chan cache.UpdateEvent, 10) + + // setup the cache watches + cancelCertWatches, err := ac.setupCertificateCacheWatches(ctx) + if err != nil { + cancel() + return fmt.Errorf("error setting up cache watches: %w", err) + } + + // start the token update notifier + ac.tokenUpdates = ac.acConfig.Tokens.Notify(token.TokenKindAgent) + + // store the cancel funcs + ac.cancel = cancel + ac.cancelWatches = cancelCertWatches + + ac.running = true + ac.done = make(chan struct{}) + go ac.run(ctx, ac.done) + + ac.logger.Info("auto-config started") + return nil +} + +func (ac *AutoConfig) Done() <-chan struct{} { + ac.Lock() + defer ac.Unlock() + + if ac.done != nil { + return ac.done + } + + // return a closed channel to indicate that we are already done + done := make(chan struct{}) + close(done) + return done +} + +func (ac *AutoConfig) IsRunning() bool { + ac.Lock() + defer ac.Unlock() + return ac.running } func (ac *AutoConfig) Stop() bool { - if ac.certMonitor == nil { + ac.Lock() + defer ac.Unlock() + + if !ac.running { return false } - if !ac.config.AutoConfig.Enabled { - return false + if ac.cancel != nil { + ac.cancel() } - return ac.certMonitor.Stop() -} - -func (ac *AutoConfig) FallbackTLS(ctx context.Context) (*structs.SignedResponse, error) { - // generate a CSR - csr, key, err := ac.generateCSR() - if err != nil { - return nil, err - } - - resp, err := ac.getInitialConfigurationOnce(ctx, csr, key) - if err != nil { - return nil, err - } - - return extractSignedResponse(resp) -} - -func (ac *AutoConfig) RecordUpdatedCerts(resp *structs.SignedResponse) error { - var err error - ac.autoConfigResponse.ExtraCACertificates = resp.ManualCARoots - ac.autoConfigResponse.CARoots, err = translateCARootsToProtobuf(&resp.ConnectCARoots) - if err != nil { - return err - } - ac.autoConfigResponse.Certificate, err = translateIssuedCertToProtobuf(&resp.IssuedCert) - if err != nil { - return err - } - - return ac.recordResponse(ac.autoConfigResponse) + return true } diff --git a/agent/auto-config/auto_config_test.go b/agent/auto-config/auto_config_test.go index a421a45b75..e3469862a4 100644 --- a/agent/auto-config/auto_config_test.go +++ b/agent/auto-config/auto_config_test.go @@ -5,115 +5,146 @@ import ( "fmt" "io/ioutil" "net" + "os" "path/filepath" - "strings" + "sync" "testing" "time" - "github.com/gogo/protobuf/types" + "github.com/hashicorp/consul/agent/cache" + cachetype "github.com/hashicorp/consul/agent/cache-types" "github.com/hashicorp/consul/agent/config" + "github.com/hashicorp/consul/agent/connect" + "github.com/hashicorp/consul/agent/metadata" "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/agent/token" "github.com/hashicorp/consul/lib" "github.com/hashicorp/consul/proto/pbautoconf" "github.com/hashicorp/consul/proto/pbconfig" - "github.com/hashicorp/consul/proto/pbconnect" "github.com/hashicorp/consul/sdk/testutil" + "github.com/hashicorp/consul/sdk/testutil/retry" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) -type mockDirectRPC struct { - mock.Mock +type configLoader struct { + opts config.BuilderOpts } -func (m *mockDirectRPC) RPC(dc string, node string, addr net.Addr, method string, args interface{}, reply interface{}) error { - var retValues mock.Arguments - if method == "AutoConfig.InitialConfiguration" { - req := args.(*pbautoconf.AutoConfigRequest) - csr := req.CSR - req.CSR = "" - retValues = m.Called(dc, node, addr, method, args, reply) - req.CSR = csr - } else { - retValues = m.Called(dc, node, addr, method, args, reply) - } +func (c *configLoader) Load(source config.Source) (*config.RuntimeConfig, []string, error) { + return config.Load(c.opts, source) +} - switch ret := retValues.Get(0).(type) { - case error: - return ret - case func(interface{}): - ret(reply) - return nil +func (c *configLoader) addConfigHCL(cfg string) { + c.opts.HCL = append(c.opts.HCL, cfg) +} + +func requireChanNotReady(t *testing.T, ch <-chan struct{}) { + select { + case <-ch: + require.Fail(t, "chan is ready when it shouldn't be") default: - return fmt.Errorf("This should not happen, update mock direct rpc expectations") + return } } -type mockCertMonitor struct { - mock.Mock -} - -func (m *mockCertMonitor) Start(_ context.Context) (<-chan struct{}, error) { - ret := m.Called() - ch := ret.Get(0).(<-chan struct{}) - return ch, ret.Error(1) -} - -func (m *mockCertMonitor) Stop() bool { - return m.Called().Bool(0) -} - -func (m *mockCertMonitor) Update(resp *structs.SignedResponse) error { - var privKey string - // filter out real certificates as we cannot predict their values - if resp != nil && strings.HasPrefix(resp.IssuedCert.PrivateKeyPEM, "-----BEGIN") { - privKey = resp.IssuedCert.PrivateKeyPEM - resp.IssuedCert.PrivateKeyPEM = "" +func requireChanReady(t *testing.T, ch <-chan struct{}) { + select { + case <-ch: + return + default: + require.Fail(t, "chan is not ready when it should be") } - err := m.Called(resp).Error(0) - if privKey != "" { - resp.IssuedCert.PrivateKeyPEM = privKey +} + +func waitForChan(timer *time.Timer, ch <-chan struct{}) bool { + select { + case <-timer.C: + return false + case <-ch: + return true } - return err +} + +func waitForChans(timeout time.Duration, chans ...<-chan struct{}) bool { + timer := time.NewTimer(timeout) + defer timer.Stop() + + for _, ch := range chans { + if !waitForChan(timer, ch) { + return false + } + } + return true } func TestNew(t *testing.T) { type testCase struct { - config Config + modify func(*Config) err string validate func(t *testing.T, ac *AutoConfig) } cases := map[string]testCase{ "no-direct-rpc": { - config: Config{ - Loader: func(source config.Source) (cfg *config.RuntimeConfig, warnings []string, err error) { - return nil, nil, nil - }, + modify: func(c *Config) { + c.DirectRPC = nil }, err: "must provide a direct RPC delegate", }, - "no-config-loader": { + modify: func(c *Config) { + c.Loader = nil + }, err: "must provide a config loader", }, - "ok": { - config: Config{ - DirectRPC: &mockDirectRPC{}, - Loader: func(source config.Source) (cfg *config.RuntimeConfig, warnings []string, err error) { - return nil, nil, nil - }, + "no-cache": { + modify: func(c *Config) { + c.Cache = nil }, + err: "must provide a cache", + }, + "no-tls-configurator": { + modify: func(c *Config) { + c.TLSConfigurator = nil + }, + err: "must provide a TLS configurator", + }, + "no-tokens": { + modify: func(c *Config) { + c.Tokens = nil + }, + err: "must provide a token store", + }, + "ok": { validate: func(t *testing.T, ac *AutoConfig) { t.Helper() require.NotNil(t, ac.logger) + require.NotNil(t, ac.acConfig.Waiter) + require.Equal(t, time.Minute, ac.acConfig.FallbackRetry) + require.Equal(t, 10*time.Second, ac.acConfig.FallbackLeeway) }, }, } for name, tcase := range cases { t.Run(name, func(t *testing.T) { - ac, err := New(tcase.config) + cfg := Config{ + Loader: func(source config.Source) (cfg *config.RuntimeConfig, warnings []string, err error) { + return nil, nil, nil + }, + DirectRPC: newMockDirectRPC(t), + Tokens: newMockTokenStore(t), + Cache: newMockCache(t), + TLSConfigurator: newMockTLSConfigurator(t), + ServerProvider: newMockServerProvider(t), + } + + if tcase.modify != nil { + tcase.modify(&cfg) + } + + ac, err := New(cfg) if tcase.err != "" { testutil.RequireErrorContains(t, err, tcase.err) } else { @@ -157,32 +188,34 @@ func TestReadConfig(t *testing.T) { require.Same(t, ac.config, cfg) } -func setupRuntimeConfig(t *testing.T) *config.RuntimeConfig { +func setupRuntimeConfig(t *testing.T) *configLoader { t.Helper() dataDir := testutil.TempDir(t, "auto-config") - rtConfig := &config.RuntimeConfig{ - DataDir: dataDir, - Datacenter: "dc1", - NodeName: "autoconf", - BindAddr: &net.IPAddr{IP: net.ParseIP("127.0.0.1")}, + + opts := config.BuilderOpts{ + Config: config.Config{ + DataDir: &dataDir, + Datacenter: stringPointer("dc1"), + NodeName: stringPointer("autoconf"), + BindAddr: stringPointer("127.0.0.1"), + }, } - return rtConfig + return &configLoader{opts: opts} } func TestInitialConfiguration_disabled(t *testing.T) { - rtConfig := setupRuntimeConfig(t) + loader := setupRuntimeConfig(t) + loader.addConfigHCL(` + primary_datacenter = "primary" + auto_config = { + enabled = false + } + `) + + conf := newMockedConfig(t).Config + conf.Loader = loader.Load - directRPC := new(mockDirectRPC) - directRPC.Test(t) - conf := Config{ - DirectRPC: directRPC, - Loader: func(source config.Source) (*config.RuntimeConfig, []string, error) { - rtConfig.PrimaryDatacenter = "primary" - rtConfig.AutoConfig.Enabled = false - return rtConfig, nil, nil - }, - } ac, err := New(conf) require.NoError(t, err) require.NotNil(t, ac) @@ -191,38 +224,34 @@ func TestInitialConfiguration_disabled(t *testing.T) { require.NoError(t, err) require.NotNil(t, cfg) require.Equal(t, "primary", cfg.PrimaryDatacenter) - require.NoFileExists(t, filepath.Join(rtConfig.DataDir, autoConfigFileName)) - - // ensure no RPC was made - directRPC.AssertExpectations(t) + require.NoFileExists(t, filepath.Join(*loader.opts.Config.DataDir, autoConfigFileName)) } func TestInitialConfiguration_cancelled(t *testing.T) { - rtConfig := setupRuntimeConfig(t) + mcfg := newMockedConfig(t) + + loader := setupRuntimeConfig(t) + loader.addConfigHCL(` + primary_datacenter = "primary" + auto_config = { + enabled = true + intro_token = "blarg" + server_addresses = ["127.0.0.1:8300"] + } + verify_outgoing = true + `) + mcfg.Config.Loader = loader.Load - directRPC := new(mockDirectRPC) - directRPC.Test(t) expectedRequest := pbautoconf.AutoConfigRequest{ Datacenter: "dc1", Node: "autoconf", JWT: "blarg", } - directRPC.On("RPC", "dc1", "autoconf", &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 8300}, "AutoConfig.InitialConfiguration", &expectedRequest, mock.Anything).Return(fmt.Errorf("injected error")).Times(0) - conf := Config{ - DirectRPC: directRPC, - Loader: func(source config.Source) (*config.RuntimeConfig, []string, error) { - rtConfig.PrimaryDatacenter = "primary" - rtConfig.AutoConfig = config.AutoConfig{ - Enabled: true, - IntroToken: "blarg", - ServerAddresses: []string{"127.0.0.1:8300"}, - } - rtConfig.VerifyOutgoing = true - return rtConfig, nil, nil - }, - } - ac, err := New(conf) + mcfg.directRPC.On("RPC", "dc1", "autoconf", &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 8300}, "AutoConfig.InitialConfiguration", &expectedRequest, mock.Anything).Return(fmt.Errorf("injected error")).Times(0) + mcfg.serverProvider.On("FindLANServer").Return(nil).Times(0) + + ac, err := New(mcfg.Config) require.NoError(t, err) require.NotNil(t, ac) @@ -232,110 +261,55 @@ func TestInitialConfiguration_cancelled(t *testing.T) { cfg, err := ac.InitialConfiguration(ctx) testutil.RequireErrorContains(t, err, context.DeadlineExceeded.Error()) require.Nil(t, cfg) - - // ensure no RPC was made - directRPC.AssertExpectations(t) } func TestInitialConfiguration_restored(t *testing.T) { - rtConfig := setupRuntimeConfig(t) + mcfg := newMockedConfig(t) + + loader := setupRuntimeConfig(t) + loader.addConfigHCL(` + auto_config = { + enabled = true + intro_token ="blarg" + server_addresses = ["127.0.0.1:8300"] + } + verify_outgoing = true + `) + + mcfg.Config.Loader = loader.Load + + indexedRoots, cert, extraCACerts := mcfg.setupInitialTLS(t, "autoconf", "dc1", "secret") // persist an auto config response to the data dir where it is expected - persistedFile := filepath.Join(rtConfig.DataDir, autoConfigFileName) + persistedFile := filepath.Join(*loader.opts.Config.DataDir, autoConfigFileName) response := &pbautoconf.AutoConfigResponse{ Config: &pbconfig.Config{ PrimaryDatacenter: "primary", TLS: &pbconfig.TLS{ VerifyServerHostname: true, }, - }, - CARoots: &pbconnect.CARoots{ - ActiveRootID: "active", - TrustDomain: "trust", - Roots: []*pbconnect.CARoot{ - { - ID: "active", - Name: "foo", - SerialNumber: 42, - SigningKeyID: "blarg", - NotBefore: &types.Timestamp{Seconds: 5000, Nanos: 100}, - NotAfter: &types.Timestamp{Seconds: 10000, Nanos: 9009}, - RootCert: "not an actual cert", - Active: true, + ACL: &pbconfig.ACL{ + Tokens: &pbconfig.ACLTokens{ + Agent: "secret", }, }, }, - Certificate: &pbconnect.IssuedCert{ - SerialNumber: "1234", - CertPEM: "not a cert", - PrivateKeyPEM: "private", - Agent: "foo", - AgentURI: "spiffe://blarg/agent/client/dc/foo/id/foo", - ValidAfter: &types.Timestamp{Seconds: 6000}, - ValidBefore: &types.Timestamp{Seconds: 7000}, - }, - ExtraCACertificates: []string{"blarg"}, + CARoots: mustTranslateCARootsToProtobuf(t, indexedRoots), + Certificate: mustTranslateIssuedCertToProtobuf(t, cert), + ExtraCACertificates: extraCACerts, } data, err := pbMarshaler.MarshalToString(response) require.NoError(t, err) require.NoError(t, ioutil.WriteFile(persistedFile, []byte(data), 0600)) - directRPC := new(mockDirectRPC) - directRPC.Test(t) + // recording the initial configuration even when restoring is going to update + // the agent token in the token store + mcfg.tokens.On("UpdateAgentToken", "secret", token.TokenSourceConfig).Return(true).Once() - // setup the mock certificate monitor to ensure that the initial state gets - // updated appropriately during config restoration. - certMon := new(mockCertMonitor) - certMon.Test(t) - certMon.On("Update", &structs.SignedResponse{ - IssuedCert: structs.IssuedCert{ - SerialNumber: "1234", - CertPEM: "not a cert", - PrivateKeyPEM: "private", - Agent: "foo", - AgentURI: "spiffe://blarg/agent/client/dc/foo/id/foo", - ValidAfter: time.Unix(6000, 0), - ValidBefore: time.Unix(7000, 0), - }, - ConnectCARoots: structs.IndexedCARoots{ - ActiveRootID: "active", - TrustDomain: "trust", - Roots: []*structs.CARoot{ - { - ID: "active", - Name: "foo", - SerialNumber: 42, - SigningKeyID: "blarg", - NotBefore: time.Unix(5000, 100), - NotAfter: time.Unix(10000, 9009), - RootCert: "not an actual cert", - Active: true, - // the decoding process doesn't leave this nil - IntermediateCerts: []string{}, - }, - }, - }, - ManualCARoots: []string{"blarg"}, - VerifyServerHostname: true, - }).Return(nil).Once() + // prepopulation is going to grab the token to populate the correct cache key + mcfg.tokens.On("AgentToken").Return("secret").Times(0) - conf := Config{ - DirectRPC: directRPC, - Loader: func(source config.Source) (*config.RuntimeConfig, []string, error) { - if err := setPrimaryDatacenterFromSource(rtConfig, source); err != nil { - return nil, nil, err - } - rtConfig.AutoConfig = config.AutoConfig{ - Enabled: true, - IntroToken: "blarg", - ServerAddresses: []string{"127.0.0.1:8300"}, - } - rtConfig.VerifyOutgoing = true - return rtConfig, nil, nil - }, - CertMonitor: certMon, - } - ac, err := New(conf) + ac, err := New(mcfg.Config) require.NoError(t, err) require.NotNil(t, ac) @@ -343,64 +317,51 @@ func TestInitialConfiguration_restored(t *testing.T) { require.NoError(t, err, data) require.NotNil(t, cfg) require.Equal(t, "primary", cfg.PrimaryDatacenter) - - // ensure no RPC was made - directRPC.AssertExpectations(t) - certMon.AssertExpectations(t) -} - -func setPrimaryDatacenterFromSource(rtConfig *config.RuntimeConfig, source config.Source) error { - if source != nil { - cfg, _, err := source.Parse() - if err != nil { - return err - } - rtConfig.PrimaryDatacenter = *cfg.PrimaryDatacenter - } - return nil } func TestInitialConfiguration_success(t *testing.T) { - rtConfig := setupRuntimeConfig(t) + mcfg := newMockedConfig(t) + loader := setupRuntimeConfig(t) + loader.addConfigHCL(` + auto_config = { + enabled = true + intro_token ="blarg" + server_addresses = ["127.0.0.1:8300"] + } + verify_outgoing = true + `) + mcfg.Config.Loader = loader.Load - directRPC := new(mockDirectRPC) - directRPC.Test(t) + indexedRoots, cert, extraCerts := mcfg.setupInitialTLS(t, "autoconf", "dc1", "secret") - populateResponse := func(val interface{}) { - resp, ok := val.(*pbautoconf.AutoConfigResponse) + // this gets called when InitialConfiguration is invoked to record the token from the + // auto-config response + mcfg.tokens.On("UpdateAgentToken", "secret", token.TokenSourceConfig).Return(true).Once() + + // prepopulation is going to grab the token to populate the correct cache key + mcfg.tokens.On("AgentToken").Return("secret").Times(0) + + // no server provider + mcfg.serverProvider.On("FindLANServer").Return(nil).Times(0) + + populateResponse := func(args mock.Arguments) { + resp, ok := args.Get(5).(*pbautoconf.AutoConfigResponse) require.True(t, ok) resp.Config = &pbconfig.Config{ PrimaryDatacenter: "primary", TLS: &pbconfig.TLS{ VerifyServerHostname: true, }, - } - - resp.CARoots = &pbconnect.CARoots{ - ActiveRootID: "active", - TrustDomain: "trust", - Roots: []*pbconnect.CARoot{ - { - ID: "active", - Name: "foo", - SerialNumber: 42, - SigningKeyID: "blarg", - NotBefore: &types.Timestamp{Seconds: 5000, Nanos: 100}, - NotAfter: &types.Timestamp{Seconds: 10000, Nanos: 9009}, - RootCert: "not an actual cert", - Active: true, + ACL: &pbconfig.ACL{ + Tokens: &pbconfig.ACLTokens{ + Agent: "secret", }, }, } - resp.Certificate = &pbconnect.IssuedCert{ - SerialNumber: "1234", - CertPEM: "not a cert", - Agent: "foo", - AgentURI: "spiffe://blarg/agent/client/dc/foo/id/foo", - ValidAfter: &types.Timestamp{Seconds: 6000}, - ValidBefore: &types.Timestamp{Seconds: 7000}, - } - resp.ExtraCACertificates = []string{"blarg"} + + resp.CARoots = mustTranslateCARootsToProtobuf(t, indexedRoots) + resp.Certificate = mustTranslateIssuedCertToProtobuf(t, cert) + resp.ExtraCACertificates = extraCerts } expectedRequest := pbautoconf.AutoConfigRequest{ @@ -409,66 +370,16 @@ func TestInitialConfiguration_success(t *testing.T) { JWT: "blarg", } - directRPC.On( + mcfg.directRPC.On( "RPC", "dc1", "autoconf", &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 8300}, "AutoConfig.InitialConfiguration", &expectedRequest, - &pbautoconf.AutoConfigResponse{}).Return(populateResponse) + &pbautoconf.AutoConfigResponse{}).Return(nil).Run(populateResponse) - // setup the mock certificate monitor to ensure that the initial state gets - // updated appropriately during config restoration. - certMon := new(mockCertMonitor) - certMon.Test(t) - certMon.On("Update", &structs.SignedResponse{ - IssuedCert: structs.IssuedCert{ - SerialNumber: "1234", - CertPEM: "not a cert", - PrivateKeyPEM: "", // the mock - Agent: "foo", - AgentURI: "spiffe://blarg/agent/client/dc/foo/id/foo", - ValidAfter: time.Unix(6000, 0), - ValidBefore: time.Unix(7000, 0), - }, - ConnectCARoots: structs.IndexedCARoots{ - ActiveRootID: "active", - TrustDomain: "trust", - Roots: []*structs.CARoot{ - { - ID: "active", - Name: "foo", - SerialNumber: 42, - SigningKeyID: "blarg", - NotBefore: time.Unix(5000, 100), - NotAfter: time.Unix(10000, 9009), - RootCert: "not an actual cert", - Active: true, - }, - }, - }, - ManualCARoots: []string{"blarg"}, - VerifyServerHostname: true, - }).Return(nil).Once() - - conf := Config{ - DirectRPC: directRPC, - Loader: func(source config.Source) (*config.RuntimeConfig, []string, error) { - if err := setPrimaryDatacenterFromSource(rtConfig, source); err != nil { - return nil, nil, err - } - rtConfig.AutoConfig = config.AutoConfig{ - Enabled: true, - IntroToken: "blarg", - ServerAddresses: []string{"127.0.0.1:8300"}, - } - rtConfig.VerifyOutgoing = true - return rtConfig, nil, nil - }, - CertMonitor: certMon, - } - ac, err := New(conf) + ac, err := New(mcfg.Config) require.NoError(t, err) require.NotNil(t, ac) @@ -478,26 +389,61 @@ func TestInitialConfiguration_success(t *testing.T) { require.Equal(t, "primary", cfg.PrimaryDatacenter) // the file was written to. - persistedFile := filepath.Join(rtConfig.DataDir, autoConfigFileName) + persistedFile := filepath.Join(*loader.opts.Config.DataDir, autoConfigFileName) require.FileExists(t, persistedFile) - - // ensure no RPC was made - directRPC.AssertExpectations(t) - certMon.AssertExpectations(t) } func TestInitialConfiguration_retries(t *testing.T) { - rtConfig := setupRuntimeConfig(t) + mcfg := newMockedConfig(t) + loader := setupRuntimeConfig(t) + loader.addConfigHCL(` + auto_config = { + enabled = true + intro_token ="blarg" + server_addresses = [ + "198.18.0.1:8300", + "198.18.0.2:8398", + "198.18.0.3:8399", + "127.0.0.1:1234" + ] + } + verify_outgoing = true + `) + mcfg.Config.Loader = loader.Load - directRPC := new(mockDirectRPC) - directRPC.Test(t) + // reduce the retry wait times to make this test run faster + mcfg.Config.Waiter = lib.NewRetryWaiter(2, 0, 1*time.Millisecond, nil) - populateResponse := func(val interface{}) { - resp, ok := val.(*pbautoconf.AutoConfigResponse) + indexedRoots, cert, extraCerts := mcfg.setupInitialTLS(t, "autoconf", "dc1", "secret") + + // this gets called when InitialConfiguration is invoked to record the token from the + // auto-config response + mcfg.tokens.On("UpdateAgentToken", "secret", token.TokenSourceConfig).Return(true).Once() + + // prepopulation is going to grab the token to populate the correct cache key + mcfg.tokens.On("AgentToken").Return("secret").Times(0) + + // no server provider + mcfg.serverProvider.On("FindLANServer").Return(nil).Times(0) + + populateResponse := func(args mock.Arguments) { + resp, ok := args.Get(5).(*pbautoconf.AutoConfigResponse) require.True(t, ok) resp.Config = &pbconfig.Config{ PrimaryDatacenter: "primary", + TLS: &pbconfig.TLS{ + VerifyServerHostname: true, + }, + ACL: &pbconfig.ACL{ + Tokens: &pbconfig.ACLTokens{ + Agent: "secret", + }, + }, } + + resp.CARoots = mustTranslateCARootsToProtobuf(t, indexedRoots) + resp.Certificate = mustTranslateIssuedCertToProtobuf(t, cert) + resp.ExtraCACertificates = extraCerts } expectedRequest := pbautoconf.AutoConfigRequest{ @@ -509,7 +455,7 @@ func TestInitialConfiguration_retries(t *testing.T) { // basically the 198.18.0.* addresses should fail indefinitely. the first time through the // outer loop we inject a failure for the DNS resolution of localhost to 127.0.0.1. Then // the second time through the outer loop we allow the localhost one to work. - directRPC.On( + mcfg.directRPC.On( "RPC", "dc1", "autoconf", @@ -517,7 +463,7 @@ func TestInitialConfiguration_retries(t *testing.T) { "AutoConfig.InitialConfiguration", &expectedRequest, &pbautoconf.AutoConfigResponse{}).Return(fmt.Errorf("injected failure")).Times(0) - directRPC.On( + mcfg.directRPC.On( "RPC", "dc1", "autoconf", @@ -525,7 +471,7 @@ func TestInitialConfiguration_retries(t *testing.T) { "AutoConfig.InitialConfiguration", &expectedRequest, &pbautoconf.AutoConfigResponse{}).Return(fmt.Errorf("injected failure")).Times(0) - directRPC.On( + mcfg.directRPC.On( "RPC", "dc1", "autoconf", @@ -533,7 +479,7 @@ func TestInitialConfiguration_retries(t *testing.T) { "AutoConfig.InitialConfiguration", &expectedRequest, &pbautoconf.AutoConfigResponse{}).Return(fmt.Errorf("injected failure")).Times(0) - directRPC.On( + mcfg.directRPC.On( "RPC", "dc1", "autoconf", @@ -541,37 +487,16 @@ func TestInitialConfiguration_retries(t *testing.T) { "AutoConfig.InitialConfiguration", &expectedRequest, &pbautoconf.AutoConfigResponse{}).Return(fmt.Errorf("injected failure")).Once() - directRPC.On( + mcfg.directRPC.On( "RPC", "dc1", "autoconf", &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 1234}, "AutoConfig.InitialConfiguration", &expectedRequest, - &pbautoconf.AutoConfigResponse{}).Return(populateResponse) + &pbautoconf.AutoConfigResponse{}).Return(nil).Run(populateResponse).Once() - conf := Config{ - DirectRPC: directRPC, - Loader: func(source config.Source) (*config.RuntimeConfig, []string, error) { - if err := setPrimaryDatacenterFromSource(rtConfig, source); err != nil { - return nil, nil, err - } - rtConfig.AutoConfig = config.AutoConfig{ - Enabled: true, - IntroToken: "blarg", - ServerAddresses: []string{ - "198.18.0.1:8300", - "198.18.0.2:8398", - "198.18.0.3:8399", - "127.0.0.1:1234", - }, - } - rtConfig.VerifyOutgoing = true - return rtConfig, nil, nil - }, - Waiter: lib.NewRetryWaiter(2, 0, 1*time.Millisecond, nil), - } - ac, err := New(conf) + ac, err := New(mcfg.Config) require.NoError(t, err) require.NotNil(t, ac) @@ -581,102 +506,548 @@ func TestInitialConfiguration_retries(t *testing.T) { require.Equal(t, "primary", cfg.PrimaryDatacenter) // the file was written to. - persistedFile := filepath.Join(rtConfig.DataDir, autoConfigFileName) + persistedFile := filepath.Join(*loader.opts.Config.DataDir, autoConfigFileName) require.FileExists(t, persistedFile) - - // ensure no RPC was made - directRPC.AssertExpectations(t) } -func TestAutoConfig_StartStop(t *testing.T) { - // currently the only thing running for autoconf is just the cert monitor - // so this test only needs to ensure that the cert monitor is started and - // stopped and not that anything with regards to running the cert monitor - // actually work. Those are tested in the cert-monitor package. +func TestGoRoutineManagement(t *testing.T) { + mcfg := newMockedConfig(t) + loader := setupRuntimeConfig(t) + loader.addConfigHCL(` + auto_config = { + enabled = true + intro_token ="blarg" + server_addresses = ["127.0.0.1:8300"] + } + verify_outgoing = true + `) + mcfg.Config.Loader = loader.Load - rtConfig := setupRuntimeConfig(t) + // prepopulation is going to grab the token to populate the correct cache key + mcfg.tokens.On("AgentToken").Return("secret").Times(0) - directRPC := &mockDirectRPC{} - directRPC.Test(t) - certMon := &mockCertMonitor{} - certMon.Test(t) + ac, err := New(mcfg.Config) + require.NoError(t, err) - certMon.On("Start").Return((<-chan struct{})(nil), nil).Once() - certMon.On("Stop").Return(true).Once() + // priming the config so some other requests will work properly that need to + // read from the configuration. We are going to avoid doing InitialConfiguration + // for this test as we only are really concerned with the go routine management + _, err = ac.ReadConfig() + require.NoError(t, err) - conf := Config{ - DirectRPC: directRPC, - Loader: func(source config.Source) (*config.RuntimeConfig, []string, error) { - rtConfig.AutoConfig = config.AutoConfig{ - Enabled: true, - IntroToken: "blarg", - ServerAddresses: []string{ - "198.18.0.1", - "198.18.0.2:8398", - "198.18.0.3:8399", - "127.0.0.1:1234", - }, - } - rtConfig.VerifyOutgoing = true - return rtConfig, nil, nil - }, - CertMonitor: certMon, + var rootsCtx context.Context + var leafCtx context.Context + var ctxLock sync.Mutex + + rootsReq := ac.caRootsRequest() + mcfg.cache.On("Notify", + mock.Anything, + cachetype.ConnectCARootName, + &rootsReq, + rootsWatchID, + mock.Anything, + ).Return(nil).Times(2).Run(func(args mock.Arguments) { + ctxLock.Lock() + rootsCtx = args.Get(0).(context.Context) + ctxLock.Unlock() + }) + + leafReq := ac.leafCertRequest() + mcfg.cache.On("Notify", + mock.Anything, + cachetype.ConnectCALeafName, + &leafReq, + leafWatchID, + mock.Anything, + ).Return(nil).Times(2).Run(func(args mock.Arguments) { + ctxLock.Lock() + leafCtx = args.Get(0).(context.Context) + ctxLock.Unlock() + }) + + // we will start/stop things twice + mcfg.tokens.On("Notify", token.TokenKindAgent).Return(token.Notifier{}).Times(2) + mcfg.tokens.On("StopNotify", token.Notifier{}).Times(2) + + mcfg.tlsCfg.On("AutoEncryptCertNotAfter").Return(time.Now().Add(10 * time.Minute)).Times(0) + + // ensure that auto-config isn't running + require.False(t, ac.IsRunning()) + + // ensure that nothing bad happens and that it reports as stopped + require.False(t, ac.Stop()) + + // ensure that the Done chan also reports that things are not running + // in other words the chan is immediately selectable + requireChanReady(t, ac.Done()) + + // start auto-config + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + require.NoError(t, ac.Start(ctx)) + + waitForContexts := func() bool { + ctxLock.Lock() + defer ctxLock.Unlock() + return !(rootsCtx == nil || leafCtx == nil) } - ac, err := New(conf) - require.NoError(t, err) - require.NotNil(t, ac) - cfg, err := ac.ReadConfig() - require.NoError(t, err) - ac.config = cfg - require.NoError(t, ac.Start(context.Background())) + // wait for the cache notifications to get started + require.Eventually(t, waitForContexts, 100*time.Millisecond, 10*time.Millisecond) + + // hold onto the Done chan to test for the go routine exiting + done := ac.Done() + + // ensure we report as running + require.True(t, ac.IsRunning()) + + // ensure the done chan is not selectable yet + requireChanNotReady(t, done) + + // ensure we error if we attempt to start again + err = ac.Start(ctx) + testutil.RequireErrorContains(t, err, "AutoConfig is already running") + + // now stop things - it should return true indicating that it was running + // when we attempted to stop it. require.True(t, ac.Stop()) - certMon.AssertExpectations(t) - directRPC.AssertExpectations(t) + // ensure that the go routine shuts down - it will close the done chan. Also it should cancel + // the cache watches by cancelling the context it passed into the Notify call. + require.True(t, waitForChans(100*time.Millisecond, done, leafCtx.Done(), rootsCtx.Done()), "AutoConfig didn't shut down") + require.False(t, ac.IsRunning()) + + // restart it + require.NoError(t, ac.Start(ctx)) + + // get the new Done chan + done = ac.Done() + + // ensure that context cancellation causes us to stop as well + cancel() + require.True(t, waitForChans(100*time.Millisecond, done)) } -func TestFallBackTLS(t *testing.T) { - rtConfig := setupRuntimeConfig(t) +type testAutoConfig struct { + mcfg *mockedConfig + ac *AutoConfig + tokenUpdates chan struct{} + originalToken string - directRPC := new(mockDirectRPC) - directRPC.Test(t) + initialRoots *structs.IndexedCARoots + initialCert *structs.IssuedCert + extraCerts []string +} - populateResponse := func(val interface{}) { - resp, ok := val.(*pbautoconf.AutoConfigResponse) +func startedAutoConfig(t *testing.T, autoEncrypt bool) testAutoConfig { + t.Helper() + mcfg := newMockedConfig(t) + loader := setupRuntimeConfig(t) + if !autoEncrypt { + loader.addConfigHCL(` + auto_config = { + enabled = true + intro_token ="blarg" + server_addresses = ["127.0.0.1:8300"] + } + verify_outgoing = true + `) + } else { + loader.addConfigHCL(` + auto_encrypt { + tls = true + } + verify_outgoing = true + `) + } + mcfg.Config.Loader = loader.Load + mcfg.Config.FallbackLeeway = time.Nanosecond + + originalToken := "a5deaa25-11ca-48bf-a979-4c3a7aa4b9a9" + + if !autoEncrypt { + // this gets called when InitialConfiguration is invoked to record the token from the + // auto-config response + mcfg.tokens.On("UpdateAgentToken", originalToken, token.TokenSourceConfig).Return(true).Once() + } + + // we expect this to be retrieved twice: first during cache prepopulation + // and then again when setting up the cache watch for the leaf cert. + // However one of those expectations is setup in the expectInitialTLS + // method so we only need one more here + mcfg.tokens.On("AgentToken").Return(originalToken).Once() + + if autoEncrypt { + // when using AutoEncrypt we also have to grab the token once more + // when setting up the initial RPC as the ACL token is what is used + // to authorize the request. + mcfg.tokens.On("AgentToken").Return(originalToken).Once() + } + + // this is called once during Start to initialze the token watches + tokenUpdateCh := make(chan struct{}) + tokenNotifier := token.Notifier{ + Ch: tokenUpdateCh, + } + mcfg.tokens.On("Notify", token.TokenKindAgent).Once().Return(tokenNotifier) + mcfg.tokens.On("StopNotify", tokenNotifier).Once() + + // expect the roots watch on the cache + mcfg.cache.On("Notify", + mock.Anything, + cachetype.ConnectCARootName, + &structs.DCSpecificRequest{Datacenter: "dc1"}, + rootsWatchID, + mock.Anything, + ).Return(nil).Once() + + mcfg.cache.On("Notify", + mock.Anything, + cachetype.ConnectCALeafName, + &cachetype.ConnectCALeafRequest{ + Datacenter: "dc1", + Agent: "autoconf", + Token: originalToken, + DNSSAN: defaultDNSSANs, + IPSAN: defaultIPSANs, + }, + leafWatchID, + mock.Anything, + ).Return(nil).Once() + + // override the server provider - most of the other tests set it up so that this + // always returns no server (simulating a state where we haven't joined gossip). + // this seems like a good place to ensure this other way of finding server information + // works + mcfg.serverProvider.On("FindLANServer").Once().Return(&metadata.Server{ + Addr: &net.TCPAddr{IP: net.IPv4(198, 18, 0, 1), Port: 8300}, + }) + + indexedRoots, cert, extraCerts := mcfg.setupInitialTLS(t, "autoconf", "dc1", originalToken) + + mcfg.tlsCfg.On("AutoEncryptCertNotAfter").Return(cert.ValidBefore).Once() + + populateResponse := func(args mock.Arguments) { + method := args.String(3) + + switch method { + case "AutoConfig.InitialConfiguration": + resp, ok := args.Get(5).(*pbautoconf.AutoConfigResponse) + require.True(t, ok) + resp.Config = &pbconfig.Config{ + PrimaryDatacenter: "primary", + TLS: &pbconfig.TLS{ + VerifyServerHostname: true, + }, + ACL: &pbconfig.ACL{ + Tokens: &pbconfig.ACLTokens{ + Agent: originalToken, + }, + }, + } + + resp.CARoots = mustTranslateCARootsToProtobuf(t, indexedRoots) + resp.Certificate = mustTranslateIssuedCertToProtobuf(t, cert) + resp.ExtraCACertificates = extraCerts + case "AutoEncrypt.Sign": + resp, ok := args.Get(5).(*structs.SignedResponse) + require.True(t, ok) + *resp = structs.SignedResponse{ + VerifyServerHostname: true, + ConnectCARoots: *indexedRoots, + IssuedCert: *cert, + ManualCARoots: extraCerts, + } + } + } + + if !autoEncrypt { + expectedRequest := pbautoconf.AutoConfigRequest{ + Datacenter: "dc1", + Node: "autoconf", + JWT: "blarg", + } + + mcfg.directRPC.On( + "RPC", + "dc1", + "autoconf", + &net.TCPAddr{IP: net.IPv4(198, 18, 0, 1), Port: 8300}, + "AutoConfig.InitialConfiguration", + &expectedRequest, + &pbautoconf.AutoConfigResponse{}).Return(nil).Run(populateResponse).Once() + } else { + expectedRequest := structs.CASignRequest{ + WriteRequest: structs.WriteRequest{Token: originalToken}, + Datacenter: "dc1", + // TODO (autoconf) Maybe in the future we should populate a CSR + // and do some manual parsing/verification of the contents. The + // bits not having to do with the signing key such as the requested + // SANs and CN. For now though the mockDirectRPC type will empty + // the CSR so we have to pass in an empty string to the expectation. + CSR: "", + } + + mcfg.directRPC.On( + "RPC", + "dc1", + "autoconf", // reusing the same name to prevent needing more configurability + &net.TCPAddr{IP: net.IPv4(198, 18, 0, 1), Port: 8300}, + "AutoEncrypt.Sign", + &expectedRequest, + &structs.SignedResponse{}).Return(nil).Run(populateResponse) + } + + ac, err := New(mcfg.Config) + require.NoError(t, err) + require.NotNil(t, ac) + + cfg, err := ac.InitialConfiguration(context.Background()) + require.NoError(t, err) + require.NotNil(t, cfg) + if !autoEncrypt { + // auto-encrypt doesn't modify the config but rather sets the value + // in the TLS configurator + require.True(t, cfg.VerifyServerHostname) + } + + ctx, cancel := context.WithCancel(context.Background()) + require.NoError(t, ac.Start(ctx)) + t.Cleanup(func() { + done := ac.Done() + cancel() + timer := time.NewTimer(1 * time.Second) + defer timer.Stop() + select { + case <-done: + // do nothing + case <-timer.C: + t.Fatalf("AutoConfig wasn't stopped within 1 second after test completion") + } + }) + + return testAutoConfig{ + mcfg: mcfg, + ac: ac, + tokenUpdates: tokenUpdateCh, + originalToken: originalToken, + initialRoots: indexedRoots, + initialCert: cert, + extraCerts: extraCerts, + } +} + +// this test ensures that the cache watches are restarted with +// the updated token after receiving a token update +func TestTokenUpdate(t *testing.T) { + testAC := startedAutoConfig(t, false) + + newToken := "1a4cc445-86ed-46b4-a355-bbf5a11dddb0" + + rootsCtx, rootsCancel := context.WithCancel(context.Background()) + testAC.mcfg.cache.On("Notify", + mock.Anything, + cachetype.ConnectCARootName, + &structs.DCSpecificRequest{Datacenter: testAC.ac.config.Datacenter}, + rootsWatchID, + mock.Anything, + ).Return(nil).Once().Run(func(args mock.Arguments) { + rootsCancel() + }) + + leafCtx, leafCancel := context.WithCancel(context.Background()) + testAC.mcfg.cache.On("Notify", + mock.Anything, + cachetype.ConnectCALeafName, + &cachetype.ConnectCALeafRequest{ + Datacenter: "dc1", + Agent: "autoconf", + Token: newToken, + DNSSAN: defaultDNSSANs, + IPSAN: defaultIPSANs, + }, + leafWatchID, + mock.Anything, + ).Return(nil).Once().Run(func(args mock.Arguments) { + leafCancel() + }) + + // this will be retrieved once when resetting the leaf cert watch + testAC.mcfg.tokens.On("AgentToken").Return(newToken).Once() + + // send the notification about the token update + testAC.tokenUpdates <- struct{}{} + + // wait for the leaf cert watches + require.True(t, waitForChans(100*time.Millisecond, leafCtx.Done(), rootsCtx.Done()), "New cache watches were not started within 100ms") +} + +func TestRootsUpdate(t *testing.T) { + testAC := startedAutoConfig(t, false) + + secondCA := connect.TestCA(t, testAC.initialRoots.Roots[0]) + secondRoots := structs.IndexedCARoots{ + ActiveRootID: secondCA.ID, + TrustDomain: connect.TestClusterID, + Roots: []*structs.CARoot{ + secondCA, + testAC.initialRoots.Roots[0], + }, + QueryMeta: structs.QueryMeta{ + Index: 99, + }, + } + + updatedCtx, cancel := context.WithCancel(context.Background()) + testAC.mcfg.tlsCfg.On("UpdateAutoTLS", + testAC.extraCerts, + []string{secondCA.RootCert, testAC.initialRoots.Roots[0].RootCert}, + testAC.initialCert.CertPEM, + "redacted", + true, + ).Return(nil).Once().Run(func(args mock.Arguments) { + cancel() + }) + + // when a cache event comes in we end up recalculating the fallback timer which requires this call + testAC.mcfg.tlsCfg.On("AutoEncryptCertNotAfter").Return(time.Now().Add(10 * time.Minute)).Once() + + req := structs.DCSpecificRequest{Datacenter: "dc1"} + require.True(t, testAC.mcfg.cache.sendNotification(context.Background(), req.CacheInfo().Key, cache.UpdateEvent{ + CorrelationID: rootsWatchID, + Result: &secondRoots, + Meta: cache.ResultMeta{ + Index: secondRoots.Index, + }, + })) + + require.True(t, waitForChans(100*time.Millisecond, updatedCtx.Done()), "TLS certificates were not updated within the alotted time") + + // persisting these to disk happens right after the chan we are waiting for will have fired above + // however there is no deterministic way to know once its been written outside of maybe a filesystem + // event notifier. That seems a little heavy handed just for this and especially to do in any sort + // of cross platform way. + retry.Run(t, func(r *retry.R) { + resp, err := testAC.ac.readPersistedAutoConfig() + require.NoError(r, err) + require.Equal(r, secondRoots.ActiveRootID, resp.CARoots.GetActiveRootID()) + }) +} + +func TestCertUpdate(t *testing.T) { + testAC := startedAutoConfig(t, false) + secondCert := newLeaf(t, "autoconf", "dc1", testAC.initialRoots.Roots[0], 99, 10*time.Minute) + + updatedCtx, cancel := context.WithCancel(context.Background()) + testAC.mcfg.tlsCfg.On("UpdateAutoTLS", + testAC.extraCerts, + []string{testAC.initialRoots.Roots[0].RootCert}, + secondCert.CertPEM, + "redacted", + true, + ).Return(nil).Once().Run(func(args mock.Arguments) { + cancel() + }) + + // when a cache event comes in we end up recalculating the fallback timer which requires this call + testAC.mcfg.tlsCfg.On("AutoEncryptCertNotAfter").Return(secondCert.ValidBefore).Once() + + req := cachetype.ConnectCALeafRequest{ + Datacenter: "dc1", + Agent: "autoconf", + Token: testAC.originalToken, + DNSSAN: defaultDNSSANs, + IPSAN: defaultIPSANs, + } + require.True(t, testAC.mcfg.cache.sendNotification(context.Background(), req.CacheInfo().Key, cache.UpdateEvent{ + CorrelationID: leafWatchID, + Result: secondCert, + Meta: cache.ResultMeta{ + Index: secondCert.ModifyIndex, + }, + })) + + require.True(t, waitForChans(100*time.Millisecond, updatedCtx.Done()), "TLS certificates were not updated within the alotted time") + + // persisting these to disk happens after all the things we would wait for in assertCertUpdated + // will have fired. There is no deterministic way to know once its been written so we wrap + // this in a retry. + retry.Run(t, func(r *retry.R) { + resp, err := testAC.ac.readPersistedAutoConfig() + require.NoError(r, err) + + // ensure the roots got persisted to disk + require.Equal(r, secondCert.CertPEM, resp.Certificate.GetCertPEM()) + }) +} + +func TestFallback(t *testing.T) { + testAC := startedAutoConfig(t, false) + + // at this point everything is operating normally and we are just + // waiting for events. We are going to send a new cert that is basically + // already expired and then allow the fallback routine to kick in. + secondCert := newLeaf(t, "autoconf", "dc1", testAC.initialRoots.Roots[0], 100, time.Nanosecond) + secondCA := connect.TestCA(t, testAC.initialRoots.Roots[0]) + secondRoots := structs.IndexedCARoots{ + ActiveRootID: secondCA.ID, + TrustDomain: connect.TestClusterID, + Roots: []*structs.CARoot{ + secondCA, + testAC.initialRoots.Roots[0], + }, + QueryMeta: structs.QueryMeta{ + Index: 101, + }, + } + thirdCert := newLeaf(t, "autoconf", "dc1", secondCA, 102, 10*time.Minute) + + // setup the expectation for when the certs got updated initially + updatedCtx, updateCancel := context.WithCancel(context.Background()) + testAC.mcfg.tlsCfg.On("UpdateAutoTLS", + testAC.extraCerts, + []string{testAC.initialRoots.Roots[0].RootCert}, + secondCert.CertPEM, + "redacted", + true, + ).Return(nil).Once().Run(func(args mock.Arguments) { + updateCancel() + }) + + // when a cache event comes in we end up recalculating the fallback timer which requires this call + testAC.mcfg.tlsCfg.On("AutoEncryptCertNotAfter").Return(secondCert.ValidBefore).Once() + testAC.mcfg.tlsCfg.On("AutoEncryptCertExpired").Return(true).Once() + + fallbackCtx, fallbackCancel := context.WithCancel(context.Background()) + + // also testing here that we can change server IPs for ongoing operations + testAC.mcfg.serverProvider.On("FindLANServer").Once().Return(&metadata.Server{ + Addr: &net.TCPAddr{IP: net.IPv4(198, 18, 23, 2), Port: 8300}, + }) + + // after sending the notification for the cert update another InitialConfiguration RPC + // will be made to pull down the latest configuration. So we need to set up the response + // for the second RPC + populateResponse := func(args mock.Arguments) { + resp, ok := args.Get(5).(*pbautoconf.AutoConfigResponse) require.True(t, ok) resp.Config = &pbconfig.Config{ PrimaryDatacenter: "primary", TLS: &pbconfig.TLS{ VerifyServerHostname: true, }, - } - - resp.CARoots = &pbconnect.CARoots{ - ActiveRootID: "active", - TrustDomain: "trust", - Roots: []*pbconnect.CARoot{ - { - ID: "active", - Name: "foo", - SerialNumber: 42, - SigningKeyID: "blarg", - NotBefore: &types.Timestamp{Seconds: 5000, Nanos: 100}, - NotAfter: &types.Timestamp{Seconds: 10000, Nanos: 9009}, - RootCert: "not an actual cert", - Active: true, + ACL: &pbconfig.ACL{ + Tokens: &pbconfig.ACLTokens{ + Agent: testAC.originalToken, }, }, } - resp.Certificate = &pbconnect.IssuedCert{ - SerialNumber: "1234", - CertPEM: "not a cert", - Agent: "foo", - AgentURI: "spiffe://blarg/agent/client/dc/foo/id/foo", - ValidAfter: &types.Timestamp{Seconds: 6000}, - ValidBefore: &types.Timestamp{Seconds: 7000}, - } - resp.ExtraCACertificates = []string{"blarg"} + + resp.CARoots = mustTranslateCARootsToProtobuf(t, &secondRoots) + resp.Certificate = mustTranslateIssuedCertToProtobuf(t, thirdCert) + resp.ExtraCACertificates = testAC.extraCerts + + fallbackCancel() } expectedRequest := pbautoconf.AutoConfigRequest{ @@ -685,76 +1056,118 @@ func TestFallBackTLS(t *testing.T) { JWT: "blarg", } - directRPC.On( + testAC.mcfg.directRPC.On( "RPC", "dc1", "autoconf", - &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 8300}, + &net.TCPAddr{IP: net.IPv4(198, 18, 23, 2), Port: 8300}, "AutoConfig.InitialConfiguration", &expectedRequest, - &pbautoconf.AutoConfigResponse{}).Return(populateResponse) + &pbautoconf.AutoConfigResponse{}).Return(nil).Run(populateResponse).Once() - // setup the mock certificate monitor we don't expect it to be used - // as the FallbackTLS method is mainly used by the certificate monitor - // if for some reason it fails to renew the TLS certificate in time. - certMon := new(mockCertMonitor) + // this gets called when InitialConfiguration is invoked to record the token from the + // auto-config response which is how the Fallback for auto-config works + testAC.mcfg.tokens.On("UpdateAgentToken", testAC.originalToken, token.TokenSourceConfig).Return(true).Once() - conf := Config{ - DirectRPC: directRPC, - Loader: func(source config.Source) (*config.RuntimeConfig, []string, error) { - rtConfig.AutoConfig = config.AutoConfig{ - Enabled: true, - IntroToken: "blarg", - ServerAddresses: []string{"127.0.0.1:8300"}, - } - rtConfig.VerifyOutgoing = true - return rtConfig, nil, nil - }, - CertMonitor: certMon, + testAC.mcfg.expectInitialTLS(t, "autoconf", "dc1", testAC.originalToken, secondCA, &secondRoots, thirdCert, testAC.extraCerts) + + // after the second RPC we now will use the new certs validity period in the next run loop iteration + testAC.mcfg.tlsCfg.On("AutoEncryptCertNotAfter").Return(time.Now().Add(10 * time.Minute)).Once() + + // now that all the mocks are set up we can trigger the whole thing by sending the second expired cert + // as a cache update event. + req := cachetype.ConnectCALeafRequest{ + Datacenter: "dc1", + Agent: "autoconf", + Token: testAC.originalToken, + DNSSAN: defaultDNSSANs, + IPSAN: defaultIPSANs, } - ac, err := New(conf) - require.NoError(t, err) - require.NotNil(t, ac) - ac.config, err = ac.ReadConfig() - require.NoError(t, err) + require.True(t, testAC.mcfg.cache.sendNotification(context.Background(), req.CacheInfo().Key, cache.UpdateEvent{ + CorrelationID: leafWatchID, + Result: secondCert, + Meta: cache.ResultMeta{ + Index: secondCert.ModifyIndex, + }, + })) - actual, err := ac.FallbackTLS(context.Background()) - require.NoError(t, err) - expected := &structs.SignedResponse{ - ConnectCARoots: structs.IndexedCARoots{ - ActiveRootID: "active", - TrustDomain: "trust", - Roots: []*structs.CARoot{ - { - ID: "active", - Name: "foo", - SerialNumber: 42, - SigningKeyID: "blarg", - NotBefore: time.Unix(5000, 100), - NotAfter: time.Unix(10000, 9009), - RootCert: "not an actual cert", - Active: true, + // wait for the TLS certificates to get updated + require.True(t, waitForChans(100*time.Millisecond, updatedCtx.Done()), "TLS certificates were not updated within the alotted time") + + // now wait for the fallback routine to be invoked + require.True(t, waitForChans(100*time.Millisecond, fallbackCtx.Done()), "fallback routines did not get invoked within the alotted time") + + // persisting these to disk happens after the RPC we waited on above will have fired + // There is no deterministic way to know once its been written so we wrap this in a retry. + retry.Run(t, func(r *retry.R) { + resp, err := testAC.ac.readPersistedAutoConfig() + require.NoError(r, err) + + // ensure the roots got persisted to disk + require.Equal(r, thirdCert.CertPEM, resp.Certificate.GetCertPEM()) + require.Equal(r, secondRoots.ActiveRootID, resp.CARoots.GetActiveRootID()) + }) +} + +func TestIntroToken(t *testing.T) { + tokenFile := testutil.TempFile(t, "intro-token") + t.Cleanup(func() { os.Remove(tokenFile.Name()) }) + + tokenFileEmpty := testutil.TempFile(t, "intro-token-empty") + t.Cleanup(func() { os.Remove(tokenFileEmpty.Name()) }) + + tokenFromFile := "8ae34d3a-8adf-446a-b236-69874597cb5b" + tokenFromConfig := "3ad9b572-ea42-4e47-9cd0-53a398a98abf" + require.NoError(t, ioutil.WriteFile(tokenFile.Name(), []byte(tokenFromFile), 0600)) + + type testCase struct { + config *config.RuntimeConfig + err string + token string + } + + cases := map[string]testCase{ + "config": { + config: &config.RuntimeConfig{ + AutoConfig: config.AutoConfig{ + IntroToken: tokenFromConfig, + IntroTokenFile: tokenFile.Name(), }, }, + token: tokenFromConfig, }, - IssuedCert: structs.IssuedCert{ - SerialNumber: "1234", - CertPEM: "not a cert", - Agent: "foo", - AgentURI: "spiffe://blarg/agent/client/dc/foo/id/foo", - ValidAfter: time.Unix(6000, 0), - ValidBefore: time.Unix(7000, 0), + "file": { + config: &config.RuntimeConfig{ + AutoConfig: config.AutoConfig{ + IntroTokenFile: tokenFile.Name(), + }, + }, + token: tokenFromFile, + }, + "file-empty": { + config: &config.RuntimeConfig{ + AutoConfig: config.AutoConfig{ + IntroTokenFile: tokenFileEmpty.Name(), + }, + }, + err: "intro_token_file did not contain any token", }, - ManualCARoots: []string{"blarg"}, - VerifyServerHostname: true, } - // have to just verify that the private key was put in here but we then - // must zero it out so that the remaining equality check will pass - require.NotEmpty(t, actual.IssuedCert.PrivateKeyPEM) - actual.IssuedCert.PrivateKeyPEM = "" - require.Equal(t, expected, actual) - // ensure no RPC was made - directRPC.AssertExpectations(t) - certMon.AssertExpectations(t) + for name, tcase := range cases { + t.Run(name, func(t *testing.T) { + ac := AutoConfig{ + config: tcase.config, + } + + token, err := ac.introToken() + if tcase.err != "" { + testutil.RequireErrorContains(t, err, tcase.err) + } else { + require.NoError(t, err) + require.Equal(t, tcase.token, token) + } + }) + } + } diff --git a/agent/auto-config/auto_encrypt.go b/agent/auto-config/auto_encrypt.go new file mode 100644 index 0000000000..2290bb332b --- /dev/null +++ b/agent/auto-config/auto_encrypt.go @@ -0,0 +1,111 @@ +package autoconf + +import ( + "context" + "fmt" + "net" + "strings" + + "github.com/hashicorp/consul/agent/structs" +) + +func (ac *AutoConfig) autoEncryptInitialCerts(ctx context.Context) (*structs.SignedResponse, error) { + // generate a CSR + csr, key, err := ac.generateCSR() + if err != nil { + return nil, err + } + + // this resets the failures so that we will perform immediate request + wait := ac.acConfig.Waiter.Success() + for { + select { + case <-wait: + if resp, err := ac.autoEncryptInitialCertsOnce(ctx, csr, key); err == nil && resp != nil { + return resp, nil + } else if err != nil { + ac.logger.Error(err.Error()) + } else { + ac.logger.Error("No error returned when fetching certificates from the servers but no response was either") + } + + wait = ac.acConfig.Waiter.Failed() + case <-ctx.Done(): + ac.logger.Info("interrupted during retrieval of auto-encrypt certificates", "err", ctx.Err()) + return nil, ctx.Err() + } + } +} + +func (ac *AutoConfig) autoEncryptInitialCertsOnce(ctx context.Context, csr, key string) (*structs.SignedResponse, error) { + request := structs.CASignRequest{ + WriteRequest: structs.WriteRequest{Token: ac.acConfig.Tokens.AgentToken()}, + Datacenter: ac.config.Datacenter, + CSR: csr, + } + var resp structs.SignedResponse + + servers, err := ac.autoEncryptHosts() + if err != nil { + return nil, err + } + + for _, s := range servers { + // try each IP to see if we can successfully make the request + for _, addr := range ac.resolveHost(s) { + if ctx.Err() != nil { + return nil, ctx.Err() + } + + ac.logger.Debug("making AutoEncrypt.Sign RPC", "addr", addr.String()) + err = ac.acConfig.DirectRPC.RPC(ac.config.Datacenter, ac.config.NodeName, &addr, "AutoEncrypt.Sign", &request, &resp) + if err != nil { + ac.logger.Error("AutoEncrypt.Sign RPC failed", "addr", addr.String(), "error", err) + continue + } + + resp.IssuedCert.PrivateKeyPEM = key + return &resp, nil + } + } + return nil, fmt.Errorf("No servers successfully responded to the auto-encrypt request") +} + +func (ac *AutoConfig) autoEncryptHosts() ([]string, error) { + // use servers known to gossip if there are any + if ac.acConfig.ServerProvider != nil { + if srv := ac.acConfig.ServerProvider.FindLANServer(); srv != nil { + return []string{srv.Addr.String()}, nil + } + } + + hosts, err := ac.discoverServers(ac.config.RetryJoinLAN) + if err != nil { + return nil, err + } + + var addrs []string + + // The addresses we use for auto-encrypt are the retry join and start join + // addresses. These are for joining serf and therefore we cannot rely on the + // ports for these. This loop strips any port that may have been specified and + // will let subsequent resolveAddr calls add on the default RPC port. + for _, addr := range append(ac.config.StartJoinAddrsLAN, hosts...) { + host, _, err := net.SplitHostPort(addr) + if err != nil { + if strings.Contains(err.Error(), "missing port in address") { + host = addr + } else { + ac.logger.Warn("error splitting host address into IP and port", "address", addr, "error", err) + continue + } + } + addrs = append(addrs, host) + } + + if len(addrs) == 0 { + return nil, fmt.Errorf("no auto-encrypt server addresses available for use") + } + + return addrs, nil +} diff --git a/agent/auto-config/auto_encrypt_test.go b/agent/auto-config/auto_encrypt_test.go new file mode 100644 index 0000000000..867db9441f --- /dev/null +++ b/agent/auto-config/auto_encrypt_test.go @@ -0,0 +1,562 @@ +package autoconf + +import ( + "context" + "crypto/x509" + "crypto/x509/pkix" + "encoding/asn1" + "fmt" + "net" + "net/url" + "testing" + "time" + + "github.com/hashicorp/consul/agent/cache" + cachetype "github.com/hashicorp/consul/agent/cache-types" + "github.com/hashicorp/consul/agent/config" + "github.com/hashicorp/consul/agent/connect" + "github.com/hashicorp/consul/agent/metadata" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/lib" + "github.com/hashicorp/consul/sdk/testutil" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +func TestAutoEncrypt_generateCSR(t *testing.T) { + type testCase struct { + conf *config.RuntimeConfig + + // to validate the csr + expectedSubject pkix.Name + expectedSigAlg x509.SignatureAlgorithm + expectedPubAlg x509.PublicKeyAlgorithm + expectedDNSNames []string + expectedIPs []net.IP + expectedURIs []*url.URL + } + + cases := map[string]testCase{ + "ip-sans": { + conf: &config.RuntimeConfig{ + Datacenter: "dc1", + NodeName: "test-node", + AutoEncryptTLS: true, + AutoEncryptIPSAN: []net.IP{net.IPv4(198, 18, 0, 1), net.IPv4(198, 18, 0, 2)}, + }, + expectedSubject: pkix.Name{ + CommonName: connect.AgentCN("test-node", unknownTrustDomain), + Names: []pkix.AttributeTypeAndValue{ + { + // 2,5,4,3 is the CommonName type ASN1 identifier + Type: asn1.ObjectIdentifier{2, 5, 4, 3}, + Value: "testnode.agnt.unknown.consul", + }, + }, + }, + expectedSigAlg: x509.ECDSAWithSHA256, + expectedPubAlg: x509.ECDSA, + expectedDNSNames: defaultDNSSANs, + expectedIPs: append(defaultIPSANs, + net.IP{198, 18, 0, 1}, + net.IP{198, 18, 0, 2}, + ), + expectedURIs: []*url.URL{ + { + Scheme: "spiffe", + Host: unknownTrustDomain, + Path: "/agent/client/dc/dc1/id/test-node", + }, + }, + }, + "dns-sans": { + conf: &config.RuntimeConfig{ + Datacenter: "dc1", + NodeName: "test-node", + AutoEncryptTLS: true, + AutoEncryptDNSSAN: []string{"foo.local", "bar.local"}, + }, + expectedSubject: pkix.Name{ + CommonName: connect.AgentCN("test-node", unknownTrustDomain), + Names: []pkix.AttributeTypeAndValue{ + { + // 2,5,4,3 is the CommonName type ASN1 identifier + Type: asn1.ObjectIdentifier{2, 5, 4, 3}, + Value: "testnode.agnt.unknown.consul", + }, + }, + }, + expectedSigAlg: x509.ECDSAWithSHA256, + expectedPubAlg: x509.ECDSA, + expectedDNSNames: append(defaultDNSSANs, "foo.local", "bar.local"), + expectedIPs: defaultIPSANs, + expectedURIs: []*url.URL{ + { + Scheme: "spiffe", + Host: unknownTrustDomain, + Path: "/agent/client/dc/dc1/id/test-node", + }, + }, + }, + } + + for name, tcase := range cases { + t.Run(name, func(t *testing.T) { + ac := AutoConfig{config: tcase.conf} + + csr, _, err := ac.generateCSR() + require.NoError(t, err) + + request, err := connect.ParseCSR(csr) + require.NoError(t, err) + require.NotNil(t, request) + + require.Equal(t, tcase.expectedSubject, request.Subject) + require.Equal(t, tcase.expectedSigAlg, request.SignatureAlgorithm) + require.Equal(t, tcase.expectedPubAlg, request.PublicKeyAlgorithm) + require.Equal(t, tcase.expectedDNSNames, request.DNSNames) + require.Equal(t, tcase.expectedIPs, request.IPAddresses) + require.Equal(t, tcase.expectedURIs, request.URIs) + }) + } +} + +func TestAutoEncrypt_hosts(t *testing.T) { + type testCase struct { + serverProvider ServerProvider + config *config.RuntimeConfig + + hosts []string + err string + } + + providerNone := newMockServerProvider(t) + providerNone.On("FindLANServer").Return(nil).Times(0) + + providerWithServer := newMockServerProvider(t) + providerWithServer.On("FindLANServer").Return(&metadata.Server{Addr: &net.TCPAddr{IP: net.IPv4(198, 18, 0, 1), Port: 1234}}).Times(0) + + cases := map[string]testCase{ + "router-override": { + serverProvider: providerWithServer, + config: &config.RuntimeConfig{ + RetryJoinLAN: []string{"127.0.0.1:9876"}, + StartJoinAddrsLAN: []string{"192.168.1.2:4321"}, + }, + hosts: []string{"198.18.0.1:1234"}, + }, + "various-addresses": { + serverProvider: providerNone, + config: &config.RuntimeConfig{ + RetryJoinLAN: []string{"198.18.0.1", "foo.com", "[2001:db8::1234]:1234", "abc.local:9876"}, + StartJoinAddrsLAN: []string{"192.168.1.1:5432", "start.local", "[::ffff:172.16.5.4]", "main.dev:6789"}, + }, + hosts: []string{ + "192.168.1.1", + "start.local", + "[::ffff:172.16.5.4]", + "main.dev", + "198.18.0.1", + "foo.com", + "2001:db8::1234", + "abc.local", + }, + }, + "split-host-port-error": { + serverProvider: providerNone, + config: &config.RuntimeConfig{ + StartJoinAddrsLAN: []string{"this-is-not:a:ip:and_port"}, + }, + err: "no auto-encrypt server addresses available for use", + }, + } + + for name, tcase := range cases { + t.Run(name, func(t *testing.T) { + ac := AutoConfig{ + config: tcase.config, + logger: testutil.Logger(t), + acConfig: Config{ + ServerProvider: tcase.serverProvider, + }, + } + + hosts, err := ac.autoEncryptHosts() + if tcase.err != "" { + testutil.RequireErrorContains(t, err, tcase.err) + } else { + require.NoError(t, err) + require.Equal(t, tcase.hosts, hosts) + } + }) + } +} + +func TestAutoEncrypt_InitialCerts(t *testing.T) { + token := "1a148388-3dd7-4db4-9eea-520424b4a86a" + datacenter := "foo" + nodeName := "bar" + + mcfg := newMockedConfig(t) + + _, indexedRoots, cert := testCerts(t, nodeName, datacenter) + + // The following are called once for each round through the auto-encrypt initial certs outer loop + // (not the per-host direct rpc attempts but the one involving the RetryWaiter) + mcfg.tokens.On("AgentToken").Return(token).Times(2) + mcfg.serverProvider.On("FindLANServer").Return(nil).Times(2) + + request := structs.CASignRequest{ + WriteRequest: structs.WriteRequest{Token: token}, + Datacenter: datacenter, + // this gets removed by the mock code as its non-deterministic what it will be + CSR: "", + } + + // first failure + mcfg.directRPC.On("RPC", + datacenter, + nodeName, + &net.TCPAddr{IP: net.IPv4(198, 18, 0, 1), Port: 8300}, + "AutoEncrypt.Sign", + &request, + &structs.SignedResponse{}, + ).Once().Return(fmt.Errorf("injected error")) + // second failure + mcfg.directRPC.On("RPC", + datacenter, + nodeName, + &net.TCPAddr{IP: net.IPv4(198, 18, 0, 2), Port: 8300}, + "AutoEncrypt.Sign", + &request, + &structs.SignedResponse{}, + ).Once().Return(fmt.Errorf("injected error")) + // third times is successfuly (second attempt to first server) + mcfg.directRPC.On("RPC", + datacenter, + nodeName, + &net.TCPAddr{IP: net.IPv4(198, 18, 0, 1), Port: 8300}, + "AutoEncrypt.Sign", + &request, + &structs.SignedResponse{}, + ).Once().Return(nil).Run(func(args mock.Arguments) { + resp, ok := args.Get(5).(*structs.SignedResponse) + require.True(t, ok) + resp.ConnectCARoots = *indexedRoots + resp.IssuedCert = *cert + resp.VerifyServerHostname = true + }) + + mcfg.Config.Waiter = lib.NewRetryWaiter(2, 0, 1*time.Millisecond, nil) + + ac := AutoConfig{ + config: &config.RuntimeConfig{ + Datacenter: datacenter, + NodeName: nodeName, + RetryJoinLAN: []string{"198.18.0.1:1234", "198.18.0.2:3456"}, + ServerPort: 8300, + }, + acConfig: mcfg.Config, + logger: testutil.Logger(t), + } + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + resp, err := ac.autoEncryptInitialCerts(ctx) + require.NoError(t, err) + require.NotNil(t, resp) + require.True(t, resp.VerifyServerHostname) + require.NotEmpty(t, resp.IssuedCert.PrivateKeyPEM) + resp.IssuedCert.PrivateKeyPEM = "" + cert.PrivateKeyPEM = "" + require.Equal(t, cert, &resp.IssuedCert) + require.Equal(t, indexedRoots, &resp.ConnectCARoots) + require.Empty(t, resp.ManualCARoots) +} + +func TestAutoEncrypt_InitialConfiguration(t *testing.T) { + token := "010494ae-ee45-4433-903c-a58c91297714" + nodeName := "auto-encrypt" + datacenter := "dc1" + + mcfg := newMockedConfig(t) + loader := setupRuntimeConfig(t) + loader.addConfigHCL(` + auto_encrypt { + tls = true + } + `) + loader.opts.Config.NodeName = &nodeName + mcfg.Config.Loader = loader.Load + + indexedRoots, cert, extraCerts := mcfg.setupInitialTLS(t, nodeName, datacenter, token) + + // prepopulation is going to grab the token to populate the correct cache key + mcfg.tokens.On("AgentToken").Return(token).Times(0) + + // no server provider + mcfg.serverProvider.On("FindLANServer").Return(&metadata.Server{Addr: &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 8300}}).Times(1) + + populateResponse := func(args mock.Arguments) { + resp, ok := args.Get(5).(*structs.SignedResponse) + require.True(t, ok) + *resp = structs.SignedResponse{ + VerifyServerHostname: true, + ConnectCARoots: *indexedRoots, + IssuedCert: *cert, + ManualCARoots: extraCerts, + } + } + + expectedRequest := structs.CASignRequest{ + WriteRequest: structs.WriteRequest{Token: token}, + Datacenter: datacenter, + // TODO (autoconf) Maybe in the future we should populate a CSR + // and do some manual parsing/verification of the contents. The + // bits not having to do with the signing key such as the requested + // SANs and CN. For now though the mockDirectRPC type will empty + // the CSR so we have to pass in an empty string to the expectation. + CSR: "", + } + + mcfg.directRPC.On( + "RPC", + datacenter, + nodeName, + &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 8300}, + "AutoEncrypt.Sign", + &expectedRequest, + &structs.SignedResponse{}).Return(nil).Run(populateResponse) + + ac, err := New(mcfg.Config) + require.NoError(t, err) + require.NotNil(t, ac) + + cfg, err := ac.InitialConfiguration(context.Background()) + require.NoError(t, err) + require.NotNil(t, cfg) + +} + +func TestAutoEncrypt_TokenUpdate(t *testing.T) { + testAC := startedAutoConfig(t, true) + + newToken := "1a4cc445-86ed-46b4-a355-bbf5a11dddb0" + + rootsCtx, rootsCancel := context.WithCancel(context.Background()) + testAC.mcfg.cache.On("Notify", + mock.Anything, + cachetype.ConnectCARootName, + &structs.DCSpecificRequest{Datacenter: testAC.ac.config.Datacenter}, + rootsWatchID, + mock.Anything, + ).Return(nil).Once().Run(func(args mock.Arguments) { + rootsCancel() + }) + + leafCtx, leafCancel := context.WithCancel(context.Background()) + testAC.mcfg.cache.On("Notify", + mock.Anything, + cachetype.ConnectCALeafName, + &cachetype.ConnectCALeafRequest{ + Datacenter: "dc1", + Agent: "autoconf", + Token: newToken, + DNSSAN: defaultDNSSANs, + IPSAN: defaultIPSANs, + }, + leafWatchID, + mock.Anything, + ).Return(nil).Once().Run(func(args mock.Arguments) { + leafCancel() + }) + + // this will be retrieved once when resetting the leaf cert watch + testAC.mcfg.tokens.On("AgentToken").Return(newToken).Once() + + // send the notification about the token update + testAC.tokenUpdates <- struct{}{} + + // wait for the leaf cert watches + require.True(t, waitForChans(100*time.Millisecond, leafCtx.Done(), rootsCtx.Done()), "New cache watches were not started within 100ms") +} + +func TestAutoEncrypt_RootsUpdate(t *testing.T) { + testAC := startedAutoConfig(t, true) + + secondCA := connect.TestCA(t, testAC.initialRoots.Roots[0]) + secondRoots := structs.IndexedCARoots{ + ActiveRootID: secondCA.ID, + TrustDomain: connect.TestClusterID, + Roots: []*structs.CARoot{ + secondCA, + testAC.initialRoots.Roots[0], + }, + QueryMeta: structs.QueryMeta{ + Index: 99, + }, + } + + updatedCtx, cancel := context.WithCancel(context.Background()) + testAC.mcfg.tlsCfg.On("UpdateAutoTLSCA", + []string{secondCA.RootCert, testAC.initialRoots.Roots[0].RootCert}, + ).Return(nil).Once().Run(func(args mock.Arguments) { + cancel() + }) + + // when a cache event comes in we end up recalculating the fallback timer which requires this call + testAC.mcfg.tlsCfg.On("AutoEncryptCertNotAfter").Return(time.Now().Add(10 * time.Minute)).Once() + + req := structs.DCSpecificRequest{Datacenter: "dc1"} + require.True(t, testAC.mcfg.cache.sendNotification(context.Background(), req.CacheInfo().Key, cache.UpdateEvent{ + CorrelationID: rootsWatchID, + Result: &secondRoots, + Meta: cache.ResultMeta{ + Index: secondRoots.Index, + }, + })) + + require.True(t, waitForChans(100*time.Millisecond, updatedCtx.Done()), "TLS certificates were not updated within the alotted time") +} + +func TestAutoEncrypt_CertUpdate(t *testing.T) { + testAC := startedAutoConfig(t, true) + secondCert := newLeaf(t, "autoconf", "dc1", testAC.initialRoots.Roots[0], 99, 10*time.Minute) + + updatedCtx, cancel := context.WithCancel(context.Background()) + testAC.mcfg.tlsCfg.On("UpdateAutoTLSCert", + secondCert.CertPEM, + "redacted", + ).Return(nil).Once().Run(func(args mock.Arguments) { + cancel() + }) + + // when a cache event comes in we end up recalculating the fallback timer which requires this call + testAC.mcfg.tlsCfg.On("AutoEncryptCertNotAfter").Return(secondCert.ValidBefore).Once() + + req := cachetype.ConnectCALeafRequest{ + Datacenter: "dc1", + Agent: "autoconf", + Token: testAC.originalToken, + DNSSAN: defaultDNSSANs, + IPSAN: defaultIPSANs, + } + require.True(t, testAC.mcfg.cache.sendNotification(context.Background(), req.CacheInfo().Key, cache.UpdateEvent{ + CorrelationID: leafWatchID, + Result: secondCert, + Meta: cache.ResultMeta{ + Index: secondCert.ModifyIndex, + }, + })) + + require.True(t, waitForChans(100*time.Millisecond, updatedCtx.Done()), "TLS certificates were not updated within the alotted time") +} + +func TestAutoEncrypt_Fallback(t *testing.T) { + testAC := startedAutoConfig(t, true) + + // at this point everything is operating normally and we are just + // waiting for events. We are going to send a new cert that is basically + // already expired and then allow the fallback routine to kick in. + secondCert := newLeaf(t, "autoconf", "dc1", testAC.initialRoots.Roots[0], 100, time.Nanosecond) + secondCA := connect.TestCA(t, testAC.initialRoots.Roots[0]) + secondRoots := structs.IndexedCARoots{ + ActiveRootID: secondCA.ID, + TrustDomain: connect.TestClusterID, + Roots: []*structs.CARoot{ + secondCA, + testAC.initialRoots.Roots[0], + }, + QueryMeta: structs.QueryMeta{ + Index: 101, + }, + } + thirdCert := newLeaf(t, "autoconf", "dc1", secondCA, 102, 10*time.Minute) + + // setup the expectation for when the certs get updated initially + updatedCtx, updateCancel := context.WithCancel(context.Background()) + testAC.mcfg.tlsCfg.On("UpdateAutoTLSCert", + secondCert.CertPEM, + "redacted", + ).Return(nil).Once().Run(func(args mock.Arguments) { + updateCancel() + }) + + // when a cache event comes in we end up recalculating the fallback timer which requires this call + testAC.mcfg.tlsCfg.On("AutoEncryptCertNotAfter").Return(secondCert.ValidBefore).Once() + testAC.mcfg.tlsCfg.On("AutoEncryptCertExpired").Return(true).Once() + + fallbackCtx, fallbackCancel := context.WithCancel(context.Background()) + + // also testing here that we can change server IPs for ongoing operations + testAC.mcfg.serverProvider.On("FindLANServer").Once().Return(&metadata.Server{ + Addr: &net.TCPAddr{IP: net.IPv4(198, 18, 23, 2), Port: 8300}, + }) + + // after sending the notification for the cert update another InitialConfiguration RPC + // will be made to pull down the latest configuration. So we need to set up the response + // for the second RPC + populateResponse := func(args mock.Arguments) { + resp, ok := args.Get(5).(*structs.SignedResponse) + require.True(t, ok) + *resp = structs.SignedResponse{ + VerifyServerHostname: true, + ConnectCARoots: secondRoots, + IssuedCert: *thirdCert, + ManualCARoots: testAC.extraCerts, + } + + fallbackCancel() + } + + expectedRequest := structs.CASignRequest{ + WriteRequest: structs.WriteRequest{Token: testAC.originalToken}, + Datacenter: "dc1", + // TODO (autoconf) Maybe in the future we should populate a CSR + // and do some manual parsing/verification of the contents. The + // bits not having to do with the signing key such as the requested + // SANs and CN. For now though the mockDirectRPC type will empty + // the CSR so we have to pass in an empty string to the expectation. + CSR: "", + } + + // the fallback routine to perform auto-encrypt again will need to grab this + testAC.mcfg.tokens.On("AgentToken").Return(testAC.originalToken).Once() + + testAC.mcfg.directRPC.On( + "RPC", + "dc1", + "autoconf", + &net.TCPAddr{IP: net.IPv4(198, 18, 23, 2), Port: 8300}, + "AutoEncrypt.Sign", + &expectedRequest, + &structs.SignedResponse{}).Return(nil).Run(populateResponse).Once() + + testAC.mcfg.expectInitialTLS(t, "autoconf", "dc1", testAC.originalToken, secondCA, &secondRoots, thirdCert, testAC.extraCerts) + + // after the second RPC we now will use the new certs validity period in the next run loop iteration + testAC.mcfg.tlsCfg.On("AutoEncryptCertNotAfter").Return(time.Now().Add(10 * time.Minute)).Once() + + // now that all the mocks are set up we can trigger the whole thing by sending the second expired cert + // as a cache update event. + req := cachetype.ConnectCALeafRequest{ + Datacenter: "dc1", + Agent: "autoconf", + Token: testAC.originalToken, + DNSSAN: defaultDNSSANs, + IPSAN: defaultIPSANs, + } + require.True(t, testAC.mcfg.cache.sendNotification(context.Background(), req.CacheInfo().Key, cache.UpdateEvent{ + CorrelationID: leafWatchID, + Result: secondCert, + Meta: cache.ResultMeta{ + Index: secondCert.ModifyIndex, + }, + })) + + // wait for the TLS certificates to get updated + require.True(t, waitForChans(100*time.Millisecond, updatedCtx.Done()), "TLS certificates were not updated within the alotted time") + + // now wait for the fallback routine to be invoked + require.True(t, waitForChans(100*time.Millisecond, fallbackCtx.Done()), "fallback routines did not get invoked within the alotted time") +} diff --git a/agent/auto-config/config.go b/agent/auto-config/config.go index e6d729f4d4..c812cae6a4 100644 --- a/agent/auto-config/config.go +++ b/agent/auto-config/config.go @@ -3,9 +3,12 @@ package autoconf import ( "context" "net" + "time" + "github.com/hashicorp/consul/agent/cache" "github.com/hashicorp/consul/agent/config" - "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/agent/metadata" + "github.com/hashicorp/consul/agent/token" "github.com/hashicorp/consul/lib" "github.com/hashicorp/go-hclog" ) @@ -18,12 +21,35 @@ type DirectRPC interface { RPC(dc string, node string, addr net.Addr, method string, args interface{}, reply interface{}) error } -// CertMonitor is the interface that needs to be satisfied for AutoConfig to be able to -// setup monitoring of the Connect TLS certificate after we first get it. -type CertMonitor interface { - Update(*structs.SignedResponse) error - Start(context.Context) (<-chan struct{}, error) - Stop() bool +// Cache is an interface to represent the methods of the +// agent/cache.Cache struct that we care about +type Cache interface { + Notify(ctx context.Context, t string, r cache.Request, correlationID string, ch chan<- cache.UpdateEvent) error + Prepopulate(t string, result cache.FetchResult, dc string, token string, key string) error +} + +// ServerProvider is an interface that can be used to find one server in the local DC known to +// the agent via Gossip +type ServerProvider interface { + FindLANServer() *metadata.Server +} + +// TLSConfigurator is an interface of the methods on the tlsutil.Configurator that we will require at +// runtime. +type TLSConfigurator interface { + UpdateAutoTLS(manualCAPEMs, connectCAPEMs []string, pub, priv string, verifyServerHostname bool) error + UpdateAutoTLSCA([]string) error + UpdateAutoTLSCert(pub, priv string) error + AutoEncryptCertNotAfter() time.Time + AutoEncryptCertExpired() bool +} + +// TokenStore is an interface of the methods we will need to use from the token.Store. +type TokenStore interface { + AgentToken() string + UpdateAgentToken(secret string, source token.TokenSource) bool + Notify(kind token.TokenKind) token.Notifier + StopNotify(notifier token.Notifier) } // Config contains all the tunables for AutoConfig @@ -37,6 +63,10 @@ type Config struct { // configuration. Setting this field is required. DirectRPC DirectRPC + // ServerProvider is the interfaced to be used by AutoConfig to find any + // known servers during fallback operations. + ServerProvider ServerProvider + // Waiter is a RetryWaiter to be used during retrieval of the // initial configuration. When a round of requests fails we will // wait and eventually make another round of requests (1 round @@ -49,14 +79,28 @@ type Config struct { // having the test take minutes/hours to complete. Waiter *lib.RetryWaiter - // CertMonitor is the Connect TLS Certificate Monitor to be used for ongoing - // certificate renewals and connect CA roots updates. This field is not - // strictly required but if not provided the TLS certificates retrieved - // through by the AutoConfig.InitialConfiguration RPC will not be used - // or renewed. - CertMonitor CertMonitor - // Loader merges source with the existing FileSources and returns the complete // RuntimeConfig. Loader func(source config.Source) (cfg *config.RuntimeConfig, warnings []string, err error) + + // TLSConfigurator is the shared TLS Configurator. AutoConfig will update the + // auto encrypt/auto config certs as they are renewed. + TLSConfigurator TLSConfigurator + + // Cache is an object implementing our Cache interface. The Cache + // used at runtime must be able to handle Roots and Leaf Cert watches + Cache Cache + + // FallbackLeeway is the amount of time after certificate expiration before + // invoking the fallback routine. If not set this will default to 10s. + FallbackLeeway time.Duration + + // FallbackRetry is the duration between Fallback invocations when the configured + // fallback routine returns an error. If not set this will default to 1m. + FallbackRetry time.Duration + + // Tokens is the shared token store. It is used to retrieve the current + // agent token as well as getting notifications when that token is updated. + // This field is required. + Tokens TokenStore } diff --git a/agent/auto-config/config_translate.go b/agent/auto-config/config_translate.go index b7d04d5f61..ba8f940d1c 100644 --- a/agent/auto-config/config_translate.go +++ b/agent/auto-config/config_translate.go @@ -22,9 +22,9 @@ import ( // package cannot import the agent/config package without running into import cycles. func translateConfig(c *pbconfig.Config) config.Config { result := config.Config{ - Datacenter: &c.Datacenter, - PrimaryDatacenter: &c.PrimaryDatacenter, - NodeName: &c.NodeName, + Datacenter: stringPtrOrNil(c.Datacenter), + PrimaryDatacenter: stringPtrOrNil(c.PrimaryDatacenter), + NodeName: stringPtrOrNil(c.NodeName), // only output the SegmentName in the configuration if its non-empty // this will avoid a warning later when parsing the persisted configuration SegmentName: stringPtrOrNil(c.SegmentName), @@ -42,13 +42,13 @@ func translateConfig(c *pbconfig.Config) config.Config { if a := c.ACL; a != nil { result.ACL = config.ACL{ Enabled: &a.Enabled, - PolicyTTL: &a.PolicyTTL, - RoleTTL: &a.RoleTTL, - TokenTTL: &a.TokenTTL, - DownPolicy: &a.DownPolicy, - DefaultPolicy: &a.DefaultPolicy, + PolicyTTL: stringPtrOrNil(a.PolicyTTL), + RoleTTL: stringPtrOrNil(a.RoleTTL), + TokenTTL: stringPtrOrNil(a.TokenTTL), + DownPolicy: stringPtrOrNil(a.DownPolicy), + DefaultPolicy: stringPtrOrNil(a.DefaultPolicy), EnableKeyListPolicy: &a.EnableKeyListPolicy, - DisabledTTL: &a.DisabledTTL, + DisabledTTL: stringPtrOrNil(a.DisabledTTL), EnableTokenPersistence: &a.EnableTokenPersistence, } @@ -76,7 +76,7 @@ func translateConfig(c *pbconfig.Config) config.Config { result.RetryJoinLAN = g.RetryJoinLAN if e := c.Gossip.Encryption; e != nil { - result.EncryptKey = &e.Key + result.EncryptKey = stringPtrOrNil(e.Key) result.EncryptVerifyIncoming = &e.VerifyIncoming result.EncryptVerifyOutgoing = &e.VerifyOutgoing } diff --git a/agent/auto-config/config_translate_test.go b/agent/auto-config/config_translate_test.go index 18a4270a88..fa6d8febf0 100644 --- a/agent/auto-config/config_translate_test.go +++ b/agent/auto-config/config_translate_test.go @@ -1,10 +1,13 @@ package autoconf import ( + "fmt" "testing" "github.com/hashicorp/consul/agent/config" + "github.com/hashicorp/consul/agent/structs" pbconfig "github.com/hashicorp/consul/proto/pbconfig" + "github.com/hashicorp/consul/proto/pbconnect" "github.com/stretchr/testify/require" ) @@ -16,6 +19,38 @@ func boolPointer(b bool) *bool { return &b } +func translateCARootToProtobuf(in *structs.CARoot) (*pbconnect.CARoot, error) { + var out pbconnect.CARoot + if err := mapstructureTranslateToProtobuf(in, &out); err != nil { + return nil, fmt.Errorf("Failed to re-encode CA Roots: %w", err) + } + return &out, nil +} + +func mustTranslateCARootToProtobuf(t *testing.T, in *structs.CARoot) *pbconnect.CARoot { + out, err := translateCARootToProtobuf(in) + require.NoError(t, err) + return out +} + +func mustTranslateCARootsToStructs(t *testing.T, in *pbconnect.CARoots) *structs.IndexedCARoots { + out, err := translateCARootsToStructs(in) + require.NoError(t, err) + return out +} + +func mustTranslateCARootsToProtobuf(t *testing.T, in *structs.IndexedCARoots) *pbconnect.CARoots { + out, err := translateCARootsToProtobuf(in) + require.NoError(t, err) + return out +} + +func mustTranslateIssuedCertToProtobuf(t *testing.T, in *structs.IssuedCert) *pbconnect.IssuedCert { + out, err := translateIssuedCertToProtobuf(in) + require.NoError(t, err) + return out +} + func TestTranslateConfig(t *testing.T) { original := pbconfig.Config{ Datacenter: "abc", @@ -119,3 +154,9 @@ func TestTranslateConfig(t *testing.T) { translated := translateConfig(&original) require.Equal(t, expected, translated) } + +func TestCArootsTranslation(t *testing.T) { + _, indexedRoots, _ := testCerts(t, "autoconf", "dc1") + protoRoots := mustTranslateCARootsToProtobuf(t, indexedRoots) + require.Equal(t, indexedRoots, mustTranslateCARootsToStructs(t, protoRoots)) +} diff --git a/agent/auto-config/mock_test.go b/agent/auto-config/mock_test.go new file mode 100644 index 0000000000..d828e6a84e --- /dev/null +++ b/agent/auto-config/mock_test.go @@ -0,0 +1,337 @@ +package autoconf + +import ( + "context" + "net" + "sync" + "testing" + "time" + + "github.com/hashicorp/consul/agent/cache" + cachetype "github.com/hashicorp/consul/agent/cache-types" + "github.com/hashicorp/consul/agent/connect" + "github.com/hashicorp/consul/agent/metadata" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/agent/token" + "github.com/hashicorp/consul/proto/pbautoconf" + "github.com/hashicorp/consul/sdk/testutil" + "github.com/stretchr/testify/mock" +) + +type mockDirectRPC struct { + mock.Mock +} + +func newMockDirectRPC(t *testing.T) *mockDirectRPC { + m := mockDirectRPC{} + m.Test(t) + return &m +} + +func (m *mockDirectRPC) RPC(dc string, node string, addr net.Addr, method string, args interface{}, reply interface{}) error { + var retValues mock.Arguments + if method == "AutoConfig.InitialConfiguration" { + req := args.(*pbautoconf.AutoConfigRequest) + csr := req.CSR + req.CSR = "" + retValues = m.Called(dc, node, addr, method, args, reply) + req.CSR = csr + } else if method == "AutoEncrypt.Sign" { + req := args.(*structs.CASignRequest) + csr := req.CSR + req.CSR = "" + retValues = m.Called(dc, node, addr, method, args, reply) + req.CSR = csr + } else { + retValues = m.Called(dc, node, addr, method, args, reply) + } + + return retValues.Error(0) +} + +type mockTLSConfigurator struct { + mock.Mock +} + +func newMockTLSConfigurator(t *testing.T) *mockTLSConfigurator { + m := mockTLSConfigurator{} + m.Test(t) + return &m +} + +func (m *mockTLSConfigurator) UpdateAutoTLS(manualCAPEMs, connectCAPEMs []string, pub, priv string, verifyServerHostname bool) error { + if priv != "" { + priv = "redacted" + } + + ret := m.Called(manualCAPEMs, connectCAPEMs, pub, priv, verifyServerHostname) + return ret.Error(0) +} + +func (m *mockTLSConfigurator) UpdateAutoTLSCA(pems []string) error { + ret := m.Called(pems) + return ret.Error(0) +} +func (m *mockTLSConfigurator) UpdateAutoTLSCert(pub, priv string) error { + if priv != "" { + priv = "redacted" + } + ret := m.Called(pub, priv) + return ret.Error(0) +} +func (m *mockTLSConfigurator) AutoEncryptCertNotAfter() time.Time { + ret := m.Called() + ts, _ := ret.Get(0).(time.Time) + + return ts +} +func (m *mockTLSConfigurator) AutoEncryptCertExpired() bool { + ret := m.Called() + return ret.Bool(0) +} + +type mockServerProvider struct { + mock.Mock +} + +func newMockServerProvider(t *testing.T) *mockServerProvider { + m := mockServerProvider{} + m.Test(t) + return &m +} + +func (m *mockServerProvider) FindLANServer() *metadata.Server { + ret := m.Called() + srv, _ := ret.Get(0).(*metadata.Server) + return srv +} + +type mockWatcher struct { + ch chan<- cache.UpdateEvent + done <-chan struct{} +} + +type mockCache struct { + mock.Mock + + lock sync.Mutex + watchers map[string][]mockWatcher +} + +func newMockCache(t *testing.T) *mockCache { + m := mockCache{ + watchers: make(map[string][]mockWatcher), + } + m.Test(t) + return &m +} + +func (m *mockCache) Notify(ctx context.Context, t string, r cache.Request, correlationID string, ch chan<- cache.UpdateEvent) error { + ret := m.Called(ctx, t, r, correlationID, ch) + + err := ret.Error(0) + if err == nil { + m.lock.Lock() + key := r.CacheInfo().Key + m.watchers[key] = append(m.watchers[key], mockWatcher{ch: ch, done: ctx.Done()}) + m.lock.Unlock() + } + return err +} + +func (m *mockCache) Prepopulate(t string, result cache.FetchResult, dc string, token string, key string) error { + var restore string + cert, ok := result.Value.(*structs.IssuedCert) + if ok { + // we cannot know what the private key is prior to it being injected into the cache. + // therefore redact it here and all mock expectations should take that into account + restore = cert.PrivateKeyPEM + cert.PrivateKeyPEM = "redacted" + } + + ret := m.Called(t, result, dc, token, key) + + if ok && restore != "" { + cert.PrivateKeyPEM = restore + } + return ret.Error(0) +} + +func (m *mockCache) sendNotification(ctx context.Context, key string, u cache.UpdateEvent) bool { + m.lock.Lock() + defer m.lock.Unlock() + + watchers, ok := m.watchers[key] + if !ok || len(m.watchers) < 1 { + return false + } + + var newWatchers []mockWatcher + + for _, watcher := range watchers { + select { + case watcher.ch <- u: + newWatchers = append(newWatchers, watcher) + case <-watcher.done: + // do nothing, this watcher will be removed from the list + case <-ctx.Done(): + // return doesn't matter here really, the test is being cancelled + return true + } + } + + // this removes any already cancelled watches from being sent to + m.watchers[key] = newWatchers + + return true +} + +type mockTokenStore struct { + mock.Mock +} + +func newMockTokenStore(t *testing.T) *mockTokenStore { + m := mockTokenStore{} + m.Test(t) + return &m +} + +func (m *mockTokenStore) AgentToken() string { + ret := m.Called() + return ret.String(0) +} + +func (m *mockTokenStore) UpdateAgentToken(secret string, source token.TokenSource) bool { + return m.Called(secret, source).Bool(0) +} + +func (m *mockTokenStore) Notify(kind token.TokenKind) token.Notifier { + ret := m.Called(kind) + n, _ := ret.Get(0).(token.Notifier) + return n +} + +func (m *mockTokenStore) StopNotify(notifier token.Notifier) { + m.Called(notifier) +} + +type mockedConfig struct { + Config + + directRPC *mockDirectRPC + serverProvider *mockServerProvider + cache *mockCache + tokens *mockTokenStore + tlsCfg *mockTLSConfigurator +} + +func newMockedConfig(t *testing.T) *mockedConfig { + directRPC := newMockDirectRPC(t) + serverProvider := newMockServerProvider(t) + mcache := newMockCache(t) + tokens := newMockTokenStore(t) + tlsCfg := newMockTLSConfigurator(t) + + // I am not sure it is well defined behavior but in testing it + // out it does appear like Cleanup functions can fail tests + // Adding in the mock expectations assertions here saves us + // a bunch of code in the other test functions. + t.Cleanup(func() { + if !t.Failed() { + directRPC.AssertExpectations(t) + serverProvider.AssertExpectations(t) + mcache.AssertExpectations(t) + tokens.AssertExpectations(t) + tlsCfg.AssertExpectations(t) + } + }) + + return &mockedConfig{ + Config: Config{ + DirectRPC: directRPC, + ServerProvider: serverProvider, + Cache: mcache, + Tokens: tokens, + TLSConfigurator: tlsCfg, + Logger: testutil.Logger(t), + }, + directRPC: directRPC, + serverProvider: serverProvider, + cache: mcache, + tokens: tokens, + tlsCfg: tlsCfg, + } +} + +func (m *mockedConfig) expectInitialTLS(t *testing.T, agentName, datacenter, token string, ca *structs.CARoot, indexedRoots *structs.IndexedCARoots, cert *structs.IssuedCert, extraCerts []string) { + var pems []string + for _, root := range indexedRoots.Roots { + pems = append(pems, root.RootCert) + } + + // we should update the TLS configurator with the proper certs + m.tlsCfg.On("UpdateAutoTLS", + extraCerts, + pems, + cert.CertPEM, + // auto-config handles the CSR and Key so our tests don't have + // a way to know that the key is correct or not. We do replace + // a non empty PEM with "redacted" so we can ensure that some + // certificate is being sent + "redacted", + true, + ).Return(nil).Once() + + rootRes := cache.FetchResult{Value: indexedRoots, Index: indexedRoots.QueryMeta.Index} + rootsReq := structs.DCSpecificRequest{Datacenter: datacenter} + + // we should prepopulate the cache with the CA roots + m.cache.On("Prepopulate", + cachetype.ConnectCARootName, + rootRes, + datacenter, + "", + rootsReq.CacheInfo().Key, + ).Return(nil).Once() + + leafReq := cachetype.ConnectCALeafRequest{ + Token: token, + Agent: agentName, + Datacenter: datacenter, + } + + // copy the cert and redact the private key for the mock expectation + // the actual private key will not correspond to the cert but thats + // because AutoConfig is generated a key/csr internally and sending that + // on up with the request. + copy := *cert + copy.PrivateKeyPEM = "redacted" + leafRes := cache.FetchResult{ + Value: ©, + Index: copy.RaftIndex.ModifyIndex, + State: cachetype.ConnectCALeafSuccess(ca.SigningKeyID), + } + + // we should prepopulate the cache with the agents cert + m.cache.On("Prepopulate", + cachetype.ConnectCALeafName, + leafRes, + datacenter, + token, + leafReq.Key(), + ).Return(nil).Once() + + // when prepopulating the cert in the cache we grab the token so + // we should expec that here + m.tokens.On("AgentToken").Return(token).Once() +} + +func (m *mockedConfig) setupInitialTLS(t *testing.T, agentName, datacenter, token string) (*structs.IndexedCARoots, *structs.IssuedCert, []string) { + ca, indexedRoots, cert := testCerts(t, agentName, datacenter) + + ca2 := connect.TestCA(t, nil) + extraCerts := []string{ca2.RootCert} + + m.expectInitialTLS(t, agentName, datacenter, token, ca, indexedRoots, cert, extraCerts) + return indexedRoots, cert, extraCerts +} diff --git a/agent/auto-config/persist.go b/agent/auto-config/persist.go new file mode 100644 index 0000000000..9f94f445c7 --- /dev/null +++ b/agent/auto-config/persist.go @@ -0,0 +1,86 @@ +package autoconf + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strings" + + "github.com/golang/protobuf/jsonpb" + "github.com/hashicorp/consul/proto/pbautoconf" +) + +const ( + // autoConfigFileName is the name of the file that the agent auto-config settings are + // stored in within the data directory + autoConfigFileName = "auto-config.json" +) + +var ( + pbMarshaler = &jsonpb.Marshaler{ + OrigName: false, + EnumsAsInts: false, + Indent: " ", + EmitDefaults: true, + } + + pbUnmarshaler = &jsonpb.Unmarshaler{ + AllowUnknownFields: false, + } +) + +func (ac *AutoConfig) readPersistedAutoConfig() (*pbautoconf.AutoConfigResponse, error) { + if ac.config.DataDir == "" { + // no data directory means we don't have anything to potentially load + return nil, nil + } + + path := filepath.Join(ac.config.DataDir, autoConfigFileName) + ac.logger.Debug("attempting to restore any persisted configuration", "path", path) + + content, err := ioutil.ReadFile(path) + if err == nil { + rdr := strings.NewReader(string(content)) + + var resp pbautoconf.AutoConfigResponse + if err := pbUnmarshaler.Unmarshal(rdr, &resp); err != nil { + return nil, fmt.Errorf("failed to decode persisted auto-config data: %w", err) + } + + ac.logger.Info("read persisted configuration", "path", path) + return &resp, nil + } + + if !os.IsNotExist(err) { + return nil, fmt.Errorf("failed to load %s: %w", path, err) + } + + // ignore non-existence errors as that is an indicator that we haven't + // performed the auto configuration before + return nil, nil +} + +func (ac *AutoConfig) persistAutoConfig(resp *pbautoconf.AutoConfigResponse) error { + // now that we know the configuration is generally fine including TLS certs go ahead and persist it to disk. + if ac.config.DataDir == "" { + ac.logger.Debug("not persisting auto-config settings because there is no data directory") + return nil + } + + serialized, err := pbMarshaler.MarshalToString(resp) + if err != nil { + return fmt.Errorf("failed to encode auto-config response as JSON: %w", err) + } + + path := filepath.Join(ac.config.DataDir, autoConfigFileName) + + err = ioutil.WriteFile(path, []byte(serialized), 0660) + if err != nil { + return fmt.Errorf("failed to write auto-config configurations: %w", err) + } + + ac.logger.Debug("auto-config settings were persisted to disk") + + return nil +} diff --git a/agent/auto-config/run.go b/agent/auto-config/run.go new file mode 100644 index 0000000000..6155dc6bed --- /dev/null +++ b/agent/auto-config/run.go @@ -0,0 +1,192 @@ +package autoconf + +import ( + "context" + "fmt" + "time" + + "github.com/hashicorp/consul/agent/cache" + "github.com/hashicorp/consul/agent/structs" +) + +// handleCacheEvent is used to handle event notifications from the cache for the roots +// or leaf cert watches. +func (ac *AutoConfig) handleCacheEvent(u cache.UpdateEvent) error { + switch u.CorrelationID { + case rootsWatchID: + ac.logger.Debug("roots watch fired - updating CA certificates") + if u.Err != nil { + return fmt.Errorf("root watch returned an error: %w", u.Err) + } + + roots, ok := u.Result.(*structs.IndexedCARoots) + if !ok { + return fmt.Errorf("invalid type for roots watch response: %T", u.Result) + } + + return ac.updateCARoots(roots) + case leafWatchID: + ac.logger.Debug("leaf certificate watch fired - updating TLS certificate") + if u.Err != nil { + return fmt.Errorf("leaf watch returned an error: %w", u.Err) + } + + leaf, ok := u.Result.(*structs.IssuedCert) + if !ok { + return fmt.Errorf("invalid type for agent leaf cert watch response: %T", u.Result) + } + + return ac.updateLeafCert(leaf) + } + + return nil +} + +// handleTokenUpdate is used when a notification about the agent token being updated +// is received and various watches need cancelling/restarting to use the new token. +func (ac *AutoConfig) handleTokenUpdate(ctx context.Context) error { + ac.logger.Debug("Agent token updated - resetting watches") + + // TODO (autoencrypt) Prepopulate the cache with the new token with + // the existing cache entry with the old token. The certificate doesn't + // need to change just because the token has. However there isn't a + // good way to make that happen and this behavior is benign enough + // that I am going to push off implementing it. + + // the agent token has been updated so we must update our leaf cert watch. + // this cancels the current watches before setting up new ones + ac.cancelWatches() + + // recreate the chan for cache updates. This is a precautionary measure to ensure + // that we don't accidentally get notified for the new watches being setup before + // a blocking query in the cache returns and sends data to the old chan. In theory + // the code in agent/cache/watch.go should prevent this where we specifically check + // for context cancellation prior to sending the event. However we could cancel + // it after that check and finish setting up the new watches before getting the old + // events. Both the go routine scheduler and the OS thread scheduler would have to + // be acting up for this to happen. Regardless the way to ensure we don't get events + // for the old watches is to simply replace the chan we are expecting them from. + close(ac.cacheUpdates) + ac.cacheUpdates = make(chan cache.UpdateEvent, 10) + + // restart watches - this will be done with the correct token + cancelWatches, err := ac.setupCertificateCacheWatches(ctx) + if err != nil { + return fmt.Errorf("failed to restart watches after agent token update: %w", err) + } + ac.cancelWatches = cancelWatches + return nil +} + +// handleFallback is used when the current TLS certificate has expired and the normal +// updating mechanisms have failed to renew it quickly enough. This function will +// use the configured fallback mechanism to retrieve a new cert and start monitoring +// that one. +func (ac *AutoConfig) handleFallback(ctx context.Context) error { + ac.logger.Warn("agent's client certificate has expired") + // Background because the context is mainly useful when the agent is first starting up. + switch { + case ac.config.AutoConfig.Enabled: + resp, err := ac.getInitialConfiguration(ctx) + if err != nil { + return fmt.Errorf("error while retrieving new agent certificates via auto-config: %w", err) + } + + return ac.recordInitialConfiguration(resp) + case ac.config.AutoEncryptTLS: + reply, err := ac.autoEncryptInitialCerts(ctx) + if err != nil { + return fmt.Errorf("error while retrieving new agent certificate via auto-encrypt: %w", err) + } + return ac.setInitialTLSCertificates(reply) + default: + return fmt.Errorf("logic error: either auto-encrypt or auto-config must be enabled") + } +} + +// run is the private method to be spawn by the Start method for +// executing the main monitoring loop. +func (ac *AutoConfig) run(ctx context.Context, exit chan struct{}) { + // The fallbackTimer is used to notify AFTER the agents + // leaf certificate has expired and where we need + // to fall back to the less secure RPC endpoint just like + // if the agent was starting up new. + // + // Check 10sec (fallback leeway duration) after cert + // expires. The agent cache should be handling the expiration + // and renew it before then. + // + // If there is no cert, AutoEncryptCertNotAfter returns + // a value in the past which immediately triggers the + // renew, but this case shouldn't happen because at + // this point, auto_encrypt was just being setup + // successfully. + calcFallbackInterval := func() time.Duration { + certExpiry := ac.acConfig.TLSConfigurator.AutoEncryptCertNotAfter() + return certExpiry.Add(ac.acConfig.FallbackLeeway).Sub(time.Now()) + } + fallbackTimer := time.NewTimer(calcFallbackInterval()) + + // cleanup for once we are stopped + defer func() { + // cancel the go routines performing the cache watches + ac.cancelWatches() + // ensure we don't leak the timers go routine + fallbackTimer.Stop() + // stop receiving notifications for token updates + ac.acConfig.Tokens.StopNotify(ac.tokenUpdates) + + ac.logger.Debug("auto-config has been stopped") + + ac.Lock() + ac.cancel = nil + ac.running = false + // this should be the final cleanup task as its what notifies + // the rest of the world that this go routine has exited. + close(exit) + ac.Unlock() + }() + + for { + select { + case <-ctx.Done(): + ac.logger.Debug("stopping auto-config") + return + case <-ac.tokenUpdates.Ch: + ac.logger.Debug("handling a token update event") + + if err := ac.handleTokenUpdate(ctx); err != nil { + ac.logger.Error("error in handling token update event", "error", err) + } + case u := <-ac.cacheUpdates: + ac.logger.Debug("handling a cache update event", "correlation_id", u.CorrelationID) + + if err := ac.handleCacheEvent(u); err != nil { + ac.logger.Error("error in handling cache update event", "error", err) + } + + // reset the fallback timer as the certificate may have been updated + fallbackTimer.Stop() + fallbackTimer = time.NewTimer(calcFallbackInterval()) + case <-fallbackTimer.C: + // This is a safety net in case the cert doesn't get renewed + // in time. The agent would be stuck in that case because the watches + // never use the AutoEncrypt.Sign endpoint. + + // check auto encrypt client cert expiration + if ac.acConfig.TLSConfigurator.AutoEncryptCertExpired() { + if err := ac.handleFallback(ctx); err != nil { + ac.logger.Error("error when handling a certificate expiry event", "error", err) + fallbackTimer = time.NewTimer(ac.acConfig.FallbackRetry) + } else { + fallbackTimer = time.NewTimer(calcFallbackInterval()) + } + } else { + // this shouldn't be possible. We calculate the timer duration to be the certificate + // expiration time + some leeway (10s default). So whenever we get here the certificate + // should be expired. Regardless its probably worth resetting the timer. + fallbackTimer = time.NewTimer(calcFallbackInterval()) + } + } + } +} diff --git a/agent/auto-config/server_addr.go b/agent/auto-config/server_addr.go new file mode 100644 index 0000000000..98af4ae55a --- /dev/null +++ b/agent/auto-config/server_addr.go @@ -0,0 +1,111 @@ +package autoconf + +import ( + "fmt" + "net" + "strconv" + "strings" + + "github.com/hashicorp/consul/lib" + "github.com/hashicorp/go-discover" + discoverk8s "github.com/hashicorp/go-discover/provider/k8s" + + "github.com/hashicorp/go-hclog" +) + +func (ac *AutoConfig) discoverServers(servers []string) ([]string, error) { + providers := make(map[string]discover.Provider) + for k, v := range discover.Providers { + providers[k] = v + } + providers["k8s"] = &discoverk8s.Provider{} + + disco, err := discover.New( + discover.WithUserAgent(lib.UserAgent()), + discover.WithProviders(providers), + ) + + if err != nil { + return nil, fmt.Errorf("Failed to create go-discover resolver: %w", err) + } + + var addrs []string + for _, addr := range servers { + switch { + case strings.Contains(addr, "provider="): + resolved, err := disco.Addrs(addr, ac.logger.StandardLogger(&hclog.StandardLoggerOptions{InferLevels: true})) + if err != nil { + ac.logger.Error("failed to resolve go-discover auto-config servers", "configuration", addr, "err", err) + continue + } + + addrs = append(addrs, resolved...) + ac.logger.Debug("discovered auto-config servers", "servers", resolved) + default: + addrs = append(addrs, addr) + } + } + + return addrs, nil +} + +// autoConfigHosts is responsible for taking the list of server addresses +// and resolving any go-discover provider invocations. It will then return +// a list of hosts. These might be hostnames and is expected that DNS resolution +// may be performed after this function runs. Additionally these may contain +// ports so SplitHostPort could also be necessary. +func (ac *AutoConfig) autoConfigHosts() ([]string, error) { + // use servers known to gossip if there are any + if ac.acConfig.ServerProvider != nil { + if srv := ac.acConfig.ServerProvider.FindLANServer(); srv != nil { + return []string{srv.Addr.String()}, nil + } + } + + addrs, err := ac.discoverServers(ac.config.AutoConfig.ServerAddresses) + if err != nil { + return nil, err + } + + if len(addrs) == 0 { + return nil, fmt.Errorf("no auto-config server addresses available for use") + } + + return addrs, nil +} + +// resolveHost will take a single host string and convert it to a list of TCPAddrs +// This will process any port in the input as well as looking up the hostname using +// normal DNS resolution. +func (ac *AutoConfig) resolveHost(hostPort string) []net.TCPAddr { + port := ac.config.ServerPort + host, portStr, err := net.SplitHostPort(hostPort) + if err != nil { + if strings.Contains(err.Error(), "missing port in address") { + host = hostPort + } else { + ac.logger.Warn("error splitting host address into IP and port", "address", hostPort, "error", err) + return nil + } + } else { + port, err = strconv.Atoi(portStr) + if err != nil { + ac.logger.Warn("Parsed port is not an integer", "port", portStr, "error", err) + return nil + } + } + + // resolve the host to a list of IPs + ips, err := net.LookupIP(host) + if err != nil { + ac.logger.Warn("IP resolution failed", "host", host, "error", err) + return nil + } + + var addrs []net.TCPAddr + for _, ip := range ips { + addrs = append(addrs, net.TCPAddr{IP: ip, Port: port}) + } + + return addrs +} diff --git a/agent/auto-config/tls.go b/agent/auto-config/tls.go new file mode 100644 index 0000000000..380c9f9f8d --- /dev/null +++ b/agent/auto-config/tls.go @@ -0,0 +1,280 @@ +package autoconf + +import ( + "context" + "fmt" + "net" + + "github.com/hashicorp/consul/agent/cache" + cachetype "github.com/hashicorp/consul/agent/cache-types" + "github.com/hashicorp/consul/agent/connect" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/proto/pbautoconf" +) + +const ( + // ID of the roots watch + rootsWatchID = "roots" + + // ID of the leaf watch + leafWatchID = "leaf" + + unknownTrustDomain = "unknown" +) + +var ( + defaultDNSSANs = []string{"localhost"} + + defaultIPSANs = []net.IP{{127, 0, 0, 1}, net.ParseIP("::1")} +) + +func extractPEMs(roots *structs.IndexedCARoots) []string { + var pems []string + for _, root := range roots.Roots { + pems = append(pems, root.RootCert) + } + return pems +} + +// updateTLSFromResponse will update the TLS certificate and roots in the shared +// TLS configurator. +func (ac *AutoConfig) updateTLSFromResponse(resp *pbautoconf.AutoConfigResponse) error { + var pems []string + for _, root := range resp.GetCARoots().GetRoots() { + pems = append(pems, root.RootCert) + } + + err := ac.acConfig.TLSConfigurator.UpdateAutoTLS( + resp.ExtraCACertificates, + pems, + resp.Certificate.GetCertPEM(), + resp.Certificate.GetPrivateKeyPEM(), + resp.Config.GetTLS().GetVerifyServerHostname(), + ) + + if err != nil { + return fmt.Errorf("Failed to update the TLS configurator with new certificates: %w", err) + } + + return nil +} + +func (ac *AutoConfig) setInitialTLSCertificates(certs *structs.SignedResponse) error { + if certs == nil { + return nil + } + + if err := ac.populateCertificateCache(certs); err != nil { + return fmt.Errorf("error populating cache with certificates: %w", err) + } + + connectCAPems := extractPEMs(&certs.ConnectCARoots) + + err := ac.acConfig.TLSConfigurator.UpdateAutoTLS( + certs.ManualCARoots, + connectCAPems, + certs.IssuedCert.CertPEM, + certs.IssuedCert.PrivateKeyPEM, + certs.VerifyServerHostname, + ) + + if err != nil { + return fmt.Errorf("error updating TLS configurator with certificates: %w", err) + } + + return nil +} + +func (ac *AutoConfig) populateCertificateCache(certs *structs.SignedResponse) error { + cert, err := connect.ParseCert(certs.IssuedCert.CertPEM) + if err != nil { + return fmt.Errorf("Failed to parse certificate: %w", err) + } + + // prepolutate roots cache + rootRes := cache.FetchResult{Value: &certs.ConnectCARoots, Index: certs.ConnectCARoots.QueryMeta.Index} + rootsReq := ac.caRootsRequest() + // getting the roots doesn't require a token so in order to potentially share the cache with another + if err := ac.acConfig.Cache.Prepopulate(cachetype.ConnectCARootName, rootRes, ac.config.Datacenter, "", rootsReq.CacheInfo().Key); err != nil { + return err + } + + leafReq := ac.leafCertRequest() + + // prepolutate leaf cache + certRes := cache.FetchResult{ + Value: &certs.IssuedCert, + Index: certs.IssuedCert.RaftIndex.ModifyIndex, + State: cachetype.ConnectCALeafSuccess(connect.EncodeSigningKeyID(cert.AuthorityKeyId)), + } + if err := ac.acConfig.Cache.Prepopulate(cachetype.ConnectCALeafName, certRes, leafReq.Datacenter, leafReq.Token, leafReq.Key()); err != nil { + return err + } + + return nil +} + +func (ac *AutoConfig) setupCertificateCacheWatches(ctx context.Context) (context.CancelFunc, error) { + notificationCtx, cancel := context.WithCancel(ctx) + + rootsReq := ac.caRootsRequest() + err := ac.acConfig.Cache.Notify(notificationCtx, cachetype.ConnectCARootName, &rootsReq, rootsWatchID, ac.cacheUpdates) + if err != nil { + cancel() + return nil, err + } + + leafReq := ac.leafCertRequest() + err = ac.acConfig.Cache.Notify(notificationCtx, cachetype.ConnectCALeafName, &leafReq, leafWatchID, ac.cacheUpdates) + if err != nil { + cancel() + return nil, err + } + + return cancel, nil +} + +func (ac *AutoConfig) updateCARoots(roots *structs.IndexedCARoots) error { + switch { + case ac.config.AutoConfig.Enabled: + ac.Lock() + defer ac.Unlock() + var err error + ac.autoConfigResponse.CARoots, err = translateCARootsToProtobuf(roots) + if err != nil { + return err + } + + if err := ac.updateTLSFromResponse(ac.autoConfigResponse); err != nil { + return err + } + return ac.persistAutoConfig(ac.autoConfigResponse) + case ac.config.AutoEncryptTLS: + pems := extractPEMs(roots) + + if err := ac.acConfig.TLSConfigurator.UpdateAutoTLSCA(pems); err != nil { + return fmt.Errorf("failed to update Connect CA certificates: %w", err) + } + return nil + default: + return nil + } +} + +func (ac *AutoConfig) updateLeafCert(cert *structs.IssuedCert) error { + switch { + case ac.config.AutoConfig.Enabled: + ac.Lock() + defer ac.Unlock() + var err error + ac.autoConfigResponse.Certificate, err = translateIssuedCertToProtobuf(cert) + if err != nil { + return err + } + + if err := ac.updateTLSFromResponse(ac.autoConfigResponse); err != nil { + return err + } + return ac.persistAutoConfig(ac.autoConfigResponse) + case ac.config.AutoEncryptTLS: + if err := ac.acConfig.TLSConfigurator.UpdateAutoTLSCert(cert.CertPEM, cert.PrivateKeyPEM); err != nil { + return fmt.Errorf("failed to update the agent leaf cert: %w", err) + } + return nil + default: + return nil + } +} + +func (ac *AutoConfig) caRootsRequest() structs.DCSpecificRequest { + return structs.DCSpecificRequest{Datacenter: ac.config.Datacenter} +} + +func (ac *AutoConfig) leafCertRequest() cachetype.ConnectCALeafRequest { + return cachetype.ConnectCALeafRequest{ + Datacenter: ac.config.Datacenter, + Agent: ac.config.NodeName, + DNSSAN: ac.getDNSSANs(), + IPSAN: ac.getIPSANs(), + Token: ac.acConfig.Tokens.AgentToken(), + } +} + +// generateCSR will generate a CSR for an Agent certificate. This should +// be sent along with the AutoConfig.InitialConfiguration RPC or the +// AutoEncrypt.Sign RPC. The generated CSR does NOT have a real trust domain +// as when generating this we do not yet have the CA roots. The server will +// update the trust domain for us though. +func (ac *AutoConfig) generateCSR() (csr string, key string, err error) { + // We don't provide the correct host here, because we don't know any + // better at this point. Apart from the domain, we would need the + // ClusterID, which we don't have. This is why we go with + // unknownTrustDomain the first time. Subsequent CSRs will have the + // correct TrustDomain. + id := &connect.SpiffeIDAgent{ + // will be replaced + Host: unknownTrustDomain, + Datacenter: ac.config.Datacenter, + Agent: ac.config.NodeName, + } + + caConfig, err := ac.config.ConnectCAConfiguration() + if err != nil { + return "", "", fmt.Errorf("Cannot generate CSR: %w", err) + } + + conf, err := caConfig.GetCommonConfig() + if err != nil { + return "", "", fmt.Errorf("Failed to load common CA configuration: %w", err) + } + + if conf.PrivateKeyType == "" { + conf.PrivateKeyType = connect.DefaultPrivateKeyType + } + if conf.PrivateKeyBits == 0 { + conf.PrivateKeyBits = connect.DefaultPrivateKeyBits + } + + // Create a new private key + pk, pkPEM, err := connect.GeneratePrivateKeyWithConfig(conf.PrivateKeyType, conf.PrivateKeyBits) + if err != nil { + return "", "", fmt.Errorf("Failed to generate private key: %w", err) + } + + dnsNames := ac.getDNSSANs() + ipAddresses := ac.getIPSANs() + + // Create a CSR. + // + // The Common Name includes the dummy trust domain for now but Server will + // override this when it is signed anyway so it's OK. + cn := connect.AgentCN(ac.config.NodeName, unknownTrustDomain) + csr, err = connect.CreateCSR(id, cn, pk, dnsNames, ipAddresses) + if err != nil { + return "", "", err + } + + return csr, pkPEM, nil +} + +func (ac *AutoConfig) getDNSSANs() []string { + sans := defaultDNSSANs + switch { + case ac.config.AutoConfig.Enabled: + sans = append(sans, ac.config.AutoConfig.DNSSANs...) + case ac.config.AutoEncryptTLS: + sans = append(sans, ac.config.AutoEncryptDNSSAN...) + } + return sans +} + +func (ac *AutoConfig) getIPSANs() []net.IP { + sans := defaultIPSANs + switch { + case ac.config.AutoConfig.Enabled: + sans = append(sans, ac.config.AutoConfig.IPSANs...) + case ac.config.AutoEncryptTLS: + sans = append(sans, ac.config.AutoEncryptIPSAN...) + } + return sans +} diff --git a/agent/auto-config/tls_test.go b/agent/auto-config/tls_test.go new file mode 100644 index 0000000000..400d7be0df --- /dev/null +++ b/agent/auto-config/tls_test.go @@ -0,0 +1,56 @@ +package autoconf + +import ( + "testing" + "time" + + "github.com/hashicorp/consul/agent/connect" + "github.com/hashicorp/consul/agent/structs" + "github.com/stretchr/testify/require" +) + +func newLeaf(t *testing.T, agentName, datacenter string, ca *structs.CARoot, idx uint64, expiration time.Duration) *structs.IssuedCert { + t.Helper() + + pub, priv, err := connect.TestAgentLeaf(t, agentName, datacenter, ca, expiration) + require.NoError(t, err) + cert, err := connect.ParseCert(pub) + require.NoError(t, err) + + spiffeID, err := connect.ParseCertURI(cert.URIs[0]) + require.NoError(t, err) + + agentID, ok := spiffeID.(*connect.SpiffeIDAgent) + require.True(t, ok, "certificate doesn't have an agent leaf cert URI") + + return &structs.IssuedCert{ + SerialNumber: cert.SerialNumber.String(), + CertPEM: pub, + PrivateKeyPEM: priv, + ValidAfter: cert.NotBefore, + ValidBefore: cert.NotAfter, + Agent: agentID.Agent, + AgentURI: agentID.URI().String(), + EnterpriseMeta: *structs.DefaultEnterpriseMeta(), + RaftIndex: structs.RaftIndex{ + CreateIndex: idx, + ModifyIndex: idx, + }, + } +} + +func testCerts(t *testing.T, agentName, datacenter string) (*structs.CARoot, *structs.IndexedCARoots, *structs.IssuedCert) { + ca := connect.TestCA(t, nil) + ca.IntermediateCerts = make([]string, 0) + cert := newLeaf(t, agentName, datacenter, ca, 1, 10*time.Minute) + indexedRoots := structs.IndexedCARoots{ + ActiveRootID: ca.ID, + TrustDomain: connect.TestClusterID, + Roots: []*structs.CARoot{ + ca, + }, + QueryMeta: structs.QueryMeta{Index: 1}, + } + + return ca, &indexedRoots, cert +} diff --git a/agent/cache/cache.go b/agent/cache/cache.go index 44aeb3a046..b92feb5a50 100644 --- a/agent/cache/cache.go +++ b/agent/cache/cache.go @@ -144,16 +144,26 @@ type Options struct { EntryFetchRate rate.Limit } -// New creates a new cache with the given RPC client and reasonable defaults. -// Further settings can be tweaked on the returned value. -func New(options Options) *Cache { +// Equal return true if both options are equivalent +func (o Options) Equal(other Options) bool { + return o.EntryFetchMaxBurst == other.EntryFetchMaxBurst && o.EntryFetchRate == other.EntryFetchRate +} + +// applyDefaultValuesOnOptions set default values on options and returned updated value +func applyDefaultValuesOnOptions(options Options) Options { if options.EntryFetchRate == 0.0 { options.EntryFetchRate = DefaultEntryFetchRate } if options.EntryFetchMaxBurst == 0 { options.EntryFetchMaxBurst = DefaultEntryFetchMaxBurst } + return options +} +// New creates a new cache with the given RPC client and reasonable defaults. +// Further settings can be tweaked on the returned value. +func New(options Options) *Cache { + options = applyDefaultValuesOnOptions(options) // Initialize the heap. The buffer of 1 is really important because // its possible for the expiry loop to trigger the heap to update // itself and it'd block forever otherwise. @@ -234,6 +244,28 @@ func (c *Cache) RegisterType(n string, typ Type) { c.types[n] = typeEntry{Name: n, Type: typ, Opts: &opts} } +// ReloadOptions updates the cache with the new options +// return true if Cache is updated, false if already up to date +func (c *Cache) ReloadOptions(options Options) bool { + options = applyDefaultValuesOnOptions(options) + modified := !options.Equal(c.options) + if modified { + c.entriesLock.RLock() + defer c.entriesLock.RUnlock() + for _, entry := range c.entries { + if c.options.EntryFetchRate != options.EntryFetchRate { + entry.FetchRateLimiter.SetLimit(options.EntryFetchRate) + } + if c.options.EntryFetchMaxBurst != options.EntryFetchMaxBurst { + entry.FetchRateLimiter.SetBurst(options.EntryFetchMaxBurst) + } + } + c.options.EntryFetchRate = options.EntryFetchRate + c.options.EntryFetchMaxBurst = options.EntryFetchMaxBurst + } + return modified +} + // Get loads the data for the given type and request. If data satisfying the // minimum index is present in the cache, it is returned immediately. Otherwise, // this will block until the data is available or the request timeout is diff --git a/agent/cache/cache_test.go b/agent/cache/cache_test.go index 54794f4c3d..c2442ea7c1 100644 --- a/agent/cache/cache_test.go +++ b/agent/cache/cache_test.go @@ -14,6 +14,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + "golang.org/x/time/rate" ) // Test a basic Get with no indexes (and therefore no blocking queries). @@ -1220,6 +1221,64 @@ func TestCacheGet_nonBlockingType(t *testing.T) { typ.AssertExpectations(t) } +// Test a get with an index set will wait until an index that is higher +// is set in the cache. +func TestCacheReload(t *testing.T) { + t.Parallel() + + typ1 := TestType(t) + defer typ1.AssertExpectations(t) + + c := New(Options{EntryFetchRate: rate.Limit(1), EntryFetchMaxBurst: 1}) + c.RegisterType("t1", typ1) + typ1.Mock.On("Fetch", mock.Anything, mock.Anything).Return(FetchResult{Value: 42, Index: 42}, nil).Maybe() + + require.False(t, c.ReloadOptions(Options{EntryFetchRate: rate.Limit(1), EntryFetchMaxBurst: 1}), "Value should not be reloaded") + + _, meta, err := c.Get(context.Background(), "t1", TestRequest(t, RequestInfo{Key: "hello1", MinIndex: uint64(1)})) + require.NoError(t, err) + require.Equal(t, meta.Index, uint64(42)) + + testEntry := func(t *testing.T, doTest func(t *testing.T, entry cacheEntry)) { + c.entriesLock.Lock() + tEntry, ok := c.types["t1"] + require.True(t, ok) + keyName := makeEntryKey("t1", "", "", "hello1") + ok, entryValid, entry := c.getEntryLocked(tEntry, keyName, RequestInfo{}) + require.True(t, ok) + require.True(t, entryValid) + doTest(t, entry) + c.entriesLock.Unlock() + + } + testEntry(t, func(t *testing.T, entry cacheEntry) { + require.Equal(t, entry.FetchRateLimiter.Limit(), rate.Limit(1)) + require.Equal(t, entry.FetchRateLimiter.Burst(), 1) + }) + + // Modify only rateLimit + require.True(t, c.ReloadOptions(Options{EntryFetchRate: rate.Limit(100), EntryFetchMaxBurst: 1})) + testEntry(t, func(t *testing.T, entry cacheEntry) { + require.Equal(t, entry.FetchRateLimiter.Limit(), rate.Limit(100)) + require.Equal(t, entry.FetchRateLimiter.Burst(), 1) + }) + + // Modify only Burst + require.True(t, c.ReloadOptions(Options{EntryFetchRate: rate.Limit(100), EntryFetchMaxBurst: 5})) + testEntry(t, func(t *testing.T, entry cacheEntry) { + require.Equal(t, entry.FetchRateLimiter.Limit(), rate.Limit(100)) + require.Equal(t, entry.FetchRateLimiter.Burst(), 5) + }) + + // Modify only Burst and Limit at the same time + require.True(t, c.ReloadOptions(Options{EntryFetchRate: rate.Limit(1000), EntryFetchMaxBurst: 42})) + + testEntry(t, func(t *testing.T, entry cacheEntry) { + require.Equal(t, entry.FetchRateLimiter.Limit(), rate.Limit(1000)) + require.Equal(t, entry.FetchRateLimiter.Burst(), 42) + }) +} + // TestCacheThrottle checks the assumptions for the cache throttling. It sets // up a cache with Options{EntryFetchRate: 10.0, EntryFetchMaxBurst: 1}, which // allows for 10req/s, or one request every 100ms. diff --git a/agent/cache/testing.go b/agent/cache/testing.go index c96612d949..90a0d7bef5 100644 --- a/agent/cache/testing.go +++ b/agent/cache/testing.go @@ -60,7 +60,7 @@ func TestCacheNotifyChResult(t testing.T, ch <-chan UpdateEvent, expected ...Upd } got := make([]UpdateEvent, 0, expectLen) - timeoutCh := time.After(50 * time.Millisecond) + timeoutCh := time.After(75 * time.Millisecond) OUT: for { @@ -74,7 +74,7 @@ OUT: } case <-timeoutCh: - t.Fatalf("got %d results on chan in 50ms, want %d", len(got), expectLen) + t.Fatalf("timeout while waiting for result: got %d results on chan, want %d", len(got), expectLen) } } diff --git a/agent/cache/watch_test.go b/agent/cache/watch_test.go index 771a783593..a0bc7be753 100644 --- a/agent/cache/watch_test.go +++ b/agent/cache/watch_test.go @@ -258,7 +258,7 @@ func TestCacheNotifyPolling(t *testing.T) { } require.Equal(events[0].Result, 42) - require.Equal(events[0].Meta.Hit, false) + require.Equal(events[0].Meta.Hit && events[1].Meta.Hit, false) require.Equal(events[0].Meta.Index, uint64(1)) require.True(events[0].Meta.Age < 50*time.Millisecond) require.NoError(events[0].Err) diff --git a/agent/cert-monitor/cert_monitor.go b/agent/cert-monitor/cert_monitor.go deleted file mode 100644 index 0ad50e8a11..0000000000 --- a/agent/cert-monitor/cert_monitor.go +++ /dev/null @@ -1,505 +0,0 @@ -package certmon - -import ( - "context" - "fmt" - "io/ioutil" - "sync" - "time" - - "github.com/hashicorp/consul/agent/cache" - cachetype "github.com/hashicorp/consul/agent/cache-types" - "github.com/hashicorp/consul/agent/connect" - "github.com/hashicorp/consul/agent/structs" - "github.com/hashicorp/consul/agent/token" - "github.com/hashicorp/consul/tlsutil" - "github.com/hashicorp/go-hclog" -) - -const ( - // ID of the roots watch - rootsWatchID = "roots" - - // ID of the leaf watch - leafWatchID = "leaf" -) - -// Cache is an interface to represent the methods of the -// agent/cache.Cache struct that we care about -type Cache interface { - Notify(ctx context.Context, t string, r cache.Request, correlationID string, ch chan<- cache.UpdateEvent) error - Prepopulate(t string, result cache.FetchResult, dc string, token string, key string) error -} - -// CertMonitor will setup the proper watches to ensure that -// the Agent's Connect TLS certificate remains up to date -type CertMonitor struct { - logger hclog.Logger - cache Cache - tlsConfigurator *tlsutil.Configurator - tokens *token.Store - leafReq cachetype.ConnectCALeafRequest - rootsReq structs.DCSpecificRequest - persist PersistFunc - fallback FallbackFunc - fallbackLeeway time.Duration - fallbackRetry time.Duration - - l sync.Mutex - running bool - // cancel is used to cancel the entire CertMonitor - // go routine. This is the main field protected - // by the mutex as it being non-nil indicates that - // the go routine has been started and is stoppable. - // note that it doesn't indcate that the go routine - // is currently running. - cancel context.CancelFunc - - // cancelWatches is used to cancel the existing - // cache watches. This is mainly only necessary - // when the Agent token changes - cancelWatches context.CancelFunc - - // cacheUpdates is the chan used to have the cache - // send us back events - cacheUpdates chan cache.UpdateEvent - // tokenUpdates is the struct used to receive - // events from the token store when the Agent - // token is updated. - tokenUpdates token.Notifier - - // this is used to keep a local copy of the certs - // keys and ca certs. It will be used to persist - // all of the local state at once. - certs structs.SignedResponse -} - -// New creates a new CertMonitor for automatically rotating -// an Agent's Connect Certificate -func New(config *Config) (*CertMonitor, error) { - logger := config.Logger - if logger == nil { - logger = hclog.New(&hclog.LoggerOptions{ - Level: 0, - Output: ioutil.Discard, - }) - } - - if config.FallbackLeeway == 0 { - config.FallbackLeeway = 10 * time.Second - } - if config.FallbackRetry == 0 { - config.FallbackRetry = time.Minute - } - - if config.Cache == nil { - return nil, fmt.Errorf("CertMonitor creation requires a Cache") - } - - if config.TLSConfigurator == nil { - return nil, fmt.Errorf("CertMonitor creation requires a TLS Configurator") - } - - if config.Fallback == nil { - return nil, fmt.Errorf("CertMonitor creation requires specifying a FallbackFunc") - } - - if config.Datacenter == "" { - return nil, fmt.Errorf("CertMonitor creation requires specifying the datacenter") - } - - if config.NodeName == "" { - return nil, fmt.Errorf("CertMonitor creation requires specifying the agent's node name") - } - - if config.Tokens == nil { - return nil, fmt.Errorf("CertMonitor creation requires specifying a token store") - } - - return &CertMonitor{ - logger: logger, - cache: config.Cache, - tokens: config.Tokens, - tlsConfigurator: config.TLSConfigurator, - persist: config.Persist, - fallback: config.Fallback, - fallbackLeeway: config.FallbackLeeway, - fallbackRetry: config.FallbackRetry, - rootsReq: structs.DCSpecificRequest{Datacenter: config.Datacenter}, - leafReq: cachetype.ConnectCALeafRequest{ - Datacenter: config.Datacenter, - Agent: config.NodeName, - DNSSAN: config.DNSSANs, - IPSAN: config.IPSANs, - }, - }, nil -} - -// Update is responsible for priming the cache with the certificates -// as well as injecting them into the TLS configurator -func (m *CertMonitor) Update(certs *structs.SignedResponse) error { - if certs == nil { - return nil - } - - m.certs = *certs - - if err := m.populateCache(certs); err != nil { - return fmt.Errorf("error populating cache with certificates: %w", err) - } - - connectCAPems := []string{} - for _, ca := range certs.ConnectCARoots.Roots { - connectCAPems = append(connectCAPems, ca.RootCert) - } - - // Note that its expected that the private key be within the IssuedCert in the - // SignedResponse. This isn't how a server would send back the response and requires - // that the recipient of the response who also has access to the private key will - // have filled it in. The Cache definitely does this but auto-encrypt/auto-config - // will need to ensure the original response is setup this way too. - err := m.tlsConfigurator.UpdateAutoTLS( - certs.ManualCARoots, - connectCAPems, - certs.IssuedCert.CertPEM, - certs.IssuedCert.PrivateKeyPEM, - certs.VerifyServerHostname) - - if err != nil { - return fmt.Errorf("error updating TLS configurator with certificates: %w", err) - } - - return nil -} - -// populateCache is responsible for inserting the certificates into the cache -func (m *CertMonitor) populateCache(resp *structs.SignedResponse) error { - cert, err := connect.ParseCert(resp.IssuedCert.CertPEM) - if err != nil { - return fmt.Errorf("Failed to parse certificate: %w", err) - } - - // prepolutate roots cache - rootRes := cache.FetchResult{Value: &resp.ConnectCARoots, Index: resp.ConnectCARoots.QueryMeta.Index} - // getting the roots doesn't require a token so in order to potentially share the cache with another - if err := m.cache.Prepopulate(cachetype.ConnectCARootName, rootRes, m.rootsReq.Datacenter, "", m.rootsReq.CacheInfo().Key); err != nil { - return err - } - - // copy the template and update the token - leafReq := m.leafReq - leafReq.Token = m.tokens.AgentToken() - - // prepolutate leaf cache - certRes := cache.FetchResult{ - Value: &resp.IssuedCert, - Index: resp.ConnectCARoots.QueryMeta.Index, - State: cachetype.ConnectCALeafSuccess(connect.EncodeSigningKeyID(cert.AuthorityKeyId)), - } - if err := m.cache.Prepopulate(cachetype.ConnectCALeafName, certRes, leafReq.Datacenter, leafReq.Token, leafReq.Key()); err != nil { - return err - } - return nil -} - -// Start spawns the go routine to monitor the certificate and ensure it is -// rotated/renewed as necessary. The chan will indicate once the started -// go routine has exited -func (m *CertMonitor) Start(ctx context.Context) (<-chan struct{}, error) { - m.l.Lock() - defer m.l.Unlock() - - if m.running || m.cancel != nil { - return nil, fmt.Errorf("the CertMonitor is already running") - } - - // create the top level context to control the go - // routine executing the `run` method - ctx, cancel := context.WithCancel(ctx) - - // create the channel to get cache update events through - // really we should only ever get 10 updates - m.cacheUpdates = make(chan cache.UpdateEvent, 10) - - // setup the cache watches - cancelWatches, err := m.setupCacheWatches(ctx) - if err != nil { - cancel() - return nil, fmt.Errorf("error setting up cache watches: %w", err) - } - - // start the token update notifier - m.tokenUpdates = m.tokens.Notify(token.TokenKindAgent) - - // store the cancel funcs - m.cancel = cancel - m.cancelWatches = cancelWatches - - m.running = true - exit := make(chan struct{}) - go m.run(ctx, exit) - - m.logger.Info("certificate monitor started") - return exit, nil -} - -// Stop manually stops the go routine spawned by Start and -// returns whether the go routine was still running before -// cancelling. -// -// Note that cancelling the context passed into Start will -// also cause the go routine to stop -func (m *CertMonitor) Stop() bool { - m.l.Lock() - defer m.l.Unlock() - - if !m.running { - return false - } - - if m.cancel != nil { - m.cancel() - } - - return true -} - -// IsRunning returns whether the go routine to perform certificate monitoring -// is already running. -func (m *CertMonitor) IsRunning() bool { - m.l.Lock() - defer m.l.Unlock() - return m.running -} - -// setupCacheWatches will start both the roots and leaf cert watch with a new child -// context and an up to date ACL token. The watches are started with a new child context -// whose CancelFunc is also returned. -func (m *CertMonitor) setupCacheWatches(ctx context.Context) (context.CancelFunc, error) { - notificationCtx, cancel := context.WithCancel(ctx) - - // copy the request - rootsReq := m.rootsReq - - err := m.cache.Notify(notificationCtx, cachetype.ConnectCARootName, &rootsReq, rootsWatchID, m.cacheUpdates) - if err != nil { - cancel() - return nil, err - } - - // copy the request - leafReq := m.leafReq - leafReq.Token = m.tokens.AgentToken() - - err = m.cache.Notify(notificationCtx, cachetype.ConnectCALeafName, &leafReq, leafWatchID, m.cacheUpdates) - if err != nil { - cancel() - return nil, err - } - - return cancel, nil -} - -// handleCacheEvent is used to handle event notifications from the cache for the roots -// or leaf cert watches. -func (m *CertMonitor) handleCacheEvent(u cache.UpdateEvent) error { - switch u.CorrelationID { - case rootsWatchID: - m.logger.Debug("roots watch fired - updating CA certificates") - if u.Err != nil { - return fmt.Errorf("root watch returned an error: %w", u.Err) - } - - roots, ok := u.Result.(*structs.IndexedCARoots) - if !ok { - return fmt.Errorf("invalid type for roots watch response: %T", u.Result) - } - - m.certs.ConnectCARoots = *roots - - var pems []string - for _, root := range roots.Roots { - pems = append(pems, root.RootCert) - } - - if err := m.tlsConfigurator.UpdateAutoTLSCA(pems); err != nil { - return fmt.Errorf("failed to update Connect CA certificates: %w", err) - } - - if m.persist != nil { - copy := m.certs - if err := m.persist(©); err != nil { - return fmt.Errorf("failed to persist certificate package: %w", err) - } - } - case leafWatchID: - m.logger.Debug("leaf certificate watch fired - updating TLS certificate") - if u.Err != nil { - return fmt.Errorf("leaf watch returned an error: %w", u.Err) - } - - leaf, ok := u.Result.(*structs.IssuedCert) - if !ok { - return fmt.Errorf("invalid type for agent leaf cert watch response: %T", u.Result) - } - - m.certs.IssuedCert = *leaf - - if err := m.tlsConfigurator.UpdateAutoTLSCert(leaf.CertPEM, leaf.PrivateKeyPEM); err != nil { - return fmt.Errorf("failed to update the agent leaf cert: %w", err) - } - - if m.persist != nil { - copy := m.certs - if err := m.persist(©); err != nil { - return fmt.Errorf("failed to persist certificate package: %w", err) - } - } - } - - return nil -} - -// handleTokenUpdate is used when a notification about the agent token being updated -// is received and various watches need cancelling/restarting to use the new token. -func (m *CertMonitor) handleTokenUpdate(ctx context.Context) error { - m.logger.Debug("Agent token updated - resetting watches") - - // TODO (autoencrypt) Prepopulate the cache with the new token with - // the existing cache entry with the old token. The certificate doesn't - // need to change just because the token has. However there isn't a - // good way to make that happen and this behavior is benign enough - // that I am going to push off implementing it. - - // the agent token has been updated so we must update our leaf cert watch. - // this cancels the current watches before setting up new ones - m.cancelWatches() - - // recreate the chan for cache updates. This is a precautionary measure to ensure - // that we don't accidentally get notified for the new watches being setup before - // a blocking query in the cache returns and sends data to the old chan. In theory - // the code in agent/cache/watch.go should prevent this where we specifically check - // for context cancellation prior to sending the event. However we could cancel - // it after that check and finish setting up the new watches before getting the old - // events. Both the go routine scheduler and the OS thread scheduler would have to - // be acting up for this to happen. Regardless the way to ensure we don't get events - // for the old watches is to simply replace the chan we are expecting them from. - close(m.cacheUpdates) - m.cacheUpdates = make(chan cache.UpdateEvent, 10) - - // restart watches - this will be done with the correct token - cancelWatches, err := m.setupCacheWatches(ctx) - if err != nil { - return fmt.Errorf("failed to restart watches after agent token update: %w", err) - } - m.cancelWatches = cancelWatches - return nil -} - -// handleFallback is used when the current TLS certificate has expired and the normal -// updating mechanisms have failed to renew it quickly enough. This function will -// use the configured fallback mechanism to retrieve a new cert and start monitoring -// that one. -func (m *CertMonitor) handleFallback(ctx context.Context) error { - m.logger.Warn("agent's client certificate has expired") - // Background because the context is mainly useful when the agent is first starting up. - reply, err := m.fallback(ctx) - if err != nil { - return fmt.Errorf("error when getting new agent certificate: %w", err) - } - - if m.persist != nil { - if err := m.persist(reply); err != nil { - return fmt.Errorf("failed to persist certificate package: %w", err) - } - } - return m.Update(reply) -} - -// run is the private method to be spawn by the Start method for -// executing the main monitoring loop. -func (m *CertMonitor) run(ctx context.Context, exit chan struct{}) { - // The fallbackTimer is used to notify AFTER the agents - // leaf certificate has expired and where we need - // to fall back to the less secure RPC endpoint just like - // if the agent was starting up new. - // - // Check 10sec (fallback leeway duration) after cert - // expires. The agent cache should be handling the expiration - // and renew it before then. - // - // If there is no cert, AutoEncryptCertNotAfter returns - // a value in the past which immediately triggers the - // renew, but this case shouldn't happen because at - // this point, auto_encrypt was just being setup - // successfully. - calcFallbackInterval := func() time.Duration { - certExpiry := m.tlsConfigurator.AutoEncryptCertNotAfter() - return certExpiry.Add(m.fallbackLeeway).Sub(time.Now()) - } - fallbackTimer := time.NewTimer(calcFallbackInterval()) - - // cleanup for once we are stopped - defer func() { - // cancel the go routines performing the cache watches - m.cancelWatches() - // ensure we don't leak the timers go routine - fallbackTimer.Stop() - // stop receiving notifications for token updates - m.tokens.StopNotify(m.tokenUpdates) - - m.logger.Debug("certificate monitor has been stopped") - - m.l.Lock() - m.cancel = nil - m.running = false - m.l.Unlock() - - // this should be the final cleanup task as its what notifies - // the rest of the world that this go routine has exited. - close(exit) - }() - - for { - select { - case <-ctx.Done(): - m.logger.Debug("stopping the certificate monitor") - return - case <-m.tokenUpdates.Ch: - m.logger.Debug("handling a token update event") - - if err := m.handleTokenUpdate(ctx); err != nil { - m.logger.Error("error in handling token update event", "error", err) - } - case u := <-m.cacheUpdates: - m.logger.Debug("handling a cache update event", "correlation_id", u.CorrelationID) - - if err := m.handleCacheEvent(u); err != nil { - m.logger.Error("error in handling cache update event", "error", err) - } - - // reset the fallback timer as the certificate may have been updated - fallbackTimer.Stop() - fallbackTimer = time.NewTimer(calcFallbackInterval()) - case <-fallbackTimer.C: - // This is a safety net in case the auto_encrypt cert doesn't get renewed - // in time. The agent would be stuck in that case because the watches - // never use the AutoEncrypt.Sign endpoint. - - // check auto encrypt client cert expiration - if m.tlsConfigurator.AutoEncryptCertExpired() { - if err := m.handleFallback(ctx); err != nil { - m.logger.Error("error when handling a certificate expiry event", "error", err) - fallbackTimer = time.NewTimer(m.fallbackRetry) - } else { - fallbackTimer = time.NewTimer(calcFallbackInterval()) - } - } else { - // this shouldn't be possible. We calculate the timer duration to be the certificate - // expiration time + some leeway (10s default). So whenever we get here the certificate - // should be expired. Regardless its probably worth resetting the timer. - fallbackTimer = time.NewTimer(calcFallbackInterval()) - } - } - } -} diff --git a/agent/cert-monitor/cert_monitor_test.go b/agent/cert-monitor/cert_monitor_test.go deleted file mode 100644 index 2b6ea76d86..0000000000 --- a/agent/cert-monitor/cert_monitor_test.go +++ /dev/null @@ -1,731 +0,0 @@ -package certmon - -import ( - "context" - "crypto/tls" - "fmt" - "net" - "sync" - "testing" - "time" - - "github.com/hashicorp/consul/agent/cache" - cachetype "github.com/hashicorp/consul/agent/cache-types" - "github.com/hashicorp/consul/agent/connect" - "github.com/hashicorp/consul/agent/structs" - "github.com/hashicorp/consul/agent/token" - "github.com/hashicorp/consul/sdk/testutil" - "github.com/hashicorp/consul/sdk/testutil/retry" - "github.com/hashicorp/consul/tlsutil" - "github.com/hashicorp/go-uuid" - - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" -) - -type mockFallback struct { - mock.Mock -} - -func (m *mockFallback) fallback(ctx context.Context) (*structs.SignedResponse, error) { - ret := m.Called() - resp, _ := ret.Get(0).(*structs.SignedResponse) - return resp, ret.Error(1) -} - -type mockPersist struct { - mock.Mock -} - -func (m *mockPersist) persist(resp *structs.SignedResponse) error { - return m.Called(resp).Error(0) -} - -type mockWatcher struct { - ch chan<- cache.UpdateEvent - done <-chan struct{} -} - -type mockCache struct { - mock.Mock - - lock sync.Mutex - watchers map[string][]mockWatcher -} - -func (m *mockCache) Notify(ctx context.Context, t string, r cache.Request, correlationID string, ch chan<- cache.UpdateEvent) error { - m.lock.Lock() - key := r.CacheInfo().Key - m.watchers[key] = append(m.watchers[key], mockWatcher{ch: ch, done: ctx.Done()}) - m.lock.Unlock() - ret := m.Called(t, r, correlationID) - return ret.Error(0) -} - -func (m *mockCache) Prepopulate(t string, result cache.FetchResult, dc string, token string, key string) error { - ret := m.Called(t, result, dc, token, key) - return ret.Error(0) -} - -func (m *mockCache) sendNotification(ctx context.Context, key string, u cache.UpdateEvent) bool { - m.lock.Lock() - defer m.lock.Unlock() - - watchers, ok := m.watchers[key] - if !ok || len(m.watchers) < 1 { - return false - } - - var newWatchers []mockWatcher - - for _, watcher := range watchers { - select { - case watcher.ch <- u: - newWatchers = append(newWatchers, watcher) - case <-watcher.done: - // do nothing, this watcher will be removed from the list - case <-ctx.Done(): - // return doesn't matter here really, the test is being cancelled - return true - } - } - - // this removes any already cancelled watches from being sent to - m.watchers[key] = newWatchers - - return true -} - -func newMockCache(t *testing.T) *mockCache { - mcache := mockCache{watchers: make(map[string][]mockWatcher)} - mcache.Test(t) - return &mcache -} - -func waitForChan(timer *time.Timer, ch <-chan struct{}) bool { - select { - case <-timer.C: - return false - case <-ch: - return true - } -} - -func waitForChans(timeout time.Duration, chans ...<-chan struct{}) bool { - timer := time.NewTimer(timeout) - defer timer.Stop() - - for _, ch := range chans { - if !waitForChan(timer, ch) { - return false - } - } - return true -} - -func testTLSConfigurator(t *testing.T) *tlsutil.Configurator { - t.Helper() - logger := testutil.Logger(t) - cfg, err := tlsutil.NewConfigurator(tlsutil.Config{AutoTLS: true}, logger) - require.NoError(t, err) - return cfg -} - -func newLeaf(t *testing.T, ca *structs.CARoot, idx uint64, expiration time.Duration) *structs.IssuedCert { - t.Helper() - - pub, priv, err := connect.TestAgentLeaf(t, "node", "foo", ca, expiration) - require.NoError(t, err) - cert, err := connect.ParseCert(pub) - require.NoError(t, err) - - spiffeID, err := connect.ParseCertURI(cert.URIs[0]) - require.NoError(t, err) - - agentID, ok := spiffeID.(*connect.SpiffeIDAgent) - require.True(t, ok, "certificate doesn't have an agent leaf cert URI") - - return &structs.IssuedCert{ - SerialNumber: cert.SerialNumber.String(), - CertPEM: pub, - PrivateKeyPEM: priv, - ValidAfter: cert.NotBefore, - ValidBefore: cert.NotAfter, - Agent: agentID.Agent, - AgentURI: agentID.URI().String(), - EnterpriseMeta: *structs.DefaultEnterpriseMeta(), - RaftIndex: structs.RaftIndex{ - CreateIndex: idx, - ModifyIndex: idx, - }, - } -} - -type testCertMonitor struct { - monitor *CertMonitor - mcache *mockCache - tls *tlsutil.Configurator - tokens *token.Store - fallback *mockFallback - persist *mockPersist - - extraCACerts []string - initialCert *structs.IssuedCert - initialRoots *structs.IndexedCARoots - - // these are some variables that the CertMonitor was created with - datacenter string - nodeName string - dns []string - ips []net.IP - verifyServerHostname bool -} - -func newTestCertMonitor(t *testing.T) testCertMonitor { - t.Helper() - - tlsConfigurator := testTLSConfigurator(t) - tokens := new(token.Store) - - id, err := uuid.GenerateUUID() - require.NoError(t, err) - tokens.UpdateAgentToken(id, token.TokenSourceConfig) - - ca := connect.TestCA(t, nil) - manualCA := connect.TestCA(t, nil) - // this cert is setup to not expire quickly. this will prevent - // the test from accidentally running the fallback routine - // before we want to force that to happen. - issued := newLeaf(t, ca, 1, 10*time.Minute) - - indexedRoots := structs.IndexedCARoots{ - ActiveRootID: ca.ID, - TrustDomain: connect.TestClusterID, - Roots: []*structs.CARoot{ - ca, - }, - QueryMeta: structs.QueryMeta{ - Index: 1, - }, - } - - initialCerts := &structs.SignedResponse{ - ConnectCARoots: indexedRoots, - IssuedCert: *issued, - ManualCARoots: []string{manualCA.RootCert}, - VerifyServerHostname: true, - } - - dnsSANs := []string{"test.dev"} - ipSANs := []net.IP{net.IPv4(198, 18, 0, 1)} - - fallback := &mockFallback{} - fallback.Test(t) - persist := &mockPersist{} - persist.Test(t) - - mcache := newMockCache(t) - rootRes := cache.FetchResult{Value: &indexedRoots, Index: 1} - rootsReq := structs.DCSpecificRequest{Datacenter: "foo"} - mcache.On("Prepopulate", cachetype.ConnectCARootName, rootRes, "foo", "", rootsReq.CacheInfo().Key).Return(nil).Once() - - leafReq := cachetype.ConnectCALeafRequest{ - Token: tokens.AgentToken(), - Agent: "node", - Datacenter: "foo", - DNSSAN: dnsSANs, - IPSAN: ipSANs, - } - leafRes := cache.FetchResult{ - Value: issued, - Index: 1, - State: cachetype.ConnectCALeafSuccess(ca.SigningKeyID), - } - mcache.On("Prepopulate", cachetype.ConnectCALeafName, leafRes, "foo", tokens.AgentToken(), leafReq.Key()).Return(nil).Once() - - // we can assert more later but this should always be done. - defer mcache.AssertExpectations(t) - - cfg := new(Config). - WithCache(mcache). - WithLogger(testutil.Logger(t)). - WithTLSConfigurator(tlsConfigurator). - WithTokens(tokens). - WithFallback(fallback.fallback). - WithDNSSANs(dnsSANs). - WithIPSANs(ipSANs). - WithDatacenter("foo"). - WithNodeName("node"). - WithFallbackLeeway(time.Nanosecond). - WithFallbackRetry(time.Millisecond). - WithPersistence(persist.persist) - - monitor, err := New(cfg) - require.NoError(t, err) - require.NotNil(t, monitor) - - require.NoError(t, monitor.Update(initialCerts)) - - return testCertMonitor{ - monitor: monitor, - tls: tlsConfigurator, - tokens: tokens, - mcache: mcache, - persist: persist, - fallback: fallback, - extraCACerts: []string{manualCA.RootCert}, - initialCert: issued, - initialRoots: &indexedRoots, - datacenter: "foo", - nodeName: "node", - dns: dnsSANs, - ips: ipSANs, - verifyServerHostname: true, - } -} - -func tlsCertificateFromIssued(t *testing.T, issued *structs.IssuedCert) *tls.Certificate { - t.Helper() - - cert, err := tls.X509KeyPair([]byte(issued.CertPEM), []byte(issued.PrivateKeyPEM)) - require.NoError(t, err) - return &cert -} - -// convenience method to get a TLS Certificate from the intial issued certificate and priv key -func (cm *testCertMonitor) initialTLSCertificate(t *testing.T) *tls.Certificate { - t.Helper() - return tlsCertificateFromIssued(t, cm.initialCert) -} - -// just a convenience method to get a list of all the CA pems that we set up regardless -// of manual vs connect. -func (cm *testCertMonitor) initialCACerts() []string { - pems := cm.extraCACerts - for _, root := range cm.initialRoots.Roots { - pems = append(pems, root.RootCert) - } - return pems -} - -func (cm *testCertMonitor) assertExpectations(t *testing.T) { - cm.mcache.AssertExpectations(t) - cm.fallback.AssertExpectations(t) - cm.persist.AssertExpectations(t) -} - -func TestCertMonitor_InitialCerts(t *testing.T) { - // this also ensures that the cache was prepopulated properly - cm := newTestCertMonitor(t) - - // verify that the certificate was injected into the TLS configurator correctly - require.Equal(t, cm.initialTLSCertificate(t), cm.tls.Cert()) - // verify that the CA certs (both Connect and manual ones) were injected correctly - require.ElementsMatch(t, cm.initialCACerts(), cm.tls.CAPems()) - // verify that the auto-tls verify server hostname setting was injected correctly - require.Equal(t, cm.verifyServerHostname, cm.tls.VerifyServerHostname()) -} - -func TestCertMonitor_GoRoutineManagement(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - cm := newTestCertMonitor(t) - - // ensure that the monitor is not running - require.False(t, cm.monitor.IsRunning()) - - // ensure that nothing bad happens and that it reports as stopped - require.False(t, cm.monitor.Stop()) - - // we will never send notifications so these just ignore everything - cm.mcache.On("Notify", cachetype.ConnectCARootName, &structs.DCSpecificRequest{Datacenter: cm.datacenter}, rootsWatchID).Return(nil).Times(2) - cm.mcache.On("Notify", cachetype.ConnectCALeafName, - &cachetype.ConnectCALeafRequest{ - Token: cm.tokens.AgentToken(), - Datacenter: cm.datacenter, - Agent: cm.nodeName, - DNSSAN: cm.dns, - IPSAN: cm.ips, - }, - leafWatchID, - ).Return(nil).Times(2) - - done, err := cm.monitor.Start(ctx) - require.NoError(t, err) - require.True(t, cm.monitor.IsRunning()) - _, err = cm.monitor.Start(ctx) - testutil.RequireErrorContains(t, err, "the CertMonitor is already running") - require.True(t, cm.monitor.Stop()) - - require.True(t, waitForChans(100*time.Millisecond, done), "monitor didn't shut down") - require.False(t, cm.monitor.IsRunning()) - done, err = cm.monitor.Start(ctx) - require.NoError(t, err) - - // ensure that context cancellation causes us to stop as well - cancel() - require.True(t, waitForChans(100*time.Millisecond, done)) - - cm.assertExpectations(t) -} - -func startedCertMonitor(t *testing.T) (context.Context, testCertMonitor) { - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - cm := newTestCertMonitor(t) - - rootsCtx, rootsCancel := context.WithCancel(ctx) - defer rootsCancel() - leafCtx, leafCancel := context.WithCancel(ctx) - defer leafCancel() - - // initial roots watch - cm.mcache.On("Notify", cachetype.ConnectCARootName, - &structs.DCSpecificRequest{ - Datacenter: cm.datacenter, - }, - rootsWatchID). - Return(nil). - Once(). - Run(func(_ mock.Arguments) { - rootsCancel() - }) - // the initial watch after starting the monitor - cm.mcache.On("Notify", cachetype.ConnectCALeafName, - &cachetype.ConnectCALeafRequest{ - Token: cm.tokens.AgentToken(), - Datacenter: cm.datacenter, - Agent: cm.nodeName, - DNSSAN: cm.dns, - IPSAN: cm.ips, - }, - leafWatchID). - Return(nil). - Once(). - Run(func(_ mock.Arguments) { - leafCancel() - }) - - done, err := cm.monitor.Start(ctx) - require.NoError(t, err) - // this prevents logs after the test finishes - t.Cleanup(func() { - cm.monitor.Stop() - <-done - }) - - require.True(t, - waitForChans(100*time.Millisecond, rootsCtx.Done(), leafCtx.Done()), - "not all watches were started within the alotted time") - - return ctx, cm -} - -// This test ensures that the cache watches are restarted with the updated -// token after receiving a token update -func TestCertMonitor_TokenUpdate(t *testing.T) { - ctx, cm := startedCertMonitor(t) - - rootsCtx, rootsCancel := context.WithCancel(ctx) - defer rootsCancel() - leafCtx, leafCancel := context.WithCancel(ctx) - defer leafCancel() - - newToken := "8e4fe8db-162d-42d8-81ca-710fb2280ad0" - - // we expect a new roots watch because when the leaf cert watch is restarted so is the root cert watch - cm.mcache.On("Notify", cachetype.ConnectCARootName, - &structs.DCSpecificRequest{ - Datacenter: cm.datacenter, - }, - rootsWatchID). - Return(nil). - Once(). - Run(func(_ mock.Arguments) { - rootsCancel() - }) - - secondWatch := &cachetype.ConnectCALeafRequest{ - Token: newToken, - Datacenter: cm.datacenter, - Agent: cm.nodeName, - DNSSAN: cm.dns, - IPSAN: cm.ips, - } - // the new watch after updating the token - cm.mcache.On("Notify", cachetype.ConnectCALeafName, secondWatch, leafWatchID). - Return(nil). - Once(). - Run(func(args mock.Arguments) { - leafCancel() - }) - - cm.tokens.UpdateAgentToken(newToken, token.TokenSourceAPI) - - require.True(t, - waitForChans(100*time.Millisecond, rootsCtx.Done(), leafCtx.Done()), - "not all watches were restarted within the alotted time") - - cm.assertExpectations(t) -} - -func TestCertMonitor_RootsUpdate(t *testing.T) { - ctx, cm := startedCertMonitor(t) - - secondCA := connect.TestCA(t, cm.initialRoots.Roots[0]) - secondRoots := structs.IndexedCARoots{ - ActiveRootID: secondCA.ID, - TrustDomain: connect.TestClusterID, - Roots: []*structs.CARoot{ - secondCA, - cm.initialRoots.Roots[0], - }, - QueryMeta: structs.QueryMeta{ - Index: 99, - }, - } - - cm.persist.On("persist", &structs.SignedResponse{ - IssuedCert: *cm.initialCert, - ManualCARoots: cm.extraCACerts, - ConnectCARoots: secondRoots, - VerifyServerHostname: cm.verifyServerHostname, - }).Return(nil).Once() - - // assert value of the CA certs prior to updating - require.ElementsMatch(t, cm.initialCACerts(), cm.tls.CAPems()) - - req := structs.DCSpecificRequest{Datacenter: cm.datacenter} - require.True(t, cm.mcache.sendNotification(ctx, req.CacheInfo().Key, cache.UpdateEvent{ - CorrelationID: rootsWatchID, - Result: &secondRoots, - Meta: cache.ResultMeta{ - Index: secondRoots.Index, - }, - })) - - expectedCAs := append(cm.extraCACerts, secondCA.RootCert, cm.initialRoots.Roots[0].RootCert) - - // this will wait up to 200ms (8 x 25 ms waits between the 9 requests) - retry.RunWith(&retry.Counter{Count: 9, Wait: 25 * time.Millisecond}, t, func(r *retry.R) { - require.ElementsMatch(r, expectedCAs, cm.tls.CAPems()) - }) - - cm.assertExpectations(t) -} - -func TestCertMonitor_CertUpdate(t *testing.T) { - ctx, cm := startedCertMonitor(t) - - secondCert := newLeaf(t, cm.initialRoots.Roots[0], 100, 10*time.Minute) - - cm.persist.On("persist", &structs.SignedResponse{ - IssuedCert: *secondCert, - ManualCARoots: cm.extraCACerts, - ConnectCARoots: *cm.initialRoots, - VerifyServerHostname: cm.verifyServerHostname, - }).Return(nil).Once() - - // assert value of cert prior to updating the leaf - require.Equal(t, cm.initialTLSCertificate(t), cm.tls.Cert()) - - key := cm.monitor.leafReq.CacheInfo().Key - - // send the new certificate - this notifies only the watchers utilizing - // the new ACL token - require.True(t, cm.mcache.sendNotification(ctx, key, cache.UpdateEvent{ - CorrelationID: leafWatchID, - Result: secondCert, - Meta: cache.ResultMeta{ - Index: secondCert.ModifyIndex, - }, - })) - - tlsCert := tlsCertificateFromIssued(t, secondCert) - - // this will wait up to 200ms (8 x 25 ms waits between the 9 requests) - retry.RunWith(&retry.Counter{Count: 9, Wait: 25 * time.Millisecond}, t, func(r *retry.R) { - require.Equal(r, tlsCert, cm.tls.Cert()) - }) - - cm.assertExpectations(t) -} - -func TestCertMonitor_Fallback(t *testing.T) { - ctx, cm := startedCertMonitor(t) - - // at this point everything is operating normally and the monitor is just - // waiting for events. We are going to send a new cert that is basically - // already expired and then allow the fallback routine to kick in. - secondCert := newLeaf(t, cm.initialRoots.Roots[0], 100, time.Nanosecond) - secondCA := connect.TestCA(t, cm.initialRoots.Roots[0]) - secondRoots := structs.IndexedCARoots{ - ActiveRootID: secondCA.ID, - TrustDomain: connect.TestClusterID, - Roots: []*structs.CARoot{ - secondCA, - cm.initialRoots.Roots[0], - }, - QueryMeta: structs.QueryMeta{ - Index: 101, - }, - } - thirdCert := newLeaf(t, secondCA, 102, 10*time.Minute) - - // inject a fallback routine error to check that we rerun it quickly - cm.fallback.On("fallback").Return(nil, fmt.Errorf("induced error")).Once() - - fallbackResp := &structs.SignedResponse{ - ConnectCARoots: secondRoots, - IssuedCert: *thirdCert, - ManualCARoots: cm.extraCACerts, - VerifyServerHostname: true, - } - // expect the fallback routine to be executed and setup the return - cm.fallback.On("fallback").Return(fallbackResp, nil).Once() - - cm.persist.On("persist", &structs.SignedResponse{ - IssuedCert: *secondCert, - ConnectCARoots: *cm.initialRoots, - ManualCARoots: cm.extraCACerts, - VerifyServerHostname: cm.verifyServerHostname, - }).Return(nil).Once() - - cm.persist.On("persist", fallbackResp).Return(nil).Once() - - // Add another roots cache prepopulation expectation which should happen - // in response to executing the fallback mechanism - rootRes := cache.FetchResult{Value: &secondRoots, Index: 101} - rootsReq := structs.DCSpecificRequest{Datacenter: cm.datacenter} - cm.mcache.On("Prepopulate", cachetype.ConnectCARootName, rootRes, cm.datacenter, "", rootsReq.CacheInfo().Key).Return(nil).Once() - - // add another leaf cert cache prepopulation expectation which should happen - // in response to executing the fallback mechanism - leafReq := cachetype.ConnectCALeafRequest{ - Token: cm.tokens.AgentToken(), - Agent: cm.nodeName, - Datacenter: cm.datacenter, - DNSSAN: cm.dns, - IPSAN: cm.ips, - } - leafRes := cache.FetchResult{ - Value: thirdCert, - Index: 101, - State: cachetype.ConnectCALeafSuccess(secondCA.SigningKeyID), - } - cm.mcache.On("Prepopulate", cachetype.ConnectCALeafName, leafRes, leafReq.Datacenter, leafReq.Token, leafReq.Key()).Return(nil).Once() - - // nothing in the monitor should be looking at this as its only done - // in response to sending token updates, no need to synchronize - key := cm.monitor.leafReq.CacheInfo().Key - // send the new certificate - this notifies only the watchers utilizing - // the new ACL token - require.True(t, cm.mcache.sendNotification(ctx, key, cache.UpdateEvent{ - CorrelationID: leafWatchID, - Result: secondCert, - Meta: cache.ResultMeta{ - Index: secondCert.ModifyIndex, - }, - })) - - // if all went well we would have updated the first certificate which was pretty much expired - // causing the fallback handler to be invoked almost immediately. The fallback routine will - // return the response containing the third cert and second CA roots so now we should wait - // a little while and ensure they were applied to the TLS Configurator - tlsCert := tlsCertificateFromIssued(t, thirdCert) - expectedCAs := append(cm.extraCACerts, secondCA.RootCert, cm.initialRoots.Roots[0].RootCert) - - // this will wait up to 200ms (8 x 25 ms waits between the 9 requests) - retry.RunWith(&retry.Counter{Count: 9, Wait: 25 * time.Millisecond}, t, func(r *retry.R) { - require.Equal(r, tlsCert, cm.tls.Cert()) - require.ElementsMatch(r, expectedCAs, cm.tls.CAPems()) - }) - - cm.assertExpectations(t) -} - -func TestCertMonitor_New_Errors(t *testing.T) { - type testCase struct { - cfg Config - err string - } - - fallback := func(_ context.Context) (*structs.SignedResponse, error) { - return nil, fmt.Errorf("Unimplemented") - } - - tokens := new(token.Store) - - cases := map[string]testCase{ - "no-cache": { - cfg: Config{ - TLSConfigurator: testTLSConfigurator(t), - Fallback: fallback, - Tokens: tokens, - Datacenter: "foo", - NodeName: "bar", - }, - err: "CertMonitor creation requires a Cache", - }, - "no-tls-configurator": { - cfg: Config{ - Cache: cache.New(cache.Options{}), - Fallback: fallback, - Tokens: tokens, - Datacenter: "foo", - NodeName: "bar", - }, - err: "CertMonitor creation requires a TLS Configurator", - }, - "no-fallback": { - cfg: Config{ - Cache: cache.New(cache.Options{}), - TLSConfigurator: testTLSConfigurator(t), - Tokens: tokens, - Datacenter: "foo", - NodeName: "bar", - }, - err: "CertMonitor creation requires specifying a FallbackFunc", - }, - "no-tokens": { - cfg: Config{ - Cache: cache.New(cache.Options{}), - TLSConfigurator: testTLSConfigurator(t), - Fallback: fallback, - Datacenter: "foo", - NodeName: "bar", - }, - err: "CertMonitor creation requires specifying a token store", - }, - "no-datacenter": { - cfg: Config{ - Cache: cache.New(cache.Options{}), - TLSConfigurator: testTLSConfigurator(t), - Fallback: fallback, - Tokens: tokens, - NodeName: "bar", - }, - err: "CertMonitor creation requires specifying the datacenter", - }, - "no-node-name": { - cfg: Config{ - Cache: cache.New(cache.Options{}), - TLSConfigurator: testTLSConfigurator(t), - Fallback: fallback, - Tokens: tokens, - Datacenter: "foo", - }, - err: "CertMonitor creation requires specifying the agent's node name", - }, - } - - for name, tcase := range cases { - t.Run(name, func(t *testing.T) { - monitor, err := New(&tcase.cfg) - testutil.RequireErrorContains(t, err, tcase.err) - require.Nil(t, monitor) - }) - } -} diff --git a/agent/cert-monitor/config.go b/agent/cert-monitor/config.go deleted file mode 100644 index 2e4bcc57ca..0000000000 --- a/agent/cert-monitor/config.go +++ /dev/null @@ -1,150 +0,0 @@ -package certmon - -import ( - "context" - "net" - "time" - - "github.com/hashicorp/consul/agent/structs" - "github.com/hashicorp/consul/agent/token" - "github.com/hashicorp/consul/tlsutil" - "github.com/hashicorp/go-hclog" -) - -// FallbackFunc is used when the normal cache watch based Certificate -// updating fails to update the Certificate in time and a different -// method of updating the certificate is required. -type FallbackFunc func(context.Context) (*structs.SignedResponse, error) - -// PersistFunc is used to persist the data from a signed response -type PersistFunc func(*structs.SignedResponse) error - -type Config struct { - // Logger is the logger to be used while running. If not set - // then no logging will be performed. - Logger hclog.Logger - - // TLSConfigurator is where the certificates and roots are set when - // they are updated. This field is required. - TLSConfigurator *tlsutil.Configurator - - // Cache is an object implementing our Cache interface. The Cache - // used at runtime must be able to handle Roots and Leaf Cert watches - Cache Cache - - // Tokens is the shared token store. It is used to retrieve the current - // agent token as well as getting notifications when that token is updated. - // This field is required. - Tokens *token.Store - - // Persist is a function to run when there are new certs or keys - Persist PersistFunc - - // Fallback is a function to run when the normal cache updating of the - // agent's certificates has failed to work for one reason or another. - // This field is required. - Fallback FallbackFunc - - // FallbackLeeway is the amount of time after certificate expiration before - // invoking the fallback routine. If not set this will default to 10s. - FallbackLeeway time.Duration - - // FallbackRetry is the duration between Fallback invocations when the configured - // fallback routine returns an error. If not set this will default to 1m. - FallbackRetry time.Duration - - // DNSSANs is a list of DNS SANs that certificate requests should include. This - // field is optional and no extra DNS SANs will be requested if unset. 'localhost' - // is unconditionally requested by the cache implementation. - DNSSANs []string - - // IPSANs is a list of IP SANs to include in the certificate signing request. This - // field is optional and no extra IP SANs will be requested if unset. Both '127.0.0.1' - // and '::1' IP SANs are unconditionally requested by the cache implementation. - IPSANs []net.IP - - // Datacenter is the datacenter to request certificates within. This filed is required - Datacenter string - - // NodeName is the agent's node name to use when requesting certificates. This field - // is required. - NodeName string -} - -// WithCache will cause the created CertMonitor type to use the provided Cache -func (cfg *Config) WithCache(cache Cache) *Config { - cfg.Cache = cache - return cfg -} - -// WithLogger will cause the created CertMonitor type to use the provided logger -func (cfg *Config) WithLogger(logger hclog.Logger) *Config { - cfg.Logger = logger - return cfg -} - -// WithTLSConfigurator will cause the created CertMonitor type to use the provided configurator -func (cfg *Config) WithTLSConfigurator(tlsConfigurator *tlsutil.Configurator) *Config { - cfg.TLSConfigurator = tlsConfigurator - return cfg -} - -// WithTokens will cause the created CertMonitor type to use the provided token store -func (cfg *Config) WithTokens(tokens *token.Store) *Config { - cfg.Tokens = tokens - return cfg -} - -// WithFallback configures a fallback function to use if the normal update mechanisms -// fail to renew the certificate in time. -func (cfg *Config) WithFallback(fallback FallbackFunc) *Config { - cfg.Fallback = fallback - return cfg -} - -// WithDNSSANs configures the CertMonitor to request these DNS SANs when requesting a new -// certificate -func (cfg *Config) WithDNSSANs(sans []string) *Config { - cfg.DNSSANs = sans - return cfg -} - -// WithIPSANs configures the CertMonitor to request these IP SANs when requesting a new -// certificate -func (cfg *Config) WithIPSANs(sans []net.IP) *Config { - cfg.IPSANs = sans - return cfg -} - -// WithDatacenter configures the CertMonitor to request Certificates in this DC -func (cfg *Config) WithDatacenter(dc string) *Config { - cfg.Datacenter = dc - return cfg -} - -// WithNodeName configures the CertMonitor to request Certificates with this agent name -func (cfg *Config) WithNodeName(name string) *Config { - cfg.NodeName = name - return cfg -} - -// WithFallbackLeeway configures how long after a certificate expires before attempting to -// generarte a new certificate using the fallback mechanism. The default is 10s. -func (cfg *Config) WithFallbackLeeway(leeway time.Duration) *Config { - cfg.FallbackLeeway = leeway - return cfg -} - -// WithFallbackRetry controls how quickly we will make subsequent invocations of -// the fallback func in the case of it erroring out. -func (cfg *Config) WithFallbackRetry(after time.Duration) *Config { - cfg.FallbackRetry = after - return cfg -} - -// WithPersistence will configure the CertMonitor to use this callback for persisting -// a new TLS configuration. -func (cfg *Config) WithPersistence(persist PersistFunc) *Config { - cfg.Persist = persist - return cfg -} diff --git a/agent/config/builder.go b/agent/config/builder.go index 040b39aee0..4b9aab1b7d 100644 --- a/agent/config/builder.go +++ b/agent/config/builder.go @@ -22,6 +22,7 @@ import ( "github.com/hashicorp/consul/agent/consul/authmethod/ssoauth" "github.com/hashicorp/consul/agent/dns" "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/agent/token" "github.com/hashicorp/consul/ipaddr" "github.com/hashicorp/consul/lib" libtempl "github.com/hashicorp/consul/lib/template" @@ -799,6 +800,7 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) { // ---------------------------------------------------------------- // build runtime config // + dataDir := b.stringVal(c.DataDir) rt = RuntimeConfig{ // non-user configurable values ACLDisabledTTL: b.durationVal("acl.disabled_ttl", c.ACL.DisabledTTL), @@ -837,21 +839,25 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) { GossipWANRetransmitMult: b.intVal(c.GossipWAN.RetransmitMult), // ACL - ACLsEnabled: aclsEnabled, - ACLAgentMasterToken: b.stringValWithDefault(c.ACL.Tokens.AgentMaster, b.stringVal(c.ACLAgentMasterToken)), - ACLAgentToken: b.stringValWithDefault(c.ACL.Tokens.Agent, b.stringVal(c.ACLAgentToken)), - ACLDatacenter: primaryDatacenter, - ACLDefaultPolicy: b.stringValWithDefault(c.ACL.DefaultPolicy, b.stringVal(c.ACLDefaultPolicy)), - ACLDownPolicy: b.stringValWithDefault(c.ACL.DownPolicy, b.stringVal(c.ACLDownPolicy)), - ACLEnableKeyListPolicy: b.boolValWithDefault(c.ACL.EnableKeyListPolicy, b.boolVal(c.ACLEnableKeyListPolicy)), - ACLMasterToken: b.stringValWithDefault(c.ACL.Tokens.Master, b.stringVal(c.ACLMasterToken)), - ACLReplicationToken: b.stringValWithDefault(c.ACL.Tokens.Replication, b.stringVal(c.ACLReplicationToken)), - ACLTokenTTL: b.durationValWithDefault("acl.token_ttl", c.ACL.TokenTTL, b.durationVal("acl_ttl", c.ACLTTL)), - ACLPolicyTTL: b.durationVal("acl.policy_ttl", c.ACL.PolicyTTL), - ACLRoleTTL: b.durationVal("acl.role_ttl", c.ACL.RoleTTL), - ACLToken: b.stringValWithDefault(c.ACL.Tokens.Default, b.stringVal(c.ACLToken)), - ACLTokenReplication: b.boolValWithDefault(c.ACL.TokenReplication, b.boolValWithDefault(c.EnableACLReplication, enableTokenReplication)), - ACLEnableTokenPersistence: b.boolValWithDefault(c.ACL.EnableTokenPersistence, false), + ACLsEnabled: aclsEnabled, + ACLDatacenter: primaryDatacenter, + ACLDefaultPolicy: b.stringValWithDefault(c.ACL.DefaultPolicy, b.stringVal(c.ACLDefaultPolicy)), + ACLDownPolicy: b.stringValWithDefault(c.ACL.DownPolicy, b.stringVal(c.ACLDownPolicy)), + ACLEnableKeyListPolicy: b.boolValWithDefault(c.ACL.EnableKeyListPolicy, b.boolVal(c.ACLEnableKeyListPolicy)), + ACLMasterToken: b.stringValWithDefault(c.ACL.Tokens.Master, b.stringVal(c.ACLMasterToken)), + ACLTokenTTL: b.durationValWithDefault("acl.token_ttl", c.ACL.TokenTTL, b.durationVal("acl_ttl", c.ACLTTL)), + ACLPolicyTTL: b.durationVal("acl.policy_ttl", c.ACL.PolicyTTL), + ACLRoleTTL: b.durationVal("acl.role_ttl", c.ACL.RoleTTL), + ACLTokenReplication: b.boolValWithDefault(c.ACL.TokenReplication, b.boolValWithDefault(c.EnableACLReplication, enableTokenReplication)), + + ACLTokens: token.Config{ + DataDir: dataDir, + EnablePersistence: b.boolValWithDefault(c.ACL.EnableTokenPersistence, false), + ACLDefaultToken: b.stringValWithDefault(c.ACL.Tokens.Default, b.stringVal(c.ACLToken)), + ACLAgentToken: b.stringValWithDefault(c.ACL.Tokens.Agent, b.stringVal(c.ACLAgentToken)), + ACLAgentMasterToken: b.stringValWithDefault(c.ACL.Tokens.AgentMaster, b.stringVal(c.ACLAgentMasterToken)), + ACLReplicationToken: b.stringValWithDefault(c.ACL.Tokens.Replication, b.stringVal(c.ACLReplicationToken)), + }, // Autopilot AutopilotCleanupDeadServers: b.boolVal(c.Autopilot.CleanupDeadServers), @@ -957,7 +963,7 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) { ConnectTestCALeafRootChangeSpread: b.durationVal("connect.test_ca_leaf_root_change_spread", c.Connect.TestCALeafRootChangeSpread), ExposeMinPort: exposeMinPort, ExposeMaxPort: exposeMaxPort, - DataDir: b.stringVal(c.DataDir), + DataDir: dataDir, Datacenter: datacenter, DefaultQueryTime: b.durationVal("default_query_time", c.DefaultQueryTime), DevMode: b.boolVal(b.devMode), @@ -1072,10 +1078,8 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) { return RuntimeConfig{}, fmt.Errorf("cache.entry_fetch_rate must be strictly positive, was: %v", rt.Cache.EntryFetchRate) } - if entCfg, err := b.BuildEnterpriseRuntimeConfig(&c); err != nil { - return RuntimeConfig{}, err - } else { - rt.EnterpriseRuntimeConfig = entCfg + if err := b.BuildEnterpriseRuntimeConfig(&rt, &c); err != nil { + return rt, err } if rt.BootstrapExpect == 1 { @@ -1363,7 +1367,8 @@ func (b *Builder) Validate(rt RuntimeConfig) error { b.warn(err.Error()) } - return nil + err := b.validateEnterpriseConfig(rt) + return err } // addrUnique checks if the given address is already in use for another diff --git a/agent/config/builder_oss.go b/agent/config/builder_oss.go index b585cab504..85cf081375 100644 --- a/agent/config/builder_oss.go +++ b/agent/config/builder_oss.go @@ -51,8 +51,12 @@ func (e enterpriseConfigKeyError) Error() string { return fmt.Sprintf("%q is a Consul Enterprise configuration and will have no effect", e.key) } -func (_ *Builder) BuildEnterpriseRuntimeConfig(_ *Config) (EnterpriseRuntimeConfig, error) { - return EnterpriseRuntimeConfig{}, nil +func (*Builder) BuildEnterpriseRuntimeConfig(_ *RuntimeConfig, _ *Config) error { + return nil +} + +func (*Builder) validateEnterpriseConfig(_ RuntimeConfig) error { + return nil } // validateEnterpriseConfig is a function to validate the enterprise specific diff --git a/agent/config/runtime.go b/agent/config/runtime.go index 08ddfdb854..7577854224 100644 --- a/agent/config/runtime.go +++ b/agent/config/runtime.go @@ -9,6 +9,7 @@ import ( "github.com/hashicorp/consul/agent/cache" "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/agent/token" "github.com/hashicorp/consul/api" "github.com/hashicorp/consul/lib" "github.com/hashicorp/consul/logging" @@ -63,19 +64,7 @@ type RuntimeConfig struct { // hcl: acl.enabled = boolean ACLsEnabled bool - // ACLAgentMasterToken is a special token that has full read and write - // privileges for this agent, and can be used to call agent endpoints - // when no servers are available. - // - // hcl: acl.tokens.agent_master = string - ACLAgentMasterToken string - - // ACLAgentToken is the default token used to make requests for the agent - // itself, such as for registering itself with the catalog. If not - // configured, the 'acl_token' will be used. - // - // hcl: acl.tokens.agent = string - ACLAgentToken string + ACLTokens token.Config // ACLDatacenter is the central datacenter that holds authoritative // ACL records. This must be the same for the entire cluster. @@ -123,16 +112,6 @@ type RuntimeConfig struct { // hcl: acl.tokens.master = string ACLMasterToken string - // ACLReplicationToken is used to replicate data locally from the - // PrimaryDatacenter. Replication is only available on servers in - // datacenters other than the PrimaryDatacenter - // - // DEPRECATED (ACL-Legacy-Compat): Setting this to a non-empty value - // also enables legacy ACL replication if ACLs are enabled and in legacy mode. - // - // hcl: acl.tokens.replication = string - ACLReplicationToken string - // ACLtokenReplication is used to indicate that both tokens and policies // should be replicated instead of just policies // @@ -157,16 +136,6 @@ type RuntimeConfig struct { // hcl: acl.role_ttl = "duration" ACLRoleTTL time.Duration - // ACLToken is the default token used to make requests if a per-request - // token is not provided. If not configured the 'anonymous' token is used. - // - // hcl: acl.tokens.default = string - ACLToken string - - // ACLEnableTokenPersistence determines whether or not tokens set via the agent HTTP API - // should be persisted to disk and reloaded when an agent restarts. - ACLEnableTokenPersistence bool - // AutopilotCleanupDeadServers enables the automatic cleanup of dead servers when new ones // are added to the peer list. Defaults to true. // diff --git a/agent/config/runtime_oss_test.go b/agent/config/runtime_oss_test.go index 3871940c51..b6eee07e27 100644 --- a/agent/config/runtime_oss_test.go +++ b/agent/config/runtime_oss_test.go @@ -6,11 +6,9 @@ var entMetaJSON = `{}` var entRuntimeConfigSanitize = `{}` -var entFullDNSJSONConfig = `` +var entTokenConfigSanitize = `"EnterpriseConfig": {},` -var entFullDNSHCLConfig = `` - -var entFullRuntimeConfig = EnterpriseRuntimeConfig{} +func entFullRuntimeConfig(rt *RuntimeConfig) {} var enterpriseNonVotingServerWarnings []string = []string{enterpriseConfigKeyError{key: "non_voting_server"}.Error()} diff --git a/agent/config/runtime_test.go b/agent/config/runtime_test.go index adbc269e68..9b2da792ff 100644 --- a/agent/config/runtime_test.go +++ b/agent/config/runtime_test.go @@ -21,6 +21,7 @@ import ( "github.com/hashicorp/consul/agent/cache" "github.com/hashicorp/consul/agent/checks" "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/agent/token" "github.com/hashicorp/consul/lib" "github.com/hashicorp/consul/logging" "github.com/hashicorp/consul/sdk/testutil" @@ -1613,7 +1614,7 @@ func TestBuilder_BuildAndValide_ConfigFlagsAndEdgecases(t *testing.T) { json: []string{`{ "acl_replication_token": "a" }`}, hcl: []string{`acl_replication_token = "a"`}, patch: func(rt *RuntimeConfig) { - rt.ACLReplicationToken = "a" + rt.ACLTokens.ACLReplicationToken = "a" rt.ACLTokenReplication = true rt.DataDir = dataDir }, @@ -3436,6 +3437,10 @@ func TestBuilder_BuildAndValide_ConfigFlagsAndEdgecases(t *testing.T) { { "kind": "service-defaults", "name": "web", + "meta" : { + "foo": "bar", + "gir": "zim" + }, "protocol": "http", "external_sni": "abc-123", "mesh_gateway": { @@ -3450,6 +3455,10 @@ func TestBuilder_BuildAndValide_ConfigFlagsAndEdgecases(t *testing.T) { bootstrap { kind = "service-defaults" name = "web" + meta { + "foo" = "bar" + "gir" = "zim" + } protocol = "http" external_sni = "abc-123" mesh_gateway { @@ -3461,8 +3470,12 @@ func TestBuilder_BuildAndValide_ConfigFlagsAndEdgecases(t *testing.T) { rt.DataDir = dataDir rt.ConfigEntryBootstrap = []structs.ConfigEntry{ &structs.ServiceConfigEntry{ - Kind: structs.ServiceDefaults, - Name: "web", + Kind: structs.ServiceDefaults, + Name: "web", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, EnterpriseMeta: *defaultEntMeta, Protocol: "http", ExternalSNI: "abc-123", @@ -3482,6 +3495,10 @@ func TestBuilder_BuildAndValide_ConfigFlagsAndEdgecases(t *testing.T) { { "Kind": "service-defaults", "Name": "web", + "Meta" : { + "foo": "bar", + "gir": "zim" + }, "Protocol": "http", "ExternalSNI": "abc-123", "MeshGateway": { @@ -3496,6 +3513,10 @@ func TestBuilder_BuildAndValide_ConfigFlagsAndEdgecases(t *testing.T) { bootstrap { Kind = "service-defaults" Name = "web" + Meta { + "foo" = "bar" + "gir" = "zim" + } Protocol = "http" ExternalSNI = "abc-123" MeshGateway { @@ -3507,8 +3528,12 @@ func TestBuilder_BuildAndValide_ConfigFlagsAndEdgecases(t *testing.T) { rt.DataDir = dataDir rt.ConfigEntryBootstrap = []structs.ConfigEntry{ &structs.ServiceConfigEntry{ - Kind: structs.ServiceDefaults, - Name: "web", + Kind: structs.ServiceDefaults, + Name: "web", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, EnterpriseMeta: *defaultEntMeta, Protocol: "http", ExternalSNI: "abc-123", @@ -3528,6 +3553,10 @@ func TestBuilder_BuildAndValide_ConfigFlagsAndEdgecases(t *testing.T) { { "kind": "service-router", "name": "main", + "meta" : { + "foo": "bar", + "gir": "zim" + }, "routes": [ { "match": { @@ -3612,6 +3641,10 @@ func TestBuilder_BuildAndValide_ConfigFlagsAndEdgecases(t *testing.T) { bootstrap { kind = "service-router" name = "main" + meta { + "foo" = "bar" + "gir" = "zim" + } routes = [ { match { @@ -3693,8 +3726,12 @@ func TestBuilder_BuildAndValide_ConfigFlagsAndEdgecases(t *testing.T) { rt.DataDir = dataDir rt.ConfigEntryBootstrap = []structs.ConfigEntry{ &structs.ServiceRouterConfigEntry{ - Kind: structs.ServiceRouter, - Name: "main", + Kind: structs.ServiceRouter, + Name: "main", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, EnterpriseMeta: *defaultEntMeta, Routes: []structs.ServiceRoute{ { @@ -4350,6 +4387,13 @@ func testConfig(t *testing.T, tests []configTest, dataDir string) { if tt.patch != nil { tt.patch(&expected) } + + // both DataDir fields should always be the same, so test for the + // invariant, and than updated the expected, so that every test + // case does not need to set this field. + require.Equal(t, actual.DataDir, actual.ACLTokens.DataDir) + expected.ACLTokens.DataDir = actual.ACLTokens.DataDir + require.Equal(t, expected, actual) }) } @@ -5843,20 +5887,24 @@ func TestFullConfig(t *testing.T) { // user configurable values - ACLAgentMasterToken: "64fd0e08", - ACLAgentToken: "bed2377c", + ACLTokens: token.Config{ + EnablePersistence: true, + DataDir: dataDir, + ACLDefaultToken: "418fdff1", + ACLAgentToken: "bed2377c", + ACLAgentMasterToken: "64fd0e08", + ACLReplicationToken: "5795983a", + }, + ACLsEnabled: true, ACLDatacenter: "ejtmd43d", ACLDefaultPolicy: "72c2e7a0", ACLDownPolicy: "03eb2aee", ACLEnableKeyListPolicy: true, - ACLEnableTokenPersistence: true, ACLMasterToken: "8a19ac27", - ACLReplicationToken: "5795983a", ACLTokenTTL: 3321 * time.Second, ACLPolicyTTL: 1123 * time.Second, ACLRoleTTL: 9876 * time.Second, - ACLToken: "418fdff1", ACLTokenReplication: true, AdvertiseAddrLAN: ipAddr("17.99.29.16"), AdvertiseAddrWAN: ipAddr("78.63.37.19"), @@ -6485,9 +6533,10 @@ func TestFullConfig(t *testing.T) { "args": []interface{}{"dltjDJ2a", "flEa7C2d"}, }, }, - EnterpriseRuntimeConfig: entFullRuntimeConfig, } + entFullRuntimeConfig(&want) + warns := []string{ `The 'acl_datacenter' field is deprecated. Use the 'primary_datacenter' field instead.`, `bootstrap_expect > 0: expecting 53 servers`, @@ -6804,21 +6853,25 @@ func TestSanitize(t *testing.T) { } rtJSON := `{ - "ACLAgentMasterToken": "hidden", - "ACLAgentToken": "hidden", + "ACLTokens": { + ` + entTokenConfigSanitize + ` + "ACLAgentMasterToken": "hidden", + "ACLAgentToken": "hidden", + "ACLDefaultToken": "hidden", + "ACLReplicationToken": "hidden", + "DataDir": "", + "EnablePersistence": false + }, "ACLDatacenter": "", "ACLDefaultPolicy": "", "ACLDisabledTTL": "0s", "ACLDownPolicy": "", "ACLEnableKeyListPolicy": false, - "ACLEnableTokenPersistence": false, "ACLMasterToken": "hidden", "ACLPolicyTTL": "0s", - "ACLReplicationToken": "hidden", "ACLRoleTTL": "0s", "ACLTokenReplication": false, "ACLTokenTTL": "0s", - "ACLToken": "hidden", "ACLsEnabled": false, "AEInterval": "0s", "AdvertiseAddrLAN": "", diff --git a/agent/consul/acl_test.go b/agent/consul/acl_test.go index 8e47e0032e..e2cc884e62 100644 --- a/agent/consul/acl_test.go +++ b/agent/consul/acl_test.go @@ -1639,8 +1639,8 @@ func TestACLResolver_Client(t *testing.T) { // effectively disable caching - so the only way we end up with 1 token read is if they were // being resolved concurrently config.Config.ACLTokenTTL = 0 * time.Second - config.Config.ACLPolicyTTL = 30 * time.Millisecond - config.Config.ACLRoleTTL = 30 * time.Millisecond + config.Config.ACLPolicyTTL = 30 * time.Second + config.Config.ACLRoleTTL = 30 * time.Second config.Config.ACLDownPolicy = "extend-cache" }) diff --git a/agent/consul/auto_encrypt.go b/agent/consul/auto_encrypt.go deleted file mode 100644 index 0684e7f713..0000000000 --- a/agent/consul/auto_encrypt.go +++ /dev/null @@ -1,239 +0,0 @@ -package consul - -import ( - "context" - "fmt" - "net" - "strings" - "time" - - "github.com/hashicorp/consul/agent/connect" - "github.com/hashicorp/consul/agent/structs" - "github.com/hashicorp/consul/lib" - "github.com/hashicorp/go-hclog" - "github.com/miekg/dns" -) - -const ( - dummyTrustDomain = "dummy.trustdomain" - retryJitterWindow = 30 * time.Second -) - -func (c *Client) autoEncryptCSR(extraDNSSANs []string, extraIPSANs []net.IP) (string, string, error) { - // We don't provide the correct host here, because we don't know any - // better at this point. Apart from the domain, we would need the - // ClusterID, which we don't have. This is why we go with - // dummyTrustDomain the first time. Subsequent CSRs will have the - // correct TrustDomain. - id := &connect.SpiffeIDAgent{ - Host: dummyTrustDomain, - Datacenter: c.config.Datacenter, - Agent: c.config.NodeName, - } - - conf, err := c.config.CAConfig.GetCommonConfig() - if err != nil { - return "", "", err - } - - if conf.PrivateKeyType == "" { - conf.PrivateKeyType = connect.DefaultPrivateKeyType - } - if conf.PrivateKeyBits == 0 { - conf.PrivateKeyBits = connect.DefaultPrivateKeyBits - } - - // Create a new private key - pk, pkPEM, err := connect.GeneratePrivateKeyWithConfig(conf.PrivateKeyType, conf.PrivateKeyBits) - if err != nil { - return "", "", err - } - - dnsNames := append([]string{"localhost"}, extraDNSSANs...) - ipAddresses := append([]net.IP{net.ParseIP("127.0.0.1"), net.ParseIP("::1")}, extraIPSANs...) - - // Create a CSR. - // - // The Common Name includes the dummy trust domain for now but Server will - // override this when it is signed anyway so it's OK. - cn := connect.AgentCN(c.config.NodeName, dummyTrustDomain) - csr, err := connect.CreateCSR(id, cn, pk, dnsNames, ipAddresses) - if err != nil { - return "", "", err - } - - return pkPEM, csr, nil -} - -func (c *Client) RequestAutoEncryptCerts(ctx context.Context, servers []string, port int, token string, extraDNSSANs []string, extraIPSANs []net.IP) (*structs.SignedResponse, error) { - errFn := func(err error) (*structs.SignedResponse, error) { - return nil, err - } - - // Check if we know about a server already through gossip. Depending on - // how the agent joined, there might already be one. Also in case this - // gets called because the cert expired. - server := c.router.FindLANServer() - if server != nil { - servers = []string{server.Addr.String()} - } - - if len(servers) == 0 { - return errFn(fmt.Errorf("No servers to request AutoEncrypt.Sign")) - } - - pkPEM, csr, err := c.autoEncryptCSR(extraDNSSANs, extraIPSANs) - if err != nil { - return errFn(err) - } - - // Prepare request and response so that it can be passed to - // RPCInsecure. - args := structs.CASignRequest{ - WriteRequest: structs.WriteRequest{Token: token}, - Datacenter: c.config.Datacenter, - CSR: csr, - } - var reply structs.SignedResponse - - // Retry implementation modeled after https://github.com/hashicorp/consul/pull/5228. - // TLDR; there is a 30s window from which a random time is picked. - // Repeat until the call is successful. - attempts := 0 - for { - select { - case <-ctx.Done(): - return errFn(fmt.Errorf("aborting AutoEncrypt because interrupted: %w", ctx.Err())) - default: - } - - // Translate host to net.TCPAddr to make life easier for - // RPCInsecure. - for _, s := range servers { - ips, err := resolveAddr(s, c.logger) - if err != nil { - c.logger.Warn("AutoEncrypt resolveAddr failed", "error", err) - continue - } - - for _, ip := range ips { - addr := net.TCPAddr{IP: ip, Port: port} - - if err = c.connPool.RPC(c.config.Datacenter, c.config.NodeName, &addr, "AutoEncrypt.Sign", &args, &reply); err == nil { - reply.IssuedCert.PrivateKeyPEM = pkPEM - return &reply, nil - } else { - c.logger.Warn("AutoEncrypt failed", "error", err) - } - } - } - attempts++ - - delay := lib.RandomStagger(retryJitterWindow) - interval := (time.Duration(attempts) * delay) + delay - c.logger.Warn("retrying AutoEncrypt", "retry_interval", interval) - select { - case <-time.After(interval): - continue - case <-ctx.Done(): - return errFn(fmt.Errorf("aborting AutoEncrypt because interrupted: %w", ctx.Err())) - case <-c.shutdownCh: - return errFn(fmt.Errorf("aborting AutoEncrypt because shutting down")) - } - } -} - -func missingPortError(host string, err error) bool { - return err != nil && err.Error() == fmt.Sprintf("address %s: missing port in address", host) -} - -// resolveAddr is used to resolve the host into IPs and error. -func resolveAddr(rawHost string, logger hclog.Logger) ([]net.IP, error) { - host, _, err := net.SplitHostPort(rawHost) - if err != nil { - // In case we encounter this error, we proceed with the - // rawHost. This is fine since -start-join and -retry-join - // take only hosts anyways and this is an expected case. - if missingPortError(rawHost, err) { - host = rawHost - } else { - return nil, err - } - } - - if ip := net.ParseIP(host); ip != nil { - return []net.IP{ip}, nil - } - - // First try TCP so we have the best chance for the largest list of - // hosts to join. If this fails it's not fatal since this isn't a standard - // way to query DNS, and we have a fallback below. - if ips, err := tcpLookupIP(host, logger); err != nil { - logger.Debug("TCP-first lookup failed for host, falling back to UDP", "host", host, "error", err) - } else if len(ips) > 0 { - return ips, nil - } - - // If TCP didn't yield anything then use the normal Go resolver which - // will try UDP, then might possibly try TCP again if the UDP response - // indicates it was truncated. - ips, err := net.LookupIP(host) - if err != nil { - return nil, err - } - return ips, nil -} - -// tcpLookupIP is a helper to initiate a TCP-based DNS lookup for the given host. -// The built-in Go resolver will do a UDP lookup first, and will only use TCP if -// the response has the truncate bit set, which isn't common on DNS servers like -// Consul's. By doing the TCP lookup directly, we get the best chance for the -// largest list of hosts to join. Since joins are relatively rare events, it's ok -// to do this rather expensive operation. -func tcpLookupIP(host string, logger hclog.Logger) ([]net.IP, error) { - // Don't attempt any TCP lookups against non-fully qualified domain - // names, since those will likely come from the resolv.conf file. - if !strings.Contains(host, ".") { - return nil, nil - } - - // Make sure the domain name is terminated with a dot (we know there's - // at least one character at this point). - dn := host - if dn[len(dn)-1] != '.' { - dn = dn + "." - } - - // See if we can find a server to try. - cc, err := dns.ClientConfigFromFile("/etc/resolv.conf") - if err != nil { - return nil, err - } - if len(cc.Servers) > 0 { - // Do the lookup. - c := new(dns.Client) - c.Net = "tcp" - msg := new(dns.Msg) - msg.SetQuestion(dn, dns.TypeANY) - in, _, err := c.Exchange(msg, cc.Servers[0]) - if err != nil { - return nil, err - } - - // Handle any IPs we get back that we can attempt to join. - var ips []net.IP - for _, r := range in.Answer { - switch rr := r.(type) { - case (*dns.A): - ips = append(ips, rr.A) - case (*dns.AAAA): - ips = append(ips, rr.AAAA) - case (*dns.CNAME): - logger.Debug("Ignoring CNAME RR in TCP-first answer for host", "host", host) - } - } - return ips, nil - } - - return nil, nil -} diff --git a/agent/consul/auto_encrypt_test.go b/agent/consul/auto_encrypt_test.go deleted file mode 100644 index 8dd04e4166..0000000000 --- a/agent/consul/auto_encrypt_test.go +++ /dev/null @@ -1,205 +0,0 @@ -package consul - -import ( - "context" - "crypto/x509" - "crypto/x509/pkix" - "encoding/asn1" - "net" - "net/url" - "os" - "testing" - "time" - - "github.com/hashicorp/consul/agent/connect" - "github.com/hashicorp/consul/agent/structs" - "github.com/hashicorp/consul/sdk/testutil" - "github.com/hashicorp/go-hclog" - "github.com/stretchr/testify/require" -) - -func TestAutoEncrypt_resolveAddr(t *testing.T) { - type args struct { - rawHost string - logger hclog.Logger - } - logger := testutil.Logger(t) - - tests := []struct { - name string - args args - ips []net.IP - wantErr bool - }{ - { - name: "host without port", - args: args{ - "127.0.0.1", - logger, - }, - ips: []net.IP{net.IPv4(127, 0, 0, 1)}, - wantErr: false, - }, - { - name: "host with port", - args: args{ - "127.0.0.1:1234", - logger, - }, - ips: []net.IP{net.IPv4(127, 0, 0, 1)}, - wantErr: false, - }, - { - name: "host with broken port", - args: args{ - "127.0.0.1:xyz", - logger, - }, - ips: []net.IP{net.IPv4(127, 0, 0, 1)}, - wantErr: false, - }, - { - name: "not an address", - args: args{ - "abc", - logger, - }, - ips: nil, - wantErr: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - ips, err := resolveAddr(tt.args.rawHost, tt.args.logger) - if (err != nil) != tt.wantErr { - t.Errorf("resolveAddr error: %v, wantErr: %v", err, tt.wantErr) - return - } - require.Equal(t, tt.ips, ips) - }) - } -} - -func TestAutoEncrypt_missingPortError(t *testing.T) { - host := "127.0.0.1" - _, _, err := net.SplitHostPort(host) - require.True(t, missingPortError(host, err)) - - host = "127.0.0.1:1234" - _, _, err = net.SplitHostPort(host) - require.False(t, missingPortError(host, err)) -} - -func TestAutoEncrypt_RequestAutoEncryptCerts(t *testing.T) { - dir1, c1 := testClient(t) - defer os.RemoveAll(dir1) - defer c1.Shutdown() - servers := []string{"localhost"} - port := 8301 - token := "" - - ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(75*time.Millisecond)) - defer cancel() - - doneCh := make(chan struct{}) - var err error - go func() { - _, err = c1.RequestAutoEncryptCerts(ctx, servers, port, token, nil, nil) - close(doneCh) - }() - select { - case <-doneCh: - // since there are no servers at this port, we shouldn't be - // done and this should be an error of some sorts that happened - // in the setup phase before entering the for loop in - // RequestAutoEncryptCerts. - require.NoError(t, err) - case <-ctx.Done(): - // this is the happy case since auto encrypt is in its loop to - // try to request certs. - } -} - -func TestAutoEncrypt_autoEncryptCSR(t *testing.T) { - type testCase struct { - conf *Config - extraDNSSANs []string - extraIPSANs []net.IP - err string - - // to validate the csr - expectedSubject pkix.Name - expectedSigAlg x509.SignatureAlgorithm - expectedPubAlg x509.PublicKeyAlgorithm - expectedDNSNames []string - expectedIPs []net.IP - expectedURIs []*url.URL - } - - cases := map[string]testCase{ - "sans": { - conf: &Config{ - Datacenter: "dc1", - NodeName: "test-node", - CAConfig: &structs.CAConfiguration{}, - }, - extraDNSSANs: []string{"foo.local", "bar.local"}, - extraIPSANs: []net.IP{net.IPv4(198, 18, 0, 1), net.IPv4(198, 18, 0, 2)}, - expectedSubject: pkix.Name{ - CommonName: connect.AgentCN("test-node", dummyTrustDomain), - Names: []pkix.AttributeTypeAndValue{ - { - // 2,5,4,3 is the CommonName type ASN1 identifier - Type: asn1.ObjectIdentifier{2, 5, 4, 3}, - Value: "testnode.agnt.dummy.tr.consul", - }, - }, - }, - expectedSigAlg: x509.ECDSAWithSHA256, - expectedPubAlg: x509.ECDSA, - expectedDNSNames: []string{ - "localhost", - "foo.local", - "bar.local", - }, - expectedIPs: []net.IP{ - {127, 0, 0, 1}, - net.ParseIP("::1"), - {198, 18, 0, 1}, - {198, 18, 0, 2}, - }, - expectedURIs: []*url.URL{ - { - Scheme: "spiffe", - Host: dummyTrustDomain, - Path: "/agent/client/dc/dc1/id/test-node", - }, - }, - }, - } - - for name, tcase := range cases { - t.Run(name, func(t *testing.T) { - client := Client{config: tcase.conf} - - _, csr, err := client.autoEncryptCSR(tcase.extraDNSSANs, tcase.extraIPSANs) - if tcase.err == "" { - require.NoError(t, err) - - request, err := connect.ParseCSR(csr) - require.NoError(t, err) - require.NotNil(t, request) - - require.Equal(t, tcase.expectedSubject, request.Subject) - require.Equal(t, tcase.expectedSigAlg, request.SignatureAlgorithm) - require.Equal(t, tcase.expectedPubAlg, request.PublicKeyAlgorithm) - require.Equal(t, tcase.expectedDNSNames, request.DNSNames) - require.Equal(t, tcase.expectedIPs, request.IPAddresses) - require.Equal(t, tcase.expectedURIs, request.URIs) - } else { - require.Error(t, err) - require.Empty(t, csr) - } - }) - } -} diff --git a/agent/consul/config.go b/agent/consul/config.go index a48effe441..4316475651 100644 --- a/agent/consul/config.go +++ b/agent/consul/config.go @@ -443,6 +443,10 @@ type Config struct { // dead servers. AutopilotInterval time.Duration + // MetricsReportingInterval is the frequency with which the server will + // report usage metrics to the configured go-metrics Sinks. + MetricsReportingInterval time.Duration + // ConnectEnabled is whether to enable Connect features such as the CA. ConnectEnabled bool @@ -589,11 +593,16 @@ func DefaultConfig() *Config { }, }, - ServerHealthInterval: 2 * time.Second, - AutopilotInterval: 10 * time.Second, - DefaultQueryTime: 300 * time.Second, - MaxQueryTime: 600 * time.Second, - EnterpriseConfig: DefaultEnterpriseConfig(), + // Stay under the 10 second aggregation interval of + // go-metrics. This ensures we always report the + // usage metrics in each cycle. + MetricsReportingInterval: 9 * time.Second, + ServerHealthInterval: 2 * time.Second, + AutopilotInterval: 10 * time.Second, + DefaultQueryTime: 300 * time.Second, + MaxQueryTime: 600 * time.Second, + + EnterpriseConfig: DefaultEnterpriseConfig(), } // Increase our reap interval to 3 days instead of 24h. diff --git a/agent/consul/fsm/snapshot_oss_test.go b/agent/consul/fsm/snapshot_oss_test.go index e845c41c91..f798a0efaa 100644 --- a/agent/consul/fsm/snapshot_oss_test.go +++ b/agent/consul/fsm/snapshot_oss_test.go @@ -654,6 +654,12 @@ func TestFSM_SnapshotRestore_OSS(t *testing.T) { require.NoError(t, err) require.Equal(t, fedState2, fedStateLoaded2) + // Verify usage data is correctly updated + idx, nodeCount, err := fsm2.state.NodeCount() + require.NoError(t, err) + require.Equal(t, len(nodes), nodeCount) + require.NotZero(t, idx) + // Snapshot snap, err = fsm2.Snapshot() require.NoError(t, err) diff --git a/agent/consul/leader_connect.go b/agent/consul/leader_connect.go index fcad53d65b..3a7a1ce970 100644 --- a/agent/consul/leader_connect.go +++ b/agent/consul/leader_connect.go @@ -653,7 +653,7 @@ func (s *Server) secondaryIntermediateCertRenewalWatch(ctx context.Context) erro case <-ctx.Done(): return nil case <-time.After(structs.IntermediateCertRenewInterval): - retryLoopBackoff(ctx, func() error { + retryLoopBackoffAbortOnSuccess(ctx, func() error { s.caProviderReconfigurationLock.Lock() defer s.caProviderReconfigurationLock.Unlock() @@ -835,6 +835,14 @@ func (s *Server) replicateIntentions(ctx context.Context) error { // retryLoopBackoff loops a given function indefinitely, backing off exponentially // upon errors up to a maximum of maxRetryBackoff seconds. func retryLoopBackoff(ctx context.Context, loopFn func() error, errFn func(error)) { + retryLoopBackoffHandleSuccess(ctx, loopFn, errFn, false) +} + +func retryLoopBackoffAbortOnSuccess(ctx context.Context, loopFn func() error, errFn func(error)) { + retryLoopBackoffHandleSuccess(ctx, loopFn, errFn, true) +} + +func retryLoopBackoffHandleSuccess(ctx context.Context, loopFn func() error, errFn func(error), abortOnSuccess bool) { var failedAttempts uint limiter := rate.NewLimiter(loopRateLimit, retryBucketSize) for { @@ -861,6 +869,8 @@ func retryLoopBackoff(ctx context.Context, loopFn func() error, errFn func(error case <-timer.C: continue } + } else if abortOnSuccess { + return } // Reset the failed attempts after a successful run. diff --git a/agent/consul/leader_connect_test.go b/agent/consul/leader_connect_test.go index 0f8edc470d..c4852d57a7 100644 --- a/agent/consul/leader_connect_test.go +++ b/agent/consul/leader_connect_test.go @@ -1,6 +1,7 @@ package consul import ( + "context" "crypto/x509" "fmt" "io/ioutil" @@ -1442,3 +1443,43 @@ func TestLeader_lessThanHalfTimePassed(t *testing.T) { require.True(t, lessThanHalfTimePassed(now, now.Add(-10*time.Second), now.Add(20*time.Second))) } + +func TestLeader_retryLoopBackoffHandleSuccess(t *testing.T) { + type test struct { + desc string + loopFn func() error + abort bool + timedOut bool + } + success := func() error { + return nil + } + failure := func() error { + return fmt.Errorf("test error") + } + tests := []test{ + {"loop without error and no abortOnSuccess keeps running", success, false, true}, + {"loop with error and no abortOnSuccess keeps running", failure, false, true}, + {"loop without error and abortOnSuccess is stopped", success, true, false}, + {"loop with error and abortOnSuccess keeps running", failure, true, true}, + } + for _, tc := range tests { + tc := tc + t.Run(tc.desc, func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) + defer cancel() + + retryLoopBackoffHandleSuccess(ctx, tc.loopFn, func(_ error) {}, tc.abort) + select { + case <-ctx.Done(): + if !tc.timedOut { + t.Fatal("should not have timed out") + } + default: + if tc.timedOut { + t.Fatal("should have timed out") + } + } + }) + } +} diff --git a/agent/consul/server.go b/agent/consul/server.go index 04d3b61bd3..c1c1a6d76e 100644 --- a/agent/consul/server.go +++ b/agent/consul/server.go @@ -25,6 +25,7 @@ import ( "github.com/hashicorp/consul/agent/consul/autopilot" "github.com/hashicorp/consul/agent/consul/fsm" "github.com/hashicorp/consul/agent/consul/state" + "github.com/hashicorp/consul/agent/consul/usagemetrics" "github.com/hashicorp/consul/agent/metadata" "github.com/hashicorp/consul/agent/pool" "github.com/hashicorp/consul/agent/router" @@ -589,6 +590,19 @@ func NewServer(config *Config, options ...ConsulOption) (*Server, error) { return nil, err } + reporter, err := usagemetrics.NewUsageMetricsReporter( + new(usagemetrics.Config). + WithStateProvider(s.fsm). + WithLogger(s.logger). + WithDatacenter(s.config.Datacenter). + WithReportingInterval(s.config.MetricsReportingInterval), + ) + if err != nil { + s.Shutdown() + return nil, fmt.Errorf("Failed to start usage metrics reporter: %v", err) + } + go reporter.Run(&lib.StopChannelContext{StopCh: s.shutdownCh}) + // Initialize Autopilot. This must happen before starting leadership monitoring // as establishing leadership could attempt to use autopilot and cause a panic. s.initAutopilot(config) diff --git a/agent/consul/state/catalog_events.go b/agent/consul/state/catalog_events.go new file mode 100644 index 0000000000..b42d47fc64 --- /dev/null +++ b/agent/consul/state/catalog_events.go @@ -0,0 +1,475 @@ +package state + +import ( + "github.com/hashicorp/consul/agent/consul/stream" + "github.com/hashicorp/consul/agent/structs" + memdb "github.com/hashicorp/go-memdb" +) + +type changeOp int + +const ( + OpDelete changeOp = iota + OpCreate + OpUpdate +) + +type eventPayload struct { + Op changeOp + Obj interface{} +} + +// serviceHealthSnapshot returns a stream.SnapshotFunc that provides a snapshot +// of stream.Events that describe the current state of a service health query. +// +// TODO: no tests for this yet +func serviceHealthSnapshot(s *Store, topic topic) stream.SnapshotFunc { + return func(req stream.SubscribeRequest, buf stream.SnapshotAppender) (index uint64, err error) { + tx := s.db.Txn(false) + defer tx.Abort() + + connect := topic == TopicServiceHealthConnect + // TODO(namespace-streaming): plumb entMeta through from SubscribeRequest + idx, nodes, err := checkServiceNodesTxn(tx, nil, req.Key, connect, nil) + if err != nil { + return 0, err + } + + for _, n := range nodes { + event := stream.Event{ + Index: idx, + Topic: topic, + Payload: eventPayload{ + Op: OpCreate, + Obj: &n, + }, + } + + if n.Service != nil { + event.Key = n.Service.Service + } + + // append each event as a separate item so that they can be serialized + // separately, to prevent the encoding of one massive message. + buf.Append([]stream.Event{event}) + } + + return idx, err + } +} + +type nodeServiceTuple struct { + Node string + ServiceID string + EntMeta structs.EnterpriseMeta +} + +func newNodeServiceTupleFromServiceNode(sn *structs.ServiceNode) nodeServiceTuple { + return nodeServiceTuple{ + Node: sn.Node, + ServiceID: sn.ServiceID, + EntMeta: sn.EnterpriseMeta, + } +} + +func newNodeServiceTupleFromServiceHealthCheck(hc *structs.HealthCheck) nodeServiceTuple { + return nodeServiceTuple{ + Node: hc.Node, + ServiceID: hc.ServiceID, + EntMeta: hc.EnterpriseMeta, + } +} + +type serviceChange struct { + changeType changeType + change memdb.Change +} + +var serviceChangeIndirect = serviceChange{changeType: changeIndirect} + +// ServiceHealthEventsFromChanges returns all the service and Connect health +// events that should be emitted given a set of changes to the state store. +func ServiceHealthEventsFromChanges(tx ReadTxn, changes Changes) ([]stream.Event, error) { + var events []stream.Event + + var nodeChanges map[string]changeType + var serviceChanges map[nodeServiceTuple]serviceChange + + markNode := func(node string, typ changeType) { + if nodeChanges == nil { + nodeChanges = make(map[string]changeType) + } + // If the caller has an actual node mutation ensure we store it even if the + // node is already marked. If the caller is just marking the node dirty + // without a node change, don't overwrite any existing node change we know + // about. + if nodeChanges[node] == changeIndirect { + nodeChanges[node] = typ + } + } + markService := func(key nodeServiceTuple, svcChange serviceChange) { + if serviceChanges == nil { + serviceChanges = make(map[nodeServiceTuple]serviceChange) + } + // If the caller has an actual service mutation ensure we store it even if + // the service is already marked. If the caller is just marking the service + // dirty without a service change, don't overwrite any existing service change we + // know about. + if serviceChanges[key].changeType == changeIndirect { + serviceChanges[key] = svcChange + } + } + + for _, change := range changes.Changes { + switch change.Table { + case "nodes": + // Node changed in some way, if it's not a delete, we'll need to + // re-deliver CheckServiceNode results for all services on that node but + // we mark it anyway because if it _is_ a delete then we need to know that + // later to avoid trying to deliver events when node level checks mark the + // node as "changed". + n := changeObject(change).(*structs.Node) + markNode(n.Node, changeTypeFromChange(change)) + + case "services": + sn := changeObject(change).(*structs.ServiceNode) + srvChange := serviceChange{changeType: changeTypeFromChange(change), change: change} + markService(newNodeServiceTupleFromServiceNode(sn), srvChange) + + case "checks": + // For health we only care about the scope for now to know if it's just + // affecting a single service or every service on a node. There is a + // subtle edge case where the check with same ID changes from being node + // scoped to service scoped or vice versa, in either case we need to treat + // it as affecting all services on the node. + switch { + case change.Updated(): + before := change.Before.(*structs.HealthCheck) + after := change.After.(*structs.HealthCheck) + if after.ServiceID == "" || before.ServiceID == "" { + // check before and/or after is node-scoped + markNode(after.Node, changeIndirect) + } else { + // Check changed which means we just need to emit for the linked + // service. + markService(newNodeServiceTupleFromServiceHealthCheck(after), serviceChangeIndirect) + + // Edge case - if the check with same ID was updated to link to a + // different service ID but the old service with old ID still exists, + // then the old service instance needs updating too as it has one + // fewer checks now. + if before.ServiceID != after.ServiceID { + markService(newNodeServiceTupleFromServiceHealthCheck(before), serviceChangeIndirect) + } + } + + case change.Deleted(), change.Created(): + obj := changeObject(change).(*structs.HealthCheck) + if obj.ServiceID == "" { + // Node level check + markNode(obj.Node, changeIndirect) + } else { + markService(newNodeServiceTupleFromServiceHealthCheck(obj), serviceChangeIndirect) + } + } + } + } + + // Now act on those marked nodes/services + for node, changeType := range nodeChanges { + if changeType == changeDelete { + // Node deletions are a no-op here since the state store transaction will + // have also removed all the service instances which will be handled in + // the loop below. + continue + } + // Rebuild events for all services on this node + es, err := newServiceHealthEventsForNode(tx, changes.Index, node) + if err != nil { + return nil, err + } + events = append(events, es...) + } + + for tuple, srvChange := range serviceChanges { + // change may be nil if there was a change that _affected_ the service + // like a change to checks but it didn't actually change the service + // record itself. + if srvChange.changeType == changeDelete { + sn := srvChange.change.Before.(*structs.ServiceNode) + e := newServiceHealthEventDeregister(changes.Index, sn) + events = append(events, e) + continue + } + + // Check if this was a service mutation that changed it's name which + // requires special handling even if node changed and new events were + // already published. + if srvChange.changeType == changeUpdate { + before := srvChange.change.Before.(*structs.ServiceNode) + after := srvChange.change.After.(*structs.ServiceNode) + + if before.ServiceName != after.ServiceName { + // Service was renamed, the code below will ensure the new registrations + // go out to subscribers to the new service name topic key, but we need + // to fix up subscribers that were watching the old name by sending + // deregistrations. + e := newServiceHealthEventDeregister(changes.Index, before) + events = append(events, e) + } + + if e, ok := isConnectProxyDestinationServiceChange(changes.Index, before, after); ok { + events = append(events, e) + } + } + + if _, ok := nodeChanges[tuple.Node]; ok { + // We already rebuilt events for everything on this node, no need to send + // a duplicate. + continue + } + // Build service event and append it + e, err := newServiceHealthEventForService(tx, changes.Index, tuple) + if err != nil { + return nil, err + } + events = append(events, e) + } + + // Duplicate any events that affected connect-enabled instances (proxies or + // native apps) to the relevant Connect topic. + events = append(events, serviceHealthToConnectEvents(events...)...) + + return events, nil +} + +// isConnectProxyDestinationServiceChange handles the case where a Connect proxy changed +// the service it is proxying. We need to issue a de-registration for the old +// service on the Connect topic. We don't actually need to deregister this sidecar +// service though as it still exists and didn't change its name. +func isConnectProxyDestinationServiceChange(idx uint64, before, after *structs.ServiceNode) (stream.Event, bool) { + if before.ServiceKind != structs.ServiceKindConnectProxy || + before.ServiceProxy.DestinationServiceName == after.ServiceProxy.DestinationServiceName { + return stream.Event{}, false + } + + e := newServiceHealthEventDeregister(idx, before) + e.Topic = TopicServiceHealthConnect + e.Key = getPayloadCheckServiceNode(e.Payload).Service.Proxy.DestinationServiceName + return e, true +} + +type changeType uint8 + +const ( + // changeIndirect indicates some other object changed which has implications + // for the target object. + changeIndirect changeType = iota + changeDelete + changeCreate + changeUpdate +) + +func changeTypeFromChange(change memdb.Change) changeType { + switch { + case change.Deleted(): + return changeDelete + case change.Created(): + return changeCreate + default: + return changeUpdate + } +} + +// serviceHealthToConnectEvents converts already formatted service health +// registration events into the ones needed to publish to the Connect topic. +// This essentially means filtering out any instances that are not Connect +// enabled and so of no interest to those subscribers but also involves +// switching connection details to be the proxy instead of the actual instance +// in case of a sidecar. +func serviceHealthToConnectEvents(events ...stream.Event) []stream.Event { + var result []stream.Event + for _, event := range events { + if event.Topic != TopicServiceHealth { + // Skip non-health or any events already emitted to Connect topic + continue + } + node := getPayloadCheckServiceNode(event.Payload) + if node.Service == nil { + continue + } + + connectEvent := event + connectEvent.Topic = TopicServiceHealthConnect + + switch { + case node.Service.Connect.Native: + result = append(result, connectEvent) + + case node.Service.Kind == structs.ServiceKindConnectProxy: + connectEvent.Key = node.Service.Proxy.DestinationServiceName + result = append(result, connectEvent) + + default: + // ServiceKindTerminatingGateway changes are handled separately. + // All other cases are not relevant to the connect topic + } + } + + return result +} + +func getPayloadCheckServiceNode(payload interface{}) *structs.CheckServiceNode { + ep, ok := payload.(eventPayload) + if !ok { + return nil + } + csn, ok := ep.Obj.(*structs.CheckServiceNode) + if !ok { + return nil + } + return csn +} + +// newServiceHealthEventsForNode returns health events for all services on the +// given node. This mirrors some of the the logic in the oddly-named +// parseCheckServiceNodes but is more efficient since we know they are all on +// the same node. +func newServiceHealthEventsForNode(tx ReadTxn, idx uint64, node string) ([]stream.Event, error) { + // TODO(namespace-streaming): figure out the right EntMeta and mystery arg. + services, err := catalogServiceListByNode(tx, node, nil, false) + if err != nil { + return nil, err + } + + n, checksFunc, err := getNodeAndChecks(tx, node) + if err != nil { + return nil, err + } + + var events []stream.Event + for service := services.Next(); service != nil; service = services.Next() { + sn := service.(*structs.ServiceNode) + + event := newServiceHealthEventRegister(idx, n, sn, checksFunc(sn.ServiceID)) + events = append(events, event) + } + + return events, nil +} + +// getNodeAndNodeChecks returns a the node structure and a function that returns +// the full list of checks for a specific service on that node. +func getNodeAndChecks(tx ReadTxn, node string) (*structs.Node, serviceChecksFunc, error) { + // Fetch the node + nodeRaw, err := tx.First("nodes", "id", node) + if err != nil { + return nil, nil, err + } + if nodeRaw == nil { + return nil, nil, ErrMissingNode + } + n := nodeRaw.(*structs.Node) + + // TODO(namespace-streaming): work out what EntMeta is needed here, wildcard? + iter, err := catalogListChecksByNode(tx, node, nil) + if err != nil { + return nil, nil, err + } + + var nodeChecks structs.HealthChecks + var svcChecks map[string]structs.HealthChecks + + for check := iter.Next(); check != nil; check = iter.Next() { + check := check.(*structs.HealthCheck) + if check.ServiceID == "" { + nodeChecks = append(nodeChecks, check) + } else { + if svcChecks == nil { + svcChecks = make(map[string]structs.HealthChecks) + } + svcChecks[check.ServiceID] = append(svcChecks[check.ServiceID], check) + } + } + serviceChecks := func(serviceID string) structs.HealthChecks { + // Create a new slice so that append does not modify the array backing nodeChecks. + result := make(structs.HealthChecks, 0, len(nodeChecks)) + result = append(result, nodeChecks...) + for _, check := range svcChecks[serviceID] { + result = append(result, check) + } + return result + } + return n, serviceChecks, nil +} + +type serviceChecksFunc func(serviceID string) structs.HealthChecks + +func newServiceHealthEventForService(tx ReadTxn, idx uint64, tuple nodeServiceTuple) (stream.Event, error) { + n, checksFunc, err := getNodeAndChecks(tx, tuple.Node) + if err != nil { + return stream.Event{}, err + } + + svc, err := getCompoundWithTxn(tx, "services", "id", &tuple.EntMeta, tuple.Node, tuple.ServiceID) + if err != nil { + return stream.Event{}, err + } + + raw := svc.Next() + if raw == nil { + return stream.Event{}, ErrMissingService + } + + sn := raw.(*structs.ServiceNode) + return newServiceHealthEventRegister(idx, n, sn, checksFunc(sn.ServiceID)), nil +} + +func newServiceHealthEventRegister( + idx uint64, + node *structs.Node, + sn *structs.ServiceNode, + checks structs.HealthChecks, +) stream.Event { + csn := &structs.CheckServiceNode{ + Node: node, + Service: sn.ToNodeService(), + Checks: checks, + } + return stream.Event{ + Topic: TopicServiceHealth, + Key: sn.ServiceName, + Index: idx, + Payload: eventPayload{ + Op: OpCreate, + Obj: csn, + }, + } +} + +func newServiceHealthEventDeregister(idx uint64, sn *structs.ServiceNode) stream.Event { + // We actually only need the node name populated in the node part as it's only + // used as a key to know which service was deregistered so don't bother looking + // up the node in the DB. Note that while the ServiceNode does have NodeID + // etc. fields, they are never populated in memdb per the comment on that + // struct and only filled in when we return copies of the result to users. + // This is also important because if the service was deleted as part of a + // whole node deregistering then the node record won't actually exist now + // anyway and we'd have to plumb it through from the changeset above. + csn := &structs.CheckServiceNode{ + Node: &structs.Node{ + Node: sn.Node, + }, + Service: sn.ToNodeService(), + } + + return stream.Event{ + Topic: TopicServiceHealth, + Key: sn.ServiceName, + Index: idx, + Payload: eventPayload{ + Op: OpDelete, + Obj: csn, + }, + } +} diff --git a/agent/consul/state/catalog_events_test.go b/agent/consul/state/catalog_events_test.go new file mode 100644 index 0000000000..5cf610604f --- /dev/null +++ b/agent/consul/state/catalog_events_test.go @@ -0,0 +1,1492 @@ +package state + +import ( + "fmt" + "testing" + + "github.com/hashicorp/consul/agent/consul/stream" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/api" + "github.com/hashicorp/consul/types" + "github.com/stretchr/testify/require" +) + +func TestServiceHealthEventsFromChanges(t *testing.T) { + cases := []struct { + Name string + Setup func(s *Store, tx *txn) error + Mutate func(s *Store, tx *txn) error + WantEvents []stream.Event + WantErr bool + }{ + { + Name: "irrelevant events", + Mutate: func(s *Store, tx *txn) error { + return kvsSetTxn(tx, tx.Index, &structs.DirEntry{ + Key: "foo", + Value: []byte("bar"), + }, false) + }, + WantEvents: nil, + WantErr: false, + }, + { + Name: "service reg, new node", + Mutate: func(s *Store, tx *txn) error { + return s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web")) + }, + WantEvents: []stream.Event{ + testServiceHealthEvent(t, "web"), + }, + WantErr: false, + }, + { + Name: "service reg, existing node", + Setup: func(s *Store, tx *txn) error { + return s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "db")) + }, + Mutate: func(s *Store, tx *txn) error { + return s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web")) + }, + WantEvents: []stream.Event{ + // Should only publish new service + testServiceHealthEvent(t, "web", evNodeUnchanged), + }, + WantErr: false, + }, + { + Name: "service dereg, existing node", + Setup: func(s *Store, tx *txn) error { + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "db")); err != nil { + return err + } + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web")); err != nil { + return err + } + return nil + }, + Mutate: func(s *Store, tx *txn) error { + return s.deleteServiceTxn(tx, tx.Index, "node1", "web", nil) + }, + WantEvents: []stream.Event{ + // Should only publish deregistration for that service + testServiceHealthDeregistrationEvent(t, "web"), + }, + WantErr: false, + }, + { + Name: "node dereg", + Setup: func(s *Store, tx *txn) error { + if err := s.ensureRegistrationTxn(tx, tx.Index, false, testServiceRegistration(t, "db")); err != nil { + return err + } + if err := s.ensureRegistrationTxn(tx, tx.Index, false, testServiceRegistration(t, "web")); err != nil { + return err + } + return nil + }, + Mutate: func(s *Store, tx *txn) error { + return s.deleteNodeTxn(tx, tx.Index, "node1") + }, + WantEvents: []stream.Event{ + // Should publish deregistration events for all services + testServiceHealthDeregistrationEvent(t, "db"), + testServiceHealthDeregistrationEvent(t, "web"), + }, + WantErr: false, + }, + { + Name: "connect native reg, new node", + Mutate: func(s *Store, tx *txn) error { + return s.ensureRegistrationTxn(tx, tx.Index, false, testServiceRegistration(t, "web", regConnectNative)) + }, + WantEvents: []stream.Event{ + // We should see both a regular service health event as well as a connect + // one. + testServiceHealthEvent(t, "web", evConnectNative), + testServiceHealthEvent(t, "web", evConnectNative, evConnectTopic), + }, + WantErr: false, + }, + { + Name: "connect native reg, existing node", + Setup: func(s *Store, tx *txn) error { + return s.ensureRegistrationTxn(tx, tx.Index, false, testServiceRegistration(t, "db")) + }, + Mutate: func(s *Store, tx *txn) error { + return s.ensureRegistrationTxn(tx, tx.Index, false, testServiceRegistration(t, "web", regConnectNative)) + }, + WantEvents: []stream.Event{ + // We should see both a regular service health event as well as a connect + // one. + testServiceHealthEvent(t, "web", + evNodeUnchanged, + evConnectNative), + testServiceHealthEvent(t, "web", + evNodeUnchanged, + evConnectNative, + evConnectTopic), + }, + WantErr: false, + }, + { + Name: "connect native dereg, existing node", + Setup: func(s *Store, tx *txn) error { + if err := s.ensureRegistrationTxn(tx, tx.Index, false, testServiceRegistration(t, "db")); err != nil { + return err + } + + return s.ensureRegistrationTxn(tx, tx.Index, false, testServiceRegistration(t, "web", regConnectNative)) + }, + Mutate: func(s *Store, tx *txn) error { + return s.deleteServiceTxn(tx, tx.Index, "node1", "web", nil) + }, + WantEvents: []stream.Event{ + // We should see both a regular service dereg event and a connect one + testServiceHealthDeregistrationEvent(t, "web", evConnectNative), + testServiceHealthDeregistrationEvent(t, "web", evConnectNative, evConnectTopic), + }, + WantErr: false, + }, + { + Name: "connect sidecar reg, new node", + Mutate: func(s *Store, tx *txn) error { + if err := s.ensureRegistrationTxn(tx, tx.Index, false, testServiceRegistration(t, "web")); err != nil { + return err + } + return s.ensureRegistrationTxn(tx, tx.Index, false, testServiceRegistration(t, "web", regSidecar)) + }, + WantEvents: []stream.Event{ + // We should see both a regular service health event for the web service + // another for the sidecar service and a connect event for web. + testServiceHealthEvent(t, "web"), + testServiceHealthEvent(t, "web", evSidecar), + testServiceHealthEvent(t, "web", evConnectTopic, evSidecar), + }, + WantErr: false, + }, + { + Name: "connect sidecar reg, existing node", + Setup: func(s *Store, tx *txn) error { + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "db")); err != nil { + return err + } + return s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web")) + }, + Mutate: func(s *Store, tx *txn) error { + return s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar)) + }, + WantEvents: []stream.Event{ + // We should see both a regular service health event for the proxy + // service and a connect one for the target service. + testServiceHealthEvent(t, "web", evSidecar, evNodeUnchanged), + testServiceHealthEvent(t, "web", evConnectTopic, evSidecar, evNodeUnchanged), + }, + WantErr: false, + }, + { + Name: "connect sidecar dereg, existing node", + Setup: func(s *Store, tx *txn) error { + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "db")); err != nil { + return err + } + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web")); err != nil { + return err + } + return s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar)) + }, + Mutate: func(s *Store, tx *txn) error { + // Delete only the sidecar + return s.deleteServiceTxn(tx, tx.Index, "node1", "web_sidecar_proxy", nil) + }, + WantEvents: []stream.Event{ + // We should see both a regular service dereg event and a connect one + testServiceHealthDeregistrationEvent(t, "web", evSidecar), + testServiceHealthDeregistrationEvent(t, "web", evConnectTopic, evSidecar), + }, + WantErr: false, + }, + { + Name: "connect sidecar mutate svc", + Setup: func(s *Store, tx *txn) error { + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "db")); err != nil { + return err + } + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web")); err != nil { + return err + } + return s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar)) + }, + Mutate: func(s *Store, tx *txn) error { + // Change port of the target service instance + return s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regMutatePort)) + }, + WantEvents: []stream.Event{ + // We should see the service topic update but not connect since proxy + // details didn't change. + testServiceHealthEvent(t, "web", + evMutatePort, + evNodeUnchanged, + evServiceMutated, + evChecksUnchanged, + ), + }, + WantErr: false, + }, + { + Name: "connect sidecar mutate sidecar", + Setup: func(s *Store, tx *txn) error { + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "db")); err != nil { + return err + } + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web")); err != nil { + return err + } + return s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar)) + }, + Mutate: func(s *Store, tx *txn) error { + // Change port of the sidecar service instance + return s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar, regMutatePort)) + }, + WantEvents: []stream.Event{ + // We should see the proxy service topic update and a connect update + testServiceHealthEvent(t, "web", + evSidecar, + evMutatePort, + evNodeUnchanged, + evServiceMutated, + evChecksUnchanged), + testServiceHealthEvent(t, "web", + evConnectTopic, + evSidecar, + evNodeUnchanged, + evMutatePort, + evServiceMutated, + evChecksUnchanged), + }, + WantErr: false, + }, + { + Name: "connect sidecar rename service", + Setup: func(s *Store, tx *txn) error { + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "db")); err != nil { + return err + } + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web")); err != nil { + return err + } + return s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar)) + }, + Mutate: func(s *Store, tx *txn) error { + // Change service name but not ID, update proxy too + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regRenameService)); err != nil { + return err + } + return s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar, regRenameService)) + }, + WantEvents: []stream.Event{ + // We should see events to deregister the old service instance and the + // old connect instance since we changed topic key for both. Then new + // service and connect registrations. The proxy instance should also + // change since it's not proxying a different service. + testServiceHealthDeregistrationEvent(t, "web"), + testServiceHealthEvent(t, "web", + evRenameService, + evServiceMutated, + evNodeUnchanged, + evChecksMutated, + ), + testServiceHealthDeregistrationEvent(t, "web", + evConnectTopic, + evSidecar, + ), + testServiceHealthEvent(t, "web", + evSidecar, + evRenameService, + evNodeUnchanged, + evServiceMutated, + evChecksUnchanged, + ), + testServiceHealthEvent(t, "web", + evConnectTopic, + evSidecar, + evNodeUnchanged, + evRenameService, + evServiceMutated, + evChecksUnchanged, + ), + }, + WantErr: false, + }, + { + Name: "connect sidecar change destination service", + Setup: func(s *Store, tx *txn) error { + // Register a web_changed service + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web_changed")); err != nil { + return err + } + // Also a web + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web")); err != nil { + return err + } + // And a sidecar initially for web, will be moved to target web_changed + // in Mutate. + return s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar)) + }, + Mutate: func(s *Store, tx *txn) error { + // Change only the destination service of the proxy without a service + // rename or deleting and recreating the proxy. This is far fetched but + // still valid. + return s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar, regRenameService)) + }, + WantEvents: []stream.Event{ + // We should only see service health events for the sidecar service + // since the actual target services didn't change. But also should see + // Connect topic dereg for the old name to update existing subscribers + // for Connect/web. + testServiceHealthDeregistrationEvent(t, "web", + evConnectTopic, + evSidecar, + ), + testServiceHealthEvent(t, "web", + evSidecar, + evRenameService, + evNodeUnchanged, + evServiceMutated, + evChecksUnchanged, + ), + testServiceHealthEvent(t, "web", + evConnectTopic, + evSidecar, + evNodeUnchanged, + evRenameService, + evServiceMutated, + evChecksUnchanged, + ), + }, + WantErr: false, + }, + { + Name: "multi-service node update", + Setup: func(s *Store, tx *txn) error { + // Register a db service + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "db")); err != nil { + return err + } + // Also a web + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web")); err != nil { + return err + } + // With a connect sidecar + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar)); err != nil { + return err + } + return nil + }, + Mutate: func(s *Store, tx *txn) error { + // Change only the node meta. + return s.ensureRegistrationTxn(tx, tx.Index, false, + testNodeRegistration(t, regNodeMeta)) + }, + WantEvents: []stream.Event{ + // We should see updates for all services and a connect update for the + // sidecar's destination. + testServiceHealthEvent(t, "db", + evNodeMeta, + evNodeMutated, + evServiceUnchanged, + evChecksUnchanged, + ), + testServiceHealthEvent(t, "web", + evNodeMeta, + evNodeMutated, + evServiceUnchanged, + evChecksUnchanged, + ), + testServiceHealthEvent(t, "web", + evSidecar, + evNodeMeta, + evNodeMutated, + evServiceUnchanged, + evChecksUnchanged, + ), + testServiceHealthEvent(t, "web", + evConnectTopic, + evSidecar, + evNodeMeta, + evNodeMutated, + evServiceUnchanged, + evChecksUnchanged, + ), + }, + WantErr: false, + }, + { + Name: "multi-service node rename", + Setup: func(s *Store, tx *txn) error { + // Register a db service + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "db")); err != nil { + return err + } + // Also a web + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web")); err != nil { + return err + } + // With a connect sidecar + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar)); err != nil { + return err + } + return nil + }, + Mutate: func(s *Store, tx *txn) error { + // Change only the node NAME but not it's ID. We do it for every service + // though since this is effectively what client agent anti-entropy would + // do on a node rename. If we only rename the node it will have no + // services registered afterwards. + // Register a db service + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "db", regRenameNode)); err != nil { + return err + } + // Also a web + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regRenameNode)); err != nil { + return err + } + // With a connect sidecar + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar, regRenameNode)); err != nil { + return err + } + return nil + }, + WantEvents: []stream.Event{ + // Node rename is implemented internally as a node delete and new node + // insert after some renaming validation. So we should see full set of + // new events for health, then the deletions of old services, then the + // connect update and delete pair. + testServiceHealthEvent(t, "db", + evRenameNode, + // Although we delete and re-insert, we do maintain the CreatedIndex + // of the node record from the old one. + evNodeMutated, + ), + testServiceHealthEvent(t, "web", + evRenameNode, + evNodeMutated, + ), + testServiceHealthEvent(t, "web", + evSidecar, + evRenameNode, + evNodeMutated, + ), + // dereg events for old node name services + testServiceHealthDeregistrationEvent(t, "db"), + testServiceHealthDeregistrationEvent(t, "web"), + testServiceHealthDeregistrationEvent(t, "web", evSidecar), + // Connect topic updates are last due to the way we add them + testServiceHealthEvent(t, "web", + evConnectTopic, + evSidecar, + evRenameNode, + evNodeMutated, + ), + testServiceHealthDeregistrationEvent(t, "web", evConnectTopic, evSidecar), + }, + WantErr: false, + }, + { + Name: "multi-service node check failure", + Setup: func(s *Store, tx *txn) error { + // Register a db service + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "db")); err != nil { + return err + } + // Also a web + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web")); err != nil { + return err + } + // With a connect sidecar + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar)); err != nil { + return err + } + return nil + }, + Mutate: func(s *Store, tx *txn) error { + // Change only the node-level check status + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regNodeCheckFail)); err != nil { + return err + } + return nil + }, + WantEvents: []stream.Event{ + testServiceHealthEvent(t, "db", + evNodeCheckFail, + evNodeUnchanged, + evServiceUnchanged, + // Only the node check changed. This needs to come after evNodeUnchanged + evNodeChecksMutated, + ), + testServiceHealthEvent(t, "web", + evNodeCheckFail, + evNodeUnchanged, + evServiceUnchanged, + evNodeChecksMutated, + ), + testServiceHealthEvent(t, "web", + evSidecar, + evNodeCheckFail, + evNodeUnchanged, + evServiceUnchanged, + evNodeChecksMutated, + ), + testServiceHealthEvent(t, "web", + evConnectTopic, + evSidecar, + evNodeCheckFail, + evNodeUnchanged, + evServiceUnchanged, + evNodeChecksMutated, + ), + }, + WantErr: false, + }, + { + Name: "multi-service node service check failure", + Setup: func(s *Store, tx *txn) error { + // Register a db service + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "db")); err != nil { + return err + } + // Also a web + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web")); err != nil { + return err + } + // With a connect sidecar + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar)); err != nil { + return err + } + return nil + }, + Mutate: func(s *Store, tx *txn) error { + // Change the service-level check status + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regServiceCheckFail)); err != nil { + return err + } + // Also change the service-level check status for the proxy. This is + // analogous to what would happen with an alias check on the client side + // - the proxies check would get updated at roughly the same time as the + // target service check updates. + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar, regServiceCheckFail)); err != nil { + return err + } + return nil + }, + WantEvents: []stream.Event{ + // Should only see the events for that one service change, the sidecar + // service and hence the connect topic for that service. + testServiceHealthEvent(t, "web", + evServiceCheckFail, + evNodeUnchanged, + evServiceUnchanged, + evChecksMutated, + ), + testServiceHealthEvent(t, "web", + evSidecar, + evServiceCheckFail, + evNodeUnchanged, + evServiceUnchanged, + evChecksMutated, + ), + testServiceHealthEvent(t, "web", + evConnectTopic, + evSidecar, + evServiceCheckFail, + evNodeUnchanged, + evServiceUnchanged, + evChecksMutated, + ), + }, + WantErr: false, + }, + { + Name: "multi-service node node-level check delete", + Setup: func(s *Store, tx *txn) error { + // Register a db service + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "db")); err != nil { + return err + } + // Also a web + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web")); err != nil { + return err + } + // With a connect sidecar + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar)); err != nil { + return err + } + return nil + }, + Mutate: func(s *Store, tx *txn) error { + // Delete only the node-level check + if err := s.deleteCheckTxn(tx, tx.Index, "node1", "serf-health", nil); err != nil { + return err + } + return nil + }, + WantEvents: []stream.Event{ + testServiceHealthEvent(t, "db", + evNodeCheckDelete, + evNodeUnchanged, + evServiceUnchanged, + ), + testServiceHealthEvent(t, "web", + evNodeCheckDelete, + evNodeUnchanged, + evServiceUnchanged, + ), + testServiceHealthEvent(t, "web", + evSidecar, + evNodeCheckDelete, + evNodeUnchanged, + evServiceUnchanged, + ), + testServiceHealthEvent(t, "web", + evConnectTopic, + evSidecar, + evNodeCheckDelete, + evNodeUnchanged, + evServiceUnchanged, + ), + }, + WantErr: false, + }, + { + Name: "multi-service node service check delete", + Setup: func(s *Store, tx *txn) error { + // Register a db service + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "db")); err != nil { + return err + } + // Also a web + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web")); err != nil { + return err + } + // With a connect sidecar + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar)); err != nil { + return err + } + return nil + }, + Mutate: func(s *Store, tx *txn) error { + // Delete the service-level check for the main service + if err := s.deleteCheckTxn(tx, tx.Index, "node1", "service:web", nil); err != nil { + return err + } + // Also delete for a proxy + if err := s.deleteCheckTxn(tx, tx.Index, "node1", "service:web_sidecar_proxy", nil); err != nil { + return err + } + return nil + }, + WantEvents: []stream.Event{ + // Should only see the events for that one service change, the sidecar + // service and hence the connect topic for that service. + testServiceHealthEvent(t, "web", + evServiceCheckFail, + evNodeUnchanged, + evServiceUnchanged, + evServiceCheckDelete, + ), + testServiceHealthEvent(t, "web", + evSidecar, + evServiceCheckFail, + evNodeUnchanged, + evServiceUnchanged, + evServiceCheckDelete, + ), + testServiceHealthEvent(t, "web", + evConnectTopic, + evSidecar, + evServiceCheckFail, + evNodeUnchanged, + evServiceUnchanged, + evServiceCheckDelete, + ), + }, + WantErr: false, + }, + { + Name: "many services on many nodes in one TX", + Setup: func(s *Store, tx *txn) error { + // Node1 + + // Register a db service + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "db")); err != nil { + return err + } + + // Node2 + // Also a web + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regNode2)); err != nil { + return err + } + // With a connect sidecar + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar, regNode2)); err != nil { + return err + } + + return nil + }, + Mutate: func(s *Store, tx *txn) error { + // In one transaction the operator moves the web service and it's + // sidecar from node2 back to node1 and deletes them from node2 + + if err := s.deleteServiceTxn(tx, tx.Index, "node2", "web", nil); err != nil { + return err + } + if err := s.deleteServiceTxn(tx, tx.Index, "node2", "web_sidecar_proxy", nil); err != nil { + return err + } + + // Register those on node1 + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web")); err != nil { + return err + } + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "web", regSidecar)); err != nil { + return err + } + + // And for good measure, add a new connect-native service to node2 + if err := s.ensureRegistrationTxn(tx, tx.Index, false, + testServiceRegistration(t, "api", regConnectNative, regNode2)); err != nil { + return err + } + + return nil + }, + WantEvents: []stream.Event{ + // We should see: + // - service dereg for web and proxy on node2 + // - connect dereg for web on node2 + // - service reg for web and proxy on node1 + // - connect reg for web on node1 + // - service reg for api on node2 + // - connect reg for api on node2 + testServiceHealthDeregistrationEvent(t, "web", evNode2), + testServiceHealthDeregistrationEvent(t, "web", evNode2, evSidecar), + testServiceHealthDeregistrationEvent(t, "web", + evConnectTopic, + evNode2, + evSidecar, + ), + + testServiceHealthEvent(t, "web", evNodeUnchanged), + testServiceHealthEvent(t, "web", evSidecar, evNodeUnchanged), + testServiceHealthEvent(t, "web", evConnectTopic, evSidecar, evNodeUnchanged), + + testServiceHealthEvent(t, "api", + evNode2, + evConnectNative, + evNodeUnchanged, + ), + testServiceHealthEvent(t, "api", + evNode2, + evConnectTopic, + evConnectNative, + evNodeUnchanged, + ), + }, + WantErr: false, + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.Name, func(t *testing.T) { + s := testStateStore(t) + + if tc.Setup != nil { + // Bypass the publish mechanism for this test or we get into odd + // recursive stuff... + setupTx := s.db.WriteTxn(10) + require.NoError(t, tc.Setup(s, setupTx)) + // Commit the underlying transaction without using wrapped Commit so we + // avoid the whole event publishing system for setup here. It _should_ + // work but it makes debugging test hard as it will call the function + // under test for the setup data... + setupTx.Txn.Commit() + } + + tx := s.db.WriteTxn(100) + require.NoError(t, tc.Mutate(s, tx)) + + // Note we call the func under test directly rather than publishChanges so + // we can test this in isolation. + got, err := ServiceHealthEventsFromChanges(tx, Changes{Changes: tx.Changes(), Index: 100}) + if tc.WantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + + // Make sure we have the right events, only taking ordering into account + // where it matters to account for non-determinism. + requireEventsInCorrectPartialOrder(t, tc.WantEvents, got, func(e stream.Event) string { + // We need events affecting unique registrations to be ordered, within a topic + csn := getPayloadCheckServiceNode(e.Payload) + return fmt.Sprintf("%s/%s/%s", e.Topic, csn.Node.Node, csn.Service.Service) + }) + }) + } +} + +type regOption func(req *structs.RegisterRequest) error + +func testNodeRegistration(t *testing.T, opts ...regOption) *structs.RegisterRequest { + r := &structs.RegisterRequest{ + Datacenter: "dc1", + ID: "11111111-2222-3333-4444-555555555555", + Node: "node1", + Address: "10.10.10.10", + Checks: structs.HealthChecks{ + &structs.HealthCheck{ + CheckID: "serf-health", + Name: "serf-health", + Node: "node1", + Status: api.HealthPassing, + }, + }, + } + for _, opt := range opts { + err := opt(r) + require.NoError(t, err) + } + return r +} + +func testServiceRegistration(t *testing.T, svc string, opts ...regOption) *structs.RegisterRequest { + // note: don't pass opts or they might get applied twice! + r := testNodeRegistration(t) + r.Service = &structs.NodeService{ + ID: svc, + Service: svc, + Port: 8080, + } + r.Checks = append(r.Checks, + &structs.HealthCheck{ + CheckID: types.CheckID("service:" + svc), + Name: "service:" + svc, + Node: "node1", + ServiceID: svc, + ServiceName: svc, + Type: "ttl", + Status: api.HealthPassing, + }) + for _, opt := range opts { + err := opt(r) + require.NoError(t, err) + } + return r +} + +type eventOption func(e *stream.Event) error + +func testServiceHealthEvent(t *testing.T, svc string, opts ...eventOption) stream.Event { + e := newTestEventServiceHealthRegister(100, 1, svc) + + // Normalize a few things that are different in the generic event which was + // based on original code here but made more general. This means we don't have + // to change all the test loads... + csn := getPayloadCheckServiceNode(e.Payload) + csn.Node.ID = "11111111-2222-3333-4444-555555555555" + csn.Node.Address = "10.10.10.10" + + for _, opt := range opts { + err := opt(&e) + require.NoError(t, err) + } + return e +} + +func testServiceHealthDeregistrationEvent(t *testing.T, svc string, opts ...eventOption) stream.Event { + e := newTestEventServiceHealthDeregister(100, 1, svc) + for _, opt := range opts { + err := opt(&e) + require.NoError(t, err) + } + return e +} + +// regConnectNative option converts the base registration into a Connect-native +// one. +func regConnectNative(req *structs.RegisterRequest) error { + if req.Service == nil { + return nil + } + req.Service.Connect.Native = true + return nil +} + +// regSidecar option converts the base registration request +// into the registration for it's sidecar service. +func regSidecar(req *structs.RegisterRequest) error { + if req.Service == nil { + return nil + } + svc := req.Service.Service + + req.Service.Kind = structs.ServiceKindConnectProxy + req.Service.ID = svc + "_sidecar_proxy" + req.Service.Service = svc + "_sidecar_proxy" + req.Service.Port = 20000 + req.Service.Port + + req.Service.Proxy.DestinationServiceName = svc + req.Service.Proxy.DestinationServiceID = svc + + // Convert the check to point to the right ID now. This isn't totally + // realistic - sidecars should have alias checks etc but this is good enough + // to test this code path. + if len(req.Checks) >= 2 { + req.Checks[1].CheckID = types.CheckID("service:" + svc + "_sidecar_proxy") + req.Checks[1].ServiceID = svc + "_sidecar_proxy" + } + + return nil +} + +// regNodeCheckFail option converts the base registration request +// into a registration with the node-level health check failing +func regNodeCheckFail(req *structs.RegisterRequest) error { + req.Checks[0].Status = api.HealthCritical + return nil +} + +// regServiceCheckFail option converts the base registration request +// into a registration with the service-level health check failing +func regServiceCheckFail(req *structs.RegisterRequest) error { + req.Checks[1].Status = api.HealthCritical + return nil +} + +// regMutatePort option alters the base registration service port by a relative +// amount to simulate a service change. Can be used with regSidecar since it's a +// relative change (+10). +func regMutatePort(req *structs.RegisterRequest) error { + if req.Service == nil { + return nil + } + req.Service.Port += 10 + return nil +} + +// regRenameService option alters the base registration service name but not +// it's ID simulating a service being renamed while it's ID is maintained +// separately e.g. by a scheduler. This is an edge case but an important one as +// it changes which topic key events propagate. +func regRenameService(req *structs.RegisterRequest) error { + if req.Service == nil { + return nil + } + isSidecar := req.Service.Kind == structs.ServiceKindConnectProxy + + if !isSidecar { + req.Service.Service += "_changed" + // Update service checks + if len(req.Checks) >= 2 { + req.Checks[1].ServiceName += "_changed" + } + return nil + } + // This is a sidecar, it's not really realistic but lets only update the + // fields necessary to make it work again with the new service name to be sure + // we get the right result. This is certainly possible if not likely so a + // valid case. + + // We don't need to update out own details, only the name of the destination + req.Service.Proxy.DestinationServiceName += "_changed" + + return nil +} + +// regRenameNode option alters the base registration node name by adding the +// _changed suffix. +func regRenameNode(req *structs.RegisterRequest) error { + req.Node += "_changed" + for i := range req.Checks { + req.Checks[i].Node = req.Node + } + return nil +} + +// regNode2 option alters the base registration to be on a different node. +func regNode2(req *structs.RegisterRequest) error { + req.Node = "node2" + req.ID = "22222222-2222-3333-4444-555555555555" + for i := range req.Checks { + req.Checks[i].Node = req.Node + } + return nil +} + +// regNodeMeta option alters the base registration node to add some meta data. +func regNodeMeta(req *structs.RegisterRequest) error { + req.NodeMeta = map[string]string{"foo": "bar"} + return nil +} + +// evNodeUnchanged option converts the event to reset the node and node check +// raft indexes to the original value where we expect the node not to have been +// changed in the mutation. +func evNodeUnchanged(e *stream.Event) error { + // If the node wasn't touched, its modified index and check's modified + // indexes should be the original ones. + csn := getPayloadCheckServiceNode(e.Payload) + + // Check this isn't a dereg event with made up/placeholder node info + if csn.Node.CreateIndex == 0 { + return nil + } + csn.Node.CreateIndex = 10 + csn.Node.ModifyIndex = 10 + csn.Checks[0].CreateIndex = 10 + csn.Checks[0].ModifyIndex = 10 + return nil +} + +// evServiceUnchanged option converts the event to reset the service and service +// check raft indexes to the original value where we expect the service record +// not to have been changed in the mutation. +func evServiceUnchanged(e *stream.Event) error { + // If the node wasn't touched, its modified index and check's modified + // indexes should be the original ones. + csn := getPayloadCheckServiceNode(e.Payload) + + csn.Service.CreateIndex = 10 + csn.Service.ModifyIndex = 10 + if len(csn.Checks) > 1 { + csn.Checks[1].CreateIndex = 10 + csn.Checks[1].ModifyIndex = 10 + } + return nil +} + +// evConnectNative option converts the base event to represent a connect-native +// service instance. +func evConnectNative(e *stream.Event) error { + getPayloadCheckServiceNode(e.Payload).Service.Connect.Native = true + return nil +} + +// evConnectTopic option converts the base event to the equivalent event that +// should be published to the connect topic. When needed it should be applied +// first as several other options (notable evSidecar) change behavior subtly +// depending on which topic they are published to and they determin this from +// the event. +func evConnectTopic(e *stream.Event) error { + e.Topic = TopicServiceHealthConnect + return nil +} + +// evSidecar option converts the base event to the health (not connect) event +// expected from the sidecar proxy registration for that service instead. When +// needed it should be applied after any option that changes topic (e.g. +// evConnectTopic) but before other options that might change behavior subtly +// depending on whether it's a sidecar or regular service event (e.g. +// evRenameService). +func evSidecar(e *stream.Event) error { + csn := getPayloadCheckServiceNode(e.Payload) + + svc := csn.Service.Service + + csn.Service.Kind = structs.ServiceKindConnectProxy + csn.Service.ID = svc + "_sidecar_proxy" + csn.Service.Service = svc + "_sidecar_proxy" + csn.Service.Port = 20000 + csn.Service.Port + + csn.Service.Proxy.DestinationServiceName = svc + csn.Service.Proxy.DestinationServiceID = svc + + // Convert the check to point to the right ID now. This isn't totally + // realistic - sidecars should have alias checks etc but this is good enough + // to test this code path. + if len(csn.Checks) >= 2 { + csn.Checks[1].CheckID = types.CheckID("service:" + svc + "_sidecar_proxy") + csn.Checks[1].ServiceID = svc + "_sidecar_proxy" + csn.Checks[1].ServiceName = svc + "_sidecar_proxy" + } + + // Update event key to be the proxy service name, but only if this is not + // already in the connect topic + if e.Topic != TopicServiceHealthConnect { + e.Key = csn.Service.Service + } + return nil +} + +// evMutatePort option alters the base event service port by a relative +// amount to simulate a service change. Can be used with evSidecar since it's a +// relative change (+10). +func evMutatePort(e *stream.Event) error { + getPayloadCheckServiceNode(e.Payload).Service.Port += 10 + return nil +} + +// evNodeMutated option alters the base event node to set it's CreateIndex +// (but not modify index) to the setup index. This expresses that we expect the +// node record originally created in setup to have been mutated during the +// update. +func evNodeMutated(e *stream.Event) error { + getPayloadCheckServiceNode(e.Payload).Node.CreateIndex = 10 + return nil +} + +// evServiceMutated option alters the base event service to set it's CreateIndex +// (but not modify index) to the setup index. This expresses that we expect the +// service record originally created in setup to have been mutated during the +// update. +func evServiceMutated(e *stream.Event) error { + getPayloadCheckServiceNode(e.Payload).Service.CreateIndex = 10 + return nil +} + +// evChecksMutated option alters the base event service check to set it's +// CreateIndex (but not modify index) to the setup index. This expresses that we +// expect the service check records originally created in setup to have been +// mutated during the update. NOTE: this must be sequenced after +// evServiceUnchanged if both are used. +func evChecksMutated(e *stream.Event) error { + getPayloadCheckServiceNode(e.Payload).Checks[1].CreateIndex = 10 + getPayloadCheckServiceNode(e.Payload).Checks[1].ModifyIndex = 100 + return nil +} + +// evNodeChecksMutated option alters the base event node check to set it's +// CreateIndex (but not modify index) to the setup index. This expresses that we +// expect the node check records originally created in setup to have been +// mutated during the update. NOTE: this must be sequenced after evNodeUnchanged +// if both are used. +func evNodeChecksMutated(e *stream.Event) error { + getPayloadCheckServiceNode(e.Payload).Checks[0].CreateIndex = 10 + getPayloadCheckServiceNode(e.Payload).Checks[0].ModifyIndex = 100 + return nil +} + +// evChecksUnchanged option alters the base event service to set all check raft +// indexes to the setup index. This expresses that we expect none of the checks +// to have changed in the update. +func evChecksUnchanged(e *stream.Event) error { + csn := getPayloadCheckServiceNode(e.Payload) + for i := range csn.Checks { + csn.Checks[i].CreateIndex = 10 + csn.Checks[i].ModifyIndex = 10 + } + return nil +} + +// evRenameService option alters the base event service to change the service +// name but not ID simulating an in-place service rename. +func evRenameService(e *stream.Event) error { + csn := getPayloadCheckServiceNode(e.Payload) + isSidecar := csn.Service.Kind == structs.ServiceKindConnectProxy + + if !isSidecar { + csn.Service.Service += "_changed" + // Update service checks + if len(csn.Checks) >= 2 { + csn.Checks[1].ServiceName += "_changed" + } + e.Key += "_changed" + return nil + } + // This is a sidecar, it's not really realistic but lets only update the + // fields necessary to make it work again with the new service name to be sure + // we get the right result. This is certainly possible if not likely so a + // valid case. + + // We don't need to update out own details, only the name of the destination + csn.Service.Proxy.DestinationServiceName += "_changed" + + // If this is the connect topic we need to change the key too + if e.Topic == TopicServiceHealthConnect { + e.Key += "_changed" + } + return nil +} + +// evNodeMeta option alters the base event node to add some meta data. +func evNodeMeta(e *stream.Event) error { + csn := getPayloadCheckServiceNode(e.Payload) + csn.Node.Meta = map[string]string{"foo": "bar"} + return nil +} + +// evRenameNode option alters the base event node name. +func evRenameNode(e *stream.Event) error { + csn := getPayloadCheckServiceNode(e.Payload) + csn.Node.Node += "_changed" + for i := range csn.Checks { + csn.Checks[i].Node = csn.Node.Node + } + return nil +} + +// evNode2 option alters the base event to refer to a different node +func evNode2(e *stream.Event) error { + csn := getPayloadCheckServiceNode(e.Payload) + csn.Node.Node = "node2" + // Only change ID if it's set (e.g. it's not in a deregistration event) + if csn.Node.ID != "" { + csn.Node.ID = "22222222-2222-3333-4444-555555555555" + } + for i := range csn.Checks { + csn.Checks[i].Node = csn.Node.Node + } + return nil +} + +// evNodeCheckFail option alters the base event to set the node-level health +// check to be failing +func evNodeCheckFail(e *stream.Event) error { + csn := getPayloadCheckServiceNode(e.Payload) + csn.Checks[0].Status = api.HealthCritical + return nil +} + +// evNodeCheckDelete option alters the base event to remove the node-level +// health check +func evNodeCheckDelete(e *stream.Event) error { + csn := getPayloadCheckServiceNode(e.Payload) + // Ensure this is idempotent as we sometimes get called multiple times.. + if len(csn.Checks) > 0 && csn.Checks[0].ServiceID == "" { + csn.Checks = csn.Checks[1:] + } + return nil +} + +// evServiceCheckFail option alters the base event to set the service-level health +// check to be failing +func evServiceCheckFail(e *stream.Event) error { + csn := getPayloadCheckServiceNode(e.Payload) + csn.Checks[1].Status = api.HealthCritical + return nil +} + +// evServiceCheckDelete option alters the base event to remove the service-level +// health check +func evServiceCheckDelete(e *stream.Event) error { + csn := getPayloadCheckServiceNode(e.Payload) + // Ensure this is idempotent as we sometimes get called multiple times.. + if len(csn.Checks) > 1 && csn.Checks[1].ServiceID != "" { + csn.Checks = csn.Checks[0:1] + } + return nil +} + +// requireEventsInCorrectPartialOrder compares that the expected set of events +// was emitted. It allows for _independent_ events to be emitted in any order - +// this can be important because even though the transaction processing is all +// strictly ordered up until the processing func, grouping multiple updates that +// affect the same logical entity may be necessary and may impose random +// ordering changes on the eventual events if a map is used. We only care that +// events _affecting the same topic and key_ are ordered correctly with respect +// to the "expected" set of events so this helper asserts that. +// +// The caller provides a func that can return a partition key for the given +// event types and we assert that all events with the same partition key are +// deliveries in the same order. Note that this is not necessarily the same as +// topic/key since for example in Catalog only events about a specific service +// _instance_ need to be ordered while topic and key are more general. +func requireEventsInCorrectPartialOrder(t *testing.T, want, got []stream.Event, + partKey func(stream.Event) string) { + t.Helper() + + // Partion both arrays by topic/key + wantParts := make(map[string][]stream.Event) + gotParts := make(map[string][]stream.Event) + + for _, e := range want { + k := partKey(e) + wantParts[k] = append(wantParts[k], e) + } + for _, e := range got { + k := partKey(e) + gotParts[k] = append(gotParts[k], e) + } + + for k, want := range wantParts { + require.Equal(t, want, gotParts[k], "got incorrect events for partition: %s", k) + } + + for k, got := range gotParts { + if _, ok := wantParts[k]; !ok { + require.Equal(t, nil, got, "got unwanted events for partition: %s", k) + } + } +} + +// newTestEventServiceHealthRegister returns a realistically populated service +// health registration event. The nodeNum is a +// logical node and is used to create the node name ("node%d") but also change +// the node ID and IP address to make it a little more realistic for cases that +// need that. nodeNum should be less than 64k to make the IP address look +// realistic. Any other changes can be made on the returned event to avoid +// adding too many options to callers. +func newTestEventServiceHealthRegister(index uint64, nodeNum int, svc string) stream.Event { + node := fmt.Sprintf("node%d", nodeNum) + nodeID := types.NodeID(fmt.Sprintf("11111111-2222-3333-4444-%012d", nodeNum)) + addr := fmt.Sprintf("10.10.%d.%d", nodeNum/256, nodeNum%256) + + return stream.Event{ + Topic: TopicServiceHealth, + Key: svc, + Index: index, + Payload: eventPayload{ + Op: OpCreate, + Obj: &structs.CheckServiceNode{ + Node: &structs.Node{ + ID: nodeID, + Node: node, + Address: addr, + Datacenter: "dc1", + RaftIndex: structs.RaftIndex{ + CreateIndex: index, + ModifyIndex: index, + }, + }, + Service: &structs.NodeService{ + ID: svc, + Service: svc, + Port: 8080, + Weights: &structs.Weights{ + Passing: 1, + Warning: 1, + }, + RaftIndex: structs.RaftIndex{ + CreateIndex: index, + ModifyIndex: index, + }, + }, + Checks: []*structs.HealthCheck{ + { + Node: node, + CheckID: "serf-health", + Name: "serf-health", + Status: "passing", + RaftIndex: structs.RaftIndex{ + CreateIndex: index, + ModifyIndex: index, + }, + }, + { + Node: node, + CheckID: types.CheckID("service:" + svc), + Name: "service:" + svc, + ServiceID: svc, + ServiceName: svc, + Type: "ttl", + Status: "passing", + RaftIndex: structs.RaftIndex{ + CreateIndex: index, + ModifyIndex: index, + }, + }, + }, + }, + }, + } +} + +// TestEventServiceHealthDeregister returns a realistically populated service +// health deregistration event. The nodeNum is a +// logical node and is used to create the node name ("node%d") but also change +// the node ID and IP address to make it a little more realistic for cases that +// need that. nodeNum should be less than 64k to make the IP address look +// realistic. Any other changes can be made on the returned event to avoid +// adding too many options to callers. +func newTestEventServiceHealthDeregister(index uint64, nodeNum int, svc string) stream.Event { + return stream.Event{ + Topic: TopicServiceHealth, + Key: svc, + Index: index, + Payload: eventPayload{ + Op: OpDelete, + Obj: &structs.CheckServiceNode{ + Node: &structs.Node{ + Node: fmt.Sprintf("node%d", nodeNum), + }, + Service: &structs.NodeService{ + ID: svc, + Service: svc, + Port: 8080, + Weights: &structs.Weights{ + Passing: 1, + Warning: 1, + }, + RaftIndex: structs.RaftIndex{ + // The original insertion index since a delete doesn't update + // this. This magic value came from state store tests where we + // setup at index 10 and then mutate at index 100. It can be + // modified by the caller later and makes it easier than having + // yet another argument in the common case. + CreateIndex: 10, + ModifyIndex: 10, + }, + }, + }, + }, + } +} diff --git a/agent/consul/state/config_entry.go b/agent/consul/state/config_entry.go index 44113f46ad..f19205bbee 100644 --- a/agent/consul/state/config_entry.go +++ b/agent/consul/state/config_entry.go @@ -467,7 +467,7 @@ func validateProposedConfigEntryInServiceGraph( } overrides := map[structs.ConfigEntryKindName]structs.ConfigEntry{ - {Kind: kind, Name: name}: next, + structs.NewConfigEntryKindName(kind, name, entMeta): next, } var ( @@ -909,9 +909,8 @@ func configEntryWithOverridesTxn( entMeta *structs.EnterpriseMeta, ) (uint64, structs.ConfigEntry, error) { if len(overrides) > 0 { - entry, ok := overrides[structs.ConfigEntryKindName{ - Kind: kind, Name: name, - }] + kn := structs.NewConfigEntryKindName(kind, name, entMeta) + entry, ok := overrides[kn] if ok { return 0, entry, nil // a nil entry implies it should act like it is erased } diff --git a/agent/consul/state/config_entry_test.go b/agent/consul/state/config_entry_test.go index 4cee1a6c40..fcf7624a5b 100644 --- a/agent/consul/state/config_entry_test.go +++ b/agent/consul/state/config_entry_test.go @@ -880,10 +880,10 @@ func TestStore_ReadDiscoveryChainConfigEntries_Overrides(t *testing.T) { }, }, expectBefore: []structs.ConfigEntryKindName{ - {Kind: structs.ServiceDefaults, Name: "main"}, + structs.NewConfigEntryKindName(structs.ServiceDefaults, "main", nil), }, overrides: map[structs.ConfigEntryKindName]structs.ConfigEntry{ - {Kind: structs.ServiceDefaults, Name: "main"}: nil, + structs.NewConfigEntryKindName(structs.ServiceDefaults, "main", nil): nil, }, expectAfter: []structs.ConfigEntryKindName{ // nothing @@ -899,17 +899,17 @@ func TestStore_ReadDiscoveryChainConfigEntries_Overrides(t *testing.T) { }, }, expectBefore: []structs.ConfigEntryKindName{ - {Kind: structs.ServiceDefaults, Name: "main"}, + structs.NewConfigEntryKindName(structs.ServiceDefaults, "main", nil), }, overrides: map[structs.ConfigEntryKindName]structs.ConfigEntry{ - {Kind: structs.ServiceDefaults, Name: "main"}: &structs.ServiceConfigEntry{ + structs.NewConfigEntryKindName(structs.ServiceDefaults, "main", nil): &structs.ServiceConfigEntry{ Kind: structs.ServiceDefaults, Name: "main", Protocol: "grpc", }, }, expectAfter: []structs.ConfigEntryKindName{ - {Kind: structs.ServiceDefaults, Name: "main"}, + structs.NewConfigEntryKindName(structs.ServiceDefaults, "main", nil), }, checkAfter: func(t *testing.T, entrySet *structs.DiscoveryChainConfigEntries) { defaults := entrySet.GetService(structs.NewServiceID("main", nil)) @@ -932,14 +932,14 @@ func TestStore_ReadDiscoveryChainConfigEntries_Overrides(t *testing.T) { }, }, expectBefore: []structs.ConfigEntryKindName{ - {Kind: structs.ServiceDefaults, Name: "main"}, - {Kind: structs.ServiceRouter, Name: "main"}, + structs.NewConfigEntryKindName(structs.ServiceDefaults, "main", nil), + structs.NewConfigEntryKindName(structs.ServiceRouter, "main", nil), }, overrides: map[structs.ConfigEntryKindName]structs.ConfigEntry{ - {Kind: structs.ServiceRouter, Name: "main"}: nil, + structs.NewConfigEntryKindName(structs.ServiceRouter, "main", nil): nil, }, expectAfter: []structs.ConfigEntryKindName{ - {Kind: structs.ServiceDefaults, Name: "main"}, + structs.NewConfigEntryKindName(structs.ServiceDefaults, "main", nil), }, }, { @@ -977,12 +977,12 @@ func TestStore_ReadDiscoveryChainConfigEntries_Overrides(t *testing.T) { }, }, expectBefore: []structs.ConfigEntryKindName{ - {Kind: structs.ServiceDefaults, Name: "main"}, - {Kind: structs.ServiceResolver, Name: "main"}, - {Kind: structs.ServiceRouter, Name: "main"}, + structs.NewConfigEntryKindName(structs.ServiceDefaults, "main", nil), + structs.NewConfigEntryKindName(structs.ServiceResolver, "main", nil), + structs.NewConfigEntryKindName(structs.ServiceRouter, "main", nil), }, overrides: map[structs.ConfigEntryKindName]structs.ConfigEntry{ - {Kind: structs.ServiceRouter, Name: "main"}: &structs.ServiceRouterConfigEntry{ + structs.NewConfigEntryKindName(structs.ServiceRouter, "main", nil): &structs.ServiceRouterConfigEntry{ Kind: structs.ServiceRouter, Name: "main", Routes: []structs.ServiceRoute{ @@ -1000,9 +1000,9 @@ func TestStore_ReadDiscoveryChainConfigEntries_Overrides(t *testing.T) { }, }, expectAfter: []structs.ConfigEntryKindName{ - {Kind: structs.ServiceDefaults, Name: "main"}, - {Kind: structs.ServiceResolver, Name: "main"}, - {Kind: structs.ServiceRouter, Name: "main"}, + structs.NewConfigEntryKindName(structs.ServiceDefaults, "main", nil), + structs.NewConfigEntryKindName(structs.ServiceResolver, "main", nil), + structs.NewConfigEntryKindName(structs.ServiceRouter, "main", nil), }, checkAfter: func(t *testing.T, entrySet *structs.DiscoveryChainConfigEntries) { router := entrySet.GetRouter(structs.NewServiceID("main", nil)) @@ -1040,14 +1040,14 @@ func TestStore_ReadDiscoveryChainConfigEntries_Overrides(t *testing.T) { }, }, expectBefore: []structs.ConfigEntryKindName{ - {Kind: structs.ServiceDefaults, Name: "main"}, - {Kind: structs.ServiceSplitter, Name: "main"}, + structs.NewConfigEntryKindName(structs.ServiceDefaults, "main", nil), + structs.NewConfigEntryKindName(structs.ServiceSplitter, "main", nil), }, overrides: map[structs.ConfigEntryKindName]structs.ConfigEntry{ - {Kind: structs.ServiceSplitter, Name: "main"}: nil, + structs.NewConfigEntryKindName(structs.ServiceSplitter, "main", nil): nil, }, expectAfter: []structs.ConfigEntryKindName{ - {Kind: structs.ServiceDefaults, Name: "main"}, + structs.NewConfigEntryKindName(structs.ServiceDefaults, "main", nil), }, }, { @@ -1067,11 +1067,11 @@ func TestStore_ReadDiscoveryChainConfigEntries_Overrides(t *testing.T) { }, }, expectBefore: []structs.ConfigEntryKindName{ - {Kind: structs.ServiceDefaults, Name: "main"}, - {Kind: structs.ServiceSplitter, Name: "main"}, + structs.NewConfigEntryKindName(structs.ServiceDefaults, "main", nil), + structs.NewConfigEntryKindName(structs.ServiceSplitter, "main", nil), }, overrides: map[structs.ConfigEntryKindName]structs.ConfigEntry{ - {Kind: structs.ServiceSplitter, Name: "main"}: &structs.ServiceSplitterConfigEntry{ + structs.NewConfigEntryKindName(structs.ServiceSplitter, "main", nil): &structs.ServiceSplitterConfigEntry{ Kind: structs.ServiceSplitter, Name: "main", Splits: []structs.ServiceSplit{ @@ -1081,8 +1081,8 @@ func TestStore_ReadDiscoveryChainConfigEntries_Overrides(t *testing.T) { }, }, expectAfter: []structs.ConfigEntryKindName{ - {Kind: structs.ServiceDefaults, Name: "main"}, - {Kind: structs.ServiceSplitter, Name: "main"}, + structs.NewConfigEntryKindName(structs.ServiceDefaults, "main", nil), + structs.NewConfigEntryKindName(structs.ServiceSplitter, "main", nil), }, checkAfter: func(t *testing.T, entrySet *structs.DiscoveryChainConfigEntries) { splitter := entrySet.GetSplitter(structs.NewServiceID("main", nil)) @@ -1106,10 +1106,10 @@ func TestStore_ReadDiscoveryChainConfigEntries_Overrides(t *testing.T) { }, }, expectBefore: []structs.ConfigEntryKindName{ - {Kind: structs.ServiceResolver, Name: "main"}, + structs.NewConfigEntryKindName(structs.ServiceResolver, "main", nil), }, overrides: map[structs.ConfigEntryKindName]structs.ConfigEntry{ - {Kind: structs.ServiceResolver, Name: "main"}: nil, + structs.NewConfigEntryKindName(structs.ServiceResolver, "main", nil): nil, }, expectAfter: []structs.ConfigEntryKindName{ // nothing @@ -1124,17 +1124,17 @@ func TestStore_ReadDiscoveryChainConfigEntries_Overrides(t *testing.T) { }, }, expectBefore: []structs.ConfigEntryKindName{ - {Kind: structs.ServiceResolver, Name: "main"}, + structs.NewConfigEntryKindName(structs.ServiceResolver, "main", nil), }, overrides: map[structs.ConfigEntryKindName]structs.ConfigEntry{ - {Kind: structs.ServiceResolver, Name: "main"}: &structs.ServiceResolverConfigEntry{ + structs.NewConfigEntryKindName(structs.ServiceResolver, "main", nil): &structs.ServiceResolverConfigEntry{ Kind: structs.ServiceResolver, Name: "main", ConnectTimeout: 33 * time.Second, }, }, expectAfter: []structs.ConfigEntryKindName{ - {Kind: structs.ServiceResolver, Name: "main"}, + structs.NewConfigEntryKindName(structs.ServiceResolver, "main", nil), }, checkAfter: func(t *testing.T, entrySet *structs.DiscoveryChainConfigEntries) { resolver := entrySet.GetResolver(structs.NewServiceID("main", nil)) @@ -1181,28 +1181,32 @@ func TestStore_ReadDiscoveryChainConfigEntries_Overrides(t *testing.T) { func entrySetToKindNames(entrySet *structs.DiscoveryChainConfigEntries) []structs.ConfigEntryKindName { var out []structs.ConfigEntryKindName for _, entry := range entrySet.Routers { - out = append(out, structs.ConfigEntryKindName{ - Kind: entry.Kind, - Name: entry.Name, - }) + out = append(out, structs.NewConfigEntryKindName( + entry.Kind, + entry.Name, + &entry.EnterpriseMeta, + )) } for _, entry := range entrySet.Splitters { - out = append(out, structs.ConfigEntryKindName{ - Kind: entry.Kind, - Name: entry.Name, - }) + out = append(out, structs.NewConfigEntryKindName( + entry.Kind, + entry.Name, + &entry.EnterpriseMeta, + )) } for _, entry := range entrySet.Resolvers { - out = append(out, structs.ConfigEntryKindName{ - Kind: entry.Kind, - Name: entry.Name, - }) + out = append(out, structs.NewConfigEntryKindName( + entry.Kind, + entry.Name, + &entry.EnterpriseMeta, + )) } for _, entry := range entrySet.Services { - out = append(out, structs.ConfigEntryKindName{ - Kind: entry.Kind, - Name: entry.Name, - }) + out = append(out, structs.NewConfigEntryKindName( + entry.Kind, + entry.Name, + &entry.EnterpriseMeta, + )) } return out } diff --git a/agent/consul/state/memdb.go b/agent/consul/state/memdb.go index be4f4348e2..3fd72dfaa7 100644 --- a/agent/consul/state/memdb.go +++ b/agent/consul/state/memdb.go @@ -15,6 +15,13 @@ type ReadTxn interface { Abort() } +// WriteTxn is implemented by memdb.Txn to perform write operations. +type WriteTxn interface { + ReadTxn + Insert(table string, obj interface{}) error + Commit() error +} + // Changes wraps a memdb.Changes to include the index at which these changes // were made. type Changes struct { @@ -24,8 +31,9 @@ type Changes struct { } // changeTrackerDB is a thin wrapper around memdb.DB which enables TrackChanges on -// all write transactions. When the transaction is committed the changes are -// sent to the eventPublisher which will create and emit change events. +// all write transactions. When the transaction is committed the changes are: +// 1. Used to update our internal usage tracking +// 2. Sent to the eventPublisher which will create and emit change events type changeTrackerDB struct { db *memdb.MemDB publisher eventPublisher @@ -77,11 +85,8 @@ func (c *changeTrackerDB) WriteTxn(idx uint64) *txn { return t } -func (c *changeTrackerDB) publish(changes Changes) error { - readOnlyTx := c.db.Txn(false) - defer readOnlyTx.Abort() - - events, err := c.processChanges(readOnlyTx, changes) +func (c *changeTrackerDB) publish(tx ReadTxn, changes Changes) error { + events, err := c.processChanges(tx, changes) if err != nil { return fmt.Errorf("failed generating events from changes: %v", err) } @@ -89,17 +94,21 @@ func (c *changeTrackerDB) publish(changes Changes) error { return nil } -// WriteTxnRestore returns a wrapped RW transaction that does NOT have change -// tracking enabled. This should only be used in Restore where we need to -// replace the entire contents of the Store without a need to track the changes. -// WriteTxnRestore uses a zero index since the whole restore doesn't really occur -// at one index - the effect is to write many values that were previously -// written across many indexes. +// WriteTxnRestore returns a wrapped RW transaction that should only be used in +// Restore where we need to replace the entire contents of the Store. +// WriteTxnRestore uses a zero index since the whole restore doesn't really +// occur at one index - the effect is to write many values that were previously +// written across many indexes. WriteTxnRestore also does not publish any +// change events to subscribers. func (c *changeTrackerDB) WriteTxnRestore() *txn { - return &txn{ + t := &txn{ Txn: c.db.Txn(true), Index: 0, } + + // We enable change tracking so that usage data is correctly populated. + t.Txn.TrackChanges() + return t } // txn wraps a memdb.Txn to capture changes and send them to the EventPublisher. @@ -115,7 +124,7 @@ type txn struct { // Index is stored so that it may be passed along to any subscribers as part // of a change event. Index uint64 - publish func(changes Changes) error + publish func(tx ReadTxn, changes Changes) error } // Commit first pushes changes to EventPublisher, then calls Commit on the @@ -125,15 +134,22 @@ type txn struct { // by the caller. A non-nil error indicates that a commit failed and was not // applied. func (tx *txn) Commit() error { + changes := Changes{ + Index: tx.Index, + Changes: tx.Txn.Changes(), + } + + if len(changes.Changes) > 0 { + if err := updateUsage(tx, changes); err != nil { + return err + } + } + // publish may be nil if this is a read-only or WriteTxnRestore transaction. // In those cases changes should also be empty, and there will be nothing // to publish. if tx.publish != nil { - changes := Changes{ - Index: tx.Index, - Changes: tx.Txn.Changes(), - } - if err := tx.publish(changes); err != nil { + if err := tx.publish(tx.Txn, changes); err != nil { return err } } @@ -149,11 +165,33 @@ func (t topic) String() string { return string(t) } +var ( + // TopicServiceHealth contains events for all registered service instances. + TopicServiceHealth topic = "topic-service-health" + // TopicServiceHealthConnect contains events for connect-enabled service instances. + TopicServiceHealthConnect topic = "topic-service-health-connect" +) + func processDBChanges(tx ReadTxn, changes Changes) ([]stream.Event, error) { - // TODO: add other table handlers here. - return aclChangeUnsubscribeEvent(tx, changes) + var events []stream.Event + fns := []func(tx ReadTxn, changes Changes) ([]stream.Event, error){ + aclChangeUnsubscribeEvent, + ServiceHealthEventsFromChanges, + // TODO: add other table handlers here. + } + for _, fn := range fns { + e, err := fn(tx, changes) + if err != nil { + return nil, err + } + events = append(events, e...) + } + return events, nil } -func newSnapshotHandlers() stream.SnapshotHandlers { - return stream.SnapshotHandlers{} +func newSnapshotHandlers(s *Store) stream.SnapshotHandlers { + return stream.SnapshotHandlers{ + TopicServiceHealth: serviceHealthSnapshot(s, TopicServiceHealth), + TopicServiceHealthConnect: serviceHealthSnapshot(s, TopicServiceHealthConnect), + } } diff --git a/agent/consul/state/operations_oss.go b/agent/consul/state/operations_oss.go index 4c382694b9..30deb70683 100644 --- a/agent/consul/state/operations_oss.go +++ b/agent/consul/state/operations_oss.go @@ -7,30 +7,30 @@ import ( "github.com/hashicorp/go-memdb" ) -func firstWithTxn(tx *txn, +func firstWithTxn(tx ReadTxn, table, index, idxVal string, entMeta *structs.EnterpriseMeta) (interface{}, error) { return tx.First(table, index, idxVal) } -func firstWatchWithTxn(tx *txn, +func firstWatchWithTxn(tx ReadTxn, table, index, idxVal string, entMeta *structs.EnterpriseMeta) (<-chan struct{}, interface{}, error) { return tx.FirstWatch(table, index, idxVal) } -func firstWatchCompoundWithTxn(tx *txn, +func firstWatchCompoundWithTxn(tx ReadTxn, table, index string, _ *structs.EnterpriseMeta, idxVals ...interface{}) (<-chan struct{}, interface{}, error) { return tx.FirstWatch(table, index, idxVals...) } -func getWithTxn(tx *txn, +func getWithTxn(tx ReadTxn, table, index, idxVal string, entMeta *structs.EnterpriseMeta) (memdb.ResultIterator, error) { return tx.Get(table, index, idxVal) } -func getCompoundWithTxn(tx *txn, table, index string, +func getCompoundWithTxn(tx ReadTxn, table, index string, _ *structs.EnterpriseMeta, idxVals ...interface{}) (memdb.ResultIterator, error) { return tx.Get(table, index, idxVals...) diff --git a/agent/consul/state/state_store.go b/agent/consul/state/state_store.go index d19922eece..3a7229607c 100644 --- a/agent/consul/state/state_store.go +++ b/agent/consul/state/state_store.go @@ -162,17 +162,17 @@ func NewStateStore(gc *TombstoneGC) (*Store, error) { ctx, cancel := context.WithCancel(context.TODO()) s := &Store{ - schema: schema, - abandonCh: make(chan struct{}), - kvsGraveyard: NewGraveyard(gc), - lockDelay: NewDelay(), - db: &changeTrackerDB{ - db: db, - publisher: stream.NewEventPublisher(ctx, newSnapshotHandlers(), 10*time.Second), - processChanges: processDBChanges, - }, + schema: schema, + abandonCh: make(chan struct{}), + kvsGraveyard: NewGraveyard(gc), + lockDelay: NewDelay(), stopEventPublisher: cancel, } + s.db = &changeTrackerDB{ + db: db, + publisher: stream.NewEventPublisher(ctx, newSnapshotHandlers(s), 10*time.Second), + processChanges: processDBChanges, + } return s, nil } diff --git a/agent/consul/state/store_integration_test.go b/agent/consul/state/store_integration_test.go index 83a978bb0a..6b2e9d1fe6 100644 --- a/agent/consul/state/store_integration_test.go +++ b/agent/consul/state/store_integration_test.go @@ -376,7 +376,7 @@ var topicService stream.Topic = topic("test-topic-service") func newTestSnapshotHandlers(s *Store) stream.SnapshotHandlers { return stream.SnapshotHandlers{ - topicService: func(req *stream.SubscribeRequest, snap stream.SnapshotAppender) (uint64, error) { + topicService: func(req stream.SubscribeRequest, snap stream.SnapshotAppender) (uint64, error) { idx, nodes, err := s.ServiceNodes(nil, req.Key, nil) if err != nil { return idx, err diff --git a/agent/consul/state/usage.go b/agent/consul/state/usage.go new file mode 100644 index 0000000000..6e43f3729f --- /dev/null +++ b/agent/consul/state/usage.go @@ -0,0 +1,258 @@ +package state + +import ( + "fmt" + + "github.com/hashicorp/consul/agent/structs" + memdb "github.com/hashicorp/go-memdb" +) + +const ( + serviceNamesUsageTable = "service-names" +) + +// usageTableSchema returns a new table schema used for tracking various indexes +// for the Raft log. +func usageTableSchema() *memdb.TableSchema { + return &memdb.TableSchema{ + Name: "usage", + Indexes: map[string]*memdb.IndexSchema{ + "id": { + Name: "id", + AllowMissing: false, + Unique: true, + Indexer: &memdb.StringFieldIndex{ + Field: "ID", + Lowercase: true, + }, + }, + }, + } +} + +func init() { + registerSchema(usageTableSchema) +} + +// UsageEntry represents a count of some arbitrary identifier within the +// state store, along with the last seen index. +type UsageEntry struct { + ID string + Index uint64 + Count int +} + +// ServiceUsage contains all of the usage data related to services +type ServiceUsage struct { + Services int + ServiceInstances int + EnterpriseServiceUsage +} + +type uniqueServiceState int + +const ( + NoChange uniqueServiceState = 0 + Deleted uniqueServiceState = 1 + Created uniqueServiceState = 2 +) + +// updateUsage takes a set of memdb changes and computes a delta for specific +// usage metrics that we track. +func updateUsage(tx WriteTxn, changes Changes) error { + usageDeltas := make(map[string]int) + for _, change := range changes.Changes { + var delta int + if change.Created() { + delta = 1 + } else if change.Deleted() { + delta = -1 + } + + switch change.Table { + case "nodes": + usageDeltas[change.Table] += delta + case "services": + svc := changeObject(change).(*structs.ServiceNode) + usageDeltas[change.Table] += delta + serviceIter, err := getWithTxn(tx, servicesTableName, "service", svc.ServiceName, &svc.EnterpriseMeta) + if err != nil { + return err + } + + var serviceState uniqueServiceState + if serviceIter.Next() == nil { + // If no services exist, we know we deleted the last service + // instance. + serviceState = Deleted + usageDeltas[serviceNamesUsageTable] -= 1 + } else if serviceIter.Next() == nil { + // If a second call to Next() returns nil, we know only a single + // instance exists. If, in addition, a new service name has been + // registered, either via creating a new service instance or via + // renaming an existing service, than we update our service count. + // + // We only care about two cases here: + // 1. A new service instance has been created with a unique name + // 2. An existing service instance has been updated with a new unique name + // + // These are the only ways a new unique service can be created. The + // other valid cases here: an update that does not change the service + // name, and a deletion, both do not impact the count of unique service + // names in the system. + + if change.Created() { + // Given a single existing service instance of the service: If a + // service has just been created, then we know this is a new unique + // service. + serviceState = Created + usageDeltas[serviceNamesUsageTable] += 1 + } else if serviceNameChanged(change) { + // Given a single existing service instance of the service: If a + // service has been updated with a new service name, then we know + // this is a new unique service. + serviceState = Created + usageDeltas[serviceNamesUsageTable] += 1 + + // Check whether the previous name was deleted in this rename, this + // is a special case of renaming a service which does not result in + // changing the count of unique service names. + before := change.Before.(*structs.ServiceNode) + beforeSvc, err := firstWithTxn(tx, servicesTableName, "service", before.ServiceName, &before.EnterpriseMeta) + if err != nil { + return err + } + if beforeSvc == nil { + usageDeltas[serviceNamesUsageTable] -= 1 + // set serviceState to NoChange since we have both gained and lost a + // service, cancelling each other out + serviceState = NoChange + } + } + } + addEnterpriseServiceUsage(usageDeltas, change, serviceState) + } + } + + idx := changes.Index + // This will happen when restoring from a snapshot, just take the max index + // of the tables we are tracking. + if idx == 0 { + idx = maxIndexTxn(tx, "nodes", servicesTableName) + } + + return writeUsageDeltas(tx, idx, usageDeltas) +} + +// serviceNameChanged returns a boolean that indicates whether the +// provided change resulted in an update to the service's service name. +func serviceNameChanged(change memdb.Change) bool { + if change.Updated() { + before := change.Before.(*structs.ServiceNode) + after := change.After.(*structs.ServiceNode) + return before.ServiceName != after.ServiceName + } + + return false +} + +// writeUsageDeltas will take in a map of IDs to deltas and update each +// entry accordingly, checking for integer underflow. The index that is +// passed in will be recorded on the entry as well. +func writeUsageDeltas(tx WriteTxn, idx uint64, usageDeltas map[string]int) error { + for id, delta := range usageDeltas { + u, err := tx.First("usage", "id", id) + if err != nil { + return fmt.Errorf("failed to retrieve existing usage entry: %s", err) + } + + if u == nil { + if delta < 0 { + return fmt.Errorf("failed to insert usage entry for %q: delta will cause a negative count", id) + } + err := tx.Insert("usage", &UsageEntry{ + ID: id, + Count: delta, + Index: idx, + }) + if err != nil { + return fmt.Errorf("failed to update usage entry: %s", err) + } + } else if cur, ok := u.(*UsageEntry); ok { + if cur.Count+delta < 0 { + return fmt.Errorf("failed to insert usage entry for %q: delta will cause a negative count", id) + } + err := tx.Insert("usage", &UsageEntry{ + ID: id, + Count: cur.Count + delta, + Index: idx, + }) + if err != nil { + return fmt.Errorf("failed to update usage entry: %s", err) + } + } + } + return nil +} + +// NodeCount returns the latest seen Raft index, a count of the number of nodes +// registered, and any errors. +func (s *Store) NodeCount() (uint64, int, error) { + tx := s.db.ReadTxn() + defer tx.Abort() + + nodeUsage, err := firstUsageEntry(tx, "nodes") + if err != nil { + return 0, 0, fmt.Errorf("failed nodes lookup: %s", err) + } + return nodeUsage.Index, nodeUsage.Count, nil +} + +// ServiceUsage returns the latest seen Raft index, a compiled set of service +// usage data, and any errors. +func (s *Store) ServiceUsage() (uint64, ServiceUsage, error) { + tx := s.db.ReadTxn() + defer tx.Abort() + + serviceInstances, err := firstUsageEntry(tx, servicesTableName) + if err != nil { + return 0, ServiceUsage{}, fmt.Errorf("failed services lookup: %s", err) + } + + services, err := firstUsageEntry(tx, serviceNamesUsageTable) + if err != nil { + return 0, ServiceUsage{}, fmt.Errorf("failed services lookup: %s", err) + } + + usage := ServiceUsage{ + ServiceInstances: serviceInstances.Count, + Services: services.Count, + } + results, err := compileEnterpriseUsage(tx, usage) + if err != nil { + return 0, ServiceUsage{}, fmt.Errorf("failed services lookup: %s", err) + } + + return serviceInstances.Index, results, nil +} + +func firstUsageEntry(tx ReadTxn, id string) (*UsageEntry, error) { + usage, err := tx.First("usage", "id", id) + if err != nil { + return nil, err + } + + // If no elements have been inserted, the usage entry will not exist. We + // return a valid value so that can be certain the return value is not nil + // when no error has occurred. + if usage == nil { + return &UsageEntry{ID: id, Count: 0}, nil + } + + realUsage, ok := usage.(*UsageEntry) + if !ok { + return nil, fmt.Errorf("failed usage lookup: type %T is not *UsageEntry", usage) + } + + return realUsage, nil +} diff --git a/agent/consul/state/usage_oss.go b/agent/consul/state/usage_oss.go new file mode 100644 index 0000000000..f35576abf5 --- /dev/null +++ b/agent/consul/state/usage_oss.go @@ -0,0 +1,15 @@ +// +build !consulent + +package state + +import ( + memdb "github.com/hashicorp/go-memdb" +) + +type EnterpriseServiceUsage struct{} + +func addEnterpriseServiceUsage(map[string]int, memdb.Change, uniqueServiceState) {} + +func compileEnterpriseUsage(tx ReadTxn, usage ServiceUsage) (ServiceUsage, error) { + return usage, nil +} diff --git a/agent/consul/state/usage_oss_test.go b/agent/consul/state/usage_oss_test.go new file mode 100644 index 0000000000..b441c71635 --- /dev/null +++ b/agent/consul/state/usage_oss_test.go @@ -0,0 +1,25 @@ +// +build !consulent + +package state + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestStateStore_Usage_ServiceUsage(t *testing.T) { + s := testStateStore(t) + + testRegisterNode(t, s, 0, "node1") + testRegisterNode(t, s, 1, "node2") + testRegisterService(t, s, 8, "node1", "service1") + testRegisterService(t, s, 9, "node2", "service1") + testRegisterService(t, s, 10, "node2", "service2") + + idx, usage, err := s.ServiceUsage() + require.NoError(t, err) + require.Equal(t, idx, uint64(10)) + require.Equal(t, 2, usage.Services) + require.Equal(t, 3, usage.ServiceInstances) +} diff --git a/agent/consul/state/usage_test.go b/agent/consul/state/usage_test.go new file mode 100644 index 0000000000..f608d7d75c --- /dev/null +++ b/agent/consul/state/usage_test.go @@ -0,0 +1,194 @@ +package state + +import ( + "testing" + + "github.com/hashicorp/consul/agent/structs" + memdb "github.com/hashicorp/go-memdb" + "github.com/stretchr/testify/require" +) + +func TestStateStore_Usage_NodeCount(t *testing.T) { + s := testStateStore(t) + + // No nodes have been registered, and thus no usage entry exists + idx, count, err := s.NodeCount() + require.NoError(t, err) + require.Equal(t, idx, uint64(0)) + require.Equal(t, count, 0) + + testRegisterNode(t, s, 0, "node1") + testRegisterNode(t, s, 1, "node2") + + idx, count, err = s.NodeCount() + require.NoError(t, err) + require.Equal(t, idx, uint64(1)) + require.Equal(t, count, 2) +} + +func TestStateStore_Usage_NodeCount_Delete(t *testing.T) { + s := testStateStore(t) + + testRegisterNode(t, s, 0, "node1") + testRegisterNode(t, s, 1, "node2") + + idx, count, err := s.NodeCount() + require.NoError(t, err) + require.Equal(t, idx, uint64(1)) + require.Equal(t, count, 2) + + require.NoError(t, s.DeleteNode(2, "node2")) + idx, count, err = s.NodeCount() + require.NoError(t, err) + require.Equal(t, idx, uint64(2)) + require.Equal(t, count, 1) +} + +func TestStateStore_Usage_ServiceUsageEmpty(t *testing.T) { + s := testStateStore(t) + + // No services have been registered, and thus no usage entry exists + idx, usage, err := s.ServiceUsage() + require.NoError(t, err) + require.Equal(t, idx, uint64(0)) + require.Equal(t, usage.Services, 0) + require.Equal(t, usage.ServiceInstances, 0) +} + +func TestStateStore_Usage_Restore(t *testing.T) { + s := testStateStore(t) + restore := s.Restore() + restore.Registration(9, &structs.RegisterRequest{ + Node: "test-node", + Service: &structs.NodeService{ + ID: "mysql", + Service: "mysql", + Port: 8080, + Address: "198.18.0.2", + }, + }) + require.NoError(t, restore.Commit()) + + idx, count, err := s.NodeCount() + require.NoError(t, err) + require.Equal(t, idx, uint64(9)) + require.Equal(t, count, 1) +} + +func TestStateStore_Usage_updateUsage_Underflow(t *testing.T) { + s := testStateStore(t) + txn := s.db.WriteTxn(1) + + // A single delete change will cause a negative count + changes := Changes{ + Index: 1, + Changes: memdb.Changes{ + { + Table: "nodes", + Before: &structs.Node{}, + After: nil, + }, + }, + } + + err := updateUsage(txn, changes) + require.Error(t, err) + require.Contains(t, err.Error(), "negative count") + + // A insert a change to create a usage entry + changes = Changes{ + Index: 1, + Changes: memdb.Changes{ + { + Table: "nodes", + Before: nil, + After: &structs.Node{}, + }, + }, + } + + err = updateUsage(txn, changes) + require.NoError(t, err) + + // Two deletes will cause a negative count now + changes = Changes{ + Index: 1, + Changes: memdb.Changes{ + { + Table: "nodes", + Before: &structs.Node{}, + After: nil, + }, + { + Table: "nodes", + Before: &structs.Node{}, + After: nil, + }, + }, + } + + err = updateUsage(txn, changes) + require.Error(t, err) + require.Contains(t, err.Error(), "negative count") +} + +func TestStateStore_Usage_ServiceUsage_updatingServiceName(t *testing.T) { + s := testStateStore(t) + testRegisterNode(t, s, 1, "node1") + testRegisterService(t, s, 1, "node1", "service1") + + t.Run("rename service with a single instance", func(t *testing.T) { + svc := &structs.NodeService{ + ID: "service1", + Service: "after", + Address: "1.1.1.1", + Port: 1111, + } + require.NoError(t, s.EnsureService(2, "node1", svc)) + + // We renamed a service with a single instance, so we maintain 1 service. + idx, usage, err := s.ServiceUsage() + require.NoError(t, err) + require.Equal(t, idx, uint64(2)) + require.Equal(t, usage.Services, 1) + require.Equal(t, usage.ServiceInstances, 1) + }) + + t.Run("rename service with a multiple instances", func(t *testing.T) { + svc2 := &structs.NodeService{ + ID: "service2", + Service: "before", + Address: "1.1.1.2", + Port: 1111, + } + require.NoError(t, s.EnsureService(3, "node1", svc2)) + + svc3 := &structs.NodeService{ + ID: "service3", + Service: "before", + Address: "1.1.1.3", + Port: 1111, + } + require.NoError(t, s.EnsureService(4, "node1", svc3)) + + idx, usage, err := s.ServiceUsage() + require.NoError(t, err) + require.Equal(t, idx, uint64(4)) + require.Equal(t, usage.Services, 2) + require.Equal(t, usage.ServiceInstances, 3) + + update := &structs.NodeService{ + ID: "service2", + Service: "another-name", + Address: "1.1.1.2", + Port: 1111, + } + require.NoError(t, s.EnsureService(5, "node1", update)) + + idx, usage, err = s.ServiceUsage() + require.NoError(t, err) + require.Equal(t, idx, uint64(5)) + require.Equal(t, usage.Services, 3) + require.Equal(t, usage.ServiceInstances, 3) + }) +} diff --git a/agent/consul/stream/event_publisher.go b/agent/consul/stream/event_publisher.go index 9dfb8bf9e5..815a68a261 100644 --- a/agent/consul/stream/event_publisher.go +++ b/agent/consul/stream/event_publisher.go @@ -61,7 +61,11 @@ type changeEvents struct { // SnapshotHandlers is a mapping of Topic to a function which produces a snapshot // of events for the SubscribeRequest. Events are appended to the snapshot using SnapshotAppender. // The nil Topic is reserved and should not be used. -type SnapshotHandlers map[Topic]func(*SubscribeRequest, SnapshotAppender) (index uint64, err error) +type SnapshotHandlers map[Topic]SnapshotFunc + +// SnapshotFunc builds a snapshot for the subscription request, and appends the +// events to the Snapshot using SnapshotAppender. +type SnapshotFunc func(SubscribeRequest, SnapshotAppender) (index uint64, err error) // SnapshotAppender appends groups of events to create a Snapshot of state. type SnapshotAppender interface { diff --git a/agent/consul/stream/event_publisher_test.go b/agent/consul/stream/event_publisher_test.go index 4deeb1503e..4448e68454 100644 --- a/agent/consul/stream/event_publisher_test.go +++ b/agent/consul/stream/event_publisher_test.go @@ -58,7 +58,7 @@ func TestEventPublisher_PublishChangesAndSubscribe_WithSnapshot(t *testing.T) { func newTestSnapshotHandlers() SnapshotHandlers { return SnapshotHandlers{ - testTopic: func(req *SubscribeRequest, buf SnapshotAppender) (uint64, error) { + testTopic: func(req SubscribeRequest, buf SnapshotAppender) (uint64, error) { if req.Topic != testTopic { return 0, fmt.Errorf("unexpected topic: %v", req.Topic) } @@ -117,7 +117,7 @@ func TestEventPublisher_ShutdownClosesSubscriptions(t *testing.T) { t.Cleanup(cancel) handlers := newTestSnapshotHandlers() - fn := func(req *SubscribeRequest, buf SnapshotAppender) (uint64, error) { + fn := func(req SubscribeRequest, buf SnapshotAppender) (uint64, error) { return 0, nil } handlers[intTopic(22)] = fn diff --git a/agent/consul/stream/event_snapshot.go b/agent/consul/stream/event_snapshot.go index 12a52ea37b..2f0d276f78 100644 --- a/agent/consul/stream/event_snapshot.go +++ b/agent/consul/stream/event_snapshot.go @@ -18,8 +18,6 @@ type eventSnapshot struct { snapBuffer *eventBuffer } -type snapFunc func(req *SubscribeRequest, buf SnapshotAppender) (uint64, error) - // newEventSnapshot creates a snapshot buffer based on the subscription request. // The current buffer head for the topic requested is passed so that once the // snapshot is complete and has been delivered into the buffer, any events @@ -27,7 +25,7 @@ type snapFunc func(req *SubscribeRequest, buf SnapshotAppender) (uint64, error) // missed. Once the snapshot is delivered the topic buffer is spliced onto the // snapshot buffer so that subscribers will naturally follow from the snapshot // to wait for any subsequent updates. -func newEventSnapshot(req *SubscribeRequest, topicBufferHead *bufferItem, fn snapFunc) *eventSnapshot { +func newEventSnapshot(req *SubscribeRequest, topicBufferHead *bufferItem, fn SnapshotFunc) *eventSnapshot { buf := newEventBuffer() s := &eventSnapshot{ Head: buf.Head(), @@ -35,7 +33,7 @@ func newEventSnapshot(req *SubscribeRequest, topicBufferHead *bufferItem, fn sna } go func() { - idx, err := fn(req, s.snapBuffer) + idx, err := fn(*req, s.snapBuffer) if err != nil { s.snapBuffer.AppendItem(&bufferItem{Err: err}) return diff --git a/agent/consul/stream/event_snapshot_test.go b/agent/consul/stream/event_snapshot_test.go index 5e62e7f94f..c888e844ab 100644 --- a/agent/consul/stream/event_snapshot_test.go +++ b/agent/consul/stream/event_snapshot_test.go @@ -161,8 +161,8 @@ func genSequentialIDs(start, end int) []string { return ids } -func testHealthConsecutiveSnapshotFn(size int, index uint64) snapFunc { - return func(req *SubscribeRequest, buf SnapshotAppender) (uint64, error) { +func testHealthConsecutiveSnapshotFn(size int, index uint64) SnapshotFunc { + return func(req SubscribeRequest, buf SnapshotAppender) (uint64, error) { for i := 0; i < size; i++ { // Event content is arbitrary we are just using Health because it's the // first type defined. We just want a set of things with consecutive diff --git a/agent/consul/usagemetrics/usagemetrics.go b/agent/consul/usagemetrics/usagemetrics.go new file mode 100644 index 0000000000..18b36cfd69 --- /dev/null +++ b/agent/consul/usagemetrics/usagemetrics.go @@ -0,0 +1,135 @@ +package usagemetrics + +import ( + "context" + "errors" + "time" + + "github.com/armon/go-metrics" + "github.com/hashicorp/consul/agent/consul/state" + "github.com/hashicorp/consul/logging" + "github.com/hashicorp/go-hclog" +) + +// Config holds the settings for various parameters for the +// UsageMetricsReporter +type Config struct { + logger hclog.Logger + metricLabels []metrics.Label + stateProvider StateProvider + tickerInterval time.Duration +} + +// WithDatacenter adds the datacenter as a label to all metrics emitted by the +// UsageMetricsReporter +func (c *Config) WithDatacenter(dc string) *Config { + c.metricLabels = append(c.metricLabels, metrics.Label{Name: "datacenter", Value: dc}) + return c +} + +// WithLogger takes a logger and creates a new, named sub-logger to use when +// running +func (c *Config) WithLogger(logger hclog.Logger) *Config { + c.logger = logger.Named(logging.UsageMetrics) + return c +} + +// WithReportingInterval specifies the interval on which UsageMetricsReporter +// should emit metrics +func (c *Config) WithReportingInterval(dur time.Duration) *Config { + c.tickerInterval = dur + return c +} + +func (c *Config) WithStateProvider(sp StateProvider) *Config { + c.stateProvider = sp + return c +} + +// StateProvider defines an inteface for retrieving a state.Store handle. In +// non-test code, this is satisfied by the fsm.FSM struct. +type StateProvider interface { + State() *state.Store +} + +// UsageMetricsReporter provides functionality for emitting usage metrics into +// the metrics stream. This makes it essentially a translation layer +// between the state store and metrics stream. +type UsageMetricsReporter struct { + logger hclog.Logger + metricLabels []metrics.Label + stateProvider StateProvider + tickerInterval time.Duration +} + +func NewUsageMetricsReporter(cfg *Config) (*UsageMetricsReporter, error) { + if cfg.stateProvider == nil { + return nil, errors.New("must provide a StateProvider to usage reporter") + } + + if cfg.logger == nil { + cfg.logger = hclog.NewNullLogger() + } + + if cfg.tickerInterval == 0 { + // Metrics are aggregated every 10 seconds, so we default to that. + cfg.tickerInterval = 10 * time.Second + } + + u := &UsageMetricsReporter{ + logger: cfg.logger, + stateProvider: cfg.stateProvider, + metricLabels: cfg.metricLabels, + tickerInterval: cfg.tickerInterval, + } + + return u, nil +} + +// Run must be run in a goroutine, and can be stopped by closing or sending +// data to the passed in shutdownCh +func (u *UsageMetricsReporter) Run(ctx context.Context) { + ticker := time.NewTicker(u.tickerInterval) + for { + select { + case <-ctx.Done(): + u.logger.Debug("usage metrics reporter shutting down") + ticker.Stop() + return + case <-ticker.C: + u.runOnce() + } + } +} + +func (u *UsageMetricsReporter) runOnce() { + state := u.stateProvider.State() + _, nodes, err := state.NodeCount() + if err != nil { + u.logger.Warn("failed to retrieve nodes from state store", "error", err) + } + metrics.SetGaugeWithLabels( + []string{"consul", "state", "nodes"}, + float32(nodes), + u.metricLabels, + ) + + _, serviceUsage, err := state.ServiceUsage() + if err != nil { + u.logger.Warn("failed to retrieve services from state store", "error", err) + } + + metrics.SetGaugeWithLabels( + []string{"consul", "state", "services"}, + float32(serviceUsage.Services), + u.metricLabels, + ) + + metrics.SetGaugeWithLabels( + []string{"consul", "state", "service_instances"}, + float32(serviceUsage.ServiceInstances), + u.metricLabels, + ) + + u.emitEnterpriseUsage(serviceUsage) +} diff --git a/agent/consul/usagemetrics/usagemetrics_oss.go b/agent/consul/usagemetrics/usagemetrics_oss.go new file mode 100644 index 0000000000..37d71b83f8 --- /dev/null +++ b/agent/consul/usagemetrics/usagemetrics_oss.go @@ -0,0 +1,7 @@ +// +build !consulent + +package usagemetrics + +import "github.com/hashicorp/consul/agent/consul/state" + +func (u *UsageMetricsReporter) emitEnterpriseUsage(state.ServiceUsage) {} diff --git a/agent/consul/usagemetrics/usagemetrics_oss_test.go b/agent/consul/usagemetrics/usagemetrics_oss_test.go new file mode 100644 index 0000000000..3d5263c0b2 --- /dev/null +++ b/agent/consul/usagemetrics/usagemetrics_oss_test.go @@ -0,0 +1,9 @@ +// +build !consulent + +package usagemetrics + +import "github.com/hashicorp/consul/agent/consul/state" + +func newStateStore() (*state.Store, error) { + return state.NewStateStore(nil) +} diff --git a/agent/consul/usagemetrics/usagemetrics_test.go b/agent/consul/usagemetrics/usagemetrics_test.go new file mode 100644 index 0000000000..c293cbb1de --- /dev/null +++ b/agent/consul/usagemetrics/usagemetrics_test.go @@ -0,0 +1,128 @@ +package usagemetrics + +import ( + "testing" + "time" + + "github.com/armon/go-metrics" + "github.com/hashicorp/consul/agent/consul/state" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/sdk/testutil" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +type mockStateProvider struct { + mock.Mock +} + +func (m *mockStateProvider) State() *state.Store { + retValues := m.Called() + return retValues.Get(0).(*state.Store) +} + +func TestUsageReporter_Run(t *testing.T) { + type testCase struct { + modfiyStateStore func(t *testing.T, s *state.Store) + expectedGauges map[string]metrics.GaugeValue + } + cases := map[string]testCase{ + "empty-state": { + expectedGauges: map[string]metrics.GaugeValue{ + "consul.usage.test.consul.state.nodes;datacenter=dc1": { + Name: "consul.usage.test.consul.state.nodes", + Value: 0, + Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}}, + }, + "consul.usage.test.consul.state.services;datacenter=dc1": { + Name: "consul.usage.test.consul.state.services", + Value: 0, + Labels: []metrics.Label{ + {Name: "datacenter", Value: "dc1"}, + }, + }, + "consul.usage.test.consul.state.service_instances;datacenter=dc1": { + Name: "consul.usage.test.consul.state.service_instances", + Value: 0, + Labels: []metrics.Label{ + {Name: "datacenter", Value: "dc1"}, + }, + }, + }, + }, + "nodes-and-services": { + modfiyStateStore: func(t *testing.T, s *state.Store) { + require.Nil(t, s.EnsureNode(1, &structs.Node{Node: "foo", Address: "127.0.0.1"})) + require.Nil(t, s.EnsureNode(2, &structs.Node{Node: "bar", Address: "127.0.0.2"})) + require.Nil(t, s.EnsureNode(3, &structs.Node{Node: "baz", Address: "127.0.0.2"})) + + // Typical services and some consul services spread across two nodes + require.Nil(t, s.EnsureService(4, "foo", &structs.NodeService{ID: "db", Service: "db", Tags: nil, Address: "", Port: 5000})) + require.Nil(t, s.EnsureService(5, "bar", &structs.NodeService{ID: "api", Service: "api", Tags: nil, Address: "", Port: 5000})) + require.Nil(t, s.EnsureService(6, "foo", &structs.NodeService{ID: "consul", Service: "consul", Tags: nil})) + require.Nil(t, s.EnsureService(7, "bar", &structs.NodeService{ID: "consul", Service: "consul", Tags: nil})) + }, + expectedGauges: map[string]metrics.GaugeValue{ + "consul.usage.test.consul.state.nodes;datacenter=dc1": { + Name: "consul.usage.test.consul.state.nodes", + Value: 3, + Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}}, + }, + "consul.usage.test.consul.state.services;datacenter=dc1": { + Name: "consul.usage.test.consul.state.services", + Value: 3, + Labels: []metrics.Label{ + {Name: "datacenter", Value: "dc1"}, + }, + }, + "consul.usage.test.consul.state.service_instances;datacenter=dc1": { + Name: "consul.usage.test.consul.state.service_instances", + Value: 4, + Labels: []metrics.Label{ + {Name: "datacenter", Value: "dc1"}, + }, + }, + }, + }, + } + + for name, tcase := range cases { + t.Run(name, func(t *testing.T) { + // Only have a single interval for the test + sink := metrics.NewInmemSink(1*time.Minute, 1*time.Minute) + cfg := metrics.DefaultConfig("consul.usage.test") + cfg.EnableHostname = false + metrics.NewGlobal(cfg, sink) + + mockStateProvider := &mockStateProvider{} + s, err := newStateStore() + require.NoError(t, err) + if tcase.modfiyStateStore != nil { + tcase.modfiyStateStore(t, s) + } + mockStateProvider.On("State").Return(s) + + reporter, err := NewUsageMetricsReporter( + new(Config). + WithStateProvider(mockStateProvider). + WithLogger(testutil.Logger(t)). + WithDatacenter("dc1"), + ) + require.NoError(t, err) + + reporter.runOnce() + + intervals := sink.Data() + require.Len(t, intervals, 1) + intv := intervals[0] + + // Range over the expected values instead of just doing an Equal + // comparison on the maps because of different metrics emitted between + // OSS and Ent. The enterprise tests have a full equality comparison on + // the maps. + for key, expected := range tcase.expectedGauges { + require.Equal(t, expected, intv.Gauges[key]) + } + }) + } +} diff --git a/agent/http.go b/agent/http.go index bac6c172c2..dc9438230d 100644 --- a/agent/http.go +++ b/agent/http.go @@ -80,16 +80,14 @@ func (e ForbiddenError) Error() string { } // HTTPServer provides an HTTP api for an agent. +// +// TODO: rename this struct to something more appropriate. It is an http.Handler, +// request router or multiplexer, but it is not a Server. type HTTPServer struct { - // TODO(dnephin): remove Server field, it is not used by any of the HTTPServer methods - Server *http.Server - ln net.Listener agent *Agent denylist *Denylist - - // proto is filled by the agent to "http" or "https". - proto string } + type templatedFile struct { templated *bytes.Reader name string diff --git a/agent/http_test.go b/agent/http_test.go index 6574b89180..36ecf387b3 100644 --- a/agent/http_test.go +++ b/agent/http_test.go @@ -1353,7 +1353,7 @@ func TestHTTPServer_HandshakeTimeout(t *testing.T) { // Connect to it with a plain TCP client that doesn't attempt to send HTTP or // complete a TLS handshake. - conn, err := net.Dial("tcp", a.srv.ln.Addr().String()) + conn, err := net.Dial("tcp", a.HTTPAddr()) require.NoError(t, err) defer conn.Close() @@ -1413,7 +1413,7 @@ func TestRPC_HTTPSMaxConnsPerClient(t *testing.T) { }) defer a.Shutdown() - addr := a.srv.ln.Addr() + addr := a.HTTPAddr() assertConn := func(conn net.Conn, wantOpen bool) { retry.Run(t, func(r *retry.R) { @@ -1433,21 +1433,21 @@ func TestRPC_HTTPSMaxConnsPerClient(t *testing.T) { } // Connect to the server with bare TCP - conn1, err := net.DialTimeout("tcp", addr.String(), time.Second) + conn1, err := net.DialTimeout("tcp", addr, time.Second) require.NoError(t, err) defer conn1.Close() assertConn(conn1, true) // Two conns should succeed - conn2, err := net.DialTimeout("tcp", addr.String(), time.Second) + conn2, err := net.DialTimeout("tcp", addr, time.Second) require.NoError(t, err) defer conn2.Close() assertConn(conn2, true) // Third should succeed negotiating TCP handshake... - conn3, err := net.DialTimeout("tcp", addr.String(), time.Second) + conn3, err := net.DialTimeout("tcp", addr, time.Second) require.NoError(t, err) defer conn3.Close() @@ -1460,7 +1460,7 @@ func TestRPC_HTTPSMaxConnsPerClient(t *testing.T) { require.NoError(t, a.reloadConfigInternal(&newCfg)) // Now another conn should be allowed - conn4, err := net.DialTimeout("tcp", addr.String(), time.Second) + conn4, err := net.DialTimeout("tcp", addr, time.Second) require.NoError(t, err) defer conn4.Close() diff --git a/agent/setup.go b/agent/setup.go index 4ed2823222..d5a2d063ea 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -1,7 +1,6 @@ package agent import ( - "context" "fmt" "io" "net" @@ -10,11 +9,9 @@ import ( autoconf "github.com/hashicorp/consul/agent/auto-config" "github.com/hashicorp/consul/agent/cache" - certmon "github.com/hashicorp/consul/agent/cert-monitor" "github.com/hashicorp/consul/agent/config" "github.com/hashicorp/consul/agent/pool" "github.com/hashicorp/consul/agent/router" - "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/token" "github.com/hashicorp/consul/ipaddr" "github.com/hashicorp/consul/lib" @@ -82,42 +79,26 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer) (BaseDeps, error) d.RuntimeConfig = cfg d.Tokens = new(token.Store) + // cache-types are not registered yet, but they won't be used until the components are started. d.Cache = cache.New(cfg.Cache) d.ConnPool = newConnPool(cfg, d.Logger, d.TLSConfigurator) - deferredAC := &deferredAutoConfig{} - d.Router = router.NewRouter(d.Logger, cfg.Datacenter, fmt.Sprintf("%s.%s", cfg.NodeName, cfg.Datacenter)) - cmConf := new(certmon.Config). - WithCache(d.Cache). - WithTLSConfigurator(d.TLSConfigurator). - WithDNSSANs(cfg.AutoConfig.DNSSANs). - WithIPSANs(cfg.AutoConfig.IPSANs). - WithDatacenter(cfg.Datacenter). - WithNodeName(cfg.NodeName). - WithFallback(deferredAC.autoConfigFallbackTLS). - WithLogger(d.Logger.Named(logging.AutoConfig)). - WithTokens(d.Tokens). - WithPersistence(deferredAC.autoConfigPersist) - acCertMon, err := certmon.New(cmConf) - if err != nil { - return d, err - } - acConf := autoconf.Config{ - DirectRPC: d.ConnPool, - Logger: d.Logger, - CertMonitor: acCertMon, - Loader: configLoader, + DirectRPC: d.ConnPool, + Logger: d.Logger, + Loader: configLoader, + ServerProvider: d.Router, + TLSConfigurator: d.TLSConfigurator, + Cache: d.Cache, + Tokens: d.Tokens, } d.AutoConfig, err = autoconf.New(acConf) if err != nil { return d, err } - // TODO: can this cyclic dependency be un-cycled? - deferredAC.autoConf = d.AutoConfig return d, nil } @@ -144,21 +125,3 @@ func newConnPool(config *config.RuntimeConfig, logger hclog.Logger, tls *tlsutil } return pool } - -type deferredAutoConfig struct { - autoConf *autoconf.AutoConfig // TODO: use an interface -} - -func (a *deferredAutoConfig) autoConfigFallbackTLS(ctx context.Context) (*structs.SignedResponse, error) { - if a.autoConf == nil { - return nil, fmt.Errorf("AutoConfig manager has not been created yet") - } - return a.autoConf.FallbackTLS(ctx) -} - -func (a *deferredAutoConfig) autoConfigPersist(resp *structs.SignedResponse) error { - if a.autoConf == nil { - return fmt.Errorf("AutoConfig manager has not been created yet") - } - return a.autoConf.RecordUpdatedCerts(resp) -} diff --git a/agent/structs/config_entry.go b/agent/structs/config_entry.go index d377f83a72..b1ffd3e0d6 100644 --- a/agent/structs/config_entry.go +++ b/agent/structs/config_entry.go @@ -10,6 +10,7 @@ import ( "github.com/hashicorp/consul/lib" "github.com/hashicorp/consul/lib/decode" "github.com/hashicorp/go-msgpack/codec" + "github.com/hashicorp/go-multierror" "github.com/mitchellh/hashstructure" "github.com/mitchellh/mapstructure" ) @@ -43,6 +44,7 @@ type ConfigEntry interface { CanRead(acl.Authorizer) bool CanWrite(acl.Authorizer) bool + GetMeta() map[string]string GetEnterpriseMeta() *EnterpriseMeta GetRaftIndex() *RaftIndex } @@ -64,6 +66,7 @@ type ServiceConfigEntry struct { // // Connect ConnectConfiguration + Meta map[string]string `json:",omitempty"` EnterpriseMeta `hcl:",squash" mapstructure:",squash"` RaftIndex } @@ -80,6 +83,13 @@ func (e *ServiceConfigEntry) GetName() string { return e.Name } +func (e *ServiceConfigEntry) GetMeta() map[string]string { + if e == nil { + return nil + } + return e.Meta +} + func (e *ServiceConfigEntry) Normalize() error { if e == nil { return fmt.Errorf("config entry is nil") @@ -94,7 +104,7 @@ func (e *ServiceConfigEntry) Normalize() error { } func (e *ServiceConfigEntry) Validate() error { - return nil + return validateConfigEntryMeta(e.Meta) } func (e *ServiceConfigEntry) CanRead(authz acl.Authorizer) bool { @@ -137,6 +147,7 @@ type ProxyConfigEntry struct { MeshGateway MeshGatewayConfig `json:",omitempty" alias:"mesh_gateway"` Expose ExposeConfig `json:",omitempty"` + Meta map[string]string `json:",omitempty"` EnterpriseMeta `hcl:",squash" mapstructure:",squash"` RaftIndex } @@ -153,6 +164,13 @@ func (e *ProxyConfigEntry) GetName() string { return e.Name } +func (e *ProxyConfigEntry) GetMeta() map[string]string { + if e == nil { + return nil + } + return e.Meta +} + func (e *ProxyConfigEntry) Normalize() error { if e == nil { return fmt.Errorf("config entry is nil") @@ -175,6 +193,10 @@ func (e *ProxyConfigEntry) Validate() error { return fmt.Errorf("invalid name (%q), only %q is supported", e.Name, ProxyConfigGlobal) } + if err := validateConfigEntryMeta(e.Meta); err != nil { + return err + } + return e.validateEnterpriseMeta() } @@ -666,4 +688,38 @@ func (c *ConfigEntryResponse) UnmarshalBinary(data []byte) error { type ConfigEntryKindName struct { Kind string Name string + EnterpriseMeta +} + +func NewConfigEntryKindName(kind, name string, entMeta *EnterpriseMeta) ConfigEntryKindName { + ret := ConfigEntryKindName{ + Kind: kind, + Name: name, + } + if entMeta == nil { + entMeta = DefaultEnterpriseMeta() + } + + ret.EnterpriseMeta = *entMeta + ret.EnterpriseMeta.Normalize() + return ret +} + +func validateConfigEntryMeta(meta map[string]string) error { + var err error + if len(meta) > metaMaxKeyPairs { + err = multierror.Append(err, fmt.Errorf( + "Meta exceeds maximum element count %d", metaMaxKeyPairs)) + } + for k, v := range meta { + if len(k) > metaKeyMaxLength { + err = multierror.Append(err, fmt.Errorf( + "Meta key %q exceeds maximum length %d", k, metaKeyMaxLength)) + } + if len(v) > metaValueMaxLength { + err = multierror.Append(err, fmt.Errorf( + "Meta value for key %q exceeds maximum length %d", k, metaValueMaxLength)) + } + } + return err } diff --git a/agent/structs/config_entry_discoverychain.go b/agent/structs/config_entry_discoverychain.go index 0317f144a4..9c0a509197 100644 --- a/agent/structs/config_entry_discoverychain.go +++ b/agent/structs/config_entry_discoverychain.go @@ -69,6 +69,7 @@ type ServiceRouterConfigEntry struct { // the default service. Routes []ServiceRoute + Meta map[string]string `json:",omitempty"` EnterpriseMeta `hcl:",squash" mapstructure:",squash"` RaftIndex } @@ -85,6 +86,13 @@ func (e *ServiceRouterConfigEntry) GetName() string { return e.Name } +func (e *ServiceRouterConfigEntry) GetMeta() map[string]string { + if e == nil { + return nil + } + return e.Meta +} + func (e *ServiceRouterConfigEntry) Normalize() error { if e == nil { return fmt.Errorf("config entry is nil") @@ -120,6 +128,10 @@ func (e *ServiceRouterConfigEntry) Validate() error { return fmt.Errorf("Name is required") } + if err := validateConfigEntryMeta(e.Meta); err != nil { + return err + } + // Technically you can have no explicit routes at all where just the // catch-all is configured for you, but at that point maybe you should just // delete it so it will default? @@ -438,6 +450,7 @@ type ServiceSplitterConfigEntry struct { // to the FIRST split. Splits []ServiceSplit + Meta map[string]string `json:",omitempty"` EnterpriseMeta `hcl:",squash" mapstructure:",squash"` RaftIndex } @@ -454,6 +467,13 @@ func (e *ServiceSplitterConfigEntry) GetName() string { return e.Name } +func (e *ServiceSplitterConfigEntry) GetMeta() map[string]string { + if e == nil { + return nil + } + return e.Meta +} + func (e *ServiceSplitterConfigEntry) Normalize() error { if e == nil { return fmt.Errorf("config entry is nil") @@ -492,6 +512,10 @@ func (e *ServiceSplitterConfigEntry) Validate() error { return fmt.Errorf("no splits configured") } + if err := validateConfigEntryMeta(e.Meta); err != nil { + return err + } + const maxScaledWeight = 100 * 100 copyAsKey := func(s ServiceSplit) ServiceSplit { @@ -674,6 +698,7 @@ type ServiceResolverConfigEntry struct { // issuing requests to this upstream service. LoadBalancer *LoadBalancer `json:",omitempty" alias:"load_balancer"` + Meta map[string]string `json:",omitempty"` EnterpriseMeta `hcl:",squash" mapstructure:",squash"` RaftIndex } @@ -746,6 +771,13 @@ func (e *ServiceResolverConfigEntry) GetName() string { return e.Name } +func (e *ServiceResolverConfigEntry) GetMeta() map[string]string { + if e == nil { + return nil + } + return e.Meta +} + func (e *ServiceResolverConfigEntry) Normalize() error { if e == nil { return fmt.Errorf("config entry is nil") @@ -763,6 +795,10 @@ func (e *ServiceResolverConfigEntry) Validate() error { return fmt.Errorf("Name is required") } + if err := validateConfigEntryMeta(e.Meta); err != nil { + return err + } + if len(e.Subsets) > 0 { for name := range e.Subsets { if name == "" { diff --git a/agent/structs/config_entry_gateways.go b/agent/structs/config_entry_gateways.go index a5557dbafa..61b9930823 100644 --- a/agent/structs/config_entry_gateways.go +++ b/agent/structs/config_entry_gateways.go @@ -27,6 +27,7 @@ type IngressGatewayConfigEntry struct { // what services to associated to those ports. Listeners []IngressListener + Meta map[string]string `json:",omitempty"` EnterpriseMeta `hcl:",squash" mapstructure:",squash"` RaftIndex } @@ -73,6 +74,7 @@ type IngressService struct { // using a "tcp" listener. Hosts []string + Meta map[string]string `json:",omitempty"` EnterpriseMeta `hcl:",squash" mapstructure:",squash"` } @@ -93,6 +95,13 @@ func (e *IngressGatewayConfigEntry) GetName() string { return e.Name } +func (e *IngressGatewayConfigEntry) GetMeta() map[string]string { + if e == nil { + return nil + } + return e.Meta +} + func (e *IngressGatewayConfigEntry) Normalize() error { if e == nil { return fmt.Errorf("config entry is nil") @@ -121,6 +130,10 @@ func (e *IngressGatewayConfigEntry) Normalize() error { } func (e *IngressGatewayConfigEntry) Validate() error { + if err := validateConfigEntryMeta(e.Meta); err != nil { + return err + } + validProtocols := map[string]bool{ "tcp": true, "http": true, @@ -283,6 +296,7 @@ type TerminatingGatewayConfigEntry struct { Name string Services []LinkedService + Meta map[string]string `json:",omitempty"` EnterpriseMeta `hcl:",squash" mapstructure:",squash"` RaftIndex } @@ -322,6 +336,13 @@ func (e *TerminatingGatewayConfigEntry) GetName() string { return e.Name } +func (e *TerminatingGatewayConfigEntry) GetMeta() map[string]string { + if e == nil { + return nil + } + return e.Meta +} + func (e *TerminatingGatewayConfigEntry) Normalize() error { if e == nil { return fmt.Errorf("config entry is nil") @@ -339,6 +360,10 @@ func (e *TerminatingGatewayConfigEntry) Normalize() error { } func (e *TerminatingGatewayConfigEntry) Validate() error { + if err := validateConfigEntryMeta(e.Meta); err != nil { + return err + } + seen := make(map[ServiceID]bool) for _, svc := range e.Services { diff --git a/agent/structs/config_entry_test.go b/agent/structs/config_entry_test.go index 0b056390fd..e75698c520 100644 --- a/agent/structs/config_entry_test.go +++ b/agent/structs/config_entry_test.go @@ -46,6 +46,10 @@ func TestDecodeConfigEntry(t *testing.T) { snake: ` kind = "proxy-defaults" name = "main" + meta { + "foo" = "bar" + "gir" = "zim" + } config { "foo" = 19 "bar" = "abc" @@ -60,6 +64,10 @@ func TestDecodeConfigEntry(t *testing.T) { camel: ` Kind = "proxy-defaults" Name = "main" + Meta { + "foo" = "bar" + "gir" = "zim" + } Config { "foo" = 19 "bar" = "abc" @@ -74,6 +82,10 @@ func TestDecodeConfigEntry(t *testing.T) { expect: &ProxyConfigEntry{ Kind: "proxy-defaults", Name: "main", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, Config: map[string]interface{}{ "foo": 19, "bar": "abc", @@ -91,6 +103,10 @@ func TestDecodeConfigEntry(t *testing.T) { snake: ` kind = "service-defaults" name = "main" + meta { + "foo" = "bar" + "gir" = "zim" + } protocol = "http" external_sni = "abc-123" mesh_gateway { @@ -100,6 +116,10 @@ func TestDecodeConfigEntry(t *testing.T) { camel: ` Kind = "service-defaults" Name = "main" + Meta { + "foo" = "bar" + "gir" = "zim" + } Protocol = "http" ExternalSNI = "abc-123" MeshGateway { @@ -107,8 +127,12 @@ func TestDecodeConfigEntry(t *testing.T) { } `, expect: &ServiceConfigEntry{ - Kind: "service-defaults", - Name: "main", + Kind: "service-defaults", + Name: "main", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, Protocol: "http", ExternalSNI: "abc-123", MeshGateway: MeshGatewayConfig{ @@ -121,6 +145,10 @@ func TestDecodeConfigEntry(t *testing.T) { snake: ` kind = "service-router" name = "main" + meta { + "foo" = "bar" + "gir" = "zim" + } routes = [ { match { @@ -200,6 +228,10 @@ func TestDecodeConfigEntry(t *testing.T) { camel: ` Kind = "service-router" Name = "main" + Meta { + "foo" = "bar" + "gir" = "zim" + } Routes = [ { Match { @@ -279,6 +311,10 @@ func TestDecodeConfigEntry(t *testing.T) { expect: &ServiceRouterConfigEntry{ Kind: "service-router", Name: "main", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, Routes: []ServiceRoute{ { Match: &ServiceRouteMatch{ @@ -361,6 +397,10 @@ func TestDecodeConfigEntry(t *testing.T) { snake: ` kind = "service-splitter" name = "main" + meta { + "foo" = "bar" + "gir" = "zim" + } splits = [ { weight = 99.1 @@ -376,6 +416,10 @@ func TestDecodeConfigEntry(t *testing.T) { camel: ` Kind = "service-splitter" Name = "main" + Meta { + "foo" = "bar" + "gir" = "zim" + } Splits = [ { Weight = 99.1 @@ -391,6 +435,10 @@ func TestDecodeConfigEntry(t *testing.T) { expect: &ServiceSplitterConfigEntry{ Kind: ServiceSplitter, Name: "main", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, Splits: []ServiceSplit{ { Weight: 99.1, @@ -409,6 +457,10 @@ func TestDecodeConfigEntry(t *testing.T) { snake: ` kind = "service-resolver" name = "main" + meta { + "foo" = "bar" + "gir" = "zim" + } default_subset = "v1" connect_timeout = "15s" subsets = { @@ -434,6 +486,10 @@ func TestDecodeConfigEntry(t *testing.T) { camel: ` Kind = "service-resolver" Name = "main" + Meta { + "foo" = "bar" + "gir" = "zim" + } DefaultSubset = "v1" ConnectTimeout = "15s" Subsets = { @@ -457,8 +513,12 @@ func TestDecodeConfigEntry(t *testing.T) { } }`, expect: &ServiceResolverConfigEntry{ - Kind: "service-resolver", - Name: "main", + Kind: "service-resolver", + Name: "main", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, DefaultSubset: "v1", ConnectTimeout: 15 * time.Second, Subsets: map[string]ServiceResolverSubset{ @@ -686,6 +746,10 @@ func TestDecodeConfigEntry(t *testing.T) { snake: ` kind = "ingress-gateway" name = "ingress-web" + meta { + "foo" = "bar" + "gir" = "zim" + } tls { enabled = true @@ -728,6 +792,10 @@ func TestDecodeConfigEntry(t *testing.T) { camel: ` Kind = "ingress-gateway" Name = "ingress-web" + Meta { + "foo" = "bar" + "gir" = "zim" + } TLS { Enabled = true } @@ -768,6 +836,10 @@ func TestDecodeConfigEntry(t *testing.T) { expect: &IngressGatewayConfigEntry{ Kind: "ingress-gateway", Name: "ingress-web", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, TLS: GatewayTLSConfig{ Enabled: true, }, @@ -811,6 +883,10 @@ func TestDecodeConfigEntry(t *testing.T) { snake: ` kind = "terminating-gateway" name = "terminating-gw-west" + meta { + "foo" = "bar" + "gir" = "zim" + } services = [ { name = "payments", @@ -831,6 +907,10 @@ func TestDecodeConfigEntry(t *testing.T) { camel: ` Kind = "terminating-gateway" Name = "terminating-gw-west" + Meta { + "foo" = "bar" + "gir" = "zim" + } Services = [ { Name = "payments", @@ -851,6 +931,10 @@ func TestDecodeConfigEntry(t *testing.T) { expect: &TerminatingGatewayConfigEntry{ Kind: "terminating-gateway", Name: "terminating-gw-west", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, Services: []LinkedService{ { Name: "payments", diff --git a/agent/testagent.go b/agent/testagent.go index 8f05b6ed47..fa3508ffab 100644 --- a/agent/testagent.go +++ b/agent/testagent.go @@ -73,8 +73,7 @@ type TestAgent struct { // It is valid after Start(). dns *DNSServer - // srv is a reference to the first started HTTP endpoint. - // It is valid after Start(). + // srv is an HTTPServer that may be used to test http endpoints. srv *HTTPServer // overrides is an hcl config source to use to override otherwise @@ -213,6 +212,8 @@ func (a *TestAgent) Start(t *testing.T) (err error) { // Start the anti-entropy syncer a.Agent.StartSync() + a.srv = &HTTPServer{agent: agent, denylist: NewDenylist(a.config.HTTPBlockEndpoints)} + if err := a.waitForUp(); err != nil { a.Shutdown() t.Logf("Error while waiting for test agent to start: %v", err) @@ -220,7 +221,6 @@ func (a *TestAgent) Start(t *testing.T) (err error) { } a.dns = a.dnsServers[0] - a.srv = a.httpServers[0] return nil } @@ -233,7 +233,7 @@ func (a *TestAgent) waitForUp() error { var retErr error var out structs.IndexedNodes for ; !time.Now().After(deadline); time.Sleep(timer.Wait) { - if len(a.httpServers) == 0 { + if len(a.apiServers.servers) == 0 { retErr = fmt.Errorf("waiting for server") continue // fail, try again } @@ -262,7 +262,7 @@ func (a *TestAgent) waitForUp() error { } else { req := httptest.NewRequest("GET", "/v1/agent/self", nil) resp := httptest.NewRecorder() - _, err := a.httpServers[0].AgentSelf(resp, req) + _, err := a.srv.AgentSelf(resp, req) if acl.IsErrPermissionDenied(err) || resp.Code == 403 { // permission denied is enough to show that the client is // connected to the servers as it would get a 503 if @@ -313,10 +313,13 @@ func (a *TestAgent) DNSAddr() string { } func (a *TestAgent) HTTPAddr() string { - if a.srv == nil { - return "" + var srv apiServer + for _, srv = range a.Agent.apiServers.servers { + if srv.Protocol == "http" { + break + } } - return a.srv.Server.Addr + return srv.Addr.String() } func (a *TestAgent) SegmentAddr(name string) string { diff --git a/agent/token/persistence.go b/agent/token/persistence.go new file mode 100644 index 0000000000..c36b903641 --- /dev/null +++ b/agent/token/persistence.go @@ -0,0 +1,192 @@ +package token + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "os" + "path/filepath" + + "github.com/hashicorp/consul/lib/file" +) + +// Logger used by Store.Load to report warnings. +type Logger interface { + Warn(msg string, args ...interface{}) +} + +// Config used by Store.Load, which includes tokens and settings for persistence. +type Config struct { + EnablePersistence bool + DataDir string + ACLDefaultToken string + ACLAgentToken string + ACLAgentMasterToken string + ACLReplicationToken string + + EnterpriseConfig +} + +const tokensPath = "acl-tokens.json" + +// Load tokens from Config and optionally from a persisted file in the cfg.DataDir. +// If a token exists in both the persisted file and in the Config a warning will +// be logged and the persisted token will be used. +// +// Failures to load the persisted file will result in loading tokens from the +// config before returning the error. +func (t *Store) Load(cfg Config, logger Logger) error { + t.persistenceLock.RLock() + if !cfg.EnablePersistence { + t.persistence = nil + t.persistenceLock.RUnlock() + loadTokens(t, cfg, persistedTokens{}, logger) + return nil + } + + defer t.persistenceLock.RUnlock() + t.persistence = &fileStore{ + filename: filepath.Join(cfg.DataDir, tokensPath), + logger: logger, + } + return t.persistence.load(t, cfg) +} + +// WithPersistenceLock executes f while hold a lock. If f returns a nil error, +// the tokens in Store will be persisted to the tokens file. Otherwise no +// tokens will be persisted, and the error from f will be returned. +// +// The lock is held so that the writes are persisted before some other thread +// can change the value. +func (t *Store) WithPersistenceLock(f func() error) error { + t.persistenceLock.Lock() + if t.persistence == nil { + t.persistenceLock.Unlock() + return f() + } + defer t.persistenceLock.Unlock() + return t.persistence.withPersistenceLock(t, f) +} + +type persistedTokens struct { + Replication string `json:"replication,omitempty"` + AgentMaster string `json:"agent_master,omitempty"` + Default string `json:"default,omitempty"` + Agent string `json:"agent,omitempty"` +} + +type fileStore struct { + filename string + logger Logger +} + +func (p *fileStore) load(s *Store, cfg Config) error { + tokens, err := readPersistedFromFile(p.filename) + if err != nil { + p.logger.Warn("unable to load persisted tokens", "error", err) + } + loadTokens(s, cfg, tokens, p.logger) + return err +} + +func loadTokens(s *Store, cfg Config, tokens persistedTokens, logger Logger) { + if tokens.Default != "" { + s.UpdateUserToken(tokens.Default, TokenSourceAPI) + + if cfg.ACLDefaultToken != "" { + logger.Warn("\"default\" token present in both the configuration and persisted token store, using the persisted token") + } + } else { + s.UpdateUserToken(cfg.ACLDefaultToken, TokenSourceConfig) + } + + if tokens.Agent != "" { + s.UpdateAgentToken(tokens.Agent, TokenSourceAPI) + + if cfg.ACLAgentToken != "" { + logger.Warn("\"agent\" token present in both the configuration and persisted token store, using the persisted token") + } + } else { + s.UpdateAgentToken(cfg.ACLAgentToken, TokenSourceConfig) + } + + if tokens.AgentMaster != "" { + s.UpdateAgentMasterToken(tokens.AgentMaster, TokenSourceAPI) + + if cfg.ACLAgentMasterToken != "" { + logger.Warn("\"agent_master\" token present in both the configuration and persisted token store, using the persisted token") + } + } else { + s.UpdateAgentMasterToken(cfg.ACLAgentMasterToken, TokenSourceConfig) + } + + if tokens.Replication != "" { + s.UpdateReplicationToken(tokens.Replication, TokenSourceAPI) + + if cfg.ACLReplicationToken != "" { + logger.Warn("\"replication\" token present in both the configuration and persisted token store, using the persisted token") + } + } else { + s.UpdateReplicationToken(cfg.ACLReplicationToken, TokenSourceConfig) + } + + loadEnterpriseTokens(s, cfg) +} + +func readPersistedFromFile(filename string) (persistedTokens, error) { + tokens := persistedTokens{} + + buf, err := ioutil.ReadFile(filename) + switch { + case os.IsNotExist(err): + // non-existence is not an error we care about + return tokens, nil + case err != nil: + return tokens, fmt.Errorf("failed reading tokens file %q: %w", filename, err) + } + + if err := json.Unmarshal(buf, &tokens); err != nil { + return tokens, fmt.Errorf("failed to decode tokens file %q: %w", filename, err) + } + + return tokens, nil +} + +func (p *fileStore) withPersistenceLock(s *Store, f func() error) error { + if err := f(); err != nil { + return err + } + + return p.saveToFile(s) +} + +func (p *fileStore) saveToFile(s *Store) error { + tokens := persistedTokens{} + if tok, source := s.UserTokenAndSource(); tok != "" && source == TokenSourceAPI { + tokens.Default = tok + } + + if tok, source := s.AgentTokenAndSource(); tok != "" && source == TokenSourceAPI { + tokens.Agent = tok + } + + if tok, source := s.AgentMasterTokenAndSource(); tok != "" && source == TokenSourceAPI { + tokens.AgentMaster = tok + } + + if tok, source := s.ReplicationTokenAndSource(); tok != "" && source == TokenSourceAPI { + tokens.Replication = tok + } + + data, err := json.Marshal(tokens) + if err != nil { + p.logger.Warn("failed to persist tokens", "error", err) + return fmt.Errorf("Failed to marshal tokens for persistence: %v", err) + } + + if err := file.WriteAtomicWithPerms(p.filename, data, 0700, 0600); err != nil { + p.logger.Warn("failed to persist tokens", "error", err) + return fmt.Errorf("Failed to persist tokens - %v", err) + } + return nil +} diff --git a/agent/token/persistence_test.go b/agent/token/persistence_test.go new file mode 100644 index 0000000000..ec8e7e60e3 --- /dev/null +++ b/agent/token/persistence_test.go @@ -0,0 +1,213 @@ +package token + +import ( + "io/ioutil" + "path/filepath" + "testing" + + "github.com/hashicorp/consul/sdk/testutil" + "github.com/hashicorp/go-hclog" + "github.com/stretchr/testify/require" +) + +func TestStore_Load(t *testing.T) { + dataDir := testutil.TempDir(t, "datadir") + tokenFile := filepath.Join(dataDir, tokensPath) + logger := hclog.New(nil) + store := new(Store) + + t.Run("with empty store", func(t *testing.T) { + cfg := Config{ + DataDir: dataDir, + ACLAgentToken: "alfa", + ACLAgentMasterToken: "bravo", + ACLDefaultToken: "charlie", + ACLReplicationToken: "delta", + } + require.NoError(t, store.Load(cfg, logger)) + require.Equal(t, "alfa", store.AgentToken()) + require.Equal(t, "bravo", store.AgentMasterToken()) + require.Equal(t, "charlie", store.UserToken()) + require.Equal(t, "delta", store.ReplicationToken()) + }) + + t.Run("updated from Config", func(t *testing.T) { + cfg := Config{ + DataDir: dataDir, + ACLDefaultToken: "echo", + ACLAgentToken: "foxtrot", + ACLAgentMasterToken: "golf", + ACLReplicationToken: "hotel", + } + // ensures no error for missing persisted tokens file + require.NoError(t, store.Load(cfg, logger)) + require.Equal(t, "echo", store.UserToken()) + require.Equal(t, "foxtrot", store.AgentToken()) + require.Equal(t, "golf", store.AgentMasterToken()) + require.Equal(t, "hotel", store.ReplicationToken()) + }) + + t.Run("with persisted tokens", func(t *testing.T) { + cfg := Config{ + DataDir: dataDir, + ACLDefaultToken: "echo", + ACLAgentToken: "foxtrot", + ACLAgentMasterToken: "golf", + ACLReplicationToken: "hotel", + } + + tokens := `{ + "agent" : "india", + "agent_master" : "juliett", + "default": "kilo", + "replication" : "lima" + }` + + require.NoError(t, ioutil.WriteFile(tokenFile, []byte(tokens), 0600)) + require.NoError(t, store.Load(cfg, logger)) + + // no updates since token persistence is not enabled + require.Equal(t, "echo", store.UserToken()) + require.Equal(t, "foxtrot", store.AgentToken()) + require.Equal(t, "golf", store.AgentMasterToken()) + require.Equal(t, "hotel", store.ReplicationToken()) + + cfg.EnablePersistence = true + require.NoError(t, store.Load(cfg, logger)) + + require.Equal(t, "india", store.AgentToken()) + require.Equal(t, "juliett", store.AgentMasterToken()) + require.Equal(t, "kilo", store.UserToken()) + require.Equal(t, "lima", store.ReplicationToken()) + + // check store persistence was enabled + require.NotNil(t, store.persistence) + }) + + t.Run("with persisted tokens, persisted tokens override config", func(t *testing.T) { + tokens := `{ + "agent" : "mike", + "agent_master" : "november", + "default": "oscar", + "replication" : "papa" + }` + + cfg := Config{ + EnablePersistence: true, + DataDir: dataDir, + ACLDefaultToken: "quebec", + ACLAgentToken: "romeo", + ACLAgentMasterToken: "sierra", + ACLReplicationToken: "tango", + } + + require.NoError(t, ioutil.WriteFile(tokenFile, []byte(tokens), 0600)) + require.NoError(t, store.Load(cfg, logger)) + + require.Equal(t, "mike", store.AgentToken()) + require.Equal(t, "november", store.AgentMasterToken()) + require.Equal(t, "oscar", store.UserToken()) + require.Equal(t, "papa", store.ReplicationToken()) + }) + + t.Run("with some persisted tokens", func(t *testing.T) { + tokens := `{ + "agent" : "uniform", + "agent_master" : "victor" + }` + + cfg := Config{ + EnablePersistence: true, + DataDir: dataDir, + ACLDefaultToken: "whiskey", + ACLAgentToken: "xray", + ACLAgentMasterToken: "yankee", + ACLReplicationToken: "zulu", + } + + require.NoError(t, ioutil.WriteFile(tokenFile, []byte(tokens), 0600)) + require.NoError(t, store.Load(cfg, logger)) + + require.Equal(t, "uniform", store.AgentToken()) + require.Equal(t, "victor", store.AgentMasterToken()) + require.Equal(t, "whiskey", store.UserToken()) + require.Equal(t, "zulu", store.ReplicationToken()) + }) + + t.Run("persisted file contains invalid data", func(t *testing.T) { + cfg := Config{ + EnablePersistence: true, + DataDir: dataDir, + ACLDefaultToken: "one", + ACLAgentToken: "two", + ACLAgentMasterToken: "three", + ACLReplicationToken: "four", + } + + require.NoError(t, ioutil.WriteFile(tokenFile, []byte{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, 0600)) + err := store.Load(cfg, logger) + require.Error(t, err) + require.Contains(t, err.Error(), "failed to decode tokens file") + + require.Equal(t, "one", store.UserToken()) + require.Equal(t, "two", store.AgentToken()) + require.Equal(t, "three", store.AgentMasterToken()) + require.Equal(t, "four", store.ReplicationToken()) + }) + + t.Run("persisted file contains invalid json", func(t *testing.T) { + cfg := Config{ + EnablePersistence: true, + DataDir: dataDir, + ACLDefaultToken: "alfa", + ACLAgentToken: "bravo", + ACLAgentMasterToken: "charlie", + ACLReplicationToken: "foxtrot", + } + + require.NoError(t, ioutil.WriteFile(tokenFile, []byte("[1,2,3]"), 0600)) + err := store.Load(cfg, logger) + require.Error(t, err) + require.Contains(t, err.Error(), "failed to decode tokens file") + + require.Equal(t, "alfa", store.UserToken()) + require.Equal(t, "bravo", store.AgentToken()) + require.Equal(t, "charlie", store.AgentMasterToken()) + require.Equal(t, "foxtrot", store.ReplicationToken()) + }) +} + +func TestStore_WithPersistenceLock(t *testing.T) { + dataDir := testutil.TempDir(t, "datadir") + store := new(Store) + cfg := Config{ + EnablePersistence: true, + DataDir: dataDir, + ACLDefaultToken: "default-token", + ACLAgentToken: "agent-token", + ACLAgentMasterToken: "master-token", + ACLReplicationToken: "replication-token", + } + err := store.Load(cfg, hclog.New(nil)) + require.NoError(t, err) + + f := func() error { + updated := store.UpdateUserToken("the-new-token", TokenSourceAPI) + require.True(t, updated) + + updated = store.UpdateAgentMasterToken("the-new-master-token", TokenSourceAPI) + require.True(t, updated) + return nil + } + + err = store.WithPersistenceLock(f) + require.NoError(t, err) + + tokens, err := readPersistedFromFile(filepath.Join(dataDir, tokensPath)) + require.NoError(t, err) + expected := persistedTokens{ + Default: "the-new-token", + AgentMaster: "the-new-master-token", + } + require.Equal(t, expected, tokens) +} diff --git a/agent/token/store.go b/agent/token/store.go index 56ab7d806a..456190f70b 100644 --- a/agent/token/store.go +++ b/agent/token/store.go @@ -77,6 +77,12 @@ type Store struct { watchers map[int]watcher watcherIndex int + persistence *fileStore + // persistenceLock is used to synchronize access to the persisted token store + // within the data directory. This will prevent loading while writing as well as + // multiple concurrent writes. + persistenceLock sync.RWMutex + // enterpriseTokens contains tokens only used in consul-enterprise enterpriseTokens } @@ -158,7 +164,7 @@ func (t *Store) sendNotificationLocked(kinds ...TokenKind) { // Returns true if it was changed. func (t *Store) UpdateUserToken(token string, source TokenSource) bool { t.l.Lock() - changed := (t.userToken != token || t.userTokenSource != source) + changed := t.userToken != token || t.userTokenSource != source t.userToken = token t.userTokenSource = source if changed { @@ -172,7 +178,7 @@ func (t *Store) UpdateUserToken(token string, source TokenSource) bool { // Returns true if it was changed. func (t *Store) UpdateAgentToken(token string, source TokenSource) bool { t.l.Lock() - changed := (t.agentToken != token || t.agentTokenSource != source) + changed := t.agentToken != token || t.agentTokenSource != source t.agentToken = token t.agentTokenSource = source if changed { @@ -186,7 +192,7 @@ func (t *Store) UpdateAgentToken(token string, source TokenSource) bool { // Returns true if it was changed. func (t *Store) UpdateAgentMasterToken(token string, source TokenSource) bool { t.l.Lock() - changed := (t.agentMasterToken != token || t.agentMasterTokenSource != source) + changed := t.agentMasterToken != token || t.agentMasterTokenSource != source t.agentMasterToken = token t.agentMasterTokenSource = source if changed { @@ -200,7 +206,7 @@ func (t *Store) UpdateAgentMasterToken(token string, source TokenSource) bool { // Returns true if it was changed. func (t *Store) UpdateReplicationToken(token string, source TokenSource) bool { t.l.Lock() - changed := (t.replicationToken != token || t.replicationTokenSource != source) + changed := t.replicationToken != token || t.replicationTokenSource != source t.replicationToken = token t.replicationTokenSource = source if changed { diff --git a/agent/token/store_oss.go b/agent/token/store_oss.go index 0a182d8265..16123052ef 100644 --- a/agent/token/store_oss.go +++ b/agent/token/store_oss.go @@ -2,11 +2,18 @@ package token +type EnterpriseConfig struct { +} + // Stub for enterpriseTokens type enterpriseTokens struct { } // enterpriseAgentToken OSS stub -func (s *Store) enterpriseAgentToken() string { +func (t *Store) enterpriseAgentToken() string { return "" } + +// loadEnterpriseTokens is a noop stub for the func defined agent_ent.go +func loadEnterpriseTokens(_ *Store, _ Config) { +} diff --git a/agent/token/store_test.go b/agent/token/store_test.go index f46fcc3a98..6df8122576 100644 --- a/agent/token/store_test.go +++ b/agent/token/store_test.go @@ -7,8 +7,6 @@ import ( ) func TestStore_RegularTokens(t *testing.T) { - t.Parallel() - type tokens struct { userSource TokenSource user string @@ -89,13 +87,22 @@ func TestStore_RegularTokens(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - t.Parallel() - s := new(Store) - require.True(t, s.UpdateUserToken(tt.set.user, tt.set.userSource)) - require.True(t, s.UpdateAgentToken(tt.set.agent, tt.set.agentSource)) - require.True(t, s.UpdateReplicationToken(tt.set.repl, tt.set.replSource)) - require.True(t, s.UpdateAgentMasterToken(tt.set.master, tt.set.masterSource)) + if tt.set.user != "" { + require.True(t, s.UpdateUserToken(tt.set.user, tt.set.userSource)) + } + + if tt.set.agent != "" { + require.True(t, s.UpdateAgentToken(tt.set.agent, tt.set.agentSource)) + } + + if tt.set.repl != "" { + require.True(t, s.UpdateReplicationToken(tt.set.repl, tt.set.replSource)) + } + + if tt.set.master != "" { + require.True(t, s.UpdateAgentMasterToken(tt.set.master, tt.set.masterSource)) + } // If they don't change then they return false. require.False(t, s.UpdateUserToken(tt.set.user, tt.set.userSource)) @@ -128,7 +135,6 @@ func TestStore_RegularTokens(t *testing.T) { } func TestStore_AgentMasterToken(t *testing.T) { - t.Parallel() s := new(Store) verify := func(want bool, toks ...string) { @@ -152,7 +158,6 @@ func TestStore_AgentMasterToken(t *testing.T) { } func TestStore_Notify(t *testing.T) { - t.Parallel() s := new(Store) newNotification := func(t *testing.T, s *Store, kind TokenKind) Notifier { diff --git a/agent/ui_endpoint_test.go b/agent/ui_endpoint_test.go index 876d4a97c8..4640bcfebb 100644 --- a/agent/ui_endpoint_test.go +++ b/agent/ui_endpoint_test.go @@ -41,7 +41,7 @@ func TestUiIndex(t *testing.T) { // Register node req, _ := http.NewRequest("GET", "/ui/my-file", nil) req.URL.Scheme = "http" - req.URL.Host = a.srv.Server.Addr + req.URL.Host = a.HTTPAddr() // Make the request client := cleanhttp.DefaultClient() diff --git a/api/api.go b/api/api.go index 7b00be967a..38a4e98fbd 100644 --- a/api/api.go +++ b/api/api.go @@ -607,9 +607,11 @@ func NewClient(config *Config) (*Client, error) { trans.DialContext = func(_ context.Context, _, _ string) (net.Conn, error) { return net.Dial("unix", parts[1]) } - config.HttpClient = &http.Client{ - Transport: trans, + httpClient, err := NewHttpClient(trans, config.TLSConfig) + if err != nil { + return nil, err } + config.HttpClient = httpClient default: return nil, fmt.Errorf("Unknown protocol scheme: %s", parts[0]) } diff --git a/api/config_entry.go b/api/config_entry.go index dc31d6110f..a234f6eb25 100644 --- a/api/config_entry.go +++ b/api/config_entry.go @@ -95,6 +95,7 @@ type ServiceConfigEntry struct { MeshGateway MeshGatewayConfig `json:",omitempty" alias:"mesh_gateway"` Expose ExposeConfig `json:",omitempty"` ExternalSNI string `json:",omitempty" alias:"external_sni"` + Meta map[string]string `json:",omitempty"` CreateIndex uint64 ModifyIndex uint64 } @@ -122,6 +123,7 @@ type ProxyConfigEntry struct { Config map[string]interface{} `json:",omitempty"` MeshGateway MeshGatewayConfig `json:",omitempty" alias:"mesh_gateway"` Expose ExposeConfig `json:",omitempty"` + Meta map[string]string `json:",omitempty"` CreateIndex uint64 ModifyIndex uint64 } diff --git a/api/config_entry_discoverychain.go b/api/config_entry_discoverychain.go index 5b989e1054..bb11612f55 100644 --- a/api/config_entry_discoverychain.go +++ b/api/config_entry_discoverychain.go @@ -12,6 +12,7 @@ type ServiceRouterConfigEntry struct { Routes []ServiceRoute `json:",omitempty"` + Meta map[string]string `json:",omitempty"` CreateIndex uint64 ModifyIndex uint64 } @@ -111,6 +112,7 @@ type ServiceSplitterConfigEntry struct { Splits []ServiceSplit `json:",omitempty"` + Meta map[string]string `json:",omitempty"` CreateIndex uint64 ModifyIndex uint64 } @@ -142,6 +144,7 @@ type ServiceResolverConfigEntry struct { // issuing requests to this upstream service. LoadBalancer *LoadBalancer `json:",omitempty" alias:"load_balancer"` + Meta map[string]string `json:",omitempty"` CreateIndex uint64 ModifyIndex uint64 } diff --git a/api/config_entry_gateways.go b/api/config_entry_gateways.go index 9d3ee0a6a9..e259427d86 100644 --- a/api/config_entry_gateways.go +++ b/api/config_entry_gateways.go @@ -21,6 +21,8 @@ type IngressGatewayConfigEntry struct { // what services to associated to those ports. Listeners []IngressListener + Meta map[string]string `json:",omitempty"` + // CreateIndex is the Raft index this entry was created at. This is a // read-only field. CreateIndex uint64 @@ -115,6 +117,8 @@ type TerminatingGatewayConfigEntry struct { // Services is a list of service names represented by the terminating gateway. Services []LinkedService `json:",omitempty"` + Meta map[string]string `json:",omitempty"` + // CreateIndex is the Raft index this entry was created at. This is a // read-only field. CreateIndex uint64 diff --git a/api/config_entry_test.go b/api/config_entry_test.go index 3bb2239586..5c8c69c38a 100644 --- a/api/config_entry_test.go +++ b/api/config_entry_test.go @@ -271,6 +271,10 @@ func TestDecodeConfigEntry(t *testing.T) { { "Kind": "proxy-defaults", "Name": "main", + "Meta" : { + "foo": "bar", + "gir": "zim" + }, "Config": { "foo": 19, "bar": "abc", @@ -286,6 +290,10 @@ func TestDecodeConfigEntry(t *testing.T) { expect: &ProxyConfigEntry{ Kind: "proxy-defaults", Name: "main", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, Config: map[string]interface{}{ "foo": float64(19), "bar": "abc", @@ -304,6 +312,10 @@ func TestDecodeConfigEntry(t *testing.T) { { "Kind": "service-defaults", "Name": "main", + "Meta" : { + "foo": "bar", + "gir": "zim" + }, "Protocol": "http", "ExternalSNI": "abc-123", "MeshGateway": { @@ -312,8 +324,12 @@ func TestDecodeConfigEntry(t *testing.T) { } `, expect: &ServiceConfigEntry{ - Kind: "service-defaults", - Name: "main", + Kind: "service-defaults", + Name: "main", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, Protocol: "http", ExternalSNI: "abc-123", MeshGateway: MeshGatewayConfig{ @@ -327,6 +343,10 @@ func TestDecodeConfigEntry(t *testing.T) { { "Kind": "service-router", "Name": "main", + "Meta" : { + "foo": "bar", + "gir": "zim" + }, "Routes": [ { "Match": { @@ -407,6 +427,10 @@ func TestDecodeConfigEntry(t *testing.T) { expect: &ServiceRouterConfigEntry{ Kind: "service-router", Name: "main", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, Routes: []ServiceRoute{ { Match: &ServiceRouteMatch{ @@ -490,6 +514,10 @@ func TestDecodeConfigEntry(t *testing.T) { { "Kind": "service-splitter", "Name": "main", + "Meta" : { + "foo": "bar", + "gir": "zim" + }, "Splits": [ { "Weight": 99.1, @@ -506,6 +534,10 @@ func TestDecodeConfigEntry(t *testing.T) { expect: &ServiceSplitterConfigEntry{ Kind: ServiceSplitter, Name: "main", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, Splits: []ServiceSplit{ { Weight: 99.1, @@ -525,6 +557,10 @@ func TestDecodeConfigEntry(t *testing.T) { { "Kind": "service-resolver", "Name": "main", + "Meta" : { + "foo": "bar", + "gir": "zim" + }, "DefaultSubset": "v1", "ConnectTimeout": "15s", "Subsets": { @@ -549,8 +585,12 @@ func TestDecodeConfigEntry(t *testing.T) { } }`, expect: &ServiceResolverConfigEntry{ - Kind: "service-resolver", - Name: "main", + Kind: "service-resolver", + Name: "main", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, DefaultSubset: "v1", ConnectTimeout: 15 * time.Second, Subsets: map[string]ServiceResolverSubset{ @@ -725,6 +765,10 @@ func TestDecodeConfigEntry(t *testing.T) { { "Kind": "ingress-gateway", "Name": "ingress-web", + "Meta" : { + "foo": "bar", + "gir": "zim" + }, "Tls": { "Enabled": true }, @@ -757,6 +801,10 @@ func TestDecodeConfigEntry(t *testing.T) { expect: &IngressGatewayConfigEntry{ Kind: "ingress-gateway", Name: "ingress-web", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, TLS: GatewayTLSConfig{ Enabled: true, }, @@ -792,9 +840,13 @@ func TestDecodeConfigEntry(t *testing.T) { { "Kind": "terminating-gateway", "Name": "terminating-west", + "Meta" : { + "foo": "bar", + "gir": "zim" + }, "Services": [ { - "Namespace": "foo", + "Namespace": "foo", "Name": "web", "CAFile": "/etc/ca.pem", "CertFile": "/etc/cert.pem", @@ -813,6 +865,10 @@ func TestDecodeConfigEntry(t *testing.T) { expect: &TerminatingGatewayConfigEntry{ Kind: "terminating-gateway", Name: "terminating-west", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, Services: []LinkedService{ { Namespace: "foo", diff --git a/api/lock.go b/api/lock.go index 5cacee8f7e..221a7add3c 100644 --- a/api/lock.go +++ b/api/lock.go @@ -79,6 +79,7 @@ type LockOptions struct { MonitorRetryTime time.Duration // Optional, defaults to DefaultMonitorRetryTime LockWaitTime time.Duration // Optional, defaults to DefaultLockWaitTime LockTryOnce bool // Optional, defaults to false which means try forever + LockDelay time.Duration // Optional, defaults to 15s Namespace string `json:",omitempty"` // Optional, defaults to API client config, namespace of ACL token, or "default" namespace } @@ -351,8 +352,9 @@ func (l *Lock) createSession() (string, error) { se := l.opts.SessionOpts if se == nil { se = &SessionEntry{ - Name: l.opts.SessionName, - TTL: l.opts.SessionTTL, + Name: l.opts.SessionName, + TTL: l.opts.SessionTTL, + LockDelay: l.opts.LockDelay, } } w := WriteOptions{Namespace: l.opts.Namespace} diff --git a/command/agent/agent.go b/command/agent/agent.go index 7da6613066..1e06ef90be 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -288,6 +288,9 @@ func (c *cmd) run(args []string) int { case err := <-agent.RetryJoinCh(): c.logger.Error("Retry join failed", "error", err) return 1 + case <-agent.Failed(): + // The deferred Shutdown method will log the appropriate error + return 1 case <-agent.ShutdownCh(): // agent is already down! return 0 diff --git a/command/config/write/config_write_test.go b/command/config/write/config_write_test.go index e4d9e27883..5d029a7ee8 100644 --- a/command/config/write/config_write_test.go +++ b/command/config/write/config_write_test.go @@ -162,6 +162,10 @@ func TestParseConfigEntry(t *testing.T) { snake: ` kind = "proxy-defaults" name = "main" + meta { + "foo" = "bar" + "gir" = "zim" + } config { "foo" = 19 "bar" = "abc" @@ -176,6 +180,10 @@ func TestParseConfigEntry(t *testing.T) { camel: ` Kind = "proxy-defaults" Name = "main" + Meta { + "foo" = "bar" + "gir" = "zim" + } Config { "foo" = 19 "bar" = "abc" @@ -191,6 +199,10 @@ func TestParseConfigEntry(t *testing.T) { { "kind": "proxy-defaults", "name": "main", + "meta" : { + "foo": "bar", + "gir": "zim" + }, "config": { "foo": 19, "bar": "abc", @@ -207,6 +219,10 @@ func TestParseConfigEntry(t *testing.T) { { "Kind": "proxy-defaults", "Name": "main", + "Meta" : { + "foo": "bar", + "gir": "zim" + }, "Config": { "foo": 19, "bar": "abc", @@ -222,6 +238,10 @@ func TestParseConfigEntry(t *testing.T) { expect: &api.ProxyConfigEntry{ Kind: "proxy-defaults", Name: "main", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, Config: map[string]interface{}{ "foo": 19, "bar": "abc", @@ -236,6 +256,10 @@ func TestParseConfigEntry(t *testing.T) { expectJSON: &api.ProxyConfigEntry{ Kind: "proxy-defaults", Name: "main", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, Config: map[string]interface{}{ "foo": float64(19), // json decoding gives float64 instead of int here "bar": "abc", @@ -254,6 +278,10 @@ func TestParseConfigEntry(t *testing.T) { kind = "terminating-gateway" name = "terminating-gw-west" namespace = "default" + meta { + "foo" = "bar" + "gir" = "zim" + } services = [ { name = "billing" @@ -273,6 +301,10 @@ func TestParseConfigEntry(t *testing.T) { Kind = "terminating-gateway" Name = "terminating-gw-west" Namespace = "default" + Meta { + "foo" = "bar" + "gir" = "zim" + } Services = [ { Name = "billing" @@ -293,6 +325,10 @@ func TestParseConfigEntry(t *testing.T) { "kind": "terminating-gateway", "name": "terminating-gw-west", "namespace": "default", + "meta" : { + "foo": "bar", + "gir": "zim" + }, "services": [ { "name": "billing", @@ -314,6 +350,10 @@ func TestParseConfigEntry(t *testing.T) { "Kind": "terminating-gateway", "Name": "terminating-gw-west", "Namespace": "default", + "Meta" : { + "foo": "bar", + "gir": "zim" + }, "Services": [ { "Name": "billing", @@ -334,6 +374,10 @@ func TestParseConfigEntry(t *testing.T) { Kind: "terminating-gateway", Name: "terminating-gw-west", Namespace: "default", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, Services: []api.LinkedService{ { Name: "billing", @@ -353,6 +397,10 @@ func TestParseConfigEntry(t *testing.T) { Kind: "terminating-gateway", Name: "terminating-gw-west", Namespace: "default", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, Services: []api.LinkedService{ { Name: "billing", @@ -374,6 +422,10 @@ func TestParseConfigEntry(t *testing.T) { snake: ` kind = "service-defaults" name = "main" + meta { + "foo" = "bar" + "gir" = "zim" + } protocol = "http" external_sni = "abc-123" mesh_gateway { @@ -383,6 +435,10 @@ func TestParseConfigEntry(t *testing.T) { camel: ` Kind = "service-defaults" Name = "main" + Meta { + "foo" = "bar" + "gir" = "zim" + } Protocol = "http" ExternalSNI = "abc-123" MeshGateway { @@ -393,6 +449,10 @@ func TestParseConfigEntry(t *testing.T) { { "kind": "service-defaults", "name": "main", + "meta" : { + "foo": "bar", + "gir": "zim" + }, "protocol": "http", "external_sni": "abc-123", "mesh_gateway": { @@ -404,6 +464,10 @@ func TestParseConfigEntry(t *testing.T) { { "Kind": "service-defaults", "Name": "main", + "Meta" : { + "foo": "bar", + "gir": "zim" + }, "Protocol": "http", "ExternalSNI": "abc-123", "MeshGateway": { @@ -412,8 +476,12 @@ func TestParseConfigEntry(t *testing.T) { } `, expect: &api.ServiceConfigEntry{ - Kind: "service-defaults", - Name: "main", + Kind: "service-defaults", + Name: "main", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, Protocol: "http", ExternalSNI: "abc-123", MeshGateway: api.MeshGatewayConfig{ @@ -426,6 +494,10 @@ func TestParseConfigEntry(t *testing.T) { snake: ` kind = "service-router" name = "main" + meta { + "foo" = "bar" + "gir" = "zim" + } routes = [ { match { @@ -505,6 +577,10 @@ func TestParseConfigEntry(t *testing.T) { camel: ` Kind = "service-router" Name = "main" + Meta { + "foo" = "bar" + "gir" = "zim" + } Routes = [ { Match { @@ -585,6 +661,10 @@ func TestParseConfigEntry(t *testing.T) { { "kind": "service-router", "name": "main", + "meta" : { + "foo": "bar", + "gir": "zim" + }, "routes": [ { "match": { @@ -672,6 +752,10 @@ func TestParseConfigEntry(t *testing.T) { { "Kind": "service-router", "Name": "main", + "Meta" : { + "foo": "bar", + "gir": "zim" + }, "Routes": [ { "Match": { @@ -758,6 +842,10 @@ func TestParseConfigEntry(t *testing.T) { expect: &api.ServiceRouterConfigEntry{ Kind: "service-router", Name: "main", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, Routes: []api.ServiceRoute{ { Match: &api.ServiceRouteMatch{ @@ -840,6 +928,10 @@ func TestParseConfigEntry(t *testing.T) { snake: ` kind = "service-splitter" name = "main" + meta { + "foo" = "bar" + "gir" = "zim" + } splits = [ { weight = 97.1 @@ -859,6 +951,10 @@ func TestParseConfigEntry(t *testing.T) { camel: ` Kind = "service-splitter" Name = "main" + Meta { + "foo" = "bar" + "gir" = "zim" + } Splits = [ { Weight = 97.1 @@ -879,6 +975,10 @@ func TestParseConfigEntry(t *testing.T) { { "kind": "service-splitter", "name": "main", + "meta" : { + "foo": "bar", + "gir": "zim" + }, "splits": [ { "weight": 97.1, @@ -900,6 +1000,10 @@ func TestParseConfigEntry(t *testing.T) { { "Kind": "service-splitter", "Name": "main", + "Meta" : { + "foo": "bar", + "gir": "zim" + }, "Splits": [ { "Weight": 97.1, @@ -920,6 +1024,10 @@ func TestParseConfigEntry(t *testing.T) { expect: &api.ServiceSplitterConfigEntry{ Kind: api.ServiceSplitter, Name: "main", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, Splits: []api.ServiceSplit{ { Weight: 97.1, @@ -942,6 +1050,10 @@ func TestParseConfigEntry(t *testing.T) { snake: ` kind = "service-resolver" name = "main" + meta { + "foo" = "bar" + "gir" = "zim" + } default_subset = "v1" connect_timeout = "15s" subsets = { @@ -967,6 +1079,10 @@ func TestParseConfigEntry(t *testing.T) { camel: ` Kind = "service-resolver" Name = "main" + Meta { + "foo" = "bar" + "gir" = "zim" + } DefaultSubset = "v1" ConnectTimeout = "15s" Subsets = { @@ -993,6 +1109,10 @@ func TestParseConfigEntry(t *testing.T) { { "kind": "service-resolver", "name": "main", + "meta" : { + "foo": "bar", + "gir": "zim" + }, "default_subset": "v1", "connect_timeout": "15s", "subsets": { @@ -1026,6 +1146,10 @@ func TestParseConfigEntry(t *testing.T) { { "Kind": "service-resolver", "Name": "main", + "Meta" : { + "foo": "bar", + "gir": "zim" + }, "DefaultSubset": "v1", "ConnectTimeout": "15s", "Subsets": { @@ -1056,8 +1180,12 @@ func TestParseConfigEntry(t *testing.T) { } `, expect: &api.ServiceResolverConfigEntry{ - Kind: "service-resolver", - Name: "main", + Kind: "service-resolver", + Name: "main", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, DefaultSubset: "v1", ConnectTimeout: 15 * time.Second, Subsets: map[string]api.ServiceResolverSubset{ @@ -1645,6 +1773,10 @@ func TestParseConfigEntry(t *testing.T) { snake: ` kind = "ingress-gateway" name = "ingress-web" + meta { + "foo" = "bar" + "gir" = "zim" + } tls { enabled = true } @@ -1668,6 +1800,10 @@ func TestParseConfigEntry(t *testing.T) { camel: ` Kind = "ingress-gateway" Name = "ingress-web" + Meta { + "foo" = "bar" + "gir" = "zim" + } Tls { Enabled = true } @@ -1692,6 +1828,10 @@ func TestParseConfigEntry(t *testing.T) { { "kind": "ingress-gateway", "name": "ingress-web", + "meta" : { + "foo": "bar", + "gir": "zim" + }, "tls": { "enabled": true }, @@ -1717,6 +1857,10 @@ func TestParseConfigEntry(t *testing.T) { { "Kind": "ingress-gateway", "Name": "ingress-web", + "Meta" : { + "foo": "bar", + "gir": "zim" + }, "Tls": { "Enabled": true }, @@ -1741,6 +1885,10 @@ func TestParseConfigEntry(t *testing.T) { expect: &api.IngressGatewayConfigEntry{ Kind: "ingress-gateway", Name: "ingress-web", + Meta: map[string]string{ + "foo": "bar", + "gir": "zim", + }, TLS: api.GatewayTLSConfig{ Enabled: true, }, diff --git a/command/connect/envoy/envoy.go b/command/connect/envoy/envoy.go index 2fe0c978bd..67530f0ec6 100644 --- a/command/connect/envoy/envoy.go +++ b/command/connect/envoy/envoy.go @@ -4,7 +4,6 @@ import ( "errors" "flag" "fmt" - "io/ioutil" "net" "os" "os/exec" @@ -20,6 +19,7 @@ import ( proxyCmd "github.com/hashicorp/consul/command/connect/proxy" "github.com/hashicorp/consul/command/flags" "github.com/hashicorp/consul/ipaddr" + "github.com/hashicorp/consul/tlsutil" ) func New(ui cli.Ui) *cmd { @@ -443,13 +443,11 @@ func (c *cmd) templateArgs() (*BootstrapTplArgs, error) { } var caPEM string - if httpCfg.TLSConfig.CAFile != "" { - content, err := ioutil.ReadFile(httpCfg.TLSConfig.CAFile) - if err != nil { - return nil, fmt.Errorf("Failed to read CA file: %s", err) - } - caPEM = strings.Replace(string(content), "\n", "\\n", -1) + pems, err := tlsutil.LoadCAs(httpCfg.TLSConfig.CAFile, httpCfg.TLSConfig.CAPath) + if err != nil { + return nil, err } + caPEM = strings.Replace(strings.Join(pems, ""), "\n", "\\n", -1) return &BootstrapTplArgs{ GRPC: grpcAddr, diff --git a/command/connect/envoy/envoy_test.go b/command/connect/envoy/envoy_test.go index d99051b1e3..cab2ae9ad9 100644 --- a/command/connect/envoy/envoy_test.go +++ b/command/connect/envoy/envoy_test.go @@ -370,6 +370,46 @@ func TestGenerateConfig(t *testing.T) { LocalAgentClusterName: xds.LocalAgentClusterName, }, }, + { + Name: "missing-ca-path", + Flags: []string{"-proxy-id", "test-proxy", "-ca-path", "some/path"}, + WantArgs: BootstrapTplArgs{ + EnvoyVersion: defaultEnvoyVersion, + ProxyCluster: "test-proxy", + ProxyID: "test-proxy", + // Should resolve IP, note this might not resolve the same way + // everywhere which might make this test brittle but not sure what else + // to do. + GRPC: GRPC{ + AgentAddress: "127.0.0.1", + AgentPort: "8502", + }, + }, + WantErr: "lstat some/path: no such file or directory", + }, + { + Name: "existing-ca-path", + Flags: []string{"-proxy-id", "test-proxy", "-ca-path", "../../../test/ca_path/"}, + Env: []string{"CONSUL_HTTP_SSL=1"}, + WantArgs: BootstrapTplArgs{ + EnvoyVersion: defaultEnvoyVersion, + ProxyCluster: "test-proxy", + ProxyID: "test-proxy", + // Should resolve IP, note this might not resolve the same way + // everywhere which might make this test brittle but not sure what else + // to do. + GRPC: GRPC{ + AgentAddress: "127.0.0.1", + AgentPort: "8502", + AgentTLS: true, + }, + AgentCAPEM: `-----BEGIN CERTIFICATE-----\nMIIFADCCAuqgAwIBAgIBATALBgkqhkiG9w0BAQswEzERMA8GA1UEAxMIQ2VydEF1\ndGgwHhcNMTUwNTExMjI0NjQzWhcNMjUwNTExMjI0NjU0WjATMREwDwYDVQQDEwhD\nZXJ0QXV0aDCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALcMByyynHsA\n+K4PJwo5+XHygaEZAhPGvHiKQK2Cbc9NDm0ZTzx0rA/dRTZlvouhDyzcJHm+6R1F\nj6zQv7iaSC3qQtJiPnPsfZ+/0XhFZ3fQWMnfDiGbZpF1kJF01ofB6vnsuocFC0zG\naGC+SZiLAzs+QMP3Bebw1elCBIeoN+8NWnRYmLsYIaYGJGBSbNo/lCpLTuinofUn\nL3ehWEGv1INwpHnSVeN0Ml2GFe23d7PUlj/wNIHgUdpUR+KEJxIP3klwtsI3QpSH\nc4VjWdf4aIcka6K3IFuw+K0PUh3xAAPnMpAQOtCZk0AhF5rlvUbevC6jADxpKxLp\nOONmvCTer4LtyNURAoBH52vbK0r/DNcTpPEFV0IP66nXUFgkk0mRKsu8HTb4IOkC\nX3K4mp18EiWUUtrHZAnNct0iIniDBqKK0yhSNhztG6VakVt/1WdQY9Ey3mNtxN1O\nthqWFKdpKUzPKYC3P6PfVpiE7+VbWTLLXba+8BPe8BxWPsVkjJqGSGnCte4COusz\nM8/7bbTgifwJfsepwFtZG53tvwjWlO46Exl30VoDNTaIGvs1fO0GqJlh2A7FN5F2\nS1rS5VYHtPK8QdmUSvyq+7JDBc1HNT5I2zsIQbNcLwDTZ5EsbU6QR7NHDJKxjv/w\nbs3eTXJSSNcFD74wRU10pXjgE5wOFu9TAgMBAAGjYzBhMA4GA1UdDwEB/wQEAwIA\nBjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBQHazgZ3Puiuc6K2LzgcX5b6fAC\nPzAfBgNVHSMEGDAWgBQHazgZ3Puiuc6K2LzgcX5b6fACPzALBgkqhkiG9w0BAQsD\nggIBAEmeNrSUhpHg1I8dtfqu9hCU/6IZThjtcFA+QcPkkMa+Z1k0SOtsgW8MdlcA\ngCf5g5yQZ0DdpWM9nDB6xDIhQdccm91idHgf8wmpEHUj0an4uyn2ESCt8eqrAWf7\nAClYORCASTYfguJCxcfvwtI1uqaOeCxSOdmFay79UVitVsWeonbCRGsVgBDifJxw\nG2oCQqoYAmXPM4J6syk5GHhB1O9MMq+g1+hOx9s+XHyTui9FL4V+IUO1ygVqEQB5\nPSiRBvcIsajSGVao+vK0gf2XfcXzqr3y3NhBky9rFMp1g+ykb2yWekV4WiROJlCj\nTsWwWZDRyjiGahDbho/XW8JciouHZhJdjhmO31rqW3HdFviCTdXMiGk3GQIzz/Jg\nP+enOaHXoY9lcxzDvY9z1BysWBgNvNrMnVge/fLP9o+a0a0PRIIVl8T0Ef3zeg1O\nCLCSy/1Vae5Tx63ZTFvGFdOSusYkG9rlAUHXZE364JRCKzM9Bz0bM+t+LaO0MaEb\nYoxcXEPU+gB2IvmARpInN3oHexR6ekuYHVTRGdWrdmuHFzc7eFwygRqTFdoCCU+G\nQZEkd+lOEyv0zvQqYg+Jp0AEGz2B2zB53uBVECtn0EqrSdPtRzUBSByXVs6QhSXn\neVmy+z3U3MecP63X6oSPXekqSyZFuegXpNNuHkjNoL4ep2ix\n-----END CERTIFICATE-----\n-----BEGIN CERTIFICATE-----\nMIIEtzCCA5+gAwIBAgIJAIewRMI8OnvTMA0GCSqGSIb3DQEBBQUAMIGYMQswCQYD\nVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDVNhbiBGcmFuY2lzY28xHDAa\nBgNVBAoTE0hhc2hpQ29ycCBUZXN0IENlcnQxDDAKBgNVBAsTA0RldjEWMBQGA1UE\nAxMNdGVzdC5pbnRlcm5hbDEgMB4GCSqGSIb3DQEJARYRdGVzdEBpbnRlcm5hbC5j\nb20wHhcNMTQwNDA3MTkwMTA4WhcNMjQwNDA0MTkwMTA4WjCBmDELMAkGA1UEBhMC\nVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1TYW4gRnJhbmNpc2NvMRwwGgYDVQQK\nExNIYXNoaUNvcnAgVGVzdCBDZXJ0MQwwCgYDVQQLEwNEZXYxFjAUBgNVBAMTDXRl\nc3QuaW50ZXJuYWwxIDAeBgkqhkiG9w0BCQEWEXRlc3RAaW50ZXJuYWwuY29tMIIB\nIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAxrs6JK4NpiOItxrpNR/1ppUU\nmH7p2BgLCBZ6eHdclle9J56i68adt8J85zaqphCfz6VDP58DsFx+N50PZyjQaDsU\nd0HejRqfHRMtg2O+UQkv4Z66+Vo+gc6uGuANi2xMtSYDVTAqqzF48OOPQDgYkzcG\nxcFZzTRFFZt2vPnyHj8cHcaFo/NMNVh7C3yTXevRGNm9u2mrbxCEeiHzFC2WUnvg\nU2jQuC7Fhnl33Zd3B6d3mQH6O23ncmwxTcPUJe6xZaIRrDuzwUcyhLj5Z3faag/f\npFIIcHSiHRfoqHLGsGg+3swId/zVJSSDHr7pJUu7Cre+vZa63FqDaooqvnisrQID\nAQABo4IBADCB/TAdBgNVHQ4EFgQUo/nrOfqvbee2VklVKIFlyQEbuJUwgc0GA1Ud\nIwSBxTCBwoAUo/nrOfqvbee2VklVKIFlyQEbuJWhgZ6kgZswgZgxCzAJBgNVBAYT\nAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNU2FuIEZyYW5jaXNjbzEcMBoGA1UE\nChMTSGFzaGlDb3JwIFRlc3QgQ2VydDEMMAoGA1UECxMDRGV2MRYwFAYDVQQDEw10\nZXN0LmludGVybmFsMSAwHgYJKoZIhvcNAQkBFhF0ZXN0QGludGVybmFsLmNvbYIJ\nAIewRMI8OnvTMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQADggEBADa9fV9h\ngjapBlkNmu64WX0Ufub5dsJrdHS8672P30S7ILB7Mk0W8sL65IezRsZnG898yHf9\n2uzmz5OvNTM9K380g7xFlyobSVq+6yqmmSAlA/ptAcIIZT727P5jig/DB7fzJM3g\njctDlEGOmEe50GQXc25VKpcpjAsNQi5ER5gowQ0v3IXNZs+yU+LvxLHc0rUJ/XSp\nlFCAMOqd5uRoMOejnT51G6krvLNzPaQ3N9jQfNVY4Q0zfs0M+6dRWvqfqB9Vyq8/\nPOLMld+HyAZEBk9zK3ZVIXx6XS4dkDnSNR91njLq7eouf6M7+7s/oMQZZRtAfQ6r\nwlW975rYa1ZqEdA=\n-----END CERTIFICATE-----\n`, + AdminAccessLogPath: "/dev/null", + AdminBindAddress: "127.0.0.1", + AdminBindPort: "19000", + LocalAgentClusterName: xds.LocalAgentClusterName, + }, + }, { Name: "custom-bootstrap", Flags: []string{"-proxy-id", "test-proxy"}, diff --git a/command/connect/envoy/testdata/existing-ca-path.golden b/command/connect/envoy/testdata/existing-ca-path.golden new file mode 100644 index 0000000000..1dd467ce4c --- /dev/null +++ b/command/connect/envoy/testdata/existing-ca-path.golden @@ -0,0 +1,125 @@ +{ + "admin": { + "access_log_path": "/dev/null", + "address": { + "socket_address": { + "address": "127.0.0.1", + "port_value": 19000 + } + } + }, + "node": { + "cluster": "test-proxy", + "id": "test-proxy", + "metadata": { + "namespace": "default", + "envoy_version": "1.15.0" + } + }, + "static_resources": { + "clusters": [ + { + "name": "local_agent", + "connect_timeout": "1s", + "type": "STATIC", + "tls_context": { + "common_tls_context": { + "validation_context": { + "trusted_ca": { + "inline_string": "-----BEGIN CERTIFICATE-----\nMIIFADCCAuqgAwIBAgIBATALBgkqhkiG9w0BAQswEzERMA8GA1UEAxMIQ2VydEF1\ndGgwHhcNMTUwNTExMjI0NjQzWhcNMjUwNTExMjI0NjU0WjATMREwDwYDVQQDEwhD\nZXJ0QXV0aDCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALcMByyynHsA\n+K4PJwo5+XHygaEZAhPGvHiKQK2Cbc9NDm0ZTzx0rA/dRTZlvouhDyzcJHm+6R1F\nj6zQv7iaSC3qQtJiPnPsfZ+/0XhFZ3fQWMnfDiGbZpF1kJF01ofB6vnsuocFC0zG\naGC+SZiLAzs+QMP3Bebw1elCBIeoN+8NWnRYmLsYIaYGJGBSbNo/lCpLTuinofUn\nL3ehWEGv1INwpHnSVeN0Ml2GFe23d7PUlj/wNIHgUdpUR+KEJxIP3klwtsI3QpSH\nc4VjWdf4aIcka6K3IFuw+K0PUh3xAAPnMpAQOtCZk0AhF5rlvUbevC6jADxpKxLp\nOONmvCTer4LtyNURAoBH52vbK0r/DNcTpPEFV0IP66nXUFgkk0mRKsu8HTb4IOkC\nX3K4mp18EiWUUtrHZAnNct0iIniDBqKK0yhSNhztG6VakVt/1WdQY9Ey3mNtxN1O\nthqWFKdpKUzPKYC3P6PfVpiE7+VbWTLLXba+8BPe8BxWPsVkjJqGSGnCte4COusz\nM8/7bbTgifwJfsepwFtZG53tvwjWlO46Exl30VoDNTaIGvs1fO0GqJlh2A7FN5F2\nS1rS5VYHtPK8QdmUSvyq+7JDBc1HNT5I2zsIQbNcLwDTZ5EsbU6QR7NHDJKxjv/w\nbs3eTXJSSNcFD74wRU10pXjgE5wOFu9TAgMBAAGjYzBhMA4GA1UdDwEB/wQEAwIA\nBjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBQHazgZ3Puiuc6K2LzgcX5b6fAC\nPzAfBgNVHSMEGDAWgBQHazgZ3Puiuc6K2LzgcX5b6fACPzALBgkqhkiG9w0BAQsD\nggIBAEmeNrSUhpHg1I8dtfqu9hCU/6IZThjtcFA+QcPkkMa+Z1k0SOtsgW8MdlcA\ngCf5g5yQZ0DdpWM9nDB6xDIhQdccm91idHgf8wmpEHUj0an4uyn2ESCt8eqrAWf7\nAClYORCASTYfguJCxcfvwtI1uqaOeCxSOdmFay79UVitVsWeonbCRGsVgBDifJxw\nG2oCQqoYAmXPM4J6syk5GHhB1O9MMq+g1+hOx9s+XHyTui9FL4V+IUO1ygVqEQB5\nPSiRBvcIsajSGVao+vK0gf2XfcXzqr3y3NhBky9rFMp1g+ykb2yWekV4WiROJlCj\nTsWwWZDRyjiGahDbho/XW8JciouHZhJdjhmO31rqW3HdFviCTdXMiGk3GQIzz/Jg\nP+enOaHXoY9lcxzDvY9z1BysWBgNvNrMnVge/fLP9o+a0a0PRIIVl8T0Ef3zeg1O\nCLCSy/1Vae5Tx63ZTFvGFdOSusYkG9rlAUHXZE364JRCKzM9Bz0bM+t+LaO0MaEb\nYoxcXEPU+gB2IvmARpInN3oHexR6ekuYHVTRGdWrdmuHFzc7eFwygRqTFdoCCU+G\nQZEkd+lOEyv0zvQqYg+Jp0AEGz2B2zB53uBVECtn0EqrSdPtRzUBSByXVs6QhSXn\neVmy+z3U3MecP63X6oSPXekqSyZFuegXpNNuHkjNoL4ep2ix\n-----END CERTIFICATE-----\n-----BEGIN CERTIFICATE-----\nMIIEtzCCA5+gAwIBAgIJAIewRMI8OnvTMA0GCSqGSIb3DQEBBQUAMIGYMQswCQYD\nVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDVNhbiBGcmFuY2lzY28xHDAa\nBgNVBAoTE0hhc2hpQ29ycCBUZXN0IENlcnQxDDAKBgNVBAsTA0RldjEWMBQGA1UE\nAxMNdGVzdC5pbnRlcm5hbDEgMB4GCSqGSIb3DQEJARYRdGVzdEBpbnRlcm5hbC5j\nb20wHhcNMTQwNDA3MTkwMTA4WhcNMjQwNDA0MTkwMTA4WjCBmDELMAkGA1UEBhMC\nVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1TYW4gRnJhbmNpc2NvMRwwGgYDVQQK\nExNIYXNoaUNvcnAgVGVzdCBDZXJ0MQwwCgYDVQQLEwNEZXYxFjAUBgNVBAMTDXRl\nc3QuaW50ZXJuYWwxIDAeBgkqhkiG9w0BCQEWEXRlc3RAaW50ZXJuYWwuY29tMIIB\nIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAxrs6JK4NpiOItxrpNR/1ppUU\nmH7p2BgLCBZ6eHdclle9J56i68adt8J85zaqphCfz6VDP58DsFx+N50PZyjQaDsU\nd0HejRqfHRMtg2O+UQkv4Z66+Vo+gc6uGuANi2xMtSYDVTAqqzF48OOPQDgYkzcG\nxcFZzTRFFZt2vPnyHj8cHcaFo/NMNVh7C3yTXevRGNm9u2mrbxCEeiHzFC2WUnvg\nU2jQuC7Fhnl33Zd3B6d3mQH6O23ncmwxTcPUJe6xZaIRrDuzwUcyhLj5Z3faag/f\npFIIcHSiHRfoqHLGsGg+3swId/zVJSSDHr7pJUu7Cre+vZa63FqDaooqvnisrQID\nAQABo4IBADCB/TAdBgNVHQ4EFgQUo/nrOfqvbee2VklVKIFlyQEbuJUwgc0GA1Ud\nIwSBxTCBwoAUo/nrOfqvbee2VklVKIFlyQEbuJWhgZ6kgZswgZgxCzAJBgNVBAYT\nAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNU2FuIEZyYW5jaXNjbzEcMBoGA1UE\nChMTSGFzaGlDb3JwIFRlc3QgQ2VydDEMMAoGA1UECxMDRGV2MRYwFAYDVQQDEw10\nZXN0LmludGVybmFsMSAwHgYJKoZIhvcNAQkBFhF0ZXN0QGludGVybmFsLmNvbYIJ\nAIewRMI8OnvTMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQADggEBADa9fV9h\ngjapBlkNmu64WX0Ufub5dsJrdHS8672P30S7ILB7Mk0W8sL65IezRsZnG898yHf9\n2uzmz5OvNTM9K380g7xFlyobSVq+6yqmmSAlA/ptAcIIZT727P5jig/DB7fzJM3g\njctDlEGOmEe50GQXc25VKpcpjAsNQi5ER5gowQ0v3IXNZs+yU+LvxLHc0rUJ/XSp\nlFCAMOqd5uRoMOejnT51G6krvLNzPaQ3N9jQfNVY4Q0zfs0M+6dRWvqfqB9Vyq8/\nPOLMld+HyAZEBk9zK3ZVIXx6XS4dkDnSNR91njLq7eouf6M7+7s/oMQZZRtAfQ6r\nwlW975rYa1ZqEdA=\n-----END CERTIFICATE-----\n" + } + } + } + }, + "http2_protocol_options": {}, + "hosts": [ + { + "socket_address": { + "address": "127.0.0.1", + "port_value": 8502 + } + } + ] + } + ] + }, + "stats_config": { + "stats_tags": [ + { + "regex": "^cluster\\.((?:([^.]+)~)?(?:[^.]+\\.)?[^.]+\\.[^.]+\\.[^.]+\\.[^.]+\\.[^.]+\\.consul\\.)", + "tag_name": "consul.custom_hash" + }, + { + "regex": "^cluster\\.((?:[^.]+~)?(?:([^.]+)\\.)?[^.]+\\.[^.]+\\.[^.]+\\.[^.]+\\.[^.]+\\.consul\\.)", + "tag_name": "consul.service_subset" + }, + { + "regex": "^cluster\\.((?:[^.]+~)?(?:[^.]+\\.)?([^.]+)\\.[^.]+\\.[^.]+\\.[^.]+\\.[^.]+\\.consul\\.)", + "tag_name": "consul.service" + }, + { + "regex": "^cluster\\.((?:[^.]+~)?(?:[^.]+\\.)?[^.]+\\.([^.]+)\\.[^.]+\\.[^.]+\\.[^.]+\\.consul\\.)", + "tag_name": "consul.namespace" + }, + { + "regex": "^cluster\\.((?:[^.]+~)?(?:[^.]+\\.)?[^.]+\\.[^.]+\\.([^.]+)\\.[^.]+\\.[^.]+\\.consul\\.)", + "tag_name": "consul.datacenter" + }, + { + "regex": "^cluster\\.((?:[^.]+~)?(?:[^.]+\\.)?[^.]+\\.[^.]+\\.[^.]+\\.([^.]+)\\.[^.]+\\.consul\\.)", + "tag_name": "consul.routing_type" + }, + { + "regex": "^cluster\\.((?:[^.]+~)?(?:[^.]+\\.)?[^.]+\\.[^.]+\\.[^.]+\\.[^.]+\\.([^.]+)\\.consul\\.)", + "tag_name": "consul.trust_domain" + }, + { + "regex": "^cluster\\.(((?:[^.]+~)?(?:[^.]+\\.)?[^.]+\\.[^.]+\\.[^.]+)\\.[^.]+\\.[^.]+\\.consul\\.)", + "tag_name": "consul.target" + }, + { + "regex": "^cluster\\.(((?:[^.]+~)?(?:[^.]+\\.)?[^.]+\\.[^.]+\\.[^.]+\\.[^.]+\\.[^.]+)\\.consul\\.)", + "tag_name": "consul.full_target" + }, + { + "tag_name": "local_cluster", + "fixed_value": "test-proxy" + } + ], + "use_all_default_tags": true + }, + "dynamic_resources": { + "lds_config": { + "ads": {} + }, + "cds_config": { + "ads": {} + }, + "ads_config": { + "api_type": "GRPC", + "grpc_services": { + "initial_metadata": [ + { + "key": "x-consul-token", + "value": "" + } + ], + "envoy_grpc": { + "cluster_name": "local_agent" + } + } + } + }, + "layered_runtime": { + "layers": [ + { + "name": "static_layer", + "static_layer": { + "envoy.deprecated_features:envoy.api.v2.Cluster.tls_context": true, + "envoy.deprecated_features:envoy.config.trace.v2.ZipkinConfig.HTTP_JSON_V1": true, + "envoy.deprecated_features:envoy.config.filter.network.http_connection_manager.v2.HttpConnectionManager.Tracing.operation_name": true + } + } + ] + } +} diff --git a/go.mod b/go.mod index ba7e7c0e76..e5864976ec 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,7 @@ require ( github.com/google/go-querystring v1.0.0 // indirect github.com/google/gofuzz v1.1.0 github.com/google/tcpproxy v0.0.0-20180808230851-dfa16c61dad2 - github.com/hashicorp/consul/api v1.6.0 + github.com/hashicorp/consul/api v1.7.0 github.com/hashicorp/consul/sdk v0.6.0 github.com/hashicorp/errwrap v1.0.0 github.com/hashicorp/go-bexpr v0.1.2 diff --git a/logging/logfile_test.go b/logging/logfile_test.go index c1343d0556..115f1bcff4 100644 --- a/logging/logfile_test.go +++ b/logging/logfile_test.go @@ -11,12 +11,11 @@ import ( const ( testFileName = "Consul.log" - testDuration = 2 * time.Second + testDuration = 50 * time.Millisecond testBytes = 10 ) func TestLogFile_timeRotation(t *testing.T) { - t.Parallel() tempDir := testutil.TempDir(t, "LogWriterTime") logFile := LogFile{ fileName: testFileName, @@ -24,7 +23,7 @@ func TestLogFile_timeRotation(t *testing.T) { duration: testDuration, } logFile.Write([]byte("Hello World")) - time.Sleep(2 * time.Second) + time.Sleep(3 * testDuration) logFile.Write([]byte("Second File")) want := 2 if got, _ := ioutil.ReadDir(tempDir); len(got) != want { @@ -33,7 +32,6 @@ func TestLogFile_timeRotation(t *testing.T) { } func TestLogFile_openNew(t *testing.T) { - t.Parallel() tempDir := testutil.TempDir(t, "LogWriterOpen") logFile := LogFile{fileName: testFileName, logPath: tempDir, duration: testDuration} if err := logFile.openNew(); err != nil { @@ -46,7 +44,6 @@ func TestLogFile_openNew(t *testing.T) { } func TestLogFile_byteRotation(t *testing.T) { - t.Parallel() tempDir := testutil.TempDir(t, "LogWriterBytes") logFile := LogFile{ fileName: testFileName, @@ -64,7 +61,6 @@ func TestLogFile_byteRotation(t *testing.T) { } func TestLogFile_deleteArchives(t *testing.T) { - t.Parallel() tempDir := testutil.TempDir(t, "LogWriteDeleteArchives") logFile := LogFile{ fileName: testFileName, @@ -100,7 +96,6 @@ func TestLogFile_deleteArchives(t *testing.T) { } func TestLogFile_deleteArchivesDisabled(t *testing.T) { - t.Parallel() tempDir := testutil.TempDir(t, t.Name()) logFile := LogFile{ fileName: testFileName, @@ -121,7 +116,6 @@ func TestLogFile_deleteArchivesDisabled(t *testing.T) { } func TestLogFile_rotationDisabled(t *testing.T) { - t.Parallel() tempDir := testutil.TempDir(t, t.Name()) logFile := LogFile{ fileName: testFileName, diff --git a/logging/logger_test.go b/logging/logger_test.go index c6bea09191..babff2a1af 100644 --- a/logging/logger_test.go +++ b/logging/logger_test.go @@ -12,7 +12,6 @@ import ( ) func TestLogger_SetupBasic(t *testing.T) { - t.Parallel() require := require.New(t) cfg := Config{LogLevel: "INFO"} @@ -22,7 +21,6 @@ func TestLogger_SetupBasic(t *testing.T) { } func TestLogger_SetupInvalidLogLevel(t *testing.T) { - t.Parallel() cfg := Config{} _, err := Setup(cfg, nil) @@ -30,7 +28,6 @@ func TestLogger_SetupInvalidLogLevel(t *testing.T) { } func TestLogger_SetupLoggerErrorLevel(t *testing.T) { - t.Parallel() cases := []struct { desc string @@ -74,7 +71,6 @@ func TestLogger_SetupLoggerErrorLevel(t *testing.T) { } func TestLogger_SetupLoggerDebugLevel(t *testing.T) { - t.Parallel() require := require.New(t) cfg := Config{LogLevel: "DEBUG"} var buf bytes.Buffer @@ -93,7 +89,6 @@ func TestLogger_SetupLoggerDebugLevel(t *testing.T) { } func TestLogger_SetupLoggerWithName(t *testing.T) { - t.Parallel() require := require.New(t) cfg := Config{ LogLevel: "DEBUG", @@ -111,7 +106,6 @@ func TestLogger_SetupLoggerWithName(t *testing.T) { } func TestLogger_SetupLoggerWithJSON(t *testing.T) { - t.Parallel() require := require.New(t) cfg := Config{ LogLevel: "DEBUG", @@ -136,7 +130,6 @@ func TestLogger_SetupLoggerWithJSON(t *testing.T) { } func TestLogger_SetupLoggerWithValidLogPath(t *testing.T) { - t.Parallel() require := require.New(t) tmpDir := testutil.TempDir(t, t.Name()) @@ -153,7 +146,6 @@ func TestLogger_SetupLoggerWithValidLogPath(t *testing.T) { } func TestLogger_SetupLoggerWithInValidLogPath(t *testing.T) { - t.Parallel() require := require.New(t) cfg := Config{ @@ -169,7 +161,6 @@ func TestLogger_SetupLoggerWithInValidLogPath(t *testing.T) { } func TestLogger_SetupLoggerWithInValidLogPathPermission(t *testing.T) { - t.Parallel() require := require.New(t) tmpDir := "/tmp/" + t.Name() diff --git a/logging/monitor/monitor_test.go b/logging/monitor/monitor_test.go index d5101bed80..df289aa5eb 100644 --- a/logging/monitor/monitor_test.go +++ b/logging/monitor/monitor_test.go @@ -10,7 +10,6 @@ import ( ) func TestMonitor_Start(t *testing.T) { - t.Parallel() require := require.New(t) logger := log.NewInterceptLogger(&log.LoggerOptions{ @@ -41,7 +40,6 @@ func TestMonitor_Start(t *testing.T) { } func TestMonitor_Stop(t *testing.T) { - t.Parallel() require := require.New(t) logger := log.NewInterceptLogger(&log.LoggerOptions{ @@ -82,7 +80,6 @@ func TestMonitor_Stop(t *testing.T) { } func TestMonitor_DroppedMessages(t *testing.T) { - t.Parallel() require := require.New(t) logger := log.NewInterceptLogger(&log.LoggerOptions{ @@ -125,7 +122,6 @@ func TestMonitor_DroppedMessages(t *testing.T) { } func TestMonitor_ZeroBufSizeDefault(t *testing.T) { - t.Parallel() require := require.New(t) logger := log.NewInterceptLogger(&log.LoggerOptions{ @@ -162,7 +158,6 @@ func TestMonitor_ZeroBufSizeDefault(t *testing.T) { } func TestMonitor_WriteStopped(t *testing.T) { - t.Parallel() require := require.New(t) logger := log.NewInterceptLogger(&log.LoggerOptions{ diff --git a/logging/names.go b/logging/names.go index 6ade11bf69..02c0fbf69f 100644 --- a/logging/names.go +++ b/logging/names.go @@ -51,6 +51,7 @@ const ( TerminatingGateway string = "terminating_gateway" TLSUtil string = "tlsutil" Transaction string = "txn" + UsageMetrics string = "usage_metrics" WAN string = "wan" Watch string = "watch" ) diff --git a/proto/translate.go b/proto/translate.go index 3619a0e6e6..6ee90c084d 100644 --- a/proto/translate.go +++ b/proto/translate.go @@ -12,6 +12,8 @@ var ( timePtrType = reflect.TypeOf((*time.Time)(nil)) timeType = timePtrType.Elem() mapStrInf = reflect.TypeOf((map[string]interface{})(nil)) + + epoch1970 = time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) ) // HookPBTimestampToTime is a mapstructure decode hook to translate a protobuf timestamp @@ -19,7 +21,10 @@ var ( func HookPBTimestampToTime(from, to reflect.Type, data interface{}) (interface{}, error) { if to == timeType && from == tsType { ts := data.(*types.Timestamp) - return time.Unix(ts.Seconds, int64(ts.Nanos)), nil + if ts.Seconds == 0 && ts.Nanos == 0 { + return time.Time{}, nil + } + return time.Unix(ts.Seconds, int64(ts.Nanos)).UTC(), nil } return data, nil @@ -39,6 +44,13 @@ func HookTimeToPBTimestamp(from, to reflect.Type, data interface{}) (interface{} // seeing a *time.Time instead of a time.Time. if from == timePtrType && to == mapStrInf { ts := data.(*time.Time) + + // protobuf only supports times from Jan 1 1970 onward but the time.Time type + // can represent values back to year 1. Basically + if ts.Before(epoch1970) { + return map[string]interface{}{}, nil + } + nanos := ts.UnixNano() if nanos < 0 { return map[string]interface{}{}, nil diff --git a/proto/translate_test.go b/proto/translate_test.go index cd88d89339..0fbfa2b9b8 100644 --- a/proto/translate_test.go +++ b/proto/translate_test.go @@ -27,7 +27,7 @@ func TestHookPBTimestampToTime(t *testing.T) { } expected := timeTSWrapper{ - Timestamp: time.Unix(1000, 42), + Timestamp: time.Unix(1000, 42).UTC(), } var actual timeTSWrapper @@ -43,7 +43,7 @@ func TestHookPBTimestampToTime(t *testing.T) { func TestHookTimeToPBTimestamp(t *testing.T) { in := timeTSWrapper{ - Timestamp: time.Unix(999999, 123456), + Timestamp: time.Unix(999999, 123456).UTC(), } expected := pbTSWrapper{ @@ -63,3 +63,24 @@ func TestHookTimeToPBTimestamp(t *testing.T) { require.Equal(t, expected, actual) } + +func TestHookTimeToPBTimestamp_ZeroTime(t *testing.T) { + in := timeTSWrapper{} + + expected := pbTSWrapper{ + Timestamp: &types.Timestamp{ + Seconds: 0, + Nanos: 0, + }, + } + + var actual pbTSWrapper + decoder, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ + DecodeHook: HookTimeToPBTimestamp, + Result: &actual, + }) + require.NoError(t, err) + require.NoError(t, decoder.Decode(in)) + + require.Equal(t, expected, actual) +} diff --git a/sdk/testutil/server.go b/sdk/testutil/server.go index 11559a416a..40743ab430 100644 --- a/sdk/testutil/server.go +++ b/sdk/testutil/server.go @@ -134,7 +134,7 @@ type ServerConfigCallback func(c *TestServerConfig) // defaultServerConfig returns a new TestServerConfig struct // with all of the listen ports incremented by one. -func defaultServerConfig(t CleanupT) *TestServerConfig { +func defaultServerConfig(t TestingTB) *TestServerConfig { nodeID, err := uuid.GenerateUUID() if err != nil { panic(err) @@ -216,11 +216,11 @@ type TestServer struct { tmpdir string } -// NewTestServerConfig creates a new TestServer, and makes a call to an optional +// NewTestServerConfigT creates a new TestServer, and makes a call to an optional // callback function to modify the configuration. If there is an error // configuring or starting the server, the server will NOT be running when the // function returns (thus you do not need to stop it). -func NewTestServerConfigT(t testing.TB, cb ServerConfigCallback) (*TestServer, error) { +func NewTestServerConfigT(t TestingTB, cb ServerConfigCallback) (*TestServer, error) { path, err := exec.LookPath("consul") if err != nil || path == "" { return nil, fmt.Errorf("consul not found on $PATH - download and install " + diff --git a/sdk/testutil/testlog.go b/sdk/testutil/testlog.go index a16396cb81..a47298391e 100644 --- a/sdk/testutil/testlog.go +++ b/sdk/testutil/testlog.go @@ -10,11 +10,11 @@ import ( "github.com/hashicorp/go-hclog" ) -func Logger(t testing.TB) hclog.InterceptLogger { +func Logger(t TestingTB) hclog.InterceptLogger { return LoggerWithOutput(t, NewLogBuffer(t)) } -func LoggerWithOutput(t testing.TB, output io.Writer) hclog.InterceptLogger { +func LoggerWithOutput(t TestingTB, output io.Writer) hclog.InterceptLogger { return hclog.NewInterceptLogger(&hclog.LoggerOptions{ Name: t.Name(), Level: hclog.Trace, @@ -25,18 +25,18 @@ func LoggerWithOutput(t testing.TB, output io.Writer) hclog.InterceptLogger { var sendTestLogsToStdout = os.Getenv("NOLOGBUFFER") == "1" // NewLogBuffer returns an io.Writer which buffers all writes. When the test -// ends, t.Failed is checked. If the test has failed all log output is printed -// to stdout. +// ends, t.Failed is checked. If the test has failed or has been run in verbose +// mode all log output is printed to stdout. // // Set the env var NOLOGBUFFER=1 to disable buffering, resulting in all log // output being written immediately to stdout. -func NewLogBuffer(t CleanupT) io.Writer { +func NewLogBuffer(t TestingTB) io.Writer { if sendTestLogsToStdout { return os.Stdout } buf := &logBuffer{buf: new(bytes.Buffer)} t.Cleanup(func() { - if t.Failed() { + if t.Failed() || testing.Verbose() { buf.Lock() defer buf.Unlock() buf.buf.WriteTo(os.Stdout) @@ -45,11 +45,6 @@ func NewLogBuffer(t CleanupT) io.Writer { return buf } -type CleanupT interface { - Cleanup(f func()) - Failed() bool -} - type logBuffer struct { buf *bytes.Buffer sync.Mutex diff --git a/sdk/testutil/types.go b/sdk/testutil/types.go new file mode 100644 index 0000000000..ec04e45dcc --- /dev/null +++ b/sdk/testutil/types.go @@ -0,0 +1,11 @@ +package testutil + +// TestingTB is an interface that describes the implementation of the testing object. +// Using an interface that describes testing.TB instead of the actual implementation +// makes testutil usable in a wider variety of contexts (e.g. use with ginkgo : https://godoc.org/github.com/onsi/ginkgo#GinkgoT) +type TestingTB interface { + Cleanup(func()) + Failed() bool + Logf(format string, args ...interface{}) + Name() string +} diff --git a/tlsutil/config.go b/tlsutil/config.go index 1875dcea6c..c966ec724e 100644 --- a/tlsutil/config.go +++ b/tlsutil/config.go @@ -238,7 +238,7 @@ func (c *Configurator) Update(config Config) error { if err != nil { return err } - pems, err := loadCAs(config.CAFile, config.CAPath) + pems, err := LoadCAs(config.CAFile, config.CAPath) if err != nil { return err } @@ -420,7 +420,7 @@ func loadKeyPair(certFile, keyFile string) (*tls.Certificate, error) { return &cert, nil } -func loadCAs(caFile, caPath string) ([]string, error) { +func LoadCAs(caFile, caPath string) ([]string, error) { if caFile == "" && caPath == "" { return nil, nil } diff --git a/tlsutil/config_test.go b/tlsutil/config_test.go index 2ef550999f..59dbe17f10 100644 --- a/tlsutil/config_test.go +++ b/tlsutil/config_test.go @@ -519,7 +519,7 @@ func TestConfigurator_ErrorPropagation(t *testing.T) { if !v.excludeCheck { cert, err := v.config.KeyPair() require.NoError(t, err, info) - pems, err := loadCAs(v.config.CAFile, v.config.CAPath) + pems, err := LoadCAs(v.config.CAFile, v.config.CAPath) require.NoError(t, err, info) pool, err := pool(pems) require.NoError(t, err, info) @@ -562,7 +562,7 @@ func TestConfigurator_CommonTLSConfigServerNameNodeName(t *testing.T) { } } -func TestConfigurator_loadCAs(t *testing.T) { +func TestConfigurator_LoadCAs(t *testing.T) { type variant struct { cafile, capath string shouldErr bool @@ -579,7 +579,7 @@ func TestConfigurator_loadCAs(t *testing.T) { {"../test/ca/root.cer", "../test/ca_path", false, false, 1}, } for i, v := range variants { - pems, err1 := loadCAs(v.cafile, v.capath) + pems, err1 := LoadCAs(v.cafile, v.capath) pool, err2 := pool(pems) info := fmt.Sprintf("case %d", i) if v.shouldErr { diff --git a/ui-v2/.gitignore b/ui-v2/.gitignore index 8596a5f28f..23b8eed2ab 100644 --- a/ui-v2/.gitignore +++ b/ui-v2/.gitignore @@ -1,3 +1,4 @@ +!bin /public/consul-api-double # See https://help.github.com/ignore-files/ for more about ignoring files. diff --git a/ui-v2/app/components/consul-intention-search-bar/index.hbs b/ui-v2/app/components/consul-intention-search-bar/index.hbs new file mode 100644 index 0000000000..c848f9a51f --- /dev/null +++ b/ui-v2/app/components/consul-intention-search-bar/index.hbs @@ -0,0 +1,74 @@ +
\ No newline at end of file diff --git a/ui-v2/app/components/healthcheck-info/index.js b/ui-v2/app/components/consul-intention-search-bar/index.js similarity index 98% rename from ui-v2/app/components/healthcheck-info/index.js rename to ui-v2/app/components/consul-intention-search-bar/index.js index abe1ccedb6..4798652642 100644 --- a/ui-v2/app/components/healthcheck-info/index.js +++ b/ui-v2/app/components/consul-intention-search-bar/index.js @@ -1,4 +1,5 @@ import Component from '@ember/component'; + export default Component.extend({ tagName: '', }); diff --git a/ui-v2/app/components/consul-kv-form/index.hbs b/ui-v2/app/components/consul-kv-form/index.hbs index f5fff53910..a0a0670814 100644 --- a/ui-v2/app/components/consul-kv-form/index.hbs +++ b/ui-v2/app/components/consul-kv-form/index.hbs @@ -31,7 +31,7 @@