// Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: BUSL-1.1 package sprawl import ( "fmt" "strings" "time" "github.com/hashicorp/consul/api" "github.com/hashicorp/consul/testing/deployer/sprawl/internal/secrets" "github.com/hashicorp/consul/testing/deployer/topology" ) // TODO: fix this by checking that a token/policy works on ALL servers before // returning from create. func isACLNotFound(err error) bool { if err == nil { return false } return strings.Contains(err.Error(), `ACL not found`) } func (s *Sprawl) bootstrapACLs(cluster string) error { var ( client = s.clients[cluster] logger = s.logger.With("cluster", cluster) mgmtToken = s.secrets.ReadGeneric(cluster, secrets.BootstrapToken) ) ac := client.ACL() if mgmtToken != "" { NOT_BOOTED: ready, err := s.isACLBootstrapped(cluster, client) if err != nil { return fmt.Errorf("error checking if the acl system is bootstrapped: %w", err) } else if !ready { logger.Warn("ACL system is not ready yet") time.Sleep(250 * time.Millisecond) goto NOT_BOOTED } TRYAGAIN: // check to see if it works _, _, err = ac.TokenReadSelf(&api.QueryOptions{Token: mgmtToken}) if err != nil { if isACLNotBootstrapped(err) { logger.Warn("system is rebooting", "error", err) time.Sleep(250 * time.Millisecond) goto TRYAGAIN } return fmt.Errorf("management token no longer works: %w", err) } logger.Debug("current management token", "token", mgmtToken) return nil } TRYAGAIN2: logger.Info("bootstrapping ACLs") tok, _, err := ac.Bootstrap() if err != nil { if isACLNotBootstrapped(err) { logger.Debug("system is rebooting", "error", err) time.Sleep(250 * time.Millisecond) goto TRYAGAIN2 } return err } mgmtToken = tok.SecretID s.secrets.SaveGeneric(cluster, secrets.BootstrapToken, mgmtToken) logger.Debug("current management token", "token", mgmtToken) return nil } func isACLNotBootstrapped(err error) bool { switch { case strings.Contains(err.Error(), "ACL system must be bootstrapped before making any requests that require authorization"): return true case strings.Contains(err.Error(), "The ACL system is currently in legacy mode"): return true } return false } func (s *Sprawl) isACLBootstrapped(cluster string, client *api.Client) (bool, error) { policy, _, err := client.ACL().PolicyReadByName("global-management", &api.QueryOptions{ Token: s.secrets.ReadGeneric(cluster, secrets.BootstrapToken), }) if err != nil { if strings.Contains(err.Error(), "Unexpected response code: 403 (ACL not found)") { return false, nil } else if isACLNotBootstrapped(err) { return false, nil } return false, err } return policy != nil, nil } func (s *Sprawl) createAnonymousToken(cluster *topology.Cluster) error { var ( client = s.clients[cluster.Name] logger = s.logger.With("cluster", cluster.Name) ) if err := s.createAnonymousPolicy(cluster); err != nil { return err } token, err := CreateOrUpdateToken(client, anonymousToken()) if err != nil { return err } logger.Debug("created anonymous token", "token", token.SecretID, ) return nil } func (s *Sprawl) createAnonymousPolicy(cluster *topology.Cluster) error { var ( client = s.clients[cluster.Name] logger = s.logger.With("cluster", cluster.Name) ) op, err := CreateOrUpdatePolicy(client, anonymousPolicy(cluster.Enterprise)) if err != nil { return err } logger.Debug("created anonymous policy", "policy-name", op.Name, "policy-id", op.ID, ) return nil } // assignAgentJoinPolicyToAnonymousToken is used only for version prior to agent token func (s *Sprawl) assignAgentJoinPolicyToAnonymousToken(cluster *topology.Cluster) error { var ( client = s.clients[cluster.Name] ) acl := client.ACL() anonymousTok, _, err := acl.TokenRead(anonymousTokenAccessorID, &api.QueryOptions{}) if err != nil { return nil } rule := ` service_prefix "" { policy = "read" } agent_prefix "" { policy = "read" } node_prefix "" { policy = "write" } operator = "write" ` policy, _, err := acl.PolicyCreate( &api.ACLPolicy{ Name: "client-join-policy", Rules: rule, }, &api.WriteOptions{}, ) if err != nil { return err } anonymousTok.Policies = append(anonymousTok.Policies, &api.ACLLink{ Name: policy.Name, }, ) _, _, err = acl.TokenUpdate(anonymousTok, &api.WriteOptions{}) if err != nil { return nil } return nil } func (s *Sprawl) createAgentTokens(cluster *topology.Cluster) error { var ( client = s.clients[cluster.Name] logger = s.logger.With("cluster", cluster.Name) ) for _, node := range cluster.Nodes { // NOTE: always create tokens even for disabled nodes. if !node.IsAgent() { continue } if node.Images.GreaterThanVersion(topology.MinVersionAgentTokenPartition) { if tok := s.secrets.ReadAgentToken(cluster.Name, node.ID()); tok == "" { token, err := CreateOrUpdateToken(client, tokenForNode(node, cluster.Enterprise)) if err != nil { return fmt.Errorf("node %s: %w", node.Name, err) } logger.Debug("created agent token", "node", node.ID(), "token", token.SecretID, ) s.secrets.SaveAgentToken(cluster.Name, node.ID(), token.SecretID) } } } return nil } // Create a policy to allow super permissive catalog reads across namespace // boundaries. func (s *Sprawl) createCrossNamespaceCatalogReadPolicies(cluster *topology.Cluster, partition string) error { if !cluster.Enterprise { return nil } var ( client = s.clients[cluster.Name] logger = s.logger.With("cluster", cluster.Name) ) op, err := CreateOrUpdatePolicy(client, policyForCrossNamespaceRead(partition)) if err != nil { return err } logger.Debug("created cross-ns-catalog-read policy", "policy-name", op.Name, "policy-id", op.ID, "partition", partition, ) return nil } func (s *Sprawl) createAllWorkloadTokens() error { for _, cluster := range s.topology.Clusters { if err := s.createWorkloadTokens(cluster); err != nil { return fmt.Errorf("createWorkloadTokens[%s]: %w", cluster.Name, err) } } return nil } func (s *Sprawl) createWorkloadTokens(cluster *topology.Cluster) error { var ( client = s.clients[cluster.Name] logger = s.logger.With("cluster", cluster.Name) ) workloadIDs := make(map[topology.ID]struct{}) for _, node := range cluster.Nodes { if !node.RunsWorkloads() || len(node.Workloads) == 0 || node.Disabled { continue } for _, wrk := range node.Workloads { if _, done := workloadIDs[wrk.ID]; done { continue } var overridePolicy *api.ACLPolicy if wrk.IsMeshGateway { var err error overridePolicy, err = CreateOrUpdatePolicy(client, policyForMeshGateway(wrk, cluster.Enterprise)) if err != nil { return fmt.Errorf("could not create policy: %w", err) } } token, err := CreateOrUpdateToken(client, tokenForWorkload(wrk, overridePolicy, cluster.Enterprise)) if err != nil { return fmt.Errorf("could not create token: %w", err) } logger.Debug("created workload token", "workload", wrk.ID.Name, "namespace", wrk.ID.Namespace, "partition", wrk.ID.Partition, "token", token.SecretID, ) s.secrets.SaveWorkloadToken(cluster.Name, wrk.ID, token.SecretID) workloadIDs[wrk.ID] = struct{}{} } } return nil } func CreateOrUpdateToken(client *api.Client, t *api.ACLToken) (*api.ACLToken, error) { ac := client.ACL() currentToken, err := getTokenByDescription(client, t.Description, &api.QueryOptions{ Partition: t.Partition, Namespace: t.Namespace, }) if err != nil { return nil, err } else if currentToken != nil { t.AccessorID = currentToken.AccessorID t.SecretID = currentToken.SecretID } if t.AccessorID != "" { t, _, err = ac.TokenUpdate(t, nil) } else { t, _, err = ac.TokenCreate(t, nil) } if err != nil { return nil, err } return t, nil } func getTokenByDescription(client *api.Client, description string, opts *api.QueryOptions) (*api.ACLToken, error) { ac := client.ACL() tokens, _, err := ac.TokenList(opts) if err != nil { return nil, err } for _, tokenEntry := range tokens { if tokenEntry.Description == description { token, _, err := ac.TokenRead(tokenEntry.AccessorID, opts) if err != nil { return nil, err } return token, nil } } return nil, nil } func CreateOrUpdatePolicy(client *api.Client, p *api.ACLPolicy) (*api.ACLPolicy, error) { ac := client.ACL() currentPolicy, _, err := ac.PolicyReadByName(p.Name, &api.QueryOptions{ Partition: p.Partition, Namespace: p.Namespace, }) // There is a quirk about Consul 1.14.x, where: if reading a policy yields // an empty result, we return "ACL not found". It's safe to ignore this here, // because if the Client's ACL token truly doesn't exist, then the create fails below. if err != nil && !strings.Contains(err.Error(), "ACL not found") { return nil, err } else if currentPolicy != nil { p.ID = currentPolicy.ID } if p.ID != "" { p, _, err = ac.PolicyUpdate(p, nil) } else { p, _, err = ac.PolicyCreate(p, nil) } if err != nil { return nil, err } return p, nil }