consul/command/agent/acl.go

252 lines
7.2 KiB
Go

package agent
import (
"errors"
"fmt"
"strings"
"sync"
"time"
"github.com/armon/go-metrics"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/consul/structs"
"github.com/hashicorp/golang-lru"
)
// There's enough behavior difference with client-side ACLs that we've
// intentionally kept this code separate from the server-side ACL code in
// consul/acl.go. We may refactor some of the caching logic in the future,
// but for now we are developing this separately to see how things shake out.
// These must be kept in sync with the constants in consul/acl.go.
const (
// aclNotFound indicates there is no matching ACL.
aclNotFound = "ACL not found"
// rootDenied is returned when attempting to resolve a root ACL.
rootDenied = "Cannot resolve root ACL"
// permissionDenied is returned when an ACL based rejection happens.
permissionDenied = "Permission denied"
// aclDisabled is returned when ACL changes are not permitted since they
// are disabled.
aclDisabled = "ACL support disabled"
// anonymousToken is the token ID we re-write to if there is no token ID
// provided.
anonymousToken = "anonymous"
// Maximum number of cached ACL entries.
aclCacheSize = 10 * 1024
)
var (
permissionDeniedErr = errors.New(permissionDenied)
)
// aclCacheEntry is used to cache ACL tokens.
type aclCacheEntry struct {
// ACL is the cached ACL.
ACL acl.ACL
// Expires is set based on the TTL for the ACL.
Expires time.Time
// ETag is used as an optimization when fetching ACLs from servers to
// avoid transmitting data back when the agent has a good copy, which is
// usually the case when refreshing a TTL.
ETag string
}
// aclManager is used by the agent to keep track of state related to ACLs,
// including caching tokens from the servers. This has some internal state that
// we don't want to dump into the agent itself.
type aclManager struct {
// acls is a cache mapping ACL tokens to compiled policies.
acls *lru.TwoQueueCache
// master is the ACL to use when the agent master token is supplied.
// This may be nil if that option isn't set in the agent config.
master acl.ACL
// down is the ACL to use when the servers are down. This may be nil
// which means to try and use the cached policy if there is one (or
// deny if there isn't a policy in the cache).
down acl.ACL
// disabled is used to keep track of feedback from the servers that ACLs
// are disabled. If the manager discovers that ACLs are disabled, this
// will be set to the next time we should check to see if they have been
// enabled. This helps cut useless traffic, but allows us to turn on ACL
// support at the servers without having to restart the whole cluster.
disabled time.Time
disabledLock sync.RWMutex
}
// newACLManager returns an ACL manager based on the given config.
func newACLManager(config *Config) (*aclManager, error) {
// Set up the cache from ID to ACL (we don't cache policies like the
// servers; only one level).
acls, err := lru.New2Q(aclCacheSize)
if err != nil {
return nil, err
}
// If an agent master token is configured, build a policy and ACL for
// it, otherwise leave it nil.
var master acl.ACL
if len(config.ACLAgentMasterToken) > 0 {
policy := &acl.Policy{
Agents: []*acl.AgentPolicy{
&acl.AgentPolicy{
Node: config.NodeName,
Policy: acl.PolicyWrite,
},
},
}
acl, err := acl.New(acl.DenyAll(), policy)
if err != nil {
return nil, err
}
master = acl
}
var down acl.ACL
switch config.ACLDownPolicy {
case "allow":
down = acl.AllowAll()
case "deny":
down = acl.DenyAll()
case "extend-cache":
// Leave the down policy as nil to signal this.
default:
return nil, fmt.Errorf("invalid ACL down policy %q", config.ACLDownPolicy)
}
// Give back a manager.
return &aclManager{
acls: acls,
master: master,
down: down,
}, nil
}
// isDisabled returns true if the manager has discovered that ACLs are disabled
// on the servers.
func (m *aclManager) isDisabled() bool {
m.disabledLock.RLock()
defer m.disabledLock.RUnlock()
return time.Now().Before(m.disabled)
}
// lookupACL attempts to locate the compiled policy associated with the given
// token. The agent may be used to perform RPC calls to the servers to fetch
// policies that aren't in the cache.
func (m *aclManager) lookupACL(agent *Agent, id string) (acl.ACL, error) {
// Handle some special cases for the ID.
if len(id) == 0 {
id = anonymousToken
} else if acl.RootACL(id) != nil {
return nil, errors.New(rootDenied)
} else if m.master != nil && id == agent.config.ACLAgentMasterToken {
return m.master, nil
}
// Try the cache first.
var cached *aclCacheEntry
if raw, ok := m.acls.Get(id); ok {
cached = raw.(*aclCacheEntry)
}
if cached != nil && time.Now().Before(cached.Expires) {
metrics.IncrCounter([]string{"consul", "acl", "cache_hit"}, 1)
return cached.ACL, nil
} else {
metrics.IncrCounter([]string{"consul", "acl", "cache_miss"}, 1)
}
// At this point we might have a stale cached ACL, or none at all, so
// try to contact the servers.
args := structs.ACLPolicyRequest{
Datacenter: agent.config.Datacenter,
ACL: id,
}
if cached != nil {
args.ETag = cached.ETag
}
var reply structs.ACLPolicy
err := agent.RPC(agent.getEndpoint("ACL")+".GetPolicy", &args, &reply)
if err != nil {
if strings.Contains(err.Error(), aclDisabled) {
agent.logger.Printf("[DEBUG] agent: ACLs disabled on servers, will check again after %s", agent.config.ACLDisabledTTL)
m.disabledLock.Lock()
m.disabled = time.Now().Add(agent.config.ACLDisabledTTL)
m.disabledLock.Unlock()
return nil, nil
} else if strings.Contains(err.Error(), aclNotFound) {
return nil, errors.New(aclNotFound)
} else {
agent.logger.Printf("[DEBUG] agent: Failed to get policy for ACL from servers: %v", err)
if m.down != nil {
return m.down, nil
} else if cached != nil {
return cached.ACL, nil
} else {
return acl.DenyAll(), nil
}
}
}
// Use the old cached compiled ACL if we can, otherwise compile it and
// resolve any parents.
var compiled acl.ACL
if cached != nil && cached.ETag == reply.ETag {
compiled = cached.ACL
} else {
parent := acl.RootACL(reply.Parent)
if parent == nil {
parent, err = m.lookupACL(agent, reply.Parent)
if err != nil {
return nil, err
}
}
acl, err := acl.New(parent, reply.Policy)
if err != nil {
return nil, err
}
compiled = acl
}
// Update the cache.
cached = &aclCacheEntry{
ACL: compiled,
ETag: reply.ETag,
}
if reply.TTL > 0 {
cached.Expires = time.Now().Add(reply.TTL)
}
m.acls.Add(id, cached)
return compiled, nil
}
// resolveToken is the primary interface used by ACL-checkers in the agent
// endpoints, which is the one place where we do some ACL enforcement on
// clients. Some of the enforcement is normative (e.g. self and monitor)
// and some is informative (e.g. catalog and health).
func (a *Agent) resolveToken(id string) (acl.ACL, error) {
// Disable ACLs if version 8 enforcement isn't enabled.
if !(*a.config.ACLEnforceVersion8) {
return nil, nil
}
// Bail if the ACL manager is disabled. This happens if it gets feedback
// from the servers that ACLs are disabled.
if a.acls.isDisabled() {
return nil, nil
}
// This will look in the cache and fetch from the servers if necessary.
return a.acls.lookupACL(a, id)
}