2020-09-09 22:46:58 +00:00
|
|
|
package resolver
|
2020-09-09 20:37:43 +00:00
|
|
|
|
|
|
|
import (
|
2020-09-09 21:51:51 +00:00
|
|
|
"context"
|
2020-09-09 22:46:58 +00:00
|
|
|
"fmt"
|
2020-09-09 20:37:43 +00:00
|
|
|
"math/rand"
|
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/hashicorp/consul/agent/metadata"
|
|
|
|
"github.com/hashicorp/consul/agent/router"
|
|
|
|
"google.golang.org/grpc/resolver"
|
|
|
|
)
|
|
|
|
|
2020-09-09 22:46:58 +00:00
|
|
|
var registerLock sync.Mutex
|
|
|
|
|
|
|
|
// RegisterWithGRPC registers the ServerResolverBuilder as a grpc/resolver.
|
|
|
|
// This function exists to synchronize registrations with a lock.
|
|
|
|
// grpc/resolver.Register expects all registration to happen at init and does
|
|
|
|
// not allow for concurrent registration. This function exists to support
|
|
|
|
// parallel testing.
|
|
|
|
func RegisterWithGRPC(b *ServerResolverBuilder) {
|
|
|
|
registerLock.Lock()
|
|
|
|
defer registerLock.Unlock()
|
|
|
|
resolver.Register(b)
|
|
|
|
}
|
2020-09-09 21:51:51 +00:00
|
|
|
|
2020-09-09 22:46:58 +00:00
|
|
|
// Nodes provides a count of the number of nodes in the cluster. It is very
|
|
|
|
// likely implemented by serf to return the number of LAN members.
|
2020-09-09 21:51:51 +00:00
|
|
|
type Nodes interface {
|
|
|
|
NumNodes() int
|
2020-09-09 20:37:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// ServerResolverBuilder tracks the current server list and keeps any
|
|
|
|
// ServerResolvers updated when changes occur.
|
|
|
|
type ServerResolverBuilder struct {
|
2020-09-09 22:46:58 +00:00
|
|
|
// datacenter of the local agent.
|
2020-09-09 20:37:43 +00:00
|
|
|
datacenter string
|
2020-09-09 22:46:58 +00:00
|
|
|
// scheme used to query the server. Defaults to consul. Used to support
|
|
|
|
// parallel testing because gRPC registers resolvers globally.
|
|
|
|
scheme string
|
2020-09-11 18:15:02 +00:00
|
|
|
// servers is an index of Servers by Server.ID. The map contains server IDs
|
|
|
|
// for all datacenters, so it assumes the ID is globally unique.
|
2020-09-09 22:46:58 +00:00
|
|
|
servers map[string]*metadata.Server
|
|
|
|
// resolvers is an index of connections to the serverResolver which manages
|
|
|
|
// addresses of servers for that connection.
|
|
|
|
resolvers map[resolver.ClientConn]*serverResolver
|
|
|
|
// nodes provides the number of nodes in the cluster.
|
|
|
|
nodes Nodes
|
|
|
|
// lock for servers and resolvers.
|
|
|
|
lock sync.RWMutex
|
|
|
|
}
|
|
|
|
|
|
|
|
var _ resolver.Builder = (*ServerResolverBuilder)(nil)
|
|
|
|
|
|
|
|
type Config struct {
|
|
|
|
// Datacenter of the local agent.
|
|
|
|
Datacenter string
|
|
|
|
// Scheme used to connect to the server. Defaults to consul.
|
|
|
|
Scheme string
|
2020-09-09 20:37:43 +00:00
|
|
|
}
|
|
|
|
|
2020-09-09 22:46:58 +00:00
|
|
|
func NewServerResolverBuilder(cfg Config, nodes Nodes) *ServerResolverBuilder {
|
|
|
|
if cfg.Scheme == "" {
|
|
|
|
cfg.Scheme = "consul"
|
|
|
|
}
|
2020-09-09 20:37:43 +00:00
|
|
|
return &ServerResolverBuilder{
|
2020-09-09 22:46:58 +00:00
|
|
|
scheme: cfg.Scheme,
|
|
|
|
datacenter: cfg.Datacenter,
|
2020-09-09 21:51:51 +00:00
|
|
|
nodes: nodes,
|
2020-09-09 20:37:43 +00:00
|
|
|
servers: make(map[string]*metadata.Server),
|
2020-09-09 22:46:58 +00:00
|
|
|
resolvers: make(map[resolver.ClientConn]*serverResolver),
|
2020-09-09 20:37:43 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-09 22:46:58 +00:00
|
|
|
// Run periodically reshuffles the order of server addresses within the
|
|
|
|
// resolvers to ensure the load is balanced across servers.
|
|
|
|
//
|
|
|
|
// TODO: this looks very similar to agent/router.Manager.Start, which is the
|
|
|
|
// only other caller of ComputeRebalanceTimer. Are the values passed to these
|
|
|
|
// two functions different enough that we need separate goroutines to rebalance?
|
|
|
|
// or could we have a single thing handle the timers, and call both rebalance
|
|
|
|
// functions?
|
2020-09-09 21:51:51 +00:00
|
|
|
func (s *ServerResolverBuilder) Run(ctx context.Context) {
|
2020-09-09 20:37:43 +00:00
|
|
|
// Compute the rebalance timer based on the number of local servers and nodes.
|
2020-09-09 21:51:51 +00:00
|
|
|
rebalanceDuration := router.ComputeRebalanceTimer(s.serversInDC(s.datacenter), s.nodes.NumNodes())
|
2020-09-09 20:37:43 +00:00
|
|
|
timer := time.NewTimer(rebalanceDuration)
|
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-timer.C:
|
|
|
|
s.rebalanceResolvers()
|
|
|
|
|
|
|
|
// Re-compute the wait duration.
|
2020-09-09 21:51:51 +00:00
|
|
|
newTimerDuration := router.ComputeRebalanceTimer(s.serversInDC(s.datacenter), s.nodes.NumNodes())
|
2020-09-09 20:37:43 +00:00
|
|
|
timer.Reset(newTimerDuration)
|
2020-09-09 21:51:51 +00:00
|
|
|
case <-ctx.Done():
|
2020-09-09 20:37:43 +00:00
|
|
|
timer.Stop()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// rebalanceResolvers shuffles the server list for resolvers in all datacenters.
|
|
|
|
func (s *ServerResolverBuilder) rebalanceResolvers() {
|
2020-09-09 22:46:58 +00:00
|
|
|
s.lock.RLock()
|
|
|
|
defer s.lock.RUnlock()
|
2020-09-09 20:37:43 +00:00
|
|
|
|
|
|
|
for _, resolver := range s.resolvers {
|
|
|
|
// Shuffle the list of addresses using the last list given to the resolver.
|
|
|
|
resolver.addrLock.Lock()
|
2020-09-09 22:46:58 +00:00
|
|
|
addrs := resolver.addrs
|
2020-09-09 20:37:43 +00:00
|
|
|
rand.Shuffle(len(addrs), func(i, j int) {
|
|
|
|
addrs[i], addrs[j] = addrs[j], addrs[i]
|
|
|
|
})
|
|
|
|
// Pass the shuffled list to the resolver.
|
|
|
|
resolver.updateAddrsLocked(addrs)
|
|
|
|
resolver.addrLock.Unlock()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// serversInDC returns the number of servers in the given datacenter.
|
|
|
|
func (s *ServerResolverBuilder) serversInDC(dc string) int {
|
2020-09-09 22:46:58 +00:00
|
|
|
s.lock.RLock()
|
|
|
|
defer s.lock.RUnlock()
|
2020-09-09 20:37:43 +00:00
|
|
|
|
|
|
|
var serverCount int
|
|
|
|
for _, server := range s.servers {
|
|
|
|
if server.Datacenter == dc {
|
|
|
|
serverCount++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return serverCount
|
|
|
|
}
|
|
|
|
|
2020-09-09 22:46:58 +00:00
|
|
|
// ServerForAddr returns server metadata for a server with the specified address.
|
|
|
|
func (s *ServerResolverBuilder) ServerForAddr(addr string) (*metadata.Server, error) {
|
|
|
|
s.lock.RLock()
|
|
|
|
defer s.lock.RUnlock()
|
2020-09-09 20:37:43 +00:00
|
|
|
|
|
|
|
for _, server := range s.servers {
|
2020-09-09 22:46:58 +00:00
|
|
|
if server.Addr.String() == addr {
|
|
|
|
return server, nil
|
|
|
|
}
|
2020-09-09 20:37:43 +00:00
|
|
|
}
|
2020-09-09 22:46:58 +00:00
|
|
|
return nil, fmt.Errorf("failed to find Consul server for address %q", addr)
|
2020-09-09 20:37:43 +00:00
|
|
|
}
|
|
|
|
|
2020-09-09 22:46:58 +00:00
|
|
|
// Build returns a new serverResolver for the given ClientConn. The resolver
|
2020-09-09 20:37:43 +00:00
|
|
|
// will keep the ClientConn's state updated based on updates from Serf.
|
2020-09-09 21:51:51 +00:00
|
|
|
func (s *ServerResolverBuilder) Build(target resolver.Target, cc resolver.ClientConn, _ resolver.BuildOption) (resolver.Resolver, error) {
|
2020-09-09 20:37:43 +00:00
|
|
|
s.lock.Lock()
|
|
|
|
defer s.lock.Unlock()
|
|
|
|
|
2020-09-09 22:46:58 +00:00
|
|
|
// If there's already a resolver for this connection, return it.
|
|
|
|
// TODO(streaming): how would this happen since we already cache connections in ClientConnPool?
|
2020-09-09 20:37:43 +00:00
|
|
|
if resolver, ok := s.resolvers[cc]; ok {
|
|
|
|
return resolver, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make a new resolver for the dc and add it to the list of active ones.
|
2020-09-09 22:46:58 +00:00
|
|
|
datacenter := strings.TrimPrefix(target.Endpoint, "server.")
|
|
|
|
resolver := &serverResolver{
|
2020-09-09 20:37:43 +00:00
|
|
|
datacenter: datacenter,
|
|
|
|
clientConn: cc,
|
2020-09-09 22:46:58 +00:00
|
|
|
close: func() {
|
|
|
|
s.lock.Lock()
|
|
|
|
defer s.lock.Unlock()
|
|
|
|
delete(s.resolvers, cc)
|
|
|
|
},
|
2020-09-09 20:37:43 +00:00
|
|
|
}
|
|
|
|
resolver.updateAddrs(s.getDCAddrs(datacenter))
|
|
|
|
|
|
|
|
s.resolvers[cc] = resolver
|
|
|
|
return resolver, nil
|
|
|
|
}
|
|
|
|
|
2020-09-09 22:46:58 +00:00
|
|
|
func (s *ServerResolverBuilder) Scheme() string { return s.scheme }
|
2020-09-09 20:37:43 +00:00
|
|
|
|
|
|
|
// AddServer updates the resolvers' states to include the new server's address.
|
|
|
|
func (s *ServerResolverBuilder) AddServer(server *metadata.Server) {
|
|
|
|
s.lock.Lock()
|
|
|
|
defer s.lock.Unlock()
|
|
|
|
|
|
|
|
s.servers[server.ID] = server
|
|
|
|
|
|
|
|
addrs := s.getDCAddrs(server.Datacenter)
|
|
|
|
for _, resolver := range s.resolvers {
|
|
|
|
if resolver.datacenter == server.Datacenter {
|
|
|
|
resolver.updateAddrs(addrs)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// RemoveServer updates the resolvers' states with the given server removed.
|
|
|
|
func (s *ServerResolverBuilder) RemoveServer(server *metadata.Server) {
|
|
|
|
s.lock.Lock()
|
|
|
|
defer s.lock.Unlock()
|
|
|
|
|
|
|
|
delete(s.servers, server.ID)
|
|
|
|
|
|
|
|
addrs := s.getDCAddrs(server.Datacenter)
|
|
|
|
for _, resolver := range s.resolvers {
|
|
|
|
if resolver.datacenter == server.Datacenter {
|
|
|
|
resolver.updateAddrs(addrs)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// getDCAddrs returns a list of the server addresses for the given datacenter.
|
2020-09-09 22:46:58 +00:00
|
|
|
// This method requires that lock is held for reads.
|
2020-09-09 20:37:43 +00:00
|
|
|
func (s *ServerResolverBuilder) getDCAddrs(dc string) []resolver.Address {
|
|
|
|
var addrs []resolver.Address
|
|
|
|
for _, server := range s.servers {
|
|
|
|
if server.Datacenter != dc {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
addrs = append(addrs, resolver.Address{
|
|
|
|
Addr: server.Addr.String(),
|
|
|
|
Type: resolver.Backend,
|
|
|
|
ServerName: server.Name,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
return addrs
|
|
|
|
}
|
|
|
|
|
2020-09-09 22:46:58 +00:00
|
|
|
// serverResolver is a grpc Resolver that will keep a grpc.ClientConn up to date
|
2020-09-09 20:37:43 +00:00
|
|
|
// on the list of server addresses to use.
|
2020-09-09 22:46:58 +00:00
|
|
|
type serverResolver struct {
|
|
|
|
// datacenter that can be reached by the clientConn. Used by ServerResolverBuilder
|
|
|
|
// to filter resolvers for those in a specific datacenter.
|
|
|
|
datacenter string
|
2020-09-09 20:37:43 +00:00
|
|
|
|
2020-09-09 22:46:58 +00:00
|
|
|
// clientConn that this resolver is providing addresses for.
|
|
|
|
clientConn resolver.ClientConn
|
|
|
|
|
|
|
|
// close is used by ServerResolverBuilder to remove this resolver from the
|
|
|
|
// index of resolvers. It is called by grpc when the connection is closed.
|
|
|
|
close func()
|
|
|
|
|
|
|
|
// addrs stores the list of addresses passed to updateAddrs, so that they
|
|
|
|
// can be rebalanced periodically by ServerResolverBuilder.
|
|
|
|
addrs []resolver.Address
|
|
|
|
addrLock sync.Mutex
|
2020-09-09 20:37:43 +00:00
|
|
|
}
|
|
|
|
|
2020-09-09 22:46:58 +00:00
|
|
|
var _ resolver.Resolver = (*serverResolver)(nil)
|
|
|
|
|
|
|
|
// updateAddrs updates this serverResolver's ClientConn to use the given set of
|
2020-09-09 20:37:43 +00:00
|
|
|
// addrs.
|
2020-09-09 22:46:58 +00:00
|
|
|
func (r *serverResolver) updateAddrs(addrs []resolver.Address) {
|
2020-09-09 20:37:43 +00:00
|
|
|
r.addrLock.Lock()
|
|
|
|
defer r.addrLock.Unlock()
|
|
|
|
r.updateAddrsLocked(addrs)
|
|
|
|
}
|
|
|
|
|
2020-09-09 22:46:58 +00:00
|
|
|
// updateAddrsLocked updates this serverResolver's ClientConn to use the given
|
|
|
|
// set of addrs. addrLock must be held by caller.
|
|
|
|
func (r *serverResolver) updateAddrsLocked(addrs []resolver.Address) {
|
2020-09-09 20:37:43 +00:00
|
|
|
// Only pass the first address initially, which will cause the
|
|
|
|
// balancer to spin down the connection for its previous first address
|
|
|
|
// if it is different. If we don't do this, it will keep using the old
|
|
|
|
// first address as long as it is still in the list, making it impossible to
|
|
|
|
// rebalance until that address is removed.
|
|
|
|
var firstAddr []resolver.Address
|
|
|
|
if len(addrs) > 0 {
|
|
|
|
firstAddr = []resolver.Address{addrs[0]}
|
|
|
|
}
|
|
|
|
r.clientConn.UpdateState(resolver.State{Addresses: firstAddr})
|
|
|
|
|
|
|
|
// Call UpdateState again with the entire list of addrs in case we need them
|
|
|
|
// for failover.
|
|
|
|
r.clientConn.UpdateState(resolver.State{Addresses: addrs})
|
|
|
|
|
2020-09-09 22:46:58 +00:00
|
|
|
r.addrs = addrs
|
2020-09-09 20:37:43 +00:00
|
|
|
}
|
|
|
|
|
2020-09-09 22:46:58 +00:00
|
|
|
func (r *serverResolver) Close() {
|
|
|
|
r.close()
|
2020-09-09 20:37:43 +00:00
|
|
|
}
|
|
|
|
|
2020-09-09 22:46:58 +00:00
|
|
|
// ResolveNow is not used
|
|
|
|
func (*serverResolver) ResolveNow(_ resolver.ResolveNowOption) {}
|