// Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: BUSL-1.1 package dns import ( "errors" "fmt" "net" "regexp" "strings" "sync/atomic" "time" "github.com/armon/go-metrics" "github.com/armon/go-radix" "github.com/miekg/dns" "github.com/hashicorp/go-hclog" "github.com/hashicorp/consul/agent/config" "github.com/hashicorp/consul/agent/discovery" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/internal/dnsutil" "github.com/hashicorp/consul/logging" ) const ( addrLabel = "addr" arpaDomain = "arpa." arpaLabel = "arpa" suffixFailover = "failover." suffixNoFailover = "no-failover." maxRecursionLevelDefault = 3 // This field comes from the V1 DNS server and affects V1 catalog lookups maxRecurseRecords = 5 ) var ( errInvalidQuestion = fmt.Errorf("invalid question") errNameNotFound = fmt.Errorf("name not found") errNotImplemented = fmt.Errorf("not implemented") errRecursionFailed = fmt.Errorf("recursion failed") trailingSpacesRE = regexp.MustCompile(" +$") ) // Context is used augment a DNS message with Consul-specific metadata. type Context struct { Token string DefaultPartition string DefaultDatacenter string } // RouterDynamicConfig is the dynamic configuration that can be hot-reloaded type RouterDynamicConfig struct { ARecordLimit int DisableCompression bool EnableTruncate bool NodeMetaTXT bool NodeTTL time.Duration Recursors []string RecursorTimeout time.Duration RecursorStrategy structs.RecursorStrategy SOAConfig SOAConfig // TTLRadix sets service TTLs by prefix, eg: "database-*" TTLRadix *radix.Tree // TTLStrict sets TTLs to service by full name match. It Has higher priority than TTLRadix TTLStrict map[string]time.Duration UDPAnswerLimit int } // GetTTLForService Find the TTL for a given service. // return ttl, true if found, 0, false otherwise func (cfg *RouterDynamicConfig) GetTTLForService(service string) (time.Duration, bool) { if cfg.TTLStrict != nil { ttl, ok := cfg.TTLStrict[service] if ok { return ttl, true } } if cfg.TTLRadix != nil { _, ttlRaw, ok := cfg.TTLRadix.LongestPrefix(service) if ok { return ttlRaw.(time.Duration), true } } return 0, false } type SOAConfig struct { Refresh uint32 // 3600 by default Retry uint32 // 600 Expire uint32 // 86400 Minttl uint32 // 0 } // DiscoveryQueryProcessor is an interface that can be used by any consumer requesting Service Discovery results. // This could be attached to a gRPC endpoint in the future in addition to DNS. // Making this an interface means testing the router with a mock is trivial. type DiscoveryQueryProcessor interface { QueryByName(*discovery.Query, discovery.Context) ([]*discovery.Result, error) QueryByIP(net.IP, discovery.Context) ([]*discovery.Result, error) } // dnsRecursor is an interface that can be used to mock calls to external DNS servers for unit testing. // //go:generate mockery --name dnsRecursor --inpackage type dnsRecursor interface { handle(req *dns.Msg, cfgCtx *RouterDynamicConfig, remoteAddress net.Addr) (*dns.Msg, error) } // Router replaces miekg/dns.ServeMux with a simpler router that only checks for the 2-3 valid domains // that Consul supports and forwards to a single DiscoveryQueryProcessor handler. If there is no match, it will recurse. type Router struct { processor DiscoveryQueryProcessor recursor dnsRecursor domain string altDomain string datacenter string nodeName string logger hclog.Logger tokenFunc func() string translateAddressFunc func(dc string, addr string, taggedAddresses map[string]string, accept dnsutil.TranslateAddressAccept) string translateServiceAddressFunc func(dc string, address string, taggedAddresses map[string]structs.ServiceAddress, accept dnsutil.TranslateAddressAccept) string // dynamicConfig stores the config as an atomic value (for hot-reloading). // It is always of type *RouterDynamicConfig dynamicConfig atomic.Value } var _ = dns.Handler(&Router{}) var _ = DNSRouter(&Router{}) func NewRouter(cfg Config) (*Router, error) { // Make sure domains are FQDN, make them case-insensitive for DNSRequestRouter domain := dns.CanonicalName(cfg.AgentConfig.DNSDomain) altDomain := dns.CanonicalName(cfg.AgentConfig.DNSAltDomain) logger := cfg.Logger.Named(logging.DNS) router := &Router{ processor: cfg.Processor, recursor: newRecursor(logger), domain: domain, altDomain: altDomain, datacenter: cfg.AgentConfig.Datacenter, logger: logger, nodeName: cfg.AgentConfig.NodeName, tokenFunc: cfg.TokenFunc, translateAddressFunc: cfg.TranslateAddressFunc, translateServiceAddressFunc: cfg.TranslateServiceAddressFunc, } if err := router.ReloadConfig(cfg.AgentConfig); err != nil { return nil, err } return router, nil } // HandleRequest is used to process an individual DNS request. It returns a message in success or fail cases. func (r *Router) HandleRequest(req *dns.Msg, reqCtx Context, remoteAddress net.Addr) *dns.Msg { configCtx := r.dynamicConfig.Load().(*RouterDynamicConfig) respGenerator := dnsResponseGenerator{} err := validateAndNormalizeRequest(req) if err != nil { r.logger.Error("error parsing DNS query", "error", err) if errors.Is(err, errInvalidQuestion) { return respGenerator.createRefusedResponse(req) } return respGenerator.createServerFailureResponse(req, configCtx, false) } r.logger.Trace("received request", "question", req.Question[0].Name, "type", dns.Type(req.Question[0].Qtype).String()) defer func(s time.Time, q dns.Question) { metrics.MeasureSinceWithLabels([]string{"dns", "query"}, s, []metrics.Label{ {Name: "node", Value: r.nodeName}, {Name: "type", Value: dns.Type(q.Qtype).String()}, }) r.logger.Trace("request served from client", "name", q.Name, "type", dns.Type(q.Qtype).String(), "class", dns.Class(q.Qclass).String(), "latency", time.Since(s).String(), "client", remoteAddress.String(), "client_network", remoteAddress.Network(), ) }(time.Now(), req.Question[0]) return r.handleRequestRecursively(req, reqCtx, configCtx, remoteAddress, maxRecursionLevelDefault) } // handleRequestRecursively is used to process an individual DNS request. It will recurse as needed // a maximum number of times and returns a message in success or fail cases. func (r *Router) handleRequestRecursively(req *dns.Msg, reqCtx Context, configCtx *RouterDynamicConfig, remoteAddress net.Addr, maxRecursionLevel int) *dns.Msg { respGenerator := dnsResponseGenerator{} r.logger.Trace( "received request", "question", req.Question[0].Name, "type", dns.Type(req.Question[0].Qtype).String(), "recursion_remaining", maxRecursionLevel) responseDomain, needRecurse := r.parseDomain(req.Question[0].Name) if needRecurse && !canRecurse(configCtx) { // This is the same error as an unmatched domain return respGenerator.createRefusedResponse(req) } if needRecurse { r.logger.Trace("checking recursors to handle request", "question", req.Question[0].Name, "type", dns.Type(req.Question[0].Qtype).String()) // This assumes `canRecurse(configCtx)` is true above resp, err := r.recursor.handle(req, configCtx, remoteAddress) if err != nil && !errors.Is(err, errRecursionFailed) { r.logger.Error("unhandled error recursing DNS query", "error", err) } if err != nil { return respGenerator.createServerFailureResponse(req, configCtx, true) } return resp } // Need to pass the question name to properly support recursion and the // trimming of the domain suffixes. qName := dns.CanonicalName(req.Question[0].Name) if maxRecursionLevel < maxRecursionLevelDefault { // Get the QName without the domain suffix qName = r.trimDomain(qName) } results, query, err := discoveryResultsFetcher{}.getQueryResults(&getQueryOptions{ req: req, reqCtx: reqCtx, qName: qName, remoteAddress: remoteAddress, processor: r.processor, logger: r.logger, domain: r.domain, altDomain: r.altDomain, }) // in case of the wrapped ECSNotGlobalError, extract the error from it. isECSGlobal := !errors.Is(err, discovery.ErrECSNotGlobal) err = getErrorFromECSNotGlobalError(err) if err != nil { return respGenerator.generateResponseFromError(&generateResponseFromErrorOpts{ req: req, err: err, qName: qName, configCtx: configCtx, responseDomain: responseDomain, isECSGlobal: isECSGlobal, query: query, canRecurse: canRecurse(configCtx), logger: r.logger, }) } r.logger.Trace("serializing results", "question", req.Question[0].Name, "results-found", len(results)) // This needs the question information because it affects the serialization format. // e.g., the Consul service has the same "results" for both NS and A/AAAA queries, but the serialization differs. serializedOpts := &serializeOptions{ req: req, reqCtx: reqCtx, query: query, results: results, cfg: configCtx, responseDomain: responseDomain, remoteAddress: remoteAddress, maxRecursionLevel: maxRecursionLevel, translateAddressFunc: r.translateAddressFunc, translateServiceAddressFunc: r.translateServiceAddressFunc, resolveCnameFunc: r.resolveCNAME, } resp, err := messageSerializer{}.serialize(serializedOpts) if err != nil { r.logger.Error("error serializing DNS results", "error", err) return respGenerator.generateResponseFromError(&generateResponseFromErrorOpts{ req: req, err: err, qName: qName, configCtx: configCtx, responseDomain: responseDomain, isECSGlobal: isECSGlobal, query: query, canRecurse: false, logger: r.logger, }) } respGenerator.trimDNSResponse(configCtx, remoteAddress, req, resp, r.logger) respGenerator.setEDNS(req, resp, isECSGlobal) return resp } // trimDomain trims the domain from the question name. func (r *Router) trimDomain(questionName string) string { longer := r.domain shorter := r.altDomain if len(shorter) > len(longer) { longer, shorter = shorter, longer } if strings.HasSuffix(questionName, "."+strings.TrimLeft(longer, ".")) { return strings.TrimSuffix(questionName, longer) } return strings.TrimSuffix(questionName, shorter) } // ServeDNS implements the miekg/dns.Handler interface. // This is a standard DNS listener, so we inject a default request context based on the agent's config. func (r *Router) ServeDNS(w dns.ResponseWriter, req *dns.Msg) { reqCtx := r.defaultAgentDNSRequestContext() out := r.HandleRequest(req, reqCtx, w.RemoteAddr()) w.WriteMsg(out) } // ReloadConfig hot-reloads the router config with new parameters func (r *Router) ReloadConfig(newCfg *config.RuntimeConfig) error { cfg, err := getDynamicRouterConfig(newCfg) if err != nil { return fmt.Errorf("error loading DNS config: %w", err) } r.dynamicConfig.Store(cfg) return nil } // resolveCNAME is used to recursively resolve CNAME records func (r *Router) resolveCNAME(cfgContext *RouterDynamicConfig, name string, reqCtx Context, remoteAddress net.Addr, maxRecursionLevel int) []dns.RR { // If the CNAME record points to a Consul address, resolve it internally // Convert query to lowercase because DNS is case-insensitive; r.domain and // r.altDomain are already converted if ln := strings.ToLower(name); strings.HasSuffix(ln, "."+r.domain) || strings.HasSuffix(ln, "."+r.altDomain) { if maxRecursionLevel < 1 { r.logger.Error("Infinite recursion detected for name, won't perform any CNAME resolution.", "name", name) return nil } req := &dns.Msg{} req.SetQuestion(name, dns.TypeANY) // TODO: handle error response (this is a comment from the V1 DNS Server) resp := r.handleRequestRecursively(req, reqCtx, cfgContext, nil, maxRecursionLevel-1) return resp.Answer } // Do nothing if we don't have a recursor if !canRecurse(cfgContext) { return nil } // Ask for any A records m := new(dns.Msg) m.SetQuestion(name, dns.TypeA) // Make a DNS lookup request recursorResponse, err := r.recursor.handle(m, cfgContext, remoteAddress) if err == nil { return recursorResponse.Answer } r.logger.Error("all resolvers failed for name", "name", name) return nil } // Request type is similar to miekg/dns.Type, but correlates to the different query processors we might need to invoke. type requestType string const ( requestTypeName requestType = "NAME" // A/AAAA/CNAME/SRV requestTypeIP requestType = "IP" // PTR requestTypeAddress requestType = "ADDR" // Custom addr. A/AAAA lookups requestTypeConsul requestType = "CONSUL" // SOA/NS ) // parseDomain converts a DNS message into a generic discovery request. // If the request domain does not match "consul." or the alternative domain, // it will return true for needRecurse. The logic is based on miekg/dns.ServeDNS matcher. // The implementation assumes that the only valid domains are "consul." and the alternative domain, and // that DS query types are not supported. func (r *Router) parseDomain(questionName string) (string, bool) { target := dns.CanonicalName(questionName) target, _ = stripAnyFailoverSuffix(target) for offset, overflow := 0, false; !overflow; offset, overflow = dns.NextLabel(target, offset) { subdomain := target[offset:] switch subdomain { case ".": // We don't support consul having a domain or altdomain attached to the root. return "", true case r.domain: return r.domain, false case r.altDomain: return r.altDomain, false case arpaDomain: // PTR queries always respond with the primary domain. return r.domain, false // Default: fallthrough } } // No match found; recurse if possible return "", true } // GetConfig returns the current router config func (r *Router) GetConfig() *RouterDynamicConfig { return r.dynamicConfig.Load().(*RouterDynamicConfig) } // defaultAgentDNSRequestContext returns a default request context based on the agent's config. func (r *Router) defaultAgentDNSRequestContext() Context { return Context{ Token: r.tokenFunc(), DefaultDatacenter: r.datacenter, // We don't need to specify the agent's partition here because that will be handled further down the stack // in the query processor. } } // getErrorFromECSNotGlobalError returns the underlying error from an ECSNotGlobalError, if it exists. func getErrorFromECSNotGlobalError(err error) error { if errors.Is(err, discovery.ErrECSNotGlobal) { return err.(discovery.ECSNotGlobalError).Unwrap() } return err } // parseRequestType inspects the DNS message type and question name to determine the requestType of request. // We assume by the time this is called, we are responding to a question with a domain we serve. // This is used internally to determine which query processor method (if any) to invoke. func parseRequestType(req *dns.Msg) requestType { switch { case req.Question[0].Qtype == dns.TypeSOA || req.Question[0].Qtype == dns.TypeNS: // SOA and NS type supersede the domain // NOTE!: In V1 of the DNS server it was possible to serve a PTR lookup using the arpa domain but a SOA question type. // This also included the SOA record. This seemed inconsistent and unnecessary - it was removed for simplicity. return requestTypeConsul case isPTRSubdomain(req.Question[0].Name): return requestTypeIP case isAddrSubdomain(req.Question[0].Name): return requestTypeAddress default: return requestTypeName } } // validateAndNormalizeRequest validates the DNS request and normalizes the request name. func validateAndNormalizeRequest(req *dns.Msg) error { // like upstream miekg/dns, we require at least one question, // but we will only answer the first. if len(req.Question) == 0 { return errInvalidQuestion } // We mutate the request name to respond with the canonical name. // This is Consul convention. req.Question[0].Name = dns.CanonicalName(req.Question[0].Name) return nil } // stripAnyFailoverSuffix strips off the suffixes that may have been added to the request name. func stripAnyFailoverSuffix(target string) (string, bool) { enableFailover := false // Strip off any suffixes that may have been added. offset, underflow := dns.PrevLabel(target, 1) if !underflow { maybeSuffix := target[offset:] switch maybeSuffix { case suffixFailover: target = target[:offset] enableFailover = true case suffixNoFailover: target = target[:offset] } } return target, enableFailover } // isAddrSubdomain returns true if the domain is a valid addr subdomain. func isAddrSubdomain(domain string) bool { labels := dns.SplitDomainName(domain) // Looking for .addr..consul. if len(labels) > 2 { return labels[1] == addrLabel } return false } // isPTRSubdomain returns true if the domain ends in the PTR domain, "in-addr.arpa.". func isPTRSubdomain(domain string) bool { labels := dns.SplitDomainName(domain) labelCount := len(labels) // We keep this check brief so we can have more specific error handling later. if labelCount < 1 { return false } return labels[labelCount-1] == arpaLabel } // getDynamicRouterConfig takes agent config and creates/resets the config used by DNS Router func getDynamicRouterConfig(conf *config.RuntimeConfig) (*RouterDynamicConfig, error) { cfg := &RouterDynamicConfig{ ARecordLimit: conf.DNSARecordLimit, EnableTruncate: conf.DNSEnableTruncate, NodeTTL: conf.DNSNodeTTL, RecursorStrategy: conf.DNSRecursorStrategy, RecursorTimeout: conf.DNSRecursorTimeout, UDPAnswerLimit: conf.DNSUDPAnswerLimit, NodeMetaTXT: conf.DNSNodeMetaTXT, DisableCompression: conf.DNSDisableCompression, SOAConfig: SOAConfig{ Expire: conf.DNSSOA.Expire, Minttl: conf.DNSSOA.Minttl, Refresh: conf.DNSSOA.Refresh, Retry: conf.DNSSOA.Retry, }, } if conf.DNSServiceTTL != nil { cfg.TTLRadix = radix.New() cfg.TTLStrict = make(map[string]time.Duration) for key, ttl := range conf.DNSServiceTTL { // All suffix with '*' are put in radix // This include '*' that will match anything if strings.HasSuffix(key, "*") { cfg.TTLRadix.Insert(key[:len(key)-1], ttl) } else { cfg.TTLStrict[key] = ttl } } } else { cfg.TTLRadix = nil cfg.TTLStrict = nil } for _, r := range conf.DNSRecursors { ra, err := formatRecursorAddress(r) if err != nil { return nil, fmt.Errorf("invalid recursor address: %w", err) } cfg.Recursors = append(cfg.Recursors, ra) } return cfg, nil } // canRecurse returns true if the router can recurse on the request. func canRecurse(cfg *RouterDynamicConfig) bool { return len(cfg.Recursors) > 0 }