mirror of https://github.com/hashicorp/consul
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
580 lines
14 KiB
580 lines
14 KiB
package agent |
|
|
|
import ( |
|
"fmt" |
|
"github.com/hashicorp/consul/consul/structs" |
|
"github.com/miekg/dns" |
|
"io" |
|
"log" |
|
"math/rand" |
|
"net" |
|
"strings" |
|
"time" |
|
) |
|
|
|
const ( |
|
testQuery = "_test.consul." |
|
consulDomain = "consul." |
|
maxServiceResponses = 3 // For UDP only |
|
maxRecurseRecords = 3 |
|
) |
|
|
|
// DNSServer is used to wrap an Agent and expose various |
|
// service discovery endpoints using a DNS interface. |
|
type DNSServer struct { |
|
agent *Agent |
|
dnsHandler *dns.ServeMux |
|
dnsServer *dns.Server |
|
dnsServerTCP *dns.Server |
|
domain string |
|
recursor string |
|
logger *log.Logger |
|
} |
|
|
|
// NewDNSServer starts a new DNS server to provide an agent interface |
|
func NewDNSServer(agent *Agent, logOutput io.Writer, domain, bind, recursor string) (*DNSServer, error) { |
|
// Make sure domain is FQDN |
|
domain = dns.Fqdn(domain) |
|
|
|
// Construct the DNS components |
|
mux := dns.NewServeMux() |
|
|
|
// Setup the servers |
|
server := &dns.Server{ |
|
Addr: bind, |
|
Net: "udp", |
|
Handler: mux, |
|
UDPSize: 65535, |
|
} |
|
serverTCP := &dns.Server{ |
|
Addr: bind, |
|
Net: "tcp", |
|
Handler: mux, |
|
} |
|
|
|
// Create the server |
|
srv := &DNSServer{ |
|
agent: agent, |
|
dnsHandler: mux, |
|
dnsServer: server, |
|
dnsServerTCP: serverTCP, |
|
domain: domain, |
|
recursor: recursor, |
|
logger: log.New(logOutput, "", log.LstdFlags), |
|
} |
|
|
|
// Register mux handlers, always handle "consul." |
|
mux.HandleFunc(domain, srv.handleQuery) |
|
if domain != consulDomain { |
|
mux.HandleFunc(consulDomain, srv.handleTest) |
|
} |
|
if recursor != "" { |
|
recursor, err := recursorAddr(recursor) |
|
if err != nil { |
|
return nil, fmt.Errorf("Invalid recursor address: %v", err) |
|
} |
|
srv.recursor = recursor |
|
mux.HandleFunc(".", srv.handleRecurse) |
|
} |
|
|
|
// Async start the DNS Servers, handle a potential error |
|
errCh := make(chan error, 1) |
|
go func() { |
|
err := server.ListenAndServe() |
|
srv.logger.Printf("[ERR] dns: error starting udp server: %v", err) |
|
errCh <- err |
|
}() |
|
|
|
errChTCP := make(chan error, 1) |
|
go func() { |
|
err := serverTCP.ListenAndServe() |
|
srv.logger.Printf("[ERR] dns: error starting tcp server: %v", err) |
|
errChTCP <- err |
|
}() |
|
|
|
// Check the server is running, do a test lookup |
|
checkCh := make(chan error, 1) |
|
go func() { |
|
// This is jank, but we have no way to edge trigger on |
|
// the start of our server, so we just wait and hope it is up. |
|
time.Sleep(50 * time.Millisecond) |
|
|
|
m := new(dns.Msg) |
|
m.SetQuestion(testQuery, dns.TypeANY) |
|
|
|
c := new(dns.Client) |
|
in, _, err := c.Exchange(m, bind) |
|
if err != nil { |
|
checkCh <- err |
|
return |
|
} |
|
|
|
if len(in.Answer) == 0 { |
|
checkCh <- fmt.Errorf("no response to test message") |
|
return |
|
} |
|
close(checkCh) |
|
}() |
|
|
|
// Wait for either the check, listen error, or timeout |
|
select { |
|
case e := <-errCh: |
|
return srv, e |
|
case e := <-errChTCP: |
|
return srv, e |
|
case e := <-checkCh: |
|
return srv, e |
|
case <-time.After(time.Second): |
|
return srv, fmt.Errorf("timeout setting up DNS server") |
|
} |
|
return srv, nil |
|
} |
|
|
|
// recursorAddr is used to add a port to the recursor if omitted. |
|
func recursorAddr(recursor string) (string, error) { |
|
// Add the port if none |
|
START: |
|
_, _, err := net.SplitHostPort(recursor) |
|
if ae, ok := err.(*net.AddrError); ok && ae.Err == "missing port in address" { |
|
recursor = fmt.Sprintf("%s:%d", recursor, 53) |
|
goto START |
|
} |
|
if err != nil { |
|
return "", err |
|
} |
|
|
|
// Get the address |
|
addr, err := net.ResolveTCPAddr("tcp", recursor) |
|
if err != nil { |
|
return "", err |
|
} |
|
|
|
// Return string |
|
return addr.String(), nil |
|
} |
|
|
|
// handleQUery is used to handle DNS queries in the configured domain |
|
func (d *DNSServer) handleQuery(resp dns.ResponseWriter, req *dns.Msg) { |
|
q := req.Question[0] |
|
defer func(s time.Time) { |
|
d.logger.Printf("[DEBUG] dns: request for %v (%v)", q, time.Now().Sub(s)) |
|
}(time.Now()) |
|
|
|
// Check if this is potentially a test query |
|
if q.Name == testQuery { |
|
d.handleTest(resp, req) |
|
return |
|
} |
|
|
|
// Switch to TCP if the client is |
|
network := "udp" |
|
if _, ok := resp.RemoteAddr().(*net.TCPAddr); ok { |
|
network = "tcp" |
|
} |
|
|
|
// Setup the message response |
|
m := new(dns.Msg) |
|
m.SetReply(req) |
|
m.Authoritative = true |
|
m.RecursionAvailable = (d.recursor != "") |
|
|
|
// Only add the SOA if requested |
|
if req.Question[0].Qtype == dns.TypeSOA { |
|
d.addSOA(d.domain, m) |
|
} |
|
|
|
// Dispatch the correct handler |
|
d.dispatch(network, req, m) |
|
|
|
// Write out the complete response |
|
if err := resp.WriteMsg(m); err != nil { |
|
d.logger.Printf("[WARN] dns: failed to respond: %v", err) |
|
} |
|
} |
|
|
|
// handleTest is used to handle DNS queries in the ".consul." domain |
|
func (d *DNSServer) handleTest(resp dns.ResponseWriter, req *dns.Msg) { |
|
q := req.Question[0] |
|
defer func(s time.Time) { |
|
d.logger.Printf("[DEBUG] dns: request for %v (%v)", q, time.Now().Sub(s)) |
|
}(time.Now()) |
|
|
|
if !(q.Qtype == dns.TypeANY || q.Qtype == dns.TypeTXT) { |
|
return |
|
} |
|
if q.Name != testQuery { |
|
return |
|
} |
|
|
|
// Always respond with TXT "ok" |
|
m := new(dns.Msg) |
|
m.SetReply(req) |
|
m.Authoritative = true |
|
m.RecursionAvailable = true |
|
header := dns.RR_Header{Name: q.Name, Rrtype: dns.TypeTXT, Class: dns.ClassINET, Ttl: 0} |
|
txt := &dns.TXT{header, []string{"ok"}} |
|
m.Answer = append(m.Answer, txt) |
|
d.addSOA(consulDomain, m) |
|
if err := resp.WriteMsg(m); err != nil { |
|
d.logger.Printf("[WARN] dns: failed to respond: %v", err) |
|
} |
|
} |
|
|
|
// addSOA is used to add an SOA record to a message for the given domain |
|
func (d *DNSServer) addSOA(domain string, msg *dns.Msg) { |
|
soa := &dns.SOA{ |
|
Hdr: dns.RR_Header{ |
|
Name: domain, |
|
Rrtype: dns.TypeSOA, |
|
Class: dns.ClassINET, |
|
Ttl: 0, |
|
}, |
|
Ns: "ns." + domain, |
|
Mbox: "postmaster." + domain, |
|
Serial: uint32(time.Now().Unix()), |
|
Refresh: 3600, |
|
Retry: 600, |
|
Expire: 86400, |
|
Minttl: 0, |
|
} |
|
msg.Ns = append(msg.Ns, soa) |
|
} |
|
|
|
// dispatch is used to parse a request and invoke the correct handler |
|
func (d *DNSServer) dispatch(network string, req, resp *dns.Msg) { |
|
// By default the query is in the default datacenter |
|
datacenter := d.agent.config.Datacenter |
|
|
|
// Get the QName without the domain suffix |
|
qName := dns.Fqdn(req.Question[0].Name) |
|
qName = strings.TrimSuffix(qName, d.domain) |
|
|
|
// Split into the label parts |
|
labels := dns.SplitDomainName(qName) |
|
|
|
// The last label is either "node", "service" or a datacenter name |
|
PARSE: |
|
n := len(labels) |
|
if n == 0 { |
|
goto INVALID |
|
} |
|
switch labels[n-1] { |
|
case "service": |
|
if n == 1 { |
|
goto INVALID |
|
} |
|
|
|
// Extract the service |
|
service := labels[n-2] |
|
|
|
// Support "." in the label, re-join all the parts |
|
tag := "" |
|
if n >= 3 { |
|
tag = strings.Join(labels[:n-2], ".") |
|
} |
|
|
|
// Handle lookup with and without tag |
|
d.serviceLookup(network, datacenter, service, tag, req, resp) |
|
|
|
case "node": |
|
if len(labels) == 1 { |
|
goto INVALID |
|
} |
|
// Allow a "." in the node name, just join all the parts |
|
node := strings.Join(labels[:n-1], ".") |
|
d.nodeLookup(network, datacenter, node, req, resp) |
|
|
|
default: |
|
// Store the DC, and re-parse |
|
datacenter = labels[n-1] |
|
labels = labels[:n-1] |
|
goto PARSE |
|
} |
|
return |
|
INVALID: |
|
d.logger.Printf("[WARN] dns: QName invalid: %s", qName) |
|
resp.SetRcode(req, dns.RcodeNameError) |
|
} |
|
|
|
// nodeLookup is used to handle a node query |
|
func (d *DNSServer) nodeLookup(network, datacenter, node string, req, resp *dns.Msg) { |
|
// Only handle ANY and A type requests |
|
qType := req.Question[0].Qtype |
|
if qType != dns.TypeANY && qType != dns.TypeA { |
|
return |
|
} |
|
|
|
// Make an RPC request |
|
args := structs.NodeSpecificRequest{ |
|
Datacenter: datacenter, |
|
Node: node, |
|
} |
|
var out structs.IndexedNodeServices |
|
if err := d.agent.RPC("Catalog.NodeServices", &args, &out); err != nil { |
|
d.logger.Printf("[ERR] dns: rpc error: %v", err) |
|
resp.SetRcode(req, dns.RcodeServerFailure) |
|
return |
|
} |
|
|
|
// If we have no address, return not found! |
|
if out.NodeServices == nil { |
|
resp.SetRcode(req, dns.RcodeNameError) |
|
return |
|
} |
|
|
|
// Add the node record |
|
records := d.formatNodeRecord(&out.NodeServices.Node, req.Question[0].Name, qType) |
|
if records != nil { |
|
resp.Answer = append(resp.Answer, records...) |
|
} |
|
} |
|
|
|
// formatNodeRecord takes a Node and returns an A, AAAA, or CNAME record |
|
func (d *DNSServer) formatNodeRecord(node *structs.Node, qName string, qType uint16) (records []dns.RR) { |
|
// Parse the IP |
|
ip := net.ParseIP(node.Address) |
|
var ipv4 net.IP |
|
if ip != nil { |
|
ipv4 = ip.To4() |
|
} |
|
switch { |
|
case ipv4 != nil && (qType == dns.TypeANY || qType == dns.TypeA): |
|
return []dns.RR{&dns.A{ |
|
Hdr: dns.RR_Header{ |
|
Name: qName, |
|
Rrtype: dns.TypeA, |
|
Class: dns.ClassINET, |
|
Ttl: 0, |
|
}, |
|
A: ip, |
|
}} |
|
|
|
case ip != nil && ipv4 == nil && (qType == dns.TypeANY || qType == dns.TypeAAAA): |
|
return []dns.RR{&dns.AAAA{ |
|
Hdr: dns.RR_Header{ |
|
Name: qName, |
|
Rrtype: dns.TypeAAAA, |
|
Class: dns.ClassINET, |
|
Ttl: 0, |
|
}, |
|
AAAA: ip, |
|
}} |
|
|
|
case ip == nil && (qType == dns.TypeANY || qType == dns.TypeCNAME || |
|
qType == dns.TypeA || qType == dns.TypeAAAA): |
|
// Get the CNAME |
|
cnRec := &dns.CNAME{ |
|
Hdr: dns.RR_Header{ |
|
Name: qName, |
|
Rrtype: dns.TypeCNAME, |
|
Class: dns.ClassINET, |
|
Ttl: 0, |
|
}, |
|
Target: dns.Fqdn(node.Address), |
|
} |
|
records = append(records, cnRec) |
|
|
|
// Recurse |
|
more := d.resolveCNAME(cnRec.Target) |
|
extra := 0 |
|
MORE_REC: |
|
for _, rr := range more { |
|
switch rr.Header().Rrtype { |
|
case dns.TypeA: |
|
fallthrough |
|
case dns.TypeAAAA: |
|
records = append(records, rr) |
|
extra++ |
|
if extra == maxRecurseRecords { |
|
break MORE_REC |
|
} |
|
} |
|
} |
|
} |
|
return records |
|
} |
|
|
|
// serviceLookup is used to handle a service query |
|
func (d *DNSServer) serviceLookup(network, datacenter, service, tag string, req, resp *dns.Msg) { |
|
// Make an RPC request |
|
args := structs.ServiceSpecificRequest{ |
|
Datacenter: datacenter, |
|
ServiceName: service, |
|
ServiceTag: tag, |
|
TagFilter: tag != "", |
|
} |
|
var out structs.IndexedCheckServiceNodes |
|
if err := d.agent.RPC("Health.ServiceNodes", &args, &out); err != nil { |
|
d.logger.Printf("[ERR] dns: rpc error: %v", err) |
|
resp.SetRcode(req, dns.RcodeServerFailure) |
|
return |
|
} |
|
|
|
// If we have no nodes, return not found! |
|
if len(out.Nodes) == 0 { |
|
resp.SetRcode(req, dns.RcodeNameError) |
|
return |
|
} |
|
|
|
// Filter out any service nodes due to health checks |
|
out.Nodes = d.filterServiceNodes(out.Nodes) |
|
|
|
// Perform a random shuffle |
|
shuffleServiceNodes(out.Nodes) |
|
|
|
// If the network is not TCP, restrict the number of responses |
|
if network != "tcp" && len(out.Nodes) > maxServiceResponses { |
|
out.Nodes = out.Nodes[:maxServiceResponses] |
|
} |
|
|
|
// Add various responses depending on the request |
|
qType := req.Question[0].Qtype |
|
d.serviceNodeRecords(out.Nodes, req, resp) |
|
|
|
if qType == dns.TypeSRV { |
|
d.serviceSRVRecords(datacenter, out.Nodes, req, resp) |
|
} |
|
} |
|
|
|
// filterServiceNodes is used to filter out nodes that are failing |
|
// health checks to prevent routing to unhealthy nodes |
|
func (d *DNSServer) filterServiceNodes(nodes structs.CheckServiceNodes) structs.CheckServiceNodes { |
|
n := len(nodes) |
|
for i := 0; i < n; i++ { |
|
node := nodes[i] |
|
for _, check := range node.Checks { |
|
if check.Status == structs.HealthCritical { |
|
d.logger.Printf("[WARN] dns: node '%s' failing health check '%s: %s', dropping from service '%s'", |
|
node.Node.Node, check.CheckID, check.Name, node.Service.Service) |
|
nodes[i], nodes[n-1] = nodes[n-1], structs.CheckServiceNode{} |
|
n-- |
|
i-- |
|
} |
|
} |
|
} |
|
return nodes[:n] |
|
} |
|
|
|
// shuffleServiceNodes does an in-place random shuffle using the Fisher-Yates algorithm |
|
func shuffleServiceNodes(nodes structs.CheckServiceNodes) { |
|
for i := len(nodes) - 1; i > 0; i-- { |
|
j := rand.Int31() % int32(i+1) |
|
nodes[i], nodes[j] = nodes[j], nodes[i] |
|
} |
|
} |
|
|
|
// serviceNodeRecords is used to add the node records for a service lookup |
|
func (d *DNSServer) serviceNodeRecords(nodes structs.CheckServiceNodes, req, resp *dns.Msg) { |
|
qName := req.Question[0].Name |
|
qType := req.Question[0].Qtype |
|
handled := make(map[string]struct{}) |
|
for _, node := range nodes { |
|
// Avoid duplicate entries, possible if a node has |
|
// the same service on multiple ports, etc. |
|
addr := node.Node.Address |
|
if _, ok := handled[addr]; ok { |
|
continue |
|
} |
|
handled[addr] = struct{}{} |
|
|
|
// Add the node record |
|
records := d.formatNodeRecord(&node.Node, qName, qType) |
|
if records != nil { |
|
resp.Answer = append(resp.Answer, records...) |
|
} |
|
} |
|
} |
|
|
|
// serviceARecords is used to add the SRV records for a service lookup |
|
func (d *DNSServer) serviceSRVRecords(dc string, nodes structs.CheckServiceNodes, req, resp *dns.Msg) { |
|
handled := make(map[string]struct{}) |
|
for _, node := range nodes { |
|
// Avoid duplicate entries, possible if a node has |
|
// the same service the same port, etc. |
|
tuple := fmt.Sprintf("%s:%d", node.Node.Node, node.Service.Port) |
|
if _, ok := handled[tuple]; ok { |
|
continue |
|
} |
|
handled[tuple] = struct{}{} |
|
|
|
// Add the SRV record |
|
srvRec := &dns.SRV{ |
|
Hdr: dns.RR_Header{ |
|
Name: req.Question[0].Name, |
|
Rrtype: dns.TypeSRV, |
|
Class: dns.ClassINET, |
|
Ttl: 0, |
|
}, |
|
Priority: 1, |
|
Weight: 1, |
|
Port: uint16(node.Service.Port), |
|
Target: fmt.Sprintf("%s.node.%s.%s", node.Node.Node, dc, d.domain), |
|
} |
|
resp.Answer = append(resp.Answer, srvRec) |
|
|
|
// Add the extra record |
|
records := d.formatNodeRecord(&node.Node, srvRec.Target, dns.TypeANY) |
|
if records != nil { |
|
resp.Extra = append(resp.Extra, records...) |
|
} |
|
} |
|
} |
|
|
|
// handleRecurse is used to handle recursive DNS queries |
|
func (d *DNSServer) handleRecurse(resp dns.ResponseWriter, req *dns.Msg) { |
|
q := req.Question[0] |
|
network := "udp" |
|
defer func(s time.Time) { |
|
d.logger.Printf("[DEBUG] dns: request for %v (%s) (%v)", q, network, time.Now().Sub(s)) |
|
}(time.Now()) |
|
|
|
// Switch to TCP if the client is |
|
if _, ok := resp.RemoteAddr().(*net.TCPAddr); ok { |
|
network = "tcp" |
|
} |
|
|
|
// Recursively resolve |
|
c := &dns.Client{Net: network} |
|
r, rtt, err := c.Exchange(req, d.recursor) |
|
|
|
// On failure, return a SERVFAIL message |
|
if err != nil { |
|
d.logger.Printf("[ERR] dns: recurse failed: %v", err) |
|
m := &dns.Msg{} |
|
m.SetReply(req) |
|
m.RecursionAvailable = true |
|
m.SetRcode(req, dns.RcodeServerFailure) |
|
resp.WriteMsg(m) |
|
return |
|
} |
|
d.logger.Printf("[DEBUG] dns: recurse RTT for %v (%v)", q, rtt) |
|
|
|
// Forward the response |
|
if err := resp.WriteMsg(r); err != nil { |
|
d.logger.Printf("[WARN] dns: failed to respond: %v", err) |
|
} |
|
} |
|
|
|
// resolveCNAME is used to recursively resolve CNAME records |
|
func (d *DNSServer) resolveCNAME(name string) []dns.RR { |
|
// Do nothing if we don't have a recursor |
|
if d.recursor == "" { |
|
return nil |
|
} |
|
|
|
// Ask for any A records |
|
m := new(dns.Msg) |
|
m.SetQuestion(name, dns.TypeA) |
|
|
|
// Make a DNS lookup request |
|
c := &dns.Client{Net: "udp"} |
|
r, rtt, err := c.Exchange(m, d.recursor) |
|
if err != nil { |
|
d.logger.Printf("[ERR] dns: cname recurse failed: %v", err) |
|
return nil |
|
} |
|
d.logger.Printf("[DEBUG] dns: cname recurse RTT for %v (%v)", name, rtt) |
|
|
|
// Return all the answers |
|
return r.Answer |
|
}
|
|
|