diff --git a/command/agent/command.go b/command/agent/command.go index c4e6e51180..70526d0de4 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -189,6 +189,11 @@ func (c *Command) readConfig() *Config { } } + // Verify DNS settings + if config.DNSConfig.UDPAnswerLimit < 1 { + c.Ui.Error(fmt.Sprintf("dns_config.udp_answer_limit %d too low, must always be greater than zero", config.DNSConfig.UDPAnswerLimit)) + } + if config.EncryptKey != "" { if _, err := config.EncryptBytes(); err != nil { c.Ui.Error(fmt.Sprintf("Invalid encryption key: %s", err)) diff --git a/command/agent/config.go b/command/agent/config.go index 6e74d0294f..c5bc8e8d2d 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -77,13 +77,16 @@ type DNSConfig struct { // returned by default for UDP. EnableTruncate bool `mapstructure:"enable_truncate"` - // EnableSingleton is used to override default behavior - // of DNS and return only a single host in a round robin - // DNS service request rather than all healthy service - // members. This is a work around for systems using - // getaddrinfo using rule 9 sorting from RFC3484 which - // breaks round robin DNS. - EnableSingleton bool `mapstructure:"enable_singleton"` + // UDPAnswerLimit is used to limit the maximum number of DNS Resource + // Records returned in the ANSWER section of a DNS response. This is + // not normally useful and will be limited based on the querying + // protocol, however systems that implemented §6 Rule 9 in RFC3484 + // may want to set this to 1 in order to achieve round-robin DNS. + // RFC3484 sorts answers in a deterministic order, which defeats the + // purpose of round-robin DNS. This RFC has been obsoleted by + // RFC6724, however a large number of Linux hosts using glibc(3) + // implemented §6 Rule 9 (e.g. CentOS 5-6, Debian Squeeze, etc). + UDPAnswerLimit int `mapstructure:"udp_answer_limit"` // MaxStale is used to bound how stale of a result is // accepted for a DNS lookup. This can be used with @@ -535,7 +538,8 @@ func DefaultConfig() *Config { Server: 8300, }, DNSConfig: DNSConfig{ - MaxStale: 5 * time.Second, + UDPAnswerLimit: 3, + MaxStale: 5 * time.Second, }, Telemetry: Telemetry{ StatsitePrefix: "consul", @@ -1135,12 +1139,12 @@ func MergeConfig(a, b *Config) *Config { if b.DNSConfig.AllowStale { result.DNSConfig.AllowStale = true } + if b.DNSConfig.UDPAnswerLimit != 0 { + result.DNSConfig.UDPAnswerLimit = b.DNSConfig.UDPAnswerLimit + } if b.DNSConfig.EnableTruncate { result.DNSConfig.EnableTruncate = true } - if b.DNSConfig.EnableSingleton { - result.DNSConfig.EnableSingleton = true - } if b.DNSConfig.MaxStale != 0 { result.DNSConfig.MaxStale = b.DNSConfig.MaxStale } diff --git a/command/agent/config_test.go b/command/agent/config_test.go index 89239fe74d..584d3881bc 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -1252,13 +1252,14 @@ func TestMergeConfig(t *testing.T) { DataDir: "/tmp/bar", DNSRecursors: []string{"127.0.0.2:1001"}, DNSConfig: DNSConfig{ - NodeTTL: 10 * time.Second, + AllowStale: false, + EnableTruncate: true, + MaxStale: 30 * time.Second, + NodeTTL: 10 * time.Second, ServiceTTL: map[string]time.Duration{ "api": 10 * time.Second, }, - AllowStale: true, - MaxStale: 30 * time.Second, - EnableTruncate: true, + UDPAnswerLimit: 3, }, Domain: "other", LogLevel: "info", diff --git a/command/agent/dns.go b/command/agent/dns.go index 8d5d197bd7..dfe04d3c72 100644 --- a/command/agent/dns.go +++ b/command/agent/dns.go @@ -12,12 +12,13 @@ import ( "github.com/armon/go-metrics" "github.com/hashicorp/consul/consul" "github.com/hashicorp/consul/consul/structs" + "github.com/hashicorp/consul/lib" "github.com/miekg/dns" ) const ( - maxServiceResponses = 3 // For UDP only - maxRecurseRecords = 5 + maxUDPAnswerLimit = 8 // For UDP only + maxRecurseRecords = 5 ) // DNSServer is used to wrap an Agent and expose various @@ -547,8 +548,8 @@ RPC: } // If the network is not TCP, restrict the number of responses - if network != "tcp" && len(resp.Answer) > maxServiceResponses { - resp.Answer = resp.Answer[:maxServiceResponses] + if network != "tcp" && len(resp.Answer) > maxUDPAnswerLimit { + resp.Answer = resp.Answer[:maxUDPAnswerLimit] // Flag that there are more records to return in the UDP response if d.config.EnableTruncate { @@ -580,7 +581,7 @@ func (d *DNSServer) preparedQueryLookup(network, datacenter, query string, req, // match the previous behavior. We can optimize by pushing more filtering // into the query execution, but for now I think we need to get the full // response. We could also choose a large arbitrary number that will - // likely work in practice, like 10*maxServiceResponses which should help + // likely work in practice, like 10*maxUDPAnswerLimit which should help // reduce bandwidth if there are thousands of nodes available. endpoint := d.agent.getEndpoint(preparedQueryEndpoint) @@ -642,8 +643,8 @@ RPC: } // If the network is not TCP, restrict the number of responses. - if network != "tcp" && len(resp.Answer) > maxServiceResponses { - resp.Answer = resp.Answer[:maxServiceResponses] + if network != "tcp" && len(resp.Answer) > maxUDPAnswerLimit { + resp.Answer = resp.Answer[:maxUDPAnswerLimit] // Flag that there are more records to return in the UDP // response. @@ -664,6 +665,9 @@ func (d *DNSServer) serviceNodeRecords(dc string, nodes structs.CheckServiceNode qName := req.Question[0].Name qType := req.Question[0].Qtype handled := make(map[string]struct{}) + + // Post-shuffle: limit the number of nodes we return to the client + effectiveRecordLimit := lib.MinInt(d.config.UDPAnswerLimit, maxUDPAnswerLimit) for _, node := range nodes { // Start with the translated address but use the service address, // if specified. @@ -684,7 +688,11 @@ func (d *DNSServer) serviceNodeRecords(dc string, nodes structs.CheckServiceNode if records != nil { resp.Answer = append(resp.Answer, records...) } - if d.config.EnableSingleton { + + if len(resp.Answer) >= effectiveRecordLimit { + if d.config.EnableTruncate { + resp.Truncated = true + } break } } diff --git a/command/agent/dns_test.go b/command/agent/dns_test.go index 32f4c1f14d..df807a9beb 100644 --- a/command/agent/dns_test.go +++ b/command/agent/dns_test.go @@ -13,6 +13,13 @@ import ( "github.com/miekg/dns" ) +const ( + testUDPAnswerLimit = 4 + testMaxUDPDNSResponses = 3 + testGenerateNumNodes = 12 + testUDPTruncateLimit = 8 +) + func makeDNSServer(t *testing.T) (string, *DNSServer) { return makeDNSServerConfig(t, nil, nil) } @@ -26,7 +33,7 @@ func makeDNSServerConfig( if agentFn != nil { agentFn(agentConf) } - dnsConf := &DNSConfig{} + dnsConf := &DefaultConfig().DNSConfig if dnsFn != nil { dnsFn(dnsConf) } @@ -1809,7 +1816,7 @@ func TestDNS_ServiceLookup_Randomize(t *testing.T) { testutil.WaitForLeader(t, srv.agent.RPC, "dc1") // Register a large set of nodes. - for i := 0; i < 3*maxServiceResponses; i++ { + for i := 0; i < 2*testMaxUDPDNSResponses; i++ { args := &structs.RegisterRequest{ Datacenter: "dc1", Node: fmt.Sprintf("foo%d", i), @@ -1856,7 +1863,7 @@ func TestDNS_ServiceLookup_Randomize(t *testing.T) { m := new(dns.Msg) m.SetQuestion(question, dns.TypeANY) - c := new(dns.Client) + c := &dns.Client{Net: "udp"} in, _, err := c.Exchange(m, addr.String()) if err != nil { t.Fatalf("err: %v", err) @@ -1864,7 +1871,7 @@ func TestDNS_ServiceLookup_Randomize(t *testing.T) { // Response length should be truncated and we should get // an A record for each response. - if len(in.Answer) != maxServiceResponses { + if len(in.Answer) != testMaxUDPDNSResponses { t.Fatalf("Bad: %#v", len(in.Answer)) } @@ -1903,7 +1910,7 @@ func TestDNS_ServiceLookup_Truncate(t *testing.T) { testutil.WaitForLeader(t, srv.agent.RPC, "dc1") // Register nodes a large number of nodes. - for i := 0; i < 3*maxServiceResponses; i++ { + for i := 0; i < 2*testUDPTruncateLimit; i++ { args := &structs.RegisterRequest{ Datacenter: "dc1", Node: fmt.Sprintf("foo%d", i), @@ -1962,16 +1969,18 @@ func TestDNS_ServiceLookup_Truncate(t *testing.T) { } func TestDNS_ServiceLookup_MaxResponses(t *testing.T) { - dir, srv := makeDNSServer(t) + dir, srv := makeDNSServerConfig(t, nil, func(c *DNSConfig) { + c.UDPAnswerLimit = testUDPAnswerLimit + }) defer os.RemoveAll(dir) defer srv.agent.Shutdown() testutil.WaitForLeader(t, srv.agent.RPC, "dc1") // Register a large number of nodes. - for i := 0; i < 6*maxServiceResponses; i++ { + for i := 0; i < testUDPAnswerLimit*testMaxUDPDNSResponses; i++ { nodeAddress := fmt.Sprintf("127.0.0.%d", i+1) - if i > 3 { + if i > (testUDPAnswerLimit * testMaxUDPDNSResponses / 2) { nodeAddress = fmt.Sprintf("fe80::%d", i+1) } args := &structs.RegisterRequest{ @@ -2012,7 +2021,7 @@ func TestDNS_ServiceLookup_MaxResponses(t *testing.T) { "web.service.consul.", id + ".query.consul.", } - for _, question := range questions { + for i, question := range questions { m := new(dns.Msg) m.SetQuestion(question, dns.TypeANY) @@ -2023,8 +2032,8 @@ func TestDNS_ServiceLookup_MaxResponses(t *testing.T) { t.Fatalf("err: %v", err) } - if len(in.Answer) != 3 { - t.Fatalf("should receive 3 answers for ANY") + if len(in.Answer) != testUDPAnswerLimit { + t.Fatalf("should receive %d answers for ANY, received: %d", testUDPAnswerLimit, len(in.Answer)) } m.SetQuestion(question, dns.TypeA) @@ -2033,8 +2042,8 @@ func TestDNS_ServiceLookup_MaxResponses(t *testing.T) { t.Fatalf("err: %v", err) } - if len(in.Answer) != 3 { - t.Fatalf("should receive 3 answers for A") + if len(in.Answer) != testUDPAnswerLimit { + t.Fatalf("%d: should receive %d answers for A, received: %d", i, testUDPAnswerLimit, len(in.Answer)) } m.SetQuestion(question, dns.TypeAAAA) @@ -2043,8 +2052,79 @@ func TestDNS_ServiceLookup_MaxResponses(t *testing.T) { t.Fatalf("err: %v", err) } - if len(in.Answer) != 3 { - t.Fatalf("should receive 3 answers for AAAA") + if len(in.Answer) != testUDPAnswerLimit { + t.Fatalf("should receive %d answers for AAAA, received: %d", testUDPAnswerLimit, len(in.Answer)) + } + } +} + +func TestDNS_ServiceLookup_UDPAnswerLimit(t *testing.T) { + dir, srv := makeDNSServer(t) + defer os.RemoveAll(dir) + defer srv.agent.Shutdown() + + testutil.WaitForLeader(t, srv.agent.RPC, "dc1") + + // Register a large number of nodes. + for i := 0; i < testGenerateNumNodes*testMaxUDPDNSResponses; i++ { + nodeAddress := fmt.Sprintf("127.0.0.%d", i+1) + // Mix-in a few IPv6 addresses + if i > testGenerateNumNodes/2 { + nodeAddress = fmt.Sprintf("fe80::%d", i+1) + } + args := &structs.RegisterRequest{ + Datacenter: "dc1", + Node: fmt.Sprintf("foo%d", i), + Address: nodeAddress, + Service: &structs.NodeService{ + Service: "web", + Port: 8000, + }, + } + + var out struct{} + if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil { + t.Fatalf("err: %v", err) + } + } + + // Look up the service directly and via prepared query. + questions := []string{ + "web.service.consul.", + } + for _, question := range questions { + m := new(dns.Msg) + m.SetQuestion(question, dns.TypeANY) + + addr, _ := srv.agent.config.ClientListener("", srv.agent.config.Ports.DNS) + c := &dns.Client{Net: "udp"} + in, _, err := c.Exchange(m, addr.String()) + if err != nil { + t.Fatalf("err: %v", err) + } + + if len(in.Answer) != srv.agent.config.DNSConfig.UDPAnswerLimit { + t.Fatalf("%d/%d answers received for ANY", len(in.Answer), srv.agent.config.DNSConfig.UDPAnswerLimit) + } + + m.SetQuestion(question, dns.TypeA) + in, _, err = c.Exchange(m, addr.String()) + if err != nil { + t.Fatalf("err: %v", err) + } + + if len(in.Answer) != srv.agent.config.DNSConfig.UDPAnswerLimit { + t.Fatalf("%d/%d answers for A", len(in.Answer), srv.agent.config.DNSConfig.UDPAnswerLimit) + } + + m.SetQuestion(question, dns.TypeAAAA) + in, _, err = c.Exchange(m, addr.String()) + if err != nil { + t.Fatalf("err: %v", err) + } + + if len(in.Answer) != srv.agent.config.DNSConfig.UDPAnswerLimit { + t.Fatalf("%d/%d answers for AAAA", len(in.Answer), srv.agent.config.DNSConfig.UDPAnswerLimit) } } } diff --git a/lib/math.go b/lib/math.go new file mode 100644 index 0000000000..36b483c64e --- /dev/null +++ b/lib/math.go @@ -0,0 +1,15 @@ +package lib + +func MaxInt(a, b int) int { + if a > b { + return a + } + return b +} + +func MinInt(a, b int) int { + if a > b { + return b + } + return a +} diff --git a/lib/math_test.go b/lib/math_test.go new file mode 100644 index 0000000000..8015e5297e --- /dev/null +++ b/lib/math_test.go @@ -0,0 +1,27 @@ +package lib + +import ( + "testing" +) + +func TestMathMaxInt(t *testing.T) { + tests := [][3]int{{1, 2, 2}, {-1, 1, 1}, {2, 0, 2}} + for i, _ := range tests { + expected := tests[i][2] + actual := MaxInt(tests[i][0], tests[i][1]) + if expected != actual { + t.Fatalf("in iteration %d expected %d, got %d", i, expected, actual) + } + } +} + +func TestMathMinInt(t *testing.T) { + tests := [][3]int{{1, 2, 1}, {-1, 1, -1}, {2, 0, 0}} + for i, _ := range tests { + expected := tests[i][2] + actual := MinInt(tests[i][0], tests[i][1]) + if expected != actual { + t.Fatalf("in iteration %d expected %d, got %d", i, expected, actual) + } + } +} diff --git a/website/source/docs/agent/options.html.markdown b/website/source/docs/agent/options.html.markdown index 3a27023575..7bcc17ddb3 100644 --- a/website/source/docs/agent/options.html.markdown +++ b/website/source/docs/agent/options.html.markdown @@ -467,6 +467,18 @@ definitions support being updated during a reload. nodes whose healthchecks are not passing will be excluded from DNS results. By default (or if set to false), only nodes whose healthchecks are failing as critical will be excluded. + * `udp_answer_limit` - Artificially limit the + number of resource records contained in the answer section of a UDP-based + DNS response. The default number of resource records returned is `3`. In + environments where + [RFC 3484 Section 6](https://tools.ietf.org/html/rfc3484#section-6) Rule 9 + is implemented and enforced, clients may need to set this value to `1` to + preserve randomized DNS round-robin (note: + [https://tools.ietf.org/html/rfc3484](RFC 3484) has been obsoleted by + [RFC 6724](https://tools.ietf.org/html/rfc6724) so it should be uncommon to + need to change this value). + * `domain` Equivalent to the [`-domain` command-line flag](#_domain).