diff --git a/agent/config/builder.go b/agent/config/builder.go index 5602522db7..bb4ceb952a 100644 --- a/agent/config/builder.go +++ b/agent/config/builder.go @@ -582,6 +582,7 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) { // DNS DNSAddrs: dnsAddrs, DNSAllowStale: b.boolVal(c.DNS.AllowStale), + DNSARecordLimit: b.intVal(c.DNS.ARecordLimit), DNSDisableCompression: b.boolVal(c.DNS.DisableCompression), DNSDomain: b.stringVal(c.DNSDomain), DNSEnableTruncate: b.boolVal(c.DNS.EnableTruncate), @@ -810,6 +811,9 @@ func (b *Builder) Validate(rt RuntimeConfig) error { if rt.DNSUDPAnswerLimit < 0 { return fmt.Errorf("dns_config.udp_answer_limit cannot be %d. Must be greater than or equal to zero", rt.DNSUDPAnswerLimit) } + if rt.DNSARecordLimit < 0 { + return fmt.Errorf("dns_config.a_record_limit cannot be %d. Must be greater than or equal to zero", rt.DNSARecordLimit) + } if err := structs.ValidateMetadata(rt.NodeMeta, false); err != nil { return fmt.Errorf("node_meta invalid: %v", err) } diff --git a/agent/config/config.go b/agent/config/config.go index ac9bcd4942..6b23449ed7 100644 --- a/agent/config/config.go +++ b/agent/config/config.go @@ -351,6 +351,7 @@ type CheckDefinition struct { type DNS struct { AllowStale *bool `json:"allow_stale,omitempty" hcl:"allow_stale" mapstructure:"allow_stale"` + ARecordLimit *int `json:"a_record_limit,omitempty" hcl:"a_record_limit" mapstructure:"a_record_limit"` DisableCompression *bool `json:"disable_compression,omitempty" hcl:"disable_compression" mapstructure:"disable_compression"` EnableTruncate *bool `json:"enable_truncate,omitempty" hcl:"enable_truncate" mapstructure:"enable_truncate"` MaxStale *string `json:"max_stale,omitempty" hcl:"max_stale" mapstructure:"max_stale"` diff --git a/agent/config/default.go b/agent/config/default.go index f3a0adfc5a..b266a83b90 100644 --- a/agent/config/default.go +++ b/agent/config/default.go @@ -64,6 +64,7 @@ func DefaultSource() Source { } dns_config = { allow_stale = true + a_record_limit = 0 udp_answer_limit = 3 max_stale = "87600h" recursor_timeout = "2s" diff --git a/agent/config/runtime.go b/agent/config/runtime.go index b2da426d0c..fd8e012e07 100644 --- a/agent/config/runtime.go +++ b/agent/config/runtime.go @@ -194,6 +194,25 @@ type RuntimeConfig struct { // hcl: dns_config { allow_stale = (true|false) } DNSAllowStale bool + // DNSARecordLimit is used to limit the maximum number of DNS Resource + // Records returned in the ANSWER section of a DNS response for A or AAAA + // records for both UDP and TCP queries. + // + // This is not normally useful and will be limited based on the querying + // protocol, however systems that implemented §6 Rule 9 in RFC3484 + // may want to set this to `1` in order to subvert §6 Rule 9 and + // re-obtain the effect of randomized resource records (i.e. each + // answer contains only one IP, but the IP changes every request). + // RFC3484 sorts answers in a deterministic order, which defeats the + // purpose of randomized DNS responses. This RFC has been obsoleted + // by RFC6724 and restores the desired behavior of randomized + // responses, however a large number of Linux hosts using glibc(3) + // implemented §6 Rule 9 and may need this option (e.g. CentOS 5-6, + // Debian Squeeze, etc). + // + // hcl: dns_config { a_record_limit = int } + DNSARecordLimit int + // DNSDisableCompression is used to control whether DNS responses are // compressed. In Consul 0.7 this was turned on by default and this // config was added as an opt-out. @@ -253,18 +272,11 @@ type RuntimeConfig struct { DNSServiceTTL map[string]time.Duration // DNSUDPAnswerLimit is used to limit the maximum number of DNS Resource - // Records returned in the ANSWER section of a DNS response. This is - // not normally useful and will be limited based on the querying - // protocol, however systems that implemented §6 Rule 9 in RFC3484 - // may want to set this to `1` in order to subvert §6 Rule 9 and - // re-obtain the effect of randomized resource records (i.e. each - // answer contains only one IP, but the IP changes every request). - // RFC3484 sorts answers in a deterministic order, which defeats the - // purpose of randomized DNS responses. This RFC has been obsoleted - // by RFC6724 and restores the desired behavior of randomized - // responses, however a large number of Linux hosts using glibc(3) - // implemented §6 Rule 9 and may need this option (e.g. CentOS 5-6, - // Debian Squeeze, etc). + // Records returned in the ANSWER section of a DNS response for UDP + // responses without EDNS support (limited to 512 bytes). + // This parameter is deprecated, if you want to limit the number of + // records returned by A or AAAA questions, please use DNSARecordLimit + // instead. // // hcl: dns_config { udp_answer_limit = int } DNSUDPAnswerLimit int diff --git a/agent/config/runtime_test.go b/agent/config/runtime_test.go index 68923654d1..4e1cd6d4a2 100644 --- a/agent/config/runtime_test.go +++ b/agent/config/runtime_test.go @@ -1595,6 +1595,15 @@ func TestConfigFlagsAndEdgecases(t *testing.T) { hcl: []string{`dns_config = { udp_answer_limit = -1 }`}, err: "dns_config.udp_answer_limit cannot be -1. Must be greater than or equal to zero", }, + { + desc: "dns_config.a_record_limit invalid", + args: []string{ + `-data-dir=` + dataDir, + }, + json: []string{`{ "dns_config": { "a_record_limit": -1 } }`}, + hcl: []string{`dns_config = { a_record_limit = -1 }`}, + err: "dns_config.a_record_limit cannot be -1. Must be greater than or equal to zero", + }, { desc: "performance.raft_multiplier < 0", args: []string{ @@ -2288,6 +2297,7 @@ func TestFullConfig(t *testing.T) { "domain": "7W1xXSqd", "dns_config": { "allow_stale": true, + "a_record_limit": 29907, "disable_compression": true, "enable_truncate": true, "max_stale": "29685s", @@ -2723,6 +2733,7 @@ func TestFullConfig(t *testing.T) { domain = "7W1xXSqd" dns_config { allow_stale = true + a_record_limit = 29907 disable_compression = true enable_truncate = true max_stale = "29685s" @@ -3283,6 +3294,7 @@ func TestFullConfig(t *testing.T) { CheckUpdateInterval: 16507 * time.Second, ClientAddrs: []*net.IPAddr{ipAddr("93.83.18.19")}, DNSAddrs: []net.Addr{tcpAddr("93.95.95.81:7001"), udpAddr("93.95.95.81:7001")}, + DNSARecordLimit: 29907, DNSAllowStale: true, DNSDisableCompression: true, DNSDomain: "7W1xXSqd", @@ -3959,6 +3971,7 @@ func TestSanitize(t *testing.T) { "ConsulSerfWANProbeTimeout": "0s", "ConsulSerfWANSuspicionMult": 0, "ConsulServerHealthInterval": "0s", + "DNSARecordLimit": 0, "DNSAddrs": [ "tcp://1.2.3.4:5678", "udp://1.2.3.4:5678" diff --git a/agent/dns.go b/agent/dns.go index b809a2b3a3..eb59961e63 100644 --- a/agent/dns.go +++ b/agent/dns.go @@ -47,6 +47,7 @@ type dnsConfig struct { SegmentName string ServiceTTL map[string]time.Duration UDPAnswerLimit int + ARecordLimit int } // DNSServer is used to wrap an Agent and expose various @@ -94,6 +95,7 @@ func NewDNSServer(a *Agent) (*DNSServer, error) { func GetDNSConfig(conf *config.RuntimeConfig) *dnsConfig { return &dnsConfig{ AllowStale: conf.DNSAllowStale, + ARecordLimit: conf.DNSARecordLimit, Datacenter: conf.Datacenter, EnableTruncate: conf.DNSEnableTruncate, MaxStale: conf.DNSMaxStale, @@ -974,6 +976,7 @@ func (d *DNSServer) serviceNodeRecords(dc string, nodes structs.CheckServiceNode handled := make(map[string]struct{}) edns := req.IsEdns0() != nil + count := 0 for _, node := range nodes { // Start with the translated address but use the service address, // if specified. @@ -999,6 +1002,11 @@ func (d *DNSServer) serviceNodeRecords(dc string, nodes structs.CheckServiceNode records := d.formatNodeRecord(node.Node, addr, qName, qType, ttl, edns) if records != nil { resp.Answer = append(resp.Answer, records...) + count++ + if count == d.config.ARecordLimit { + // We stop only if greater than 0 or we reached the limit + return + } } } } diff --git a/agent/dns_test.go b/agent/dns_test.go index d42abbeb7a..0f29cda106 100644 --- a/agent/dns_test.go +++ b/agent/dns_test.go @@ -2997,6 +2997,163 @@ func testDNSServiceLookupResponseLimits(t *testing.T, answerLimit int, qType uin return true, nil } +func checkDNSService(t *testing.T, generateNumNodes int, aRecordLimit int, qType uint16, + expectedResultsCount int, udpSize uint16, udpAnswerLimit int) error { + a := NewTestAgent(t.Name(), ` + node_name = "test-node" + dns_config { + a_record_limit = `+fmt.Sprintf("%d", aRecordLimit)+` + udp_answer_limit = `+fmt.Sprintf("%d", aRecordLimit)+` + } + `) + defer a.Shutdown() + + for i := 0; i < generateNumNodes; i++ { + nodeAddress := fmt.Sprintf("127.0.0.%d", i+1) + if rand.Float64() < pctNodesWithIPv6 { + nodeAddress = fmt.Sprintf("fe80::%d", i+1) + } + args := &structs.RegisterRequest{ + Datacenter: "dc1", + Node: fmt.Sprintf("foo%d", i), + Address: nodeAddress, + Service: &structs.NodeService{ + Service: "api-tier", + Port: 8080, + }, + } + + var out struct{} + if err := a.RPC("Catalog.Register", args, &out); err != nil { + return fmt.Errorf("err: %v", err) + } + } + var id string + { + args := &structs.PreparedQueryRequest{ + Datacenter: "dc1", + Op: structs.PreparedQueryCreate, + Query: &structs.PreparedQuery{ + Name: "api-tier", + Service: structs.ServiceQuery{ + Service: "api-tier", + }, + }, + } + + if err := a.RPC("PreparedQuery.Apply", args, &id); err != nil { + return fmt.Errorf("err: %v", err) + } + } + + // Look up the service directly and via prepared query. + questions := []string{ + "api-tier.service.consul.", + "api-tier.query.consul.", + id + ".query.consul.", + } + for _, question := range questions { + m := new(dns.Msg) + + m.SetQuestion(question, qType) + protocol := "tcp" + if udpSize > 0 { + protocol = "udp" + } + if udpSize > 512 { + m.SetEdns0(udpSize, true) + } + c := &dns.Client{Net: protocol, UDPSize: 8192} + in, _, err := c.Exchange(m, a.DNSAddr()) + if err != nil { + return fmt.Errorf("err: %v", err) + } + if len(in.Answer) != expectedResultsCount { + return fmt.Errorf("%d/%d answers received for type %v for %s (%s)", len(in.Answer), expectedResultsCount, qType, question, protocol) + } + } + + return nil +} + +func TestDNS_ServiceLookup_ARecordLimits(t *testing.T) { + t.Parallel() + tests := []struct { + name string + aRecordLimit int + expectedAResults int + expectedAAAAResuls int + expectedSRVResults int + numNodesTotal int + udpSize uint16 + udpAnswerLimit int + }{ + // UDP + EDNS + {"udp-edns-1", 1, 1, 1, 30, 30, 8192, 3}, + {"udp-edns-2", 2, 2, 1, 30, 30, 8192, 3}, + {"udp-edns-3", 3, 3, 1, 30, 30, 8192, 3}, + {"udp-edns-4", 4, 4, 1, 30, 30, 8192, 3}, + {"udp-edns-5", 5, 5, 1, 30, 30, 8192, 3}, + {"udp-edns-6", 6, 6, 1, 30, 30, 8192, 3}, + {"udp-edns-max", 6, 3, 3, 3, 3, 8192, 3}, + // All UDP without EDNS have a limit of 2 answers due to udpAnswerLimit + // Even SRV records are limit to 2 records + {"udp-limit-1", 1, 1, 1, 1, 1, 512, 2}, + {"udp-limit-2", 2, 2, 2, 2, 2, 512, 2}, + // AAAA results limited by size of payload + {"udp-limit-3", 3, 2, 2, 2, 2, 512, 2}, + {"udp-limit-4", 4, 2, 2, 2, 2, 512, 2}, + {"udp-limit-5", 5, 2, 2, 2, 2, 512, 2}, + {"udp-limit-6", 6, 2, 2, 2, 2, 512, 2}, + {"udp-limit-max", 6, 2, 2, 2, 2, 512, 2}, + // All UDP without EDNS and no udpAnswerLimit + // Size of records is limited by UDP payload + {"udp-1", 1, 1, 1, 1, 1, 512, 0}, + {"udp-2", 2, 2, 2, 2, 2, 512, 0}, + {"udp-3", 3, 2, 2, 2, 2, 512, 0}, + {"udp-4", 4, 2, 2, 2, 2, 512, 0}, + {"udp-5", 5, 2, 2, 2, 2, 512, 0}, + {"udp-6", 6, 2, 2, 2, 2, 512, 0}, + // Only 3 A and 3 SRV records on 512 bytes + {"udp-max", 6, 2, 2, 2, 2, 512, 0}, + + {"tcp-1", 1, 1, 1, 30, 30, 0, 0}, + {"tcp-2", 2, 2, 2, 30, 30, 0, 0}, + {"tcp-3", 3, 3, 3, 30, 30, 0, 0}, + {"tcp-4", 4, 4, 4, 30, 30, 0, 0}, + {"tcp-5", 5, 5, 5, 30, 30, 0, 0}, + {"tcp-6", 6, 6, 5, 30, 30, 0, 0}, + {"tcp-max", 6, 2, 2, 2, 2, 0, 0}, + } + for _, test := range tests { + test := test // capture loop var + + queriesLimited := []uint16{ + dns.TypeA, + dns.TypeAAAA, + dns.TypeANY, + } + // All those queries should have at max queriesLimited elements + for idx, qType := range queriesLimited { + t.Run(fmt.Sprintf("ARecordLimit %d qType: %d", idx, qType), func(t *testing.T) { + t.Parallel() + err := checkDNSService(t, test.numNodesTotal, test.aRecordLimit, qType, test.expectedAResults, test.udpSize, test.udpAnswerLimit) + if err != nil { + t.Errorf("Expected lookup %s to pass: %v", test.name, err) + } + }) + } + // No limits but the size of records for SRV records, since not subject to randomization issues + t.Run("SRV lookup limitARecord", func(t *testing.T) { + t.Parallel() + err := checkDNSService(t, test.expectedSRVResults, test.aRecordLimit, dns.TypeSRV, test.numNodesTotal, test.udpSize, test.udpAnswerLimit) + if err != nil { + t.Errorf("Expected service SRV lookup %s to pass: %v", test.name, err) + } + }) + } +} + func TestDNS_ServiceLookup_AnswerLimits(t *testing.T) { t.Parallel() // Build a matrix of config parameters (udpAnswerLimit), and the diff --git a/website/source/docs/agent/options.html.md b/website/source/docs/agent/options.html.md index 970ba54afc..b503d18860 100644 --- a/website/source/docs/agent/options.html.md +++ b/website/source/docs/agent/options.html.md @@ -949,10 +949,16 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass * `udp_answer_limit` - Limit the number of resource records contained in the answer section of a UDP-based DNS - response. When answering a question, Consul will use the complete list of + response. This parameter applies only to UDP DNS queries that are less than 512 bytes. This setting is deprecated + and replaced in Consul 1.0.7 by `a_record_limit`. + + * `a_record_limit` - Limit the number of + resource records contained in the answer section of a A, AAAA or ANY DNS response (both TCP and UDP). + When answering a question, Consul will use the complete list of matching hosts, shuffle the list randomly, and then limit the number of - answers to `udp_answer_limit` (default `3`). In environments where - [RFC 3484 Section 6](https://tools.ietf.org/html/rfc3484#section-6) Rule 9 + answers to `a_record_limit` (default: no limit). This limit does not apply to SRV records. + + In environments where [RFC 3484 Section 6](https://tools.ietf.org/html/rfc3484#section-6) Rule 9 is implemented and enforced (i.e. DNS answers are always sorted and therefore never random), clients may need to set this value to `1` to preserve the expected randomized distribution behavior (note: