feat(v2dns): prepared query ttls

pull/20563/head
DanStough 2024-02-09 00:10:43 -05:00
parent 7cac918811
commit eabd7bff65
No known key found for this signature in database
GPG Key ID: 0D994ED7D73D7809
8 changed files with 288 additions and 63 deletions

View File

@ -107,11 +107,11 @@ const (
type Result struct {
Service *Location // The name and address of the service.
Node *Location // The name and address of the node.
Weight uint32 // SRV queries
PortName string // Used to generate a fgdn when a specifc port was queried
PortNumber uint32 // SRV queries
Metadata map[string]string // Used to collect metadata into TXT Records
Type ResultType // Used to reconstruct the fqdn name of the resource
DNS DNSConfig // Used for DNS-specific configuration for this result
Tenancy ResultTenancy
}
@ -122,6 +122,11 @@ type Location struct {
Address string
}
type DNSConfig struct {
TTL *uint32 // deprecated: use for V1 prepared queries only
Weight uint32 // SRV queries
}
// ResultTenancy is used to reconstruct the fqdn name of the resource.
type ResultTenancy struct {
Namespace string

View File

@ -308,35 +308,31 @@ func (f *V1DataFetcher) FetchPreparedQuery(ctx Context, req *QueryPayload) ([]*R
return nil, ECSNotGlobalError{err}
}
// (v2-dns) TODO: (v2-dns) get TTLS working. They come from the database so not having
// TTL on the discovery result poses challenges.
// TODO (slackpad) - What's a safe limit we can set here? It seems like
// with dup filtering done at this level we need to get everything to
// match the previous behavior. We can optimize by pushing more filtering
// into the query execution, but for now I think we need to get the full
// response. We could also choose a large arbitrary number that will
// likely work in practice, like 10*maxUDPAnswerLimit which should help
// reduce bandwidth if there are thousands of nodes available.
// Determine the TTL. The parse should never fail since we vet it when
// the query is created, but we check anyway. If the query didn't
// specify a TTL then we will try to use the agent's service-specific
// TTL configs.
/*
// TODO (slackpad) - What's a safe limit we can set here? It seems like
// with dup filtering done at this level we need to get everything to
// match the previous behavior. We can optimize by pushing more filtering
// into the query execution, but for now I think we need to get the full
// response. We could also choose a large arbitrary number that will
// likely work in practice, like 10*maxUDPAnswerLimit which should help
// reduce bandwidth if there are thousands of nodes available.
// Determine the TTL. The parse should never fail since we vet it when
// the query is created, but we check anyway. If the query didn't
// specify a TTL then we will try to use the agent's service-specific
// TTL configs.
var ttl time.Duration
if out.DNS.TTL != "" {
var err error
ttl, err = time.ParseDuration(out.DNS.TTL)
if err != nil {
f.logger.Warn("Failed to parse TTL for prepared query , ignoring",
"ttl", out.DNS.TTL,
"prepared_query", req.Name,
)
}
} else {
ttl, _ = cfg.GetTTLForService(out.Service)
// Check is there is a TTL provided as part of the prepared query
var ttlOverride *uint32
if out.DNS.TTL != "" {
ttl, err := time.ParseDuration(out.DNS.TTL)
if err == nil {
ttlSec := uint32(ttl / time.Second)
ttlOverride = &ttlSec
}
*/
f.logger.Warn("Failed to parse TTL for prepared query , ignoring",
"ttl", out.DNS.TTL,
"prepared_query", req.Name,
)
}
// If we have no nodes, return not found!
if len(out.Nodes) == 0 {
@ -345,7 +341,7 @@ func (f *V1DataFetcher) FetchPreparedQuery(ctx Context, req *QueryPayload) ([]*R
// Perform a random shuffle
out.Nodes.Shuffle()
return f.buildResultsFromServiceNodes(out.Nodes, req), ECSNotGlobalError{}
return f.buildResultsFromServiceNodes(out.Nodes, req, ttlOverride), ECSNotGlobalError{}
}
// executePreparedQuery is used to execute a PreparedQuery against the Consul catalog.
@ -402,7 +398,7 @@ func (f *V1DataFetcher) ValidateRequest(_ Context, req *QueryPayload) error {
}
// buildResultsFromServiceNodes builds a list of results from a list of nodes.
func (f *V1DataFetcher) buildResultsFromServiceNodes(nodes []structs.CheckServiceNode, req *QueryPayload) []*Result {
func (f *V1DataFetcher) buildResultsFromServiceNodes(nodes []structs.CheckServiceNode, req *QueryPayload, ttlOverride *uint32) []*Result {
// Convert the service endpoints to results up to the limit
limit := req.Limit
if len(nodes) < limit || limit == 0 {
@ -421,8 +417,11 @@ func (f *V1DataFetcher) buildResultsFromServiceNodes(nodes []structs.CheckServic
Name: n.Node.Node,
Address: n.Node.Address,
},
Type: ResultTypeService,
Weight: uint32(findWeight(n)),
Type: ResultTypeService,
DNS: DNSConfig{
TTL: ttlOverride,
Weight: uint32(findWeight(n)),
},
PortNumber: uint32(f.translateServicePortFunc(n.Node.Datacenter, n.Service.Port, n.Service.TaggedAddresses)),
Metadata: n.Node.Meta,
Tenancy: ResultTenancy{
@ -548,7 +547,7 @@ func (f *V1DataFetcher) fetchServiceBasedOnTenancy(ctx Context, req *QueryPayloa
// Perform a random shuffle
out.Nodes.Shuffle()
return f.buildResultsFromServiceNodes(out.Nodes, req), nil
return f.buildResultsFromServiceNodes(out.Nodes, req, nil), nil
}
// findWeight returns the weight of a service node.

View File

@ -147,8 +147,10 @@ func Test_FetchEndpoints(t *testing.T) {
Name: "service-name",
Address: "service-address",
},
Type: ResultTypeService,
Weight: 1,
Type: ResultTypeService,
DNS: DNSConfig{
Weight: 1,
},
},
}

View File

@ -116,7 +116,9 @@ func (f *V2DataFetcher) FetchEndpoints(reqContext Context, req *QueryPayload, lo
Namespace: resourceObj.GetId().GetTenancy().GetNamespace(),
Partition: resourceObj.GetId().GetTenancy().GetPartition(),
},
Weight: weight,
DNS: DNSConfig{
Weight: weight,
},
}
results = append(results, result)
}

View File

@ -261,7 +261,9 @@ func Test_V2FetchEndpoints(t *testing.T) {
Namespace: resource.DefaultNamespaceName,
Partition: resource.DefaultPartitionName,
},
Weight: 1,
DNS: DNSConfig{
Weight: 1,
},
},
},
},
@ -360,7 +362,9 @@ func Test_V2FetchEndpoints(t *testing.T) {
Namespace: resource.DefaultNamespaceName,
Partition: resource.DefaultPartitionName,
},
Weight: 2,
DNS: DNSConfig{
Weight: 2,
},
},
{
Node: &Location{Name: "consul-2", Address: "2.3.4.5"},
@ -369,7 +373,9 @@ func Test_V2FetchEndpoints(t *testing.T) {
Namespace: resource.DefaultNamespaceName,
Partition: resource.DefaultPartitionName,
},
Weight: 3,
DNS: DNSConfig{
Weight: 3,
},
},
},
},
@ -408,7 +414,9 @@ func Test_V2FetchEndpoints(t *testing.T) {
Namespace: resource.DefaultNamespaceName,
Partition: resource.DefaultPartitionName,
},
Weight: 2,
DNS: DNSConfig{
Weight: 2,
},
},
},
},
@ -452,7 +460,9 @@ func Test_V2FetchEndpoints(t *testing.T) {
Namespace: resource.DefaultNamespaceName,
Partition: resource.DefaultPartitionName,
},
Weight: 1,
DNS: DNSConfig{
Weight: 1,
},
},
{
Node: &Location{Name: "consul-2", Address: "10.0.0.2"},
@ -461,7 +471,9 @@ func Test_V2FetchEndpoints(t *testing.T) {
Namespace: resource.DefaultNamespaceName,
Partition: resource.DefaultPartitionName,
},
Weight: 1,
DNS: DNSConfig{
Weight: 1,
},
},
{
Node: &Location{Name: "consul-3", Address: "10.0.0.3"},
@ -470,7 +482,9 @@ func Test_V2FetchEndpoints(t *testing.T) {
Namespace: resource.DefaultNamespaceName,
Partition: resource.DefaultPartitionName,
},
Weight: 1,
DNS: DNSConfig{
Weight: 1,
},
},
{
Node: &Location{Name: "consul-4", Address: "10.0.0.4"},
@ -479,7 +493,9 @@ func Test_V2FetchEndpoints(t *testing.T) {
Namespace: resource.DefaultNamespaceName,
Partition: resource.DefaultPartitionName,
},
Weight: 1,
DNS: DNSConfig{
Weight: 1,
},
},
{
Node: &Location{Name: "consul-5", Address: "10.0.0.5"},
@ -488,7 +504,9 @@ func Test_V2FetchEndpoints(t *testing.T) {
Namespace: resource.DefaultNamespaceName,
Partition: resource.DefaultPartitionName,
},
Weight: 1,
DNS: DNSConfig{
Weight: 1,
},
},
{
Node: &Location{Name: "consul-6", Address: "10.0.0.6"},
@ -497,7 +515,9 @@ func Test_V2FetchEndpoints(t *testing.T) {
Namespace: resource.DefaultNamespaceName,
Partition: resource.DefaultPartitionName,
},
Weight: 1,
DNS: DNSConfig{
Weight: 1,
},
},
{
Node: &Location{Name: "consul-7", Address: "10.0.0.7"},
@ -506,7 +526,9 @@ func Test_V2FetchEndpoints(t *testing.T) {
Namespace: resource.DefaultNamespaceName,
Partition: resource.DefaultPartitionName,
},
Weight: 1,
DNS: DNSConfig{
Weight: 1,
},
},
{
Node: &Location{Name: "consul-8", Address: "10.0.0.8"},
@ -515,7 +537,9 @@ func Test_V2FetchEndpoints(t *testing.T) {
Namespace: resource.DefaultNamespaceName,
Partition: resource.DefaultPartitionName,
},
Weight: 1,
DNS: DNSConfig{
Weight: 1,
},
},
{
Node: &Location{Name: "consul-9", Address: "10.0.0.9"},
@ -524,7 +548,9 @@ func Test_V2FetchEndpoints(t *testing.T) {
Namespace: resource.DefaultNamespaceName,
Partition: resource.DefaultPartitionName,
},
Weight: 1,
DNS: DNSConfig{
Weight: 1,
},
},
{
Node: &Location{Name: "consul-10", Address: "10.0.0.10"},
@ -533,7 +559,9 @@ func Test_V2FetchEndpoints(t *testing.T) {
Namespace: resource.DefaultNamespaceName,
Partition: resource.DefaultPartitionName,
},
Weight: 1,
DNS: DNSConfig{
Weight: 1,
},
},
},
verifyShuffle: true,
@ -572,7 +600,9 @@ func Test_V2FetchEndpoints(t *testing.T) {
Namespace: resource.DefaultNamespaceName,
Partition: resource.DefaultPartitionName,
},
Weight: 1,
DNS: DNSConfig{
Weight: 1,
},
},
},
},
@ -613,7 +643,9 @@ func Test_V2FetchEndpoints(t *testing.T) {
Namespace: "test-namespace",
Partition: "test-partition",
},
Weight: 1,
DNS: DNSConfig{
Weight: 1,
},
},
},
},

View File

@ -13,13 +13,12 @@ import (
"sync/atomic"
"time"
"github.com/hashicorp/consul/acl"
"github.com/armon/go-radix"
"github.com/miekg/dns"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/config"
"github.com/hashicorp/consul/agent/discovery"
"github.com/hashicorp/consul/agent/structs"
@ -286,13 +285,18 @@ func (r *Router) trimDomain(questionName string) string {
}
// getTTLForResult returns the TTL for a given result.
func getTTLForResult(name string, query *discovery.Query, cfg *RouterDynamicConfig) uint32 {
func getTTLForResult(name string, overrideTTL *uint32, query *discovery.Query, cfg *RouterDynamicConfig) uint32 {
// In the case we are not making a discovery query, such as addr. or arpa. lookups,
// use the node TTL by convention
if query == nil {
return uint32(cfg.NodeTTL / time.Second)
}
if overrideTTL != nil {
// If a result was provided with an override, use that. This is the case for some prepared queries.
return *overrideTTL
}
switch query.QueryType {
// TODO (v2-dns): currently have to do this related to the results type being changed to node whe
// the v1 data fetcher encounters a blank service address and uses the node address instead.
@ -301,7 +305,7 @@ func getTTLForResult(name string, query *discovery.Query, cfg *RouterDynamicConf
case discovery.QueryTypeWorkload:
// TODO (v2-dns): we need to discuss what we want to do for workload TTLs
return 0
case discovery.QueryTypeService:
case discovery.QueryTypeService, discovery.QueryTypePreparedQuery:
ttl, ok := cfg.getTTLForService(name)
if ok {
return uint32(ttl / time.Second)
@ -884,7 +888,9 @@ func (r *Router) getAnswerExtraAndNs(result *discovery.Result, req *dns.Msg, req
if query != nil {
ttlLookupName = query.QueryPayload.Name
}
ttl := getTTLForResult(ttlLookupName, query, cfg)
ttl := getTTLForResult(ttlLookupName, result.DNS.TTL, query, cfg)
qType := req.Question[0].Qtype
// TODO (v2-dns): skip records that refer to a workload/node that don't have a valid DNS name.
@ -1278,7 +1284,7 @@ func makeSRVRecord(name, target string, result *discovery.Result, ttl uint32) *d
Ttl: ttl,
},
Priority: 1,
Weight: uint16(result.Weight),
Weight: uint16(result.DNS.Weight),
Port: uint16(result.PortNumber),
Target: target,
}

View File

@ -1791,6 +1791,165 @@ func Test_HandleRequest(t *testing.T) {
},
},
},
// V1 Prepared Queries
{
name: "v1 prepared query w/ TTL override, ANY query, returns A record",
request: &dns.Msg{
MsgHdr: dns.MsgHdr{
Opcode: dns.OpcodeQuery,
},
Question: []dns.Question{
{
Name: "foo.query.consul.",
Qtype: dns.TypeA,
Qclass: dns.ClassINET,
},
},
},
agentConfig: &config.RuntimeConfig{
DNSDomain: "consul",
DNSNodeTTL: 123 * time.Second,
DNSSOA: config.RuntimeSOAConfig{
Refresh: 1,
Retry: 2,
Expire: 3,
Minttl: 4,
},
DNSUDPAnswerLimit: maxUDPAnswerLimit,
// We shouldn't use this if we have the override defined
DNSServiceTTL: map[string]time.Duration{
"foo": 1 * time.Second,
},
},
configureDataFetcher: func(fetcher discovery.CatalogDataFetcher) {
fetcher.(*discovery.MockCatalogDataFetcher).
On("FetchPreparedQuery", mock.Anything, mock.Anything).
Return([]*discovery.Result{
{
Service: &discovery.Location{Name: "foo", Address: "1.2.3.4"},
Node: &discovery.Location{Name: "bar", Address: "1.2.3.4"},
Type: discovery.ResultTypeService,
Tenancy: discovery.ResultTenancy{
Datacenter: "dc1",
},
DNS: discovery.DNSConfig{
TTL: getUint32Ptr(3),
Weight: 1,
},
},
}, nil).
Run(func(args mock.Arguments) {
req := args.Get(1).(*discovery.QueryPayload)
require.Equal(t, "foo", req.Name)
})
},
validateAndNormalizeExpected: true,
response: &dns.Msg{
MsgHdr: dns.MsgHdr{
Opcode: dns.OpcodeQuery,
Response: true,
Authoritative: true,
},
Compress: true,
Question: []dns.Question{
{
Name: "foo.query.consul.",
Qtype: dns.TypeA,
Qclass: dns.ClassINET,
},
},
Answer: []dns.RR{
&dns.A{
Hdr: dns.RR_Header{
Name: "foo.query.consul.",
Rrtype: dns.TypeA,
Class: dns.ClassINET,
Ttl: 3,
},
A: net.ParseIP("1.2.3.4"),
},
},
},
},
{
name: "v1 prepared query w/ matching service TTL, ANY query, returns A record",
request: &dns.Msg{
MsgHdr: dns.MsgHdr{
Opcode: dns.OpcodeQuery,
},
Question: []dns.Question{
{
Name: "foo.query.consul.",
Qtype: dns.TypeA,
Qclass: dns.ClassINET,
},
},
},
agentConfig: &config.RuntimeConfig{
DNSDomain: "consul",
DNSNodeTTL: 123 * time.Second,
DNSSOA: config.RuntimeSOAConfig{
Refresh: 1,
Retry: 2,
Expire: 3,
Minttl: 4,
},
DNSUDPAnswerLimit: maxUDPAnswerLimit,
// Results should use this as the TTL
DNSServiceTTL: map[string]time.Duration{
"foo": 1 * time.Second,
},
},
configureDataFetcher: func(fetcher discovery.CatalogDataFetcher) {
fetcher.(*discovery.MockCatalogDataFetcher).
On("FetchPreparedQuery", mock.Anything, mock.Anything).
Return([]*discovery.Result{
{
Service: &discovery.Location{Name: "foo", Address: "1.2.3.4"},
Node: &discovery.Location{Name: "bar", Address: "1.2.3.4"},
Type: discovery.ResultTypeService,
Tenancy: discovery.ResultTenancy{
Datacenter: "dc1",
},
DNS: discovery.DNSConfig{
// Intentionally no TTL here.
Weight: 1,
},
},
}, nil).
Run(func(args mock.Arguments) {
req := args.Get(1).(*discovery.QueryPayload)
require.Equal(t, "foo", req.Name)
})
},
validateAndNormalizeExpected: true,
response: &dns.Msg{
MsgHdr: dns.MsgHdr{
Opcode: dns.OpcodeQuery,
Response: true,
Authoritative: true,
},
Compress: true,
Question: []dns.Question{
{
Name: "foo.query.consul.",
Qtype: dns.TypeA,
Qclass: dns.ClassINET,
},
},
Answer: []dns.RR{
&dns.A{
Hdr: dns.RR_Header{
Name: "foo.query.consul.",
Rrtype: dns.TypeA,
Class: dns.ClassINET,
Ttl: 1,
},
A: net.ParseIP("1.2.3.4"),
},
},
},
},
}
testCases = append(testCases, getAdditionalTestCases(t)...)
@ -2174,3 +2333,8 @@ func TestDNS_syncExtra(t *testing.T) {
t.Fatalf("Bad %#v vs. %#v", *resp, *expected)
}
}
// getUint32Ptr return the pointer of an uint32 literal
func getUint32Ptr(i uint32) *uint32 {
return &i
}

View File

@ -502,6 +502,9 @@ func TestDNS_ServiceLookup(t *testing.T) {
Service: structs.ServiceQuery{
Service: "db",
},
DNS: structs.QueryDNSOptions{
TTL: "3s",
},
},
}
if err := a.RPC(context.Background(), "PreparedQuery.Apply", args, &id); err != nil {
@ -538,9 +541,6 @@ func TestDNS_ServiceLookup(t *testing.T) {
if srvRec.Target != "foo.node.dc1.consul." {
t.Fatalf("Bad: %#v", srvRec)
}
if srvRec.Hdr.Ttl != 0 {
t.Fatalf("Bad: %#v", in.Answer[0])
}
aRec, ok := in.Extra[0].(*dns.A)
if !ok {
@ -552,9 +552,24 @@ func TestDNS_ServiceLookup(t *testing.T) {
if aRec.A.String() != "127.0.0.1" {
t.Fatalf("Bad: %#v", in.Extra[0])
}
if aRec.Hdr.Ttl != 0 {
t.Fatalf("Bad: %#v", in.Extra[0])
if strings.Contains(question, "query") {
// The query should have the TTL associated with the query registration.
if srvRec.Hdr.Ttl != 3 {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if aRec.Hdr.Ttl != 3 {
t.Fatalf("Bad: %#v", in.Extra[0])
}
} else {
if srvRec.Hdr.Ttl != 0 {
t.Fatalf("Bad: %#v", in.Answer[0])
}
if aRec.Hdr.Ttl != 0 {
t.Fatalf("Bad: %#v", in.Extra[0])
}
}
}
// Lookup a non-existing service/query, we should receive an SOA.