mirror of https://github.com/hashicorp/consul
feat(v2dns): add v2 style query metrics
parent
35f1173689
commit
95a869a380
|
@ -13,6 +13,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/armon/go-metrics"
|
"github.com/armon/go-metrics"
|
||||||
|
"github.com/armon/go-metrics/prometheus"
|
||||||
|
|
||||||
"github.com/hashicorp/go-hclog"
|
"github.com/hashicorp/go-hclog"
|
||||||
|
|
||||||
|
@ -29,6 +30,15 @@ const (
|
||||||
staleCounterThreshold = 5 * time.Second
|
staleCounterThreshold = 5 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// DNSCounters pre-registers the staleness metric.
|
||||||
|
// This value is used by both the V1 and V2 DNS (V1 Catalog-only) servers.
|
||||||
|
var DNSCounters = []prometheus.CounterDefinition{
|
||||||
|
{
|
||||||
|
Name: []string{"dns", "stale_queries"},
|
||||||
|
Help: "Increments when an agent serves a query within the allowed stale threshold.",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
// v1DataFetcherDynamicConfig is used to store the dynamic configuration of the V1 data fetcher.
|
// v1DataFetcherDynamicConfig is used to store the dynamic configuration of the V1 data fetcher.
|
||||||
type v1DataFetcherDynamicConfig struct {
|
type v1DataFetcherDynamicConfig struct {
|
||||||
// Default request tenancy
|
// Default request tenancy
|
||||||
|
|
41
agent/dns.go
41
agent/dns.go
|
@ -16,7 +16,6 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/armon/go-metrics"
|
"github.com/armon/go-metrics"
|
||||||
"github.com/armon/go-metrics/prometheus"
|
|
||||||
"github.com/armon/go-radix"
|
"github.com/armon/go-radix"
|
||||||
"github.com/hashicorp/go-hclog"
|
"github.com/hashicorp/go-hclog"
|
||||||
"github.com/miekg/dns"
|
"github.com/miekg/dns"
|
||||||
|
@ -33,24 +32,6 @@ import (
|
||||||
"github.com/hashicorp/consul/logging"
|
"github.com/hashicorp/consul/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
var DNSCounters = []prometheus.CounterDefinition{
|
|
||||||
{
|
|
||||||
Name: []string{"dns", "stale_queries"},
|
|
||||||
Help: "Increments when an agent serves a query within the allowed stale threshold.",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
var DNSSummaries = []prometheus.SummaryDefinition{
|
|
||||||
{
|
|
||||||
Name: []string{"dns", "ptr_query"},
|
|
||||||
Help: "Measures the time spent handling a reverse DNS query for the given node.",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Name: []string{"dns", "domain_query"},
|
|
||||||
Help: "Measures the time spent handling a domain query for the given node.",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// UDP can fit ~25 A records in a 512B response, and ~14 AAAA
|
// UDP can fit ~25 A records in a 512B response, and ~14 AAAA
|
||||||
// records. Limit further to prevent unintentional configuration
|
// records. Limit further to prevent unintentional configuration
|
||||||
|
@ -406,8 +387,17 @@ func (d *DNSServer) getResponseDomain(questionName string) string {
|
||||||
func (d *DNSServer) handlePtr(resp dns.ResponseWriter, req *dns.Msg) {
|
func (d *DNSServer) handlePtr(resp dns.ResponseWriter, req *dns.Msg) {
|
||||||
q := req.Question[0]
|
q := req.Question[0]
|
||||||
defer func(s time.Time) {
|
defer func(s time.Time) {
|
||||||
|
// V1 DNS-style metrics
|
||||||
metrics.MeasureSinceWithLabels([]string{"dns", "ptr_query"}, s,
|
metrics.MeasureSinceWithLabels([]string{"dns", "ptr_query"}, s,
|
||||||
[]metrics.Label{{Name: "node", Value: d.agent.config.NodeName}})
|
[]metrics.Label{{Name: "node", Value: d.agent.config.NodeName}})
|
||||||
|
|
||||||
|
// V2 DNS-style metrics for forward compatibility
|
||||||
|
metrics.MeasureSinceWithLabels([]string{"dns", "query"}, s,
|
||||||
|
[]metrics.Label{
|
||||||
|
{Name: "node", Value: d.agent.config.NodeName},
|
||||||
|
{Name: "type", Value: dns.Type(dns.TypePTR).String()},
|
||||||
|
})
|
||||||
|
|
||||||
d.logger.Debug("request served from client",
|
d.logger.Debug("request served from client",
|
||||||
"question", q,
|
"question", q,
|
||||||
"latency", time.Since(s).String(),
|
"latency", time.Since(s).String(),
|
||||||
|
@ -519,12 +509,21 @@ func (d *DNSServer) handlePtr(resp dns.ResponseWriter, req *dns.Msg) {
|
||||||
func (d *DNSServer) handleQuery(resp dns.ResponseWriter, req *dns.Msg) {
|
func (d *DNSServer) handleQuery(resp dns.ResponseWriter, req *dns.Msg) {
|
||||||
q := req.Question[0]
|
q := req.Question[0]
|
||||||
defer func(s time.Time) {
|
defer func(s time.Time) {
|
||||||
|
// V1 DNS-style metrics
|
||||||
metrics.MeasureSinceWithLabels([]string{"dns", "domain_query"}, s,
|
metrics.MeasureSinceWithLabels([]string{"dns", "domain_query"}, s,
|
||||||
[]metrics.Label{{Name: "node", Value: d.agent.config.NodeName}})
|
[]metrics.Label{{Name: "node", Value: d.agent.config.NodeName}})
|
||||||
|
|
||||||
|
// V2 DNS-style metrics for forward compatibility
|
||||||
|
metrics.MeasureSinceWithLabels([]string{"dns", "query"}, s,
|
||||||
|
[]metrics.Label{
|
||||||
|
{Name: "node", Value: d.agent.config.NodeName},
|
||||||
|
{Name: "type", Value: dns.Type(q.Qtype).String()},
|
||||||
|
})
|
||||||
|
|
||||||
d.logger.Debug("request served from client",
|
d.logger.Debug("request served from client",
|
||||||
"name", q.Name,
|
"name", q.Name,
|
||||||
"type", dns.Type(q.Qtype),
|
"type", dns.Type(q.Qtype).String(),
|
||||||
"class", dns.Class(q.Qclass),
|
"class", dns.Class(q.Qclass).String(),
|
||||||
"latency", time.Since(s).String(),
|
"latency", time.Since(s).String(),
|
||||||
"client", resp.RemoteAddr().String(),
|
"client", resp.RemoteAddr().String(),
|
||||||
"client_network", resp.RemoteAddr().Network(),
|
"client_network", resp.RemoteAddr().Network(),
|
||||||
|
|
|
@ -13,6 +13,7 @@ import (
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/armon/go-metrics"
|
||||||
"github.com/armon/go-radix"
|
"github.com/armon/go-radix"
|
||||||
"github.com/miekg/dns"
|
"github.com/miekg/dns"
|
||||||
|
|
||||||
|
@ -47,8 +48,6 @@ var (
|
||||||
trailingSpacesRE = regexp.MustCompile(" +$")
|
trailingSpacesRE = regexp.MustCompile(" +$")
|
||||||
)
|
)
|
||||||
|
|
||||||
// TODO (v2-dns): metrics
|
|
||||||
|
|
||||||
// Context is used augment a DNS message with Consul-specific metadata.
|
// Context is used augment a DNS message with Consul-specific metadata.
|
||||||
type Context struct {
|
type Context struct {
|
||||||
Token string
|
Token string
|
||||||
|
@ -105,6 +104,7 @@ type Router struct {
|
||||||
domain string
|
domain string
|
||||||
altDomain string
|
altDomain string
|
||||||
datacenter string
|
datacenter string
|
||||||
|
nodeName string
|
||||||
logger hclog.Logger
|
logger hclog.Logger
|
||||||
|
|
||||||
tokenFunc func() string
|
tokenFunc func() string
|
||||||
|
@ -124,8 +124,6 @@ func NewRouter(cfg Config) (*Router, error) {
|
||||||
domain := dns.CanonicalName(cfg.AgentConfig.DNSDomain)
|
domain := dns.CanonicalName(cfg.AgentConfig.DNSDomain)
|
||||||
altDomain := dns.CanonicalName(cfg.AgentConfig.DNSAltDomain)
|
altDomain := dns.CanonicalName(cfg.AgentConfig.DNSAltDomain)
|
||||||
|
|
||||||
// TODO (v2-dns): need to figure out tenancy information here in a way that work for V2 and V1
|
|
||||||
|
|
||||||
logger := cfg.Logger.Named(logging.DNS)
|
logger := cfg.Logger.Named(logging.DNS)
|
||||||
|
|
||||||
router := &Router{
|
router := &Router{
|
||||||
|
@ -135,6 +133,7 @@ func NewRouter(cfg Config) (*Router, error) {
|
||||||
altDomain: altDomain,
|
altDomain: altDomain,
|
||||||
datacenter: cfg.AgentConfig.Datacenter,
|
datacenter: cfg.AgentConfig.Datacenter,
|
||||||
logger: logger,
|
logger: logger,
|
||||||
|
nodeName: cfg.AgentConfig.NodeName,
|
||||||
tokenFunc: cfg.TokenFunc,
|
tokenFunc: cfg.TokenFunc,
|
||||||
translateAddressFunc: cfg.TranslateAddressFunc,
|
translateAddressFunc: cfg.TranslateAddressFunc,
|
||||||
translateServiceAddressFunc: cfg.TranslateServiceAddressFunc,
|
translateServiceAddressFunc: cfg.TranslateServiceAddressFunc,
|
||||||
|
@ -148,21 +147,6 @@ func NewRouter(cfg Config) (*Router, error) {
|
||||||
|
|
||||||
// HandleRequest is used to process an individual DNS request. It returns a message in success or fail cases.
|
// HandleRequest is used to process an individual DNS request. It returns a message in success or fail cases.
|
||||||
func (r *Router) HandleRequest(req *dns.Msg, reqCtx Context, remoteAddress net.Addr) *dns.Msg {
|
func (r *Router) HandleRequest(req *dns.Msg, reqCtx Context, remoteAddress net.Addr) *dns.Msg {
|
||||||
return r.handleRequestRecursively(req, reqCtx, remoteAddress, maxRecursionLevelDefault)
|
|
||||||
}
|
|
||||||
|
|
||||||
// getErrorFromECSNotGlobalError returns the underlying error from an ECSNotGlobalError, if it exists.
|
|
||||||
func getErrorFromECSNotGlobalError(err error) error {
|
|
||||||
if errors.Is(err, discovery.ErrECSNotGlobal) {
|
|
||||||
return err.(discovery.ECSNotGlobalError).Unwrap()
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// handleRequestRecursively is used to process an individual DNS request. It will recurse as needed
|
|
||||||
// a maximum number of times and returns a message in success or fail cases.
|
|
||||||
func (r *Router) handleRequestRecursively(req *dns.Msg, reqCtx Context,
|
|
||||||
remoteAddress net.Addr, maxRecursionLevel int) *dns.Msg {
|
|
||||||
configCtx := r.dynamicConfig.Load().(*RouterDynamicConfig)
|
configCtx := r.dynamicConfig.Load().(*RouterDynamicConfig)
|
||||||
|
|
||||||
r.logger.Trace("received request", "question", req.Question[0].Name, "type", dns.Type(req.Question[0].Qtype).String())
|
r.logger.Trace("received request", "question", req.Question[0].Name, "type", dns.Type(req.Question[0].Qtype).String())
|
||||||
|
@ -176,6 +160,45 @@ func (r *Router) handleRequestRecursively(req *dns.Msg, reqCtx Context,
|
||||||
return createServerFailureResponse(req, configCtx, false)
|
return createServerFailureResponse(req, configCtx, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
defer func(s time.Time, q dns.Question) {
|
||||||
|
metrics.MeasureSinceWithLabels([]string{"dns", "query"}, s,
|
||||||
|
[]metrics.Label{
|
||||||
|
{Name: "node", Value: r.nodeName},
|
||||||
|
{Name: "type", Value: dns.Type(q.Qtype).String()},
|
||||||
|
})
|
||||||
|
|
||||||
|
r.logger.Debug("request served from client",
|
||||||
|
"name", q.Name,
|
||||||
|
"type", dns.Type(q.Qtype).String(),
|
||||||
|
"class", dns.Class(q.Qclass).String(),
|
||||||
|
"latency", time.Since(s).String(),
|
||||||
|
"client", remoteAddress.String(),
|
||||||
|
"client_network", remoteAddress.Network(),
|
||||||
|
)
|
||||||
|
}(time.Now(), req.Question[0])
|
||||||
|
|
||||||
|
return r.handleRequestRecursively(req, reqCtx, configCtx, remoteAddress, maxRecursionLevelDefault)
|
||||||
|
}
|
||||||
|
|
||||||
|
// getErrorFromECSNotGlobalError returns the underlying error from an ECSNotGlobalError, if it exists.
|
||||||
|
func getErrorFromECSNotGlobalError(err error) error {
|
||||||
|
if errors.Is(err, discovery.ErrECSNotGlobal) {
|
||||||
|
return err.(discovery.ECSNotGlobalError).Unwrap()
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleRequestRecursively is used to process an individual DNS request. It will recurse as needed
|
||||||
|
// a maximum number of times and returns a message in success or fail cases.
|
||||||
|
func (r *Router) handleRequestRecursively(req *dns.Msg, reqCtx Context, configCtx *RouterDynamicConfig,
|
||||||
|
remoteAddress net.Addr, maxRecursionLevel int) *dns.Msg {
|
||||||
|
|
||||||
|
r.logger.Trace(
|
||||||
|
"received request",
|
||||||
|
"question", req.Question[0].Name,
|
||||||
|
"type", dns.Type(req.Question[0].Qtype).String(),
|
||||||
|
"recursion_remaining", maxRecursionLevel)
|
||||||
|
|
||||||
responseDomain, needRecurse := r.parseDomain(req.Question[0].Name)
|
responseDomain, needRecurse := r.parseDomain(req.Question[0].Name)
|
||||||
if needRecurse && !canRecurse(configCtx) {
|
if needRecurse && !canRecurse(configCtx) {
|
||||||
// This is the same error as an unmatched domain
|
// This is the same error as an unmatched domain
|
||||||
|
@ -655,7 +678,7 @@ func (r *Router) defaultAgentDNSRequestContext() Context {
|
||||||
}
|
}
|
||||||
|
|
||||||
// resolveCNAME is used to recursively resolve CNAME records
|
// resolveCNAME is used to recursively resolve CNAME records
|
||||||
func (r *Router) resolveCNAME(cfg *RouterDynamicConfig, name string, reqCtx Context,
|
func (r *Router) resolveCNAME(cfgContext *RouterDynamicConfig, name string, reqCtx Context,
|
||||||
remoteAddress net.Addr, maxRecursionLevel int) []dns.RR {
|
remoteAddress net.Addr, maxRecursionLevel int) []dns.RR {
|
||||||
// If the CNAME record points to a Consul address, resolve it internally
|
// If the CNAME record points to a Consul address, resolve it internally
|
||||||
// Convert query to lowercase because DNS is case-insensitive; d.domain and
|
// Convert query to lowercase because DNS is case-insensitive; d.domain and
|
||||||
|
@ -670,13 +693,13 @@ func (r *Router) resolveCNAME(cfg *RouterDynamicConfig, name string, reqCtx Cont
|
||||||
|
|
||||||
req.SetQuestion(name, dns.TypeANY)
|
req.SetQuestion(name, dns.TypeANY)
|
||||||
// TODO: handle error response
|
// TODO: handle error response
|
||||||
resp := r.handleRequestRecursively(req, reqCtx, nil, maxRecursionLevel-1)
|
resp := r.handleRequestRecursively(req, reqCtx, cfgContext, nil, maxRecursionLevel-1)
|
||||||
|
|
||||||
return resp.Answer
|
return resp.Answer
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do nothing if we don't have a recursor
|
// Do nothing if we don't have a recursor
|
||||||
if !canRecurse(cfg) {
|
if !canRecurse(cfgContext) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -685,7 +708,7 @@ func (r *Router) resolveCNAME(cfg *RouterDynamicConfig, name string, reqCtx Cont
|
||||||
m.SetQuestion(name, dns.TypeA)
|
m.SetQuestion(name, dns.TypeA)
|
||||||
|
|
||||||
// Make a DNS lookup request
|
// Make a DNS lookup request
|
||||||
recursorResponse, err := r.recursor.handle(m, cfg, remoteAddress)
|
recursorResponse, err := r.recursor.handle(m, cfgContext, remoteAddress)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return recursorResponse.Answer
|
return recursorResponse.Answer
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,12 +6,13 @@ package dns
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/hashicorp/consul/internal/dnsutil"
|
|
||||||
"net"
|
"net"
|
||||||
"reflect"
|
"reflect"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/hashicorp/consul/internal/dnsutil"
|
||||||
|
|
||||||
"github.com/miekg/dns"
|
"github.com/miekg/dns"
|
||||||
"github.com/stretchr/testify/mock"
|
"github.com/stretchr/testify/mock"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
@ -2743,7 +2744,15 @@ func runHandleTestCases(t *testing.T, tc HandleTestCase) {
|
||||||
if ctx == nil {
|
if ctx == nil {
|
||||||
ctx = &Context{}
|
ctx = &Context{}
|
||||||
}
|
}
|
||||||
actual := router.HandleRequest(tc.request, *ctx, tc.remoteAddress)
|
|
||||||
|
var remoteAddress net.Addr
|
||||||
|
if tc.remoteAddress != nil {
|
||||||
|
remoteAddress = tc.remoteAddress
|
||||||
|
} else {
|
||||||
|
remoteAddress = &net.UDPAddr{}
|
||||||
|
}
|
||||||
|
|
||||||
|
actual := router.HandleRequest(tc.request, *ctx, remoteAddress)
|
||||||
require.Equal(t, tc.response, actual)
|
require.Equal(t, tc.response, actual)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,7 @@ import (
|
||||||
"github.com/hashicorp/consul/agent/consul/stream"
|
"github.com/hashicorp/consul/agent/consul/stream"
|
||||||
"github.com/hashicorp/consul/agent/consul/usagemetrics"
|
"github.com/hashicorp/consul/agent/consul/usagemetrics"
|
||||||
"github.com/hashicorp/consul/agent/consul/xdscapacity"
|
"github.com/hashicorp/consul/agent/consul/xdscapacity"
|
||||||
|
"github.com/hashicorp/consul/agent/discovery"
|
||||||
"github.com/hashicorp/consul/agent/grpc-external/limiter"
|
"github.com/hashicorp/consul/agent/grpc-external/limiter"
|
||||||
grpcInt "github.com/hashicorp/consul/agent/grpc-internal"
|
grpcInt "github.com/hashicorp/consul/agent/grpc-internal"
|
||||||
"github.com/hashicorp/consul/agent/grpc-internal/balancer"
|
"github.com/hashicorp/consul/agent/grpc-internal/balancer"
|
||||||
|
@ -434,6 +435,7 @@ func getPrometheusDefs(cfg *config.RuntimeConfig, isServer bool) ([]prometheus.G
|
||||||
consul.CatalogCounters,
|
consul.CatalogCounters,
|
||||||
consul.ClientCounters,
|
consul.ClientCounters,
|
||||||
consul.RPCCounters,
|
consul.RPCCounters,
|
||||||
|
discovery.DNSCounters,
|
||||||
grpcWare.StatsCounters,
|
grpcWare.StatsCounters,
|
||||||
local.StateCounters,
|
local.StateCounters,
|
||||||
xds.StatsCounters,
|
xds.StatsCounters,
|
||||||
|
|
Loading…
Reference in New Issue