You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
consul/agent/hcp/client/telemetry_config.go

177 lines
5.3 KiB

[HCP Telemetry] Periodic Refresh for Dynamic Telemetry Configuration (#18168) * OTElExporter now uses an EndpointProvider to discover the endpoint * OTELSink uses a ConfigProvider to obtain filters and labels configuration * improve tests for otel_sink * Regex logic is moved into client for a method on the TelemetryConfig object * Create a telemetry_config_provider and update deps to use it * Fix conversion * fix import newline * Add logger to hcp client and move telemetry_config out of the client.go file * Add a telemetry_config.go to refactor client.go * Update deps * update hcp deps test * Modify telemetry_config_providers * Check for nil filters * PR review updates * Fix comments and move around pieces * Fix comments * Remove context from client struct * Moved ctx out of sink struct and fixed filters, added a test * Remove named imports, use errors.New if not fformatting * Remove HCP dependencies in telemetry package * Add success metric and move lock only to grab the t.cfgHahs * Update hash * fix nits * Create an equals method and add tests * Improve telemetry_config_provider.go tests * Add race test * Add missing godoc * Remove mock for MetricsClient * Avoid goroutine test panics * trying to kick CI lint issues by upgrading mod * imprve test code and add hasher for testing * Use structure logging for filters, fix error constants, and default to allow all regex * removed hashin and modify logic to simplify * Improve race test and fix PR feedback by removing hash equals and avoid testing the timer.Ticker logic, and instead unit test * Ran make go-mod-tidy * Use errtypes in the test * Add changelog * add safety check for exporter endpoint * remove require.Contains by using error types, fix structure logging, and fix success metric typo in exporter * Fixed race test to have changing config values * Send success metric before modifying config * Avoid the defer and move the success metric under
1 year ago
package client
import (
"context"
"errors"
"fmt"
"net/url"
"regexp"
"strings"
"time"
"github.com/hashicorp/go-hclog"
hcptelemetry "github.com/hashicorp/hcp-sdk-go/clients/cloud-consul-telemetry-gateway/preview/2023-04-14/client/consul_telemetry_service"
"github.com/hashicorp/consul/agent/hcp/config"
)
var (
// defaultMetricFilters is a regex that matches all metric names.
defaultMetricFilters = regexp.MustCompile(".+")
// Validation errors for AgentTelemetryConfigOK response.
errMissingPayload = errors.New("missing payload")
errMissingTelemetryConfig = errors.New("missing telemetry config")
errMissingRefreshConfig = errors.New("missing refresh config")
errMissingMetricsConfig = errors.New("missing metrics config")
errInvalidRefreshInterval = errors.New("invalid refresh interval")
errInvalidEndpoint = errors.New("invalid metrics endpoint")
)
// TelemetryConfig contains configuration for telemetry data forwarded by Consul servers
// to the HCP Telemetry gateway.
type TelemetryConfig struct {
MetricsConfig *MetricsConfig
RefreshConfig *RefreshConfig
}
// MetricsConfig holds metrics specific configuration within TelemetryConfig.
type MetricsConfig struct {
Labels map[string]string
Filters *regexp.Regexp
Endpoint *url.URL
}
// RefreshConfig contains configuration for the periodic fetch of configuration from HCP.
type RefreshConfig struct {
RefreshInterval time.Duration
}
// MetricsEnabled returns true if metrics export is enabled, i.e. a valid metrics endpoint exists.
func (t *TelemetryConfig) MetricsEnabled() bool {
return t.MetricsConfig.Endpoint != nil
}
// validateAgentTelemetryConfigPayload ensures the returned payload from HCP is valid.
func validateAgentTelemetryConfigPayload(resp *hcptelemetry.AgentTelemetryConfigOK) error {
if resp.Payload == nil {
return errMissingPayload
}
if resp.Payload.TelemetryConfig == nil {
return errMissingTelemetryConfig
}
if resp.Payload.RefreshConfig == nil {
return errMissingRefreshConfig
}
if resp.Payload.TelemetryConfig.Metrics == nil {
return errMissingMetricsConfig
}
return nil
}
// convertAgentTelemetryResponse converts an AgentTelemetryConfig payload into a TelemetryConfig object.
func convertAgentTelemetryResponse(ctx context.Context, resp *hcptelemetry.AgentTelemetryConfigOK, cfg config.CloudConfig) (*TelemetryConfig, error) {
refreshInterval, err := time.ParseDuration(resp.Payload.RefreshConfig.RefreshInterval)
if err != nil {
return nil, fmt.Errorf("%w: %w", errInvalidRefreshInterval, err)
}
telemetryConfig := resp.Payload.TelemetryConfig
metricsEndpoint, err := convertMetricEndpoint(telemetryConfig.Endpoint, telemetryConfig.Metrics.Endpoint)
if err != nil {
return nil, errInvalidEndpoint
}
metricsFilters := convertMetricFilters(ctx, telemetryConfig.Metrics.IncludeList)
metricLabels := convertMetricLabels(telemetryConfig.Labels, cfg)
return &TelemetryConfig{
MetricsConfig: &MetricsConfig{
Endpoint: metricsEndpoint,
Labels: metricLabels,
Filters: metricsFilters,
},
RefreshConfig: &RefreshConfig{
RefreshInterval: refreshInterval,
},
}, nil
}
// convertMetricEndpoint returns a url for the export of metrics, if a valid endpoint was obtained.
// It returns no error, and no url, if an empty endpoint is retrieved (server not registered with CCM).
// It returns an error, and no url, if a bad endpoint is retrieved.
func convertMetricEndpoint(telemetryEndpoint string, metricsEndpoint string) (*url.URL, error) {
// Telemetry endpoint overriden by metrics specific endpoint, if given.
endpoint := telemetryEndpoint
if metricsEndpoint != "" {
endpoint = metricsEndpoint
}
// If endpoint is empty, server not registered with CCM, no error returned.
if endpoint == "" {
return nil, nil
}
// Endpoint from CTW has no metrics path, so it must be added.
rawUrl := endpoint + metricsGatewayPath
u, err := url.ParseRequestURI(rawUrl)
if err != nil {
return nil, fmt.Errorf("%w: %w", errInvalidEndpoint, err)
}
return u, nil
}
// convertMetricFilters returns a valid regex used to filter metrics.
// if invalid filters are given, a defaults regex that allow all metrics is returned.
func convertMetricFilters(ctx context.Context, payloadFilters []string) *regexp.Regexp {
logger := hclog.FromContext(ctx)
validFilters := make([]string, 0, len(payloadFilters))
for _, filter := range payloadFilters {
_, err := regexp.Compile(filter)
if err != nil {
logger.Error("invalid filter", "error", err)
continue
}
validFilters = append(validFilters, filter)
}
if len(validFilters) == 0 {
logger.Error("no valid filters")
return defaultMetricFilters
}
// Combine the valid regex strings with OR.
finalRegex := strings.Join(validFilters, "|")
composedRegex, err := regexp.Compile(finalRegex)
if err != nil {
logger.Error("failed to compile final regex", "error", err)
return defaultMetricFilters
}
return composedRegex
}
// convertMetricLabels returns a set of <key, value> string pairs that must be added as attributes to all exported telemetry data.
func convertMetricLabels(payloadLabels map[string]string, cfg config.CloudConfig) map[string]string {
labels := make(map[string]string)
nodeID := string(cfg.NodeID)
if nodeID != "" {
labels["node_id"] = nodeID
}
if cfg.NodeName != "" {
labels["node_name"] = cfg.NodeName
}
for k, v := range payloadLabels {
labels[k] = v
}
return labels
}