diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 51320c661..94924d2c4 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -152,6 +152,7 @@ type flagConfig struct { queryConcurrency int queryMaxSamples int RemoteFlushDeadline model.Duration + nameEscapingScheme string featureList []string memlimitRatio float64 @@ -237,6 +238,9 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error { case "delayed-compaction": c.tsdb.EnableDelayedCompaction = true level.Info(logger).Log("msg", "Experimental delayed compaction is enabled.") + case "utf8-names": + model.NameValidationScheme = model.UTF8Validation + level.Info(logger).Log("msg", "Experimental UTF-8 support enabled") case "": continue case "promql-at-modifier", "promql-negative-offset": @@ -481,7 +485,9 @@ func main() { a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates."). Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval) - a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). + a.Flag("scrape.name-escaping-scheme", `Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots".`).Default(scrape.DefaultNameEscapingScheme.String()).StringVar(&cfg.nameEscapingScheme) + + a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). Default("").StringsVar(&cfg.featureList) promlogflag.AddFlags(a, &cfg.promlogConfig) @@ -509,6 +515,15 @@ func main() { os.Exit(1) } + if cfg.nameEscapingScheme != "" { + scheme, err := model.ToEscapingScheme(cfg.nameEscapingScheme) + if err != nil { + fmt.Fprintf(os.Stderr, `Invalid name escaping scheme: %q; Needs to be one of "values", "underscores", or "dots"`, cfg.nameEscapingScheme) + os.Exit(1) + } + model.NameEscapingScheme = scheme + } + if agentMode && len(serverOnlyFlags) > 0 { fmt.Fprintf(os.Stderr, "The following flag(s) can not be used in agent mode: %q", serverOnlyFlags) os.Exit(3) diff --git a/config/config.go b/config/config.go index 7632a444f..4326b0a99 100644 --- a/config/config.go +++ b/config/config.go @@ -67,6 +67,11 @@ var ( } ) +const ( + LegacyValidationConfig = "legacy" + UTF8ValidationConfig = "utf8" +) + // Load parses the YAML input s into a Config. func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, error) { cfg := &Config{} @@ -446,6 +451,8 @@ type GlobalConfig struct { // Keep no more than this many dropped targets per job. // 0 means no limit. KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"` + // Allow UTF8 Metric and Label Names. + MetricNameValidationScheme string `yaml:"metric_name_validation_scheme,omitempty"` } // ScrapeProtocol represents supported protocol for scraping metrics. @@ -471,6 +478,7 @@ var ( PrometheusText0_0_4 ScrapeProtocol = "PrometheusText0.0.4" OpenMetricsText0_0_1 ScrapeProtocol = "OpenMetricsText0.0.1" OpenMetricsText1_0_0 ScrapeProtocol = "OpenMetricsText1.0.0" + UTF8NamesHeader string = model.EscapingKey + "=" + model.AllowUTF8 ScrapeProtocolsHeaders = map[ScrapeProtocol]string{ PrometheusProto: "application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited", @@ -656,6 +664,8 @@ type ScrapeConfig struct { // Keep no more than this many dropped targets per job. // 0 means no limit. KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"` + // Allow UTF8 Metric and Label Names. + MetricNameValidationScheme string `yaml:"metric_name_validation_scheme,omitempty"` // We cannot do proper Go type embedding below as the parser will then parse // values arbitrarily into the overflow maps of further-down types. @@ -762,6 +772,17 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error { return fmt.Errorf("%w for scrape config with job name %q", err, c.JobName) } + switch globalConfig.MetricNameValidationScheme { + case "", LegacyValidationConfig: + case UTF8ValidationConfig: + if model.NameValidationScheme != model.UTF8Validation { + return fmt.Errorf("utf8 name validation requested but feature not enabled via --enable-feature=utf8-names") + } + default: + return fmt.Errorf("unknown name validation method specified, must be either 'legacy' or 'utf8', got %s", globalConfig.MetricNameValidationScheme) + } + c.MetricNameValidationScheme = globalConfig.MetricNameValidationScheme + return nil } diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md index b8f2e4241..a16e807e1 100644 --- a/docs/command-line/prometheus.md +++ b/docs/command-line/prometheus.md @@ -56,7 +56,8 @@ The Prometheus monitoring server | --query.timeout | Maximum time a query may take before being aborted. Use with server mode only. | `2m` | | --query.max-concurrency | Maximum number of queries executed concurrently. Use with server mode only. | `20` | | --query.max-samples | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` | -| --enable-feature | Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | +| --scrape.name-escaping-scheme | Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots". | `values` | +| --enable-feature | Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | | --log.level | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` | | --log.format | Output format of log messages. One of: [logfmt, json] | `logfmt` | diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 313a7f2f3..bc684b6f9 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -121,6 +121,11 @@ global: # that will be kept in memory. 0 means no limit. [ keep_dropped_targets: | default = 0 ] + # Specifies the validation scheme for metric and label names. Either blank or + # "legacy" for letters, numbers, colons, and underscores; or "utf8" for full + # UTF-8 support. + [ metric_name_validation_scheme | default "legacy" ] + runtime: # Configure the Go garbage collector GOGC parameter # See: https://tip.golang.org/doc/gc-guide#GOGC @@ -461,6 +466,11 @@ metric_relabel_configs: # that will be kept in memory. 0 means no limit. [ keep_dropped_targets: | default = 0 ] +# Specifies the validation scheme for metric and label names. Either blank or +# "legacy" for letters, numbers, colons, and underscores; or "utf8" for full +# UTF-8 support. +[ metric_name_validation_scheme | default "legacy" ] + # Limit on total number of positive and negative buckets allowed in a single # native histogram. The resolution of a histogram with more buckets will be # reduced until the number of buckets is within the limit. If the limit cannot diff --git a/docs/feature_flags.md b/docs/feature_flags.md index c9a3558fa..0a908bb91 100644 --- a/docs/feature_flags.md +++ b/docs/feature_flags.md @@ -249,3 +249,11 @@ In the event of multiple consecutive Head compactions being possible, only the f Note that during this delay, the Head continues its usual operations, which include serving and appending series. Despite the delay in compaction, the blocks produced are time-aligned in the same manner as they would be if the delay was not in place. + +## UTF-8 Name Support + +`--enable-feature=utf8-names` + +When enabled, changes the metric and label name validation scheme inside Prometheus to allow the full UTF-8 character set. +By itself, this flag does not enable the request of UTF-8 names via content negotiation. +Users will also have to set `metric_name_validation_scheme` in scrape configs to enable the feature either on the global config or on a per-scrape config basis. diff --git a/scrape/manager.go b/scrape/manager.go index 156e949f8..6d4e8707b 100644 --- a/scrape/manager.go +++ b/scrape/manager.go @@ -93,6 +93,8 @@ type Options struct { skipOffsetting bool } +const DefaultNameEscapingScheme = model.ValueEncodingEscaping + // Manager maintains a set of scrape pools and manages start/stop cycles // when receiving new target groups from the discovery manager. type Manager struct { diff --git a/scrape/scrape.go b/scrape/scrape.go index 68411a62e..9979f7361 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -303,6 +303,11 @@ func (sp *scrapePool) restartLoops(reuseCache bool) { mrc = sp.config.MetricRelabelConfigs ) + validationScheme := model.LegacyValidation + if sp.config.MetricNameValidationScheme == config.UTF8ValidationConfig { + validationScheme = model.UTF8Validation + } + sp.targetMtx.Lock() forcedErr := sp.refreshTargetLimitErr() @@ -323,7 +328,7 @@ func (sp *scrapePool) restartLoops(reuseCache bool) { client: sp.client, timeout: timeout, bodySizeLimit: bodySizeLimit, - acceptHeader: acceptHeader(sp.config.ScrapeProtocols), + acceptHeader: acceptHeader(sp.config.ScrapeProtocols, validationScheme), acceptEncodingHeader: acceptEncodingHeader(enableCompression), } newLoop = sp.newLoop(scrapeLoopOptions{ @@ -452,6 +457,11 @@ func (sp *scrapePool) sync(targets []*Target) { scrapeClassicHistograms = sp.config.ScrapeClassicHistograms ) + validationScheme := model.LegacyValidation + if sp.config.MetricNameValidationScheme == config.UTF8ValidationConfig { + validationScheme = model.UTF8Validation + } + sp.targetMtx.Lock() for _, t := range targets { hash := t.hash() @@ -467,7 +477,7 @@ func (sp *scrapePool) sync(targets []*Target) { client: sp.client, timeout: timeout, bodySizeLimit: bodySizeLimit, - acceptHeader: acceptHeader(sp.config.ScrapeProtocols), + acceptHeader: acceptHeader(sp.config.ScrapeProtocols, validationScheme), acceptEncodingHeader: acceptEncodingHeader(enableCompression), metrics: sp.metrics, } @@ -714,11 +724,16 @@ var errBodySizeLimit = errors.New("body size limit exceeded") // acceptHeader transforms preference from the options into specific header values as // https://www.rfc-editor.org/rfc/rfc9110.html#name-accept defines. // No validation is here, we expect scrape protocols to be validated already. -func acceptHeader(sps []config.ScrapeProtocol) string { +func acceptHeader(sps []config.ScrapeProtocol, scheme model.ValidationScheme) string { var vals []string weight := len(config.ScrapeProtocolsHeaders) + 1 for _, sp := range sps { - vals = append(vals, fmt.Sprintf("%s;q=0.%d", config.ScrapeProtocolsHeaders[sp], weight)) + val := config.ScrapeProtocolsHeaders[sp] + if scheme == model.UTF8Validation { + val += ";" + config.UTF8NamesHeader + } + val += fmt.Sprintf(";q=0.%d", weight) + vals = append(vals, val) weight-- } // Default match anything. diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index a3fe6ac1a..be81b8677 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -2339,11 +2339,15 @@ func TestTargetScraperScrapeOK(t *testing.T) { ) var protobufParsing bool + var allowUTF8 bool server := httptest.NewServer( http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + accept := r.Header.Get("Accept") + if allowUTF8 { + require.Truef(t, strings.Contains(accept, "escaping=allow-utf-8"), "Expected Accept header to allow utf8, got %q", accept) + } if protobufParsing { - accept := r.Header.Get("Accept") require.True(t, strings.HasPrefix(accept, "application/vnd.google.protobuf;"), "Expected Accept header to prefer application/vnd.google.protobuf.") } @@ -2351,7 +2355,11 @@ func TestTargetScraperScrapeOK(t *testing.T) { timeout := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds") require.Equal(t, expectedTimeout, timeout, "Expected scrape timeout header.") - w.Header().Set("Content-Type", `text/plain; version=0.0.4`) + if allowUTF8 { + w.Header().Set("Content-Type", `text/plain; version=1.0.0; escaping=allow-utf-8`) + } else { + w.Header().Set("Content-Type", `text/plain; version=0.0.4`) + } w.Write([]byte("metric_a 1\nmetric_b 2\n")) }), ) @@ -2380,13 +2388,22 @@ func TestTargetScraperScrapeOK(t *testing.T) { require.NoError(t, err) contentType, err := ts.readResponse(context.Background(), resp, &buf) require.NoError(t, err) - require.Equal(t, "text/plain; version=0.0.4", contentType) + if allowUTF8 { + require.Equal(t, "text/plain; version=1.0.0; escaping=allow-utf-8", contentType) + } else { + require.Equal(t, "text/plain; version=0.0.4", contentType) + } require.Equal(t, "metric_a 1\nmetric_b 2\n", buf.String()) } - runTest(acceptHeader(config.DefaultScrapeProtocols)) + runTest(acceptHeader(config.DefaultScrapeProtocols, model.LegacyValidation)) protobufParsing = true - runTest(acceptHeader(config.DefaultProtoFirstScrapeProtocols)) + runTest(acceptHeader(config.DefaultProtoFirstScrapeProtocols, model.LegacyValidation)) + protobufParsing = false + allowUTF8 = true + runTest(acceptHeader(config.DefaultScrapeProtocols, model.UTF8Validation)) + protobufParsing = true + runTest(acceptHeader(config.DefaultProtoFirstScrapeProtocols, model.UTF8Validation)) } func TestTargetScrapeScrapeCancel(t *testing.T) { @@ -2412,7 +2429,7 @@ func TestTargetScrapeScrapeCancel(t *testing.T) { ), }, client: http.DefaultClient, - acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols), + acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols, model.LegacyValidation), } ctx, cancel := context.WithCancel(context.Background()) @@ -2467,7 +2484,7 @@ func TestTargetScrapeScrapeNotFound(t *testing.T) { ), }, client: http.DefaultClient, - acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols), + acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols, model.LegacyValidation), } resp, err := ts.scrape(context.Background()) @@ -2511,7 +2528,7 @@ func TestTargetScraperBodySizeLimit(t *testing.T) { }, client: http.DefaultClient, bodySizeLimit: bodySizeLimit, - acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols), + acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols, model.LegacyValidation), metrics: newTestScrapeMetrics(t), } var buf bytes.Buffer