From 03995775a7674a0f571abf667b82e07b9d48df24 Mon Sep 17 00:00:00 2001 From: Arthur Silva Sens Date: Fri, 8 Nov 2024 10:02:48 -0300 Subject: [PATCH] Allow UTF-8 characters in metric and label names as opt-in feature (#15258) * Allow UTF-8 characters in metric and label names as opt-in feature --------- Signed-off-by: Arthur Silva Sens --- config/config.go | 29 ++- config/config_test.go | 62 +++++++ config/testdata/otlp_allow_utf8.bad.yml | 4 + config/testdata/otlp_allow_utf8.good.yml | 2 + .../testdata/otlp_allow_utf8.incompatible.yml | 4 + documentation/examples/prometheus-otlp.yml | 31 ++++ .../prometheus/helpers_from_stdlib.go | 106 +++++++++++ .../prometheus/normalize_label.go | 4 +- .../prometheus/normalize_label_test.go | 27 +-- .../prometheus/normalize_name.go | 62 ++++--- .../prometheus/normalize_name_test.go | 169 ++++++++++-------- .../prometheusremotewrite/helper.go | 6 +- .../prometheusremotewrite/histograms_test.go | 2 +- .../prometheusremotewrite/metrics_to_prw.go | 3 +- .../otlp_to_openmetrics_metadata.go | 4 +- storage/remote/write_handler.go | 1 + 16 files changed, 399 insertions(+), 117 deletions(-) create mode 100644 config/testdata/otlp_allow_utf8.bad.yml create mode 100644 config/testdata/otlp_allow_utf8.good.yml create mode 100644 config/testdata/otlp_allow_utf8.incompatible.yml create mode 100644 documentation/examples/prometheus-otlp.yml create mode 100644 storage/remote/otlptranslator/prometheus/helpers_from_stdlib.go diff --git a/config/config.go b/config/config.go index 7fb77b0e6..ef3ea5e67 100644 --- a/config/config.go +++ b/config/config.go @@ -106,6 +106,18 @@ func Load(s string, logger *slog.Logger) (*Config, error) { if !b.Labels().IsEmpty() { cfg.GlobalConfig.ExternalLabels = b.Labels() } + + switch cfg.OTLPConfig.TranslationStrategy { + case UnderscoreEscapingWithSuffixes: + case "": + case NoUTF8EscapingWithSuffixes: + if cfg.GlobalConfig.MetricNameValidationScheme == LegacyValidationConfig { + return nil, errors.New("OTLP translation strategy NoUTF8EscapingWithSuffixes is not allowed when UTF8 is disabled") + } + default: + return nil, fmt.Errorf("unsupported OTLP translation strategy %q", cfg.OTLPConfig.TranslationStrategy) + } + return cfg, nil } @@ -239,7 +251,9 @@ var ( } // DefaultOTLPConfig is the default OTLP configuration. - DefaultOTLPConfig = OTLPConfig{} + DefaultOTLPConfig = OTLPConfig{ + TranslationStrategy: UnderscoreEscapingWithSuffixes, + } ) // Config is the top-level configuration for Prometheus's config files. @@ -1402,9 +1416,20 @@ func getGoGCEnv() int { return DefaultRuntimeConfig.GoGC } +type translationStrategyOption string + +var ( + // NoUTF8EscapingWithSuffixes will keep UTF-8 characters as they are, units and type suffixes will still be added. + NoUTF8EscapingWithSuffixes translationStrategyOption = "NoUTF8EscapingWithSuffixes" + // UnderscoreEscapingWithSuffixes is the default option for translating OTLP to Prometheus. + // This option will translate all UTF-8 characters to underscores, while adding units and type suffixes. + UnderscoreEscapingWithSuffixes translationStrategyOption = "UnderscoreEscapingWithSuffixes" +) + // OTLPConfig is the configuration for writing to the OTLP endpoint. type OTLPConfig struct { - PromoteResourceAttributes []string `yaml:"promote_resource_attributes,omitempty"` + PromoteResourceAttributes []string `yaml:"promote_resource_attributes,omitempty"` + TranslationStrategy translationStrategyOption `yaml:"translation_strategy,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. diff --git a/config/config_test.go b/config/config_test.go index c3148f93a..77cbf9b2e 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -168,6 +168,7 @@ var expectedConf = &Config{ PromoteResourceAttributes: []string{ "k8s.cluster.name", "k8s.job.name", "k8s.namespace.name", }, + TranslationStrategy: UnderscoreEscapingWithSuffixes, }, RemoteReadConfigs: []*RemoteReadConfig{ @@ -1553,6 +1554,67 @@ func TestOTLPSanitizeResourceAttributes(t *testing.T) { }) } +func TestOTLPAllowUTF8(t *testing.T) { + t.Run("good config", func(t *testing.T) { + fpath := filepath.Join("testdata", "otlp_allow_utf8.good.yml") + verify := func(t *testing.T, conf *Config, err error) { + t.Helper() + require.NoError(t, err) + require.Equal(t, NoUTF8EscapingWithSuffixes, conf.OTLPConfig.TranslationStrategy) + } + + t.Run("LoadFile", func(t *testing.T) { + conf, err := LoadFile(fpath, false, promslog.NewNopLogger()) + verify(t, conf, err) + }) + t.Run("Load", func(t *testing.T) { + content, err := os.ReadFile(fpath) + require.NoError(t, err) + conf, err := Load(string(content), promslog.NewNopLogger()) + verify(t, conf, err) + }) + }) + + t.Run("incompatible config", func(t *testing.T) { + fpath := filepath.Join("testdata", "otlp_allow_utf8.incompatible.yml") + verify := func(t *testing.T, err error) { + t.Helper() + require.ErrorContains(t, err, `OTLP translation strategy NoUTF8EscapingWithSuffixes is not allowed when UTF8 is disabled`) + } + + t.Run("LoadFile", func(t *testing.T) { + _, err := LoadFile(fpath, false, promslog.NewNopLogger()) + verify(t, err) + }) + t.Run("Load", func(t *testing.T) { + content, err := os.ReadFile(fpath) + require.NoError(t, err) + _, err = Load(string(content), promslog.NewNopLogger()) + t.Log("err", err) + verify(t, err) + }) + }) + + t.Run("bad config", func(t *testing.T) { + fpath := filepath.Join("testdata", "otlp_allow_utf8.bad.yml") + verify := func(t *testing.T, err error) { + t.Helper() + require.ErrorContains(t, err, `unsupported OTLP translation strategy "Invalid"`) + } + + t.Run("LoadFile", func(t *testing.T) { + _, err := LoadFile(fpath, false, promslog.NewNopLogger()) + verify(t, err) + }) + t.Run("Load", func(t *testing.T) { + content, err := os.ReadFile(fpath) + require.NoError(t, err) + _, err = Load(string(content), promslog.NewNopLogger()) + verify(t, err) + }) + }) +} + func TestLoadConfig(t *testing.T) { // Parse a valid file that sets a global scrape timeout. This tests whether parsing // an overwritten default field in the global config permanently changes the default. diff --git a/config/testdata/otlp_allow_utf8.bad.yml b/config/testdata/otlp_allow_utf8.bad.yml new file mode 100644 index 000000000..488e4b055 --- /dev/null +++ b/config/testdata/otlp_allow_utf8.bad.yml @@ -0,0 +1,4 @@ +global: + metric_name_validation_scheme: legacy +otlp: + translation_strategy: Invalid diff --git a/config/testdata/otlp_allow_utf8.good.yml b/config/testdata/otlp_allow_utf8.good.yml new file mode 100644 index 000000000..f3069d2fd --- /dev/null +++ b/config/testdata/otlp_allow_utf8.good.yml @@ -0,0 +1,2 @@ +otlp: + translation_strategy: NoUTF8EscapingWithSuffixes diff --git a/config/testdata/otlp_allow_utf8.incompatible.yml b/config/testdata/otlp_allow_utf8.incompatible.yml new file mode 100644 index 000000000..2625c2413 --- /dev/null +++ b/config/testdata/otlp_allow_utf8.incompatible.yml @@ -0,0 +1,4 @@ +global: + metric_name_validation_scheme: legacy +otlp: + translation_strategy: NoUTF8EscapingWithSuffixes diff --git a/documentation/examples/prometheus-otlp.yml b/documentation/examples/prometheus-otlp.yml new file mode 100644 index 000000000..f0a8ab8b1 --- /dev/null +++ b/documentation/examples/prometheus-otlp.yml @@ -0,0 +1,31 @@ +# my global config +global: + evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. + +otlp: + # Recommended attributes to be promoted to labels. + promote_resource_attributes: + - service.instance.id + - service.name + - service.namespace + - cloud.availability_zone + - cloud.region + - container.name + - deployment.environment.name + - k8s.cluster.name + - k8s.container.name + - k8s.cronjob.name + - k8s.daemonset.name + - k8s.deployment.name + - k8s.job.name + - k8s.namespace.name + - k8s.pod.name + - k8s.replicaset.name + - k8s.statefulset.name + # Ingest OTLP data keeping UTF-8 characters in metric/label names. + translation_strategy: NoUTF8EscapingWithSuffixes + +storage: + # OTLP is a push-based protocol, Out of order samples is a common scenario. + tsdb: + out_of_order_time_window: 30m diff --git a/storage/remote/otlptranslator/prometheus/helpers_from_stdlib.go b/storage/remote/otlptranslator/prometheus/helpers_from_stdlib.go new file mode 100644 index 000000000..cb9257d07 --- /dev/null +++ b/storage/remote/otlptranslator/prometheus/helpers_from_stdlib.go @@ -0,0 +1,106 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/golang/go/blob/f2d118fd5f7e872804a5825ce29797f81a28b0fa/src/strings/strings.go +// Provenance-includes-license: BSD-3-Clause +// Provenance-includes-copyright: Copyright The Go Authors. + +package prometheus + +import "strings" + +// fieldsFunc is a copy of strings.FieldsFunc from the Go standard library, +// but it also returns the separators as part of the result. +func fieldsFunc(s string, f func(rune) bool) ([]string, []string) { + // A span is used to record a slice of s of the form s[start:end]. + // The start index is inclusive and the end index is exclusive. + type span struct { + start int + end int + } + spans := make([]span, 0, 32) + separators := make([]string, 0, 32) + + // Find the field start and end indices. + // Doing this in a separate pass (rather than slicing the string s + // and collecting the result substrings right away) is significantly + // more efficient, possibly due to cache effects. + start := -1 // valid span start if >= 0 + for end, rune := range s { + if f(rune) { + if start >= 0 { + spans = append(spans, span{start, end}) + // Set start to a negative value. + // Note: using -1 here consistently and reproducibly + // slows down this code by a several percent on amd64. + start = ^start + separators = append(separators, string(s[end])) + } + } else { + if start < 0 { + start = end + } + } + } + + // Last field might end at EOF. + if start >= 0 { + spans = append(spans, span{start, len(s)}) + } + + // Create strings from recorded field indices. + a := make([]string, len(spans)) + for i, span := range spans { + a[i] = s[span.start:span.end] + } + + return a, separators +} + +// join is a copy of strings.Join from the Go standard library, +// but it also accepts a slice of separators to join the elements with. +// If the slice of separators is shorter than the slice of elements, use a default value. +// We also don't check for integer overflow. +func join(elems []string, separators []string, def string) string { + switch len(elems) { + case 0: + return "" + case 1: + return elems[0] + } + + var n int + var sep string + sepLen := len(separators) + for i, elem := range elems { + if i >= sepLen { + sep = def + } else { + sep = separators[i] + } + n += len(sep) + len(elem) + } + + var b strings.Builder + b.Grow(n) + b.WriteString(elems[0]) + for i, s := range elems[1:] { + if i >= sepLen { + sep = def + } else { + sep = separators[i] + } + b.WriteString(sep) + b.WriteString(s) + } + return b.String() +} diff --git a/storage/remote/otlptranslator/prometheus/normalize_label.go b/storage/remote/otlptranslator/prometheus/normalize_label.go index d5de2c765..b928e6888 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_label.go +++ b/storage/remote/otlptranslator/prometheus/normalize_label.go @@ -29,9 +29,9 @@ import ( // // Labels that start with non-letter rune will be prefixed with "key_". // An exception is made for double-underscores which are allowed. -func NormalizeLabel(label string) string { +func NormalizeLabel(label string, allowUTF8 bool) string { // Trivial case - if len(label) == 0 { + if len(label) == 0 || allowUTF8 { return label } diff --git a/storage/remote/otlptranslator/prometheus/normalize_label_test.go b/storage/remote/otlptranslator/prometheus/normalize_label_test.go index 21d4d6a6d..19ab6cd17 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_label_test.go +++ b/storage/remote/otlptranslator/prometheus/normalize_label_test.go @@ -22,24 +22,27 @@ import ( func TestNormalizeLabel(t *testing.T) { tests := []struct { - label string - expected string + label string + expected string + expectedUTF8 string }{ - {"", ""}, - {"label:with:colons", "label_with_colons"}, // Without UTF-8 support, colons are only allowed in metric names - {"LabelWithCapitalLetters", "LabelWithCapitalLetters"}, - {"label!with&special$chars)", "label_with_special_chars_"}, - {"label_with_foreign_characters_字符", "label_with_foreign_characters___"}, - {"label.with.dots", "label_with_dots"}, - {"123label", "key_123label"}, - {"_label_starting_with_underscore", "key_label_starting_with_underscore"}, - {"__label_starting_with_2underscores", "__label_starting_with_2underscores"}, + {"", "", ""}, + {"label:with:colons", "label_with_colons", "label:with:colons"}, // Without UTF-8 support, colons are only allowed in metric names + {"LabelWithCapitalLetters", "LabelWithCapitalLetters", "LabelWithCapitalLetters"}, + {"label!with&special$chars)", "label_with_special_chars_", "label!with&special$chars)"}, + {"label_with_foreign_characters_字符", "label_with_foreign_characters___", "label_with_foreign_characters_字符"}, + {"label.with.dots", "label_with_dots", "label.with.dots"}, + {"123label", "key_123label", "123label"}, + {"_label_starting_with_underscore", "key_label_starting_with_underscore", "_label_starting_with_underscore"}, + {"__label_starting_with_2underscores", "__label_starting_with_2underscores", "__label_starting_with_2underscores"}, } for i, test := range tests { t.Run(fmt.Sprintf("test_%d", i), func(t *testing.T) { - result := NormalizeLabel(test.label) + result := NormalizeLabel(test.label, false) require.Equal(t, test.expected, result) + uTF8result := NormalizeLabel(test.label, true) + require.Equal(t, test.expectedUTF8, uTF8result) }) } } diff --git a/storage/remote/otlptranslator/prometheus/normalize_name.go b/storage/remote/otlptranslator/prometheus/normalize_name.go index 0119b64df..335705aa8 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name.go @@ -88,27 +88,32 @@ var perUnitMap = map[string]string{ // See rules at https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels, // https://prometheus.io/docs/practices/naming/#metric-and-label-naming // and https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. -func BuildCompliantName(metric pmetric.Metric, namespace string, addMetricSuffixes bool) string { +func BuildCompliantName(metric pmetric.Metric, namespace string, addMetricSuffixes, allowUTF8 bool) string { // Full normalization following standard Prometheus naming conventions if addMetricSuffixes { - return normalizeName(metric, namespace) + return normalizeName(metric, namespace, allowUTF8) } - // Regexp for metric name characters that should be replaced with _. - invalidMetricCharRE := regexp.MustCompile(`[^a-zA-Z0-9:_]`) - - // Simple case (no full normalization, no units, etc.). - metricName := strings.Join(strings.FieldsFunc(metric.Name(), func(r rune) bool { - return invalidMetricCharRE.MatchString(string(r)) - }), "_") + var metricName string + if !allowUTF8 { + // Regexp for metric name characters that should be replaced with _. + invalidMetricCharRE := regexp.MustCompile(`[^a-zA-Z0-9:_]`) + + // Simple case (no full normalization, no units, etc.). + metricName = strings.Join(strings.FieldsFunc(metric.Name(), func(r rune) bool { + return invalidMetricCharRE.MatchString(string(r)) + }), "_") + } else { + metricName = metric.Name() + } // Namespace? if namespace != "" { return namespace + "_" + metricName } - // Metric name starts with a digit? Prefix it with an underscore. - if metricName != "" && unicode.IsDigit(rune(metricName[0])) { + // Metric name starts with a digit and utf8 not allowed? Prefix it with an underscore. + if metricName != "" && unicode.IsDigit(rune(metricName[0])) && !allowUTF8 { metricName = "_" + metricName } @@ -116,17 +121,18 @@ func BuildCompliantName(metric pmetric.Metric, namespace string, addMetricSuffix } // Build a normalized name for the specified metric. -func normalizeName(metric pmetric.Metric, namespace string) string { - // Regexp for characters that can't be in a metric name token. - nonTokenMetricCharRE := regexp.MustCompile(`[^a-zA-Z0-9:]`) - +func normalizeName(metric pmetric.Metric, namespace string, allowUTF8 bool) string { + var translationFunc func(rune) bool + if !allowUTF8 { + nonTokenMetricCharRE := regexp.MustCompile(`[^a-zA-Z0-9:]`) + translationFunc = func(r rune) bool { return nonTokenMetricCharRE.MatchString(string(r)) } + } else { + translationFunc = func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != ':' } + } // Split metric name into "tokens" (of supported metric name runes). // Note that this has the side effect of replacing multiple consecutive underscores with a single underscore. // This is part of the OTel to Prometheus specification: https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. - nameTokens := strings.FieldsFunc( - metric.Name(), - func(r rune) bool { return nonTokenMetricCharRE.MatchString(string(r)) }, - ) + nameTokens, separators := fieldsFunc(metric.Name(), translationFunc) // Split unit at the '/' if any unitTokens := strings.SplitN(metric.Unit(), "/", 2) @@ -137,7 +143,10 @@ func normalizeName(metric pmetric.Metric, namespace string) string { var mainUnitProm, perUnitProm string mainUnitOTel := strings.TrimSpace(unitTokens[0]) if mainUnitOTel != "" && !strings.ContainsAny(mainUnitOTel, "{}") { - mainUnitProm = cleanUpUnit(unitMapGetOrDefault(mainUnitOTel)) + mainUnitProm = unitMapGetOrDefault(mainUnitOTel) + if !allowUTF8 { + mainUnitProm = cleanUpUnit(mainUnitProm) + } if slices.Contains(nameTokens, mainUnitProm) { mainUnitProm = "" } @@ -148,7 +157,10 @@ func normalizeName(metric pmetric.Metric, namespace string) string { if len(unitTokens) > 1 && unitTokens[1] != "" { perUnitOTel := strings.TrimSpace(unitTokens[1]) if perUnitOTel != "" && !strings.ContainsAny(perUnitOTel, "{}") { - perUnitProm = cleanUpUnit(perUnitMapGetOrDefault(perUnitOTel)) + perUnitProm = perUnitMapGetOrDefault(perUnitOTel) + if !allowUTF8 { + perUnitProm = cleanUpUnit(perUnitProm) + } } if perUnitProm != "" { perUnitProm = "per_" + perUnitProm @@ -189,8 +201,12 @@ func normalizeName(metric pmetric.Metric, namespace string) string { nameTokens = append([]string{namespace}, nameTokens...) } - // Build the string from the tokens, separated with underscores - normalizedName := strings.Join(nameTokens, "_") + // Build the string from the tokens + separators. + // If UTF-8 isn't allowed, we'll use underscores as separators. + if !allowUTF8 { + separators = []string{} + } + normalizedName := join(nameTokens, separators, "_") // Metric name cannot start with a digit, so prefix it with "_" in this case if normalizedName != "" && unicode.IsDigit(rune(normalizedName[0])) { diff --git a/storage/remote/otlptranslator/prometheus/normalize_name_test.go b/storage/remote/otlptranslator/prometheus/normalize_name_test.go index 2d5648e84..d97e7a560 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name_test.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name_test.go @@ -25,92 +25,119 @@ import ( ) func TestByte(t *testing.T) { - require.Equal(t, "system_filesystem_usage_bytes", normalizeName(createGauge("system.filesystem.usage", "By"), "")) + require.Equal(t, "system_filesystem_usage_bytes", normalizeName(createGauge("system.filesystem.usage", "By"), "", false)) } func TestByteCounter(t *testing.T) { - require.Equal(t, "system_io_bytes_total", normalizeName(createCounter("system.io", "By"), "")) - require.Equal(t, "network_transmitted_bytes_total", normalizeName(createCounter("network_transmitted_bytes_total", "By"), "")) + require.Equal(t, "system_io_bytes_total", normalizeName(createCounter("system.io", "By"), "", false)) + require.Equal(t, "network_transmitted_bytes_total", normalizeName(createCounter("network_transmitted_bytes_total", "By"), "", false)) } func TestWhiteSpaces(t *testing.T) { - require.Equal(t, "system_filesystem_usage_bytes", normalizeName(createGauge("\t system.filesystem.usage ", " By\t"), "")) + require.Equal(t, "system_filesystem_usage_bytes", normalizeName(createGauge("\t system.filesystem.usage ", " By\t"), "", false)) } func TestNonStandardUnit(t *testing.T) { - require.Equal(t, "system_network_dropped", normalizeName(createGauge("system.network.dropped", "{packets}"), "")) + require.Equal(t, "system_network_dropped", normalizeName(createGauge("system.network.dropped", "{packets}"), "", false)) } func TestNonStandardUnitCounter(t *testing.T) { - require.Equal(t, "system_network_dropped_total", normalizeName(createCounter("system.network.dropped", "{packets}"), "")) + require.Equal(t, "system_network_dropped_total", normalizeName(createCounter("system.network.dropped", "{packets}"), "", false)) } func TestBrokenUnit(t *testing.T) { - require.Equal(t, "system_network_dropped_packets", normalizeName(createGauge("system.network.dropped", "packets"), "")) - require.Equal(t, "system_network_packets_dropped", normalizeName(createGauge("system.network.packets.dropped", "packets"), "")) - require.Equal(t, "system_network_packets", normalizeName(createGauge("system.network.packets", "packets"), "")) + require.Equal(t, "system_network_dropped_packets", normalizeName(createGauge("system.network.dropped", "packets"), "", false)) + require.Equal(t, "system_network_packets_dropped", normalizeName(createGauge("system.network.packets.dropped", "packets"), "", false)) + require.Equal(t, "system_network_packets", normalizeName(createGauge("system.network.packets", "packets"), "", false)) } func TestBrokenUnitCounter(t *testing.T) { - require.Equal(t, "system_network_dropped_packets_total", normalizeName(createCounter("system.network.dropped", "packets"), "")) - require.Equal(t, "system_network_packets_dropped_total", normalizeName(createCounter("system.network.packets.dropped", "packets"), "")) - require.Equal(t, "system_network_packets_total", normalizeName(createCounter("system.network.packets", "packets"), "")) + require.Equal(t, "system_network_dropped_packets_total", normalizeName(createCounter("system.network.dropped", "packets"), "", false)) + require.Equal(t, "system_network_packets_dropped_total", normalizeName(createCounter("system.network.packets.dropped", "packets"), "", false)) + require.Equal(t, "system_network_packets_total", normalizeName(createCounter("system.network.packets", "packets"), "", false)) } func TestRatio(t *testing.T) { - require.Equal(t, "hw_gpu_memory_utilization_ratio", normalizeName(createGauge("hw.gpu.memory.utilization", "1"), "")) - require.Equal(t, "hw_fan_speed_ratio", normalizeName(createGauge("hw.fan.speed_ratio", "1"), "")) - require.Equal(t, "objects_total", normalizeName(createCounter("objects", "1"), "")) + require.Equal(t, "hw_gpu_memory_utilization_ratio", normalizeName(createGauge("hw.gpu.memory.utilization", "1"), "", false)) + require.Equal(t, "hw_fan_speed_ratio", normalizeName(createGauge("hw.fan.speed_ratio", "1"), "", false)) + require.Equal(t, "objects_total", normalizeName(createCounter("objects", "1"), "", false)) } func TestHertz(t *testing.T) { - require.Equal(t, "hw_cpu_speed_limit_hertz", normalizeName(createGauge("hw.cpu.speed_limit", "Hz"), "")) + require.Equal(t, "hw_cpu_speed_limit_hertz", normalizeName(createGauge("hw.cpu.speed_limit", "Hz"), "", false)) } func TestPer(t *testing.T) { - require.Equal(t, "broken_metric_speed_km_per_hour", normalizeName(createGauge("broken.metric.speed", "km/h"), "")) - require.Equal(t, "astro_light_speed_limit_meters_per_second", normalizeName(createGauge("astro.light.speed_limit", "m/s"), "")) + require.Equal(t, "broken_metric_speed_km_per_hour", normalizeName(createGauge("broken.metric.speed", "km/h"), "", false)) + require.Equal(t, "astro_light_speed_limit_meters_per_second", normalizeName(createGauge("astro.light.speed_limit", "m/s"), "", false)) } func TestPercent(t *testing.T) { - require.Equal(t, "broken_metric_success_ratio_percent", normalizeName(createGauge("broken.metric.success_ratio", "%"), "")) - require.Equal(t, "broken_metric_success_percent", normalizeName(createGauge("broken.metric.success_percent", "%"), "")) + require.Equal(t, "broken_metric_success_ratio_percent", normalizeName(createGauge("broken.metric.success_ratio", "%"), "", false)) + require.Equal(t, "broken_metric_success_percent", normalizeName(createGauge("broken.metric.success_percent", "%"), "", false)) } func TestEmpty(t *testing.T) { - require.Equal(t, "test_metric_no_unit", normalizeName(createGauge("test.metric.no_unit", ""), "")) - require.Equal(t, "test_metric_spaces", normalizeName(createGauge("test.metric.spaces", " \t "), "")) -} - -func TestUnsupportedRunes(t *testing.T) { - require.Equal(t, "unsupported_metric_temperature_F", normalizeName(createGauge("unsupported.metric.temperature", "°F"), "")) - require.Equal(t, "unsupported_metric_weird", normalizeName(createGauge("unsupported.metric.weird", "+=.:,!* & #"), "")) - require.Equal(t, "unsupported_metric_redundant_test_per_C", normalizeName(createGauge("unsupported.metric.redundant", "__test $/°C"), "")) + require.Equal(t, "test_metric_no_unit", normalizeName(createGauge("test.metric.no_unit", ""), "", false)) + require.Equal(t, "test_metric_spaces", normalizeName(createGauge("test.metric.spaces", " \t "), "", false)) +} + +func TestAllowUTF8(t *testing.T) { + t.Run("allow UTF8", func(t *testing.T) { + require.Equal(t, "unsupported.metric.temperature_°F", normalizeName(createGauge("unsupported.metric.temperature", "°F"), "", true)) + require.Equal(t, "unsupported.metric.weird_+=.:,!* & #", normalizeName(createGauge("unsupported.metric.weird", "+=.:,!* & #"), "", true)) + require.Equal(t, "unsupported.metric.redundant___test $_per_°C", normalizeName(createGauge("unsupported.metric.redundant", "__test $/°C"), "", true)) + require.Equal(t, "metric_with_字符_foreign_characters_ど", normalizeName(createGauge("metric_with_字符_foreign_characters", "ど"), "", true)) + }) + t.Run("disallow UTF8", func(t *testing.T) { + require.Equal(t, "unsupported_metric_temperature_F", normalizeName(createGauge("unsupported.metric.temperature", "°F"), "", false)) + require.Equal(t, "unsupported_metric_weird", normalizeName(createGauge("unsupported.metric.weird", "+=.:,!* & #"), "", false)) + require.Equal(t, "unsupported_metric_redundant_test_per_C", normalizeName(createGauge("unsupported.metric.redundant", "__test $/°C"), "", false)) + require.Equal(t, "metric_with_foreign_characters", normalizeName(createGauge("metric_with_字符_foreign_characters", "ど"), "", false)) + }) +} + +func TestAllowUTF8KnownBugs(t *testing.T) { + // Due to historical reasons, the translator code was copied from OpenTelemetry collector codebase. + // Over there, they tried to provide means to translate metric names following Prometheus conventions that are documented here: + // https://prometheus.io/docs/practices/naming/ + // + // Althogh not explicitly said, it was implied that words should be separated by a single underscore and the codebase was written + // with that in mind. + // + // Now that we're allowing OTel users to have their original names stored in prometheus without any transformation, we're facing problems + // where two (or more) UTF-8 characters are being used to separate words. + // TODO(arthursens): Fix it! + + // We're asserting on 'NotEqual', which proves the bug. + require.NotEqual(t, "metric....split_=+by_//utf8characters", normalizeName(createGauge("metric....split_=+by_//utf8characters", ""), "", true)) + // Here we're asserting on 'Equal', showing the current behavior. + require.Equal(t, "metric.split_by_utf8characters", normalizeName(createGauge("metric....split_=+by_//utf8characters", ""), "", true)) } func TestOTelReceivers(t *testing.T) { - require.Equal(t, "active_directory_ds_replication_network_io_bytes_total", normalizeName(createCounter("active_directory.ds.replication.network.io", "By"), "")) - require.Equal(t, "active_directory_ds_replication_sync_object_pending_total", normalizeName(createCounter("active_directory.ds.replication.sync.object.pending", "{objects}"), "")) - require.Equal(t, "active_directory_ds_replication_object_rate_per_second", normalizeName(createGauge("active_directory.ds.replication.object.rate", "{objects}/s"), "")) - require.Equal(t, "active_directory_ds_name_cache_hit_rate_percent", normalizeName(createGauge("active_directory.ds.name_cache.hit_rate", "%"), "")) - require.Equal(t, "active_directory_ds_ldap_bind_last_successful_time_milliseconds", normalizeName(createGauge("active_directory.ds.ldap.bind.last_successful.time", "ms"), "")) - require.Equal(t, "apache_current_connections", normalizeName(createGauge("apache.current_connections", "connections"), "")) - require.Equal(t, "apache_workers_connections", normalizeName(createGauge("apache.workers", "connections"), "")) - require.Equal(t, "apache_requests_total", normalizeName(createCounter("apache.requests", "1"), "")) - require.Equal(t, "bigip_virtual_server_request_count_total", normalizeName(createCounter("bigip.virtual_server.request.count", "{requests}"), "")) - require.Equal(t, "system_cpu_utilization_ratio", normalizeName(createGauge("system.cpu.utilization", "1"), "")) - require.Equal(t, "system_disk_operation_time_seconds_total", normalizeName(createCounter("system.disk.operation_time", "s"), "")) - require.Equal(t, "system_cpu_load_average_15m_ratio", normalizeName(createGauge("system.cpu.load_average.15m", "1"), "")) - require.Equal(t, "memcached_operation_hit_ratio_percent", normalizeName(createGauge("memcached.operation_hit_ratio", "%"), "")) - require.Equal(t, "mongodbatlas_process_asserts_per_second", normalizeName(createGauge("mongodbatlas.process.asserts", "{assertions}/s"), "")) - require.Equal(t, "mongodbatlas_process_journaling_data_files_mebibytes", normalizeName(createGauge("mongodbatlas.process.journaling.data_files", "MiBy"), "")) - require.Equal(t, "mongodbatlas_process_network_io_bytes_per_second", normalizeName(createGauge("mongodbatlas.process.network.io", "By/s"), "")) - require.Equal(t, "mongodbatlas_process_oplog_rate_gibibytes_per_hour", normalizeName(createGauge("mongodbatlas.process.oplog.rate", "GiBy/h"), "")) - require.Equal(t, "mongodbatlas_process_db_query_targeting_scanned_per_returned", normalizeName(createGauge("mongodbatlas.process.db.query_targeting.scanned_per_returned", "{scanned}/{returned}"), "")) - require.Equal(t, "nginx_requests", normalizeName(createGauge("nginx.requests", "requests"), "")) - require.Equal(t, "nginx_connections_accepted", normalizeName(createGauge("nginx.connections_accepted", "connections"), "")) - require.Equal(t, "nsxt_node_memory_usage_kilobytes", normalizeName(createGauge("nsxt.node.memory.usage", "KBy"), "")) - require.Equal(t, "redis_latest_fork_microseconds", normalizeName(createGauge("redis.latest_fork", "us"), "")) + require.Equal(t, "active_directory_ds_replication_network_io_bytes_total", normalizeName(createCounter("active_directory.ds.replication.network.io", "By"), "", false)) + require.Equal(t, "active_directory_ds_replication_sync_object_pending_total", normalizeName(createCounter("active_directory.ds.replication.sync.object.pending", "{objects}"), "", false)) + require.Equal(t, "active_directory_ds_replication_object_rate_per_second", normalizeName(createGauge("active_directory.ds.replication.object.rate", "{objects}/s"), "", false)) + require.Equal(t, "active_directory_ds_name_cache_hit_rate_percent", normalizeName(createGauge("active_directory.ds.name_cache.hit_rate", "%"), "", false)) + require.Equal(t, "active_directory_ds_ldap_bind_last_successful_time_milliseconds", normalizeName(createGauge("active_directory.ds.ldap.bind.last_successful.time", "ms"), "", false)) + require.Equal(t, "apache_current_connections", normalizeName(createGauge("apache.current_connections", "connections"), "", false)) + require.Equal(t, "apache_workers_connections", normalizeName(createGauge("apache.workers", "connections"), "", false)) + require.Equal(t, "apache_requests_total", normalizeName(createCounter("apache.requests", "1"), "", false)) + require.Equal(t, "bigip_virtual_server_request_count_total", normalizeName(createCounter("bigip.virtual_server.request.count", "{requests}"), "", false)) + require.Equal(t, "system_cpu_utilization_ratio", normalizeName(createGauge("system.cpu.utilization", "1"), "", false)) + require.Equal(t, "system_disk_operation_time_seconds_total", normalizeName(createCounter("system.disk.operation_time", "s"), "", false)) + require.Equal(t, "system_cpu_load_average_15m_ratio", normalizeName(createGauge("system.cpu.load_average.15m", "1"), "", false)) + require.Equal(t, "memcached_operation_hit_ratio_percent", normalizeName(createGauge("memcached.operation_hit_ratio", "%"), "", false)) + require.Equal(t, "mongodbatlas_process_asserts_per_second", normalizeName(createGauge("mongodbatlas.process.asserts", "{assertions}/s"), "", false)) + require.Equal(t, "mongodbatlas_process_journaling_data_files_mebibytes", normalizeName(createGauge("mongodbatlas.process.journaling.data_files", "MiBy"), "", false)) + require.Equal(t, "mongodbatlas_process_network_io_bytes_per_second", normalizeName(createGauge("mongodbatlas.process.network.io", "By/s"), "", false)) + require.Equal(t, "mongodbatlas_process_oplog_rate_gibibytes_per_hour", normalizeName(createGauge("mongodbatlas.process.oplog.rate", "GiBy/h"), "", false)) + require.Equal(t, "mongodbatlas_process_db_query_targeting_scanned_per_returned", normalizeName(createGauge("mongodbatlas.process.db.query_targeting.scanned_per_returned", "{scanned}/{returned}"), "", false)) + require.Equal(t, "nginx_requests", normalizeName(createGauge("nginx.requests", "requests"), "", false)) + require.Equal(t, "nginx_connections_accepted", normalizeName(createGauge("nginx.connections_accepted", "connections"), "", false)) + require.Equal(t, "nsxt_node_memory_usage_kilobytes", normalizeName(createGauge("nsxt.node.memory.usage", "KBy"), "", false)) + require.Equal(t, "redis_latest_fork_microseconds", normalizeName(createGauge("redis.latest_fork", "us"), "", false)) } func TestTrimPromSuffixes(t *testing.T) { @@ -144,8 +171,8 @@ func TestTrimPromSuffixes(t *testing.T) { } func TestNamespace(t *testing.T) { - require.Equal(t, "space_test", normalizeName(createGauge("test", ""), "space")) - require.Equal(t, "space_test", normalizeName(createGauge("#test", ""), "space")) + require.Equal(t, "space_test", normalizeName(createGauge("test", ""), "space", false)) + require.Equal(t, "space_test", normalizeName(createGauge("#test", ""), "space", false)) } func TestCleanUpUnit(t *testing.T) { @@ -180,28 +207,28 @@ func TestRemoveItem(t *testing.T) { } func TestBuildCompliantNameWithSuffixes(t *testing.T) { - require.Equal(t, "system_io_bytes_total", BuildCompliantName(createCounter("system.io", "By"), "", true)) - require.Equal(t, "system_network_io_bytes_total", BuildCompliantName(createCounter("network.io", "By"), "system", true)) - require.Equal(t, "_3_14_digits", BuildCompliantName(createGauge("3.14 digits", ""), "", true)) - require.Equal(t, "envoy_rule_engine_zlib_buf_error", BuildCompliantName(createGauge("envoy__rule_engine_zlib_buf_error", ""), "", true)) - require.Equal(t, ":foo::bar", BuildCompliantName(createGauge(":foo::bar", ""), "", true)) - require.Equal(t, ":foo::bar_total", BuildCompliantName(createCounter(":foo::bar", ""), "", true)) + require.Equal(t, "system_io_bytes_total", BuildCompliantName(createCounter("system.io", "By"), "", true, false)) + require.Equal(t, "system_network_io_bytes_total", BuildCompliantName(createCounter("network.io", "By"), "system", true, false)) + require.Equal(t, "_3_14_digits", BuildCompliantName(createGauge("3.14 digits", ""), "", true, false)) + require.Equal(t, "envoy_rule_engine_zlib_buf_error", BuildCompliantName(createGauge("envoy__rule_engine_zlib_buf_error", ""), "", true, false)) + require.Equal(t, ":foo::bar", BuildCompliantName(createGauge(":foo::bar", ""), "", true, false)) + require.Equal(t, ":foo::bar_total", BuildCompliantName(createCounter(":foo::bar", ""), "", true, false)) // Gauges with unit 1 are considered ratios. - require.Equal(t, "foo_bar_ratio", BuildCompliantName(createGauge("foo.bar", "1"), "", true)) + require.Equal(t, "foo_bar_ratio", BuildCompliantName(createGauge("foo.bar", "1"), "", true, false)) // Slashes in units are converted. - require.Equal(t, "system_io_foo_per_bar_total", BuildCompliantName(createCounter("system.io", "foo/bar"), "", true)) - require.Equal(t, "metric_with_foreign_characters_total", BuildCompliantName(createCounter("metric_with_字符_foreign_characters", ""), "", true)) + require.Equal(t, "system_io_foo_per_bar_total", BuildCompliantName(createCounter("system.io", "foo/bar"), "", true, false)) + require.Equal(t, "metric_with_foreign_characters_total", BuildCompliantName(createCounter("metric_with_字符_foreign_characters", ""), "", true, false)) } func TestBuildCompliantNameWithoutSuffixes(t *testing.T) { - require.Equal(t, "system_io", BuildCompliantName(createCounter("system.io", "By"), "", false)) - require.Equal(t, "system_network_io", BuildCompliantName(createCounter("network.io", "By"), "system", false)) - require.Equal(t, "system_network_I_O", BuildCompliantName(createCounter("network (I/O)", "By"), "system", false)) - require.Equal(t, "_3_14_digits", BuildCompliantName(createGauge("3.14 digits", "By"), "", false)) - require.Equal(t, "envoy__rule_engine_zlib_buf_error", BuildCompliantName(createGauge("envoy__rule_engine_zlib_buf_error", ""), "", false)) - require.Equal(t, ":foo::bar", BuildCompliantName(createGauge(":foo::bar", ""), "", false)) - require.Equal(t, ":foo::bar", BuildCompliantName(createCounter(":foo::bar", ""), "", false)) - require.Equal(t, "foo_bar", BuildCompliantName(createGauge("foo.bar", "1"), "", false)) - require.Equal(t, "system_io", BuildCompliantName(createCounter("system.io", "foo/bar"), "", false)) - require.Equal(t, "metric_with___foreign_characters", BuildCompliantName(createCounter("metric_with_字符_foreign_characters", ""), "", false)) + require.Equal(t, "system_io", BuildCompliantName(createCounter("system.io", "By"), "", false, false)) + require.Equal(t, "system_network_io", BuildCompliantName(createCounter("network.io", "By"), "system", false, false)) + require.Equal(t, "system_network_I_O", BuildCompliantName(createCounter("network (I/O)", "By"), "system", false, false)) + require.Equal(t, "_3_14_digits", BuildCompliantName(createGauge("3.14 digits", "By"), "", false, false)) + require.Equal(t, "envoy__rule_engine_zlib_buf_error", BuildCompliantName(createGauge("envoy__rule_engine_zlib_buf_error", ""), "", false, false)) + require.Equal(t, ":foo::bar", BuildCompliantName(createGauge(":foo::bar", ""), "", false, false)) + require.Equal(t, ":foo::bar", BuildCompliantName(createCounter(":foo::bar", ""), "", false, false)) + require.Equal(t, "foo_bar", BuildCompliantName(createGauge("foo.bar", "1"), "", false, false)) + require.Equal(t, "system_io", BuildCompliantName(createCounter("system.io", "foo/bar"), "", false, false)) + require.Equal(t, "metric_with___foreign_characters", BuildCompliantName(createCounter("metric_with_字符_foreign_characters", ""), "", false, false)) } diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index f7fede258..30cfa8643 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -157,7 +157,7 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, setting // map ensures no duplicate label names. l := make(map[string]string, maxLabelCount) for _, label := range labels { - var finalKey = prometheustranslator.NormalizeLabel(label.Name) + var finalKey = prometheustranslator.NormalizeLabel(label.Name, settings.AllowUTF8) if existingValue, alreadyExists := l[finalKey]; alreadyExists { l[finalKey] = existingValue + ";" + label.Value } else { @@ -166,7 +166,7 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, setting } for _, lbl := range promotedAttrs { - normalized := prometheustranslator.NormalizeLabel(lbl.Name) + normalized := prometheustranslator.NormalizeLabel(lbl.Name, settings.AllowUTF8) if _, exists := l[normalized]; !exists { l[normalized] = lbl.Value } @@ -205,7 +205,7 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, setting } // internal labels should be maintained if !(len(name) > 4 && name[:2] == "__" && name[len(name)-2:] == "__") { - name = prometheustranslator.NormalizeLabel(name) + name = prometheustranslator.NormalizeLabel(name, settings.AllowUTF8) } l[name] = extras[i+1] } diff --git a/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go b/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go index 5fdd26ef2..dcd83b7f9 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go @@ -762,7 +762,7 @@ func TestPrometheusConverter_addExponentialHistogramDataPoints(t *testing.T) { Settings{ ExportCreatedMetric: true, }, - prometheustranslator.BuildCompliantName(metric, "", true), + prometheustranslator.BuildCompliantName(metric, "", true, true), ) require.NoError(t, err) require.Empty(t, annots) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go index 0afd2ad57..4f8baf310 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go @@ -38,6 +38,7 @@ type Settings struct { ExportCreatedMetric bool AddMetricSuffixes bool SendMetadata bool + AllowUTF8 bool PromoteResourceAttributes []string } @@ -84,7 +85,7 @@ func (c *PrometheusConverter) FromMetrics(ctx context.Context, md pmetric.Metric continue } - promName := prometheustranslator.BuildCompliantName(metric, settings.Namespace, settings.AddMetricSuffixes) + promName := prometheustranslator.BuildCompliantName(metric, settings.Namespace, settings.AddMetricSuffixes, settings.AllowUTF8) // handle individual metrics based on type //exhaustive:enforce diff --git a/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go b/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go index ba4870419..b423d2cc6 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go @@ -43,7 +43,7 @@ func otelMetricTypeToPromMetricType(otelMetric pmetric.Metric) prompb.MetricMeta return prompb.MetricMetadata_UNKNOWN } -func OtelMetricsToMetadata(md pmetric.Metrics, addMetricSuffixes bool) []*prompb.MetricMetadata { +func OtelMetricsToMetadata(md pmetric.Metrics, addMetricSuffixes, allowUTF8 bool) []*prompb.MetricMetadata { resourceMetricsSlice := md.ResourceMetrics() metadataLength := 0 @@ -65,7 +65,7 @@ func OtelMetricsToMetadata(md pmetric.Metrics, addMetricSuffixes bool) []*prompb metric := scopeMetrics.Metrics().At(k) entry := prompb.MetricMetadata{ Type: otelMetricTypeToPromMetricType(metric), - MetricFamilyName: prometheustranslator.BuildCompliantName(metric, "", addMetricSuffixes), + MetricFamilyName: prometheustranslator.BuildCompliantName(metric, "", addMetricSuffixes, allowUTF8), Help: metric.Description(), } metadata = append(metadata, &entry) diff --git a/storage/remote/write_handler.go b/storage/remote/write_handler.go index 466673c99..87102a374 100644 --- a/storage/remote/write_handler.go +++ b/storage/remote/write_handler.go @@ -513,6 +513,7 @@ func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { converter := otlptranslator.NewPrometheusConverter() annots, err := converter.FromMetrics(r.Context(), req.Metrics(), otlptranslator.Settings{ AddMetricSuffixes: true, + AllowUTF8: otlpCfg.TranslationStrategy == config.NoUTF8EscapingWithSuffixes, PromoteResourceAttributes: otlpCfg.PromoteResourceAttributes, }) if err != nil {