otlptranslator: Harmonize non-UTF8 sanitization w/ naming rules. (#15314)

* otlptranslator: Harmonize non-UTF8 sanitization w/ naming rules.

Harmonize non-UTF8 sanitization w/ Prometheus naming rules.

---------

Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
pull/15318/head
Arve Knudsen 4 weeks ago committed by GitHub
parent 9700933d18
commit 7c4f877881
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -13,6 +13,7 @@
* [ENHANCEMENT] Scraping: support Created-Timestamp feature on native histograms. #14694
* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to always ignore native histograms. #14941
* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to treat Infinity consistently. #14941
* [BUGFIX] OTLP receiver: Preserve colons when generating metric names in suffix adding mode (this mode is always enabled, unless one uses Prometheus as a library). #15251
## 3.0.0-beta.1 / 2024-10-09

@ -19,6 +19,8 @@ package prometheus
import (
"strings"
"unicode"
"github.com/prometheus/prometheus/util/strutil"
)
// Normalizes the specified label to follow Prometheus label names standard.
@ -26,7 +28,6 @@ import (
// See rules at https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels.
//
// Labels that start with non-letter rune will be prefixed with "key_".
//
// An exception is made for double-underscores which are allowed.
func NormalizeLabel(label string) string {
// Trivial case
@ -34,8 +35,7 @@ func NormalizeLabel(label string) string {
return label
}
// Replace all non-alphanumeric runes with underscores
label = strings.Map(sanitizeRune, label)
label = strutil.SanitizeLabelName(label)
// If label starts with a number, prepend with "key_"
if unicode.IsDigit(rune(label[0])) {
@ -46,11 +46,3 @@ func NormalizeLabel(label string) string {
return label
}
// Return '_' for anything non-alphanumeric.
func sanitizeRune(r rune) rune {
if unicode.IsLower(r) || unicode.IsUpper(r) || unicode.IsDigit(r) {
return r
}
return '_'
}

@ -17,9 +17,12 @@
package prometheus
import (
"regexp"
"slices"
"strings"
"unicode"
"github.com/prometheus/prometheus/util/strutil"
"go.opentelemetry.io/collector/pdata/pmetric"
)
@ -84,24 +87,27 @@ var perUnitMap = map[string]string{
//
// See rules at https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels,
// https://prometheus.io/docs/practices/naming/#metric-and-label-naming
// and https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus.
// and https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus.
func BuildCompliantName(metric pmetric.Metric, namespace string, addMetricSuffixes bool) string {
var metricName string
// Full normalization following standard Prometheus naming conventions
if addMetricSuffixes {
return normalizeName(metric, namespace)
}
// Simple case (no full normalization, no units, etc.), we simply trim out forbidden chars
metricName = RemovePromForbiddenRunes(metric.Name())
// Regexp for metric name characters that should be replaced with _.
invalidMetricCharRE := regexp.MustCompile(`[^a-zA-Z0-9:_]`)
// Simple case (no full normalization, no units, etc.).
metricName := strings.Join(strings.FieldsFunc(metric.Name(), func(r rune) bool {
return invalidMetricCharRE.MatchString(string(r))
}), "_")
// Namespace?
if namespace != "" {
return namespace + "_" + metricName
}
// Metric name starts with a digit? Prefix it with an underscore
// Metric name starts with a digit? Prefix it with an underscore.
if metricName != "" && unicode.IsDigit(rune(metricName[0])) {
metricName = "_" + metricName
}
@ -109,12 +115,17 @@ func BuildCompliantName(metric pmetric.Metric, namespace string, addMetricSuffix
return metricName
}
// Build a normalized name for the specified metric
// Build a normalized name for the specified metric.
func normalizeName(metric pmetric.Metric, namespace string) string {
// Split metric name into "tokens" (remove all non-alphanumerics)
// Regexp for characters that can't be in a metric name token.
nonTokenMetricCharRE := regexp.MustCompile(`[^a-zA-Z0-9:]`)
// Split metric name into "tokens" (of supported metric name runes).
// Note that this has the side effect of replacing multiple consecutive underscores with a single underscore.
// This is part of the OTel to Prometheus specification: https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus.
nameTokens := strings.FieldsFunc(
metric.Name(),
func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != ':' },
func(r rune) bool { return nonTokenMetricCharRE.MatchString(string(r)) },
)
// Split unit at the '/' if any
@ -123,11 +134,12 @@ func normalizeName(metric pmetric.Metric, namespace string) string {
// Main unit
// Append if not blank, doesn't contain '{}', and is not present in metric name already
if len(unitTokens) > 0 {
var mainUnitProm, perUnitProm string
mainUnitOTel := strings.TrimSpace(unitTokens[0])
if mainUnitOTel != "" && !strings.ContainsAny(mainUnitOTel, "{}") {
mainUnitProm := CleanUpString(unitMapGetOrDefault(mainUnitOTel))
if mainUnitProm != "" && !contains(nameTokens, mainUnitProm) {
nameTokens = append(nameTokens, mainUnitProm)
mainUnitProm = cleanUpUnit(unitMapGetOrDefault(mainUnitOTel))
if slices.Contains(nameTokens, mainUnitProm) {
mainUnitProm = ""
}
}
@ -136,13 +148,26 @@ func normalizeName(metric pmetric.Metric, namespace string) string {
if len(unitTokens) > 1 && unitTokens[1] != "" {
perUnitOTel := strings.TrimSpace(unitTokens[1])
if perUnitOTel != "" && !strings.ContainsAny(perUnitOTel, "{}") {
perUnitProm := CleanUpString(perUnitMapGetOrDefault(perUnitOTel))
if perUnitProm != "" && !contains(nameTokens, perUnitProm) {
nameTokens = append(nameTokens, "per", perUnitProm)
perUnitProm = cleanUpUnit(perUnitMapGetOrDefault(perUnitOTel))
}
if perUnitProm != "" {
perUnitProm = "per_" + perUnitProm
if slices.Contains(nameTokens, perUnitProm) {
perUnitProm = ""
}
}
}
if perUnitProm != "" {
mainUnitProm = strings.TrimSuffix(mainUnitProm, "_")
}
if mainUnitProm != "" {
nameTokens = append(nameTokens, mainUnitProm)
}
if perUnitProm != "" {
nameTokens = append(nameTokens, perUnitProm)
}
}
// Append _total for Counters
@ -235,15 +260,15 @@ func removeSuffix(tokens []string, suffix string) []string {
return tokens
}
// Clean up specified string so it's Prometheus compliant
func CleanUpString(s string) string {
return strings.Join(strings.FieldsFunc(s, func(r rune) bool { return !unicode.IsUpper(r) && !unicode.IsLower(r) && !unicode.IsDigit(r) }), "_")
}
func RemovePromForbiddenRunes(s string) string {
return strings.Join(strings.FieldsFunc(s, func(r rune) bool {
return !unicode.IsUpper(r) && !unicode.IsLower(r) && !unicode.IsDigit(r) && r != '_' && r != ':'
}), "_")
// cleanUpUnit cleans up unit so it matches model.LabelNameRE.
func cleanUpUnit(unit string) string {
// Multiple consecutive underscores are replaced with a single underscore.
// This is part of the OTel to Prometheus specification: https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus.
multipleUnderscoresRE := regexp.MustCompile(`__+`)
return strings.TrimPrefix(multipleUnderscoresRE.ReplaceAllString(
strutil.SanitizeLabelName(unit),
"_",
), "_")
}
// Retrieve the Prometheus "basic" unit corresponding to the specified "basic" unit
@ -264,16 +289,6 @@ func perUnitMapGetOrDefault(perUnit string) string {
return perUnit
}
// Returns whether the slice contains the specified value
func contains(slice []string, value string) bool {
for _, sliceEntry := range slice {
if sliceEntry == value {
return true
}
}
return false
}
// Remove the specified value from the slice
func removeItem(slice []string, value string) []string {
newSlice := make([]string, 0, len(slice))

@ -148,13 +148,13 @@ func TestNamespace(t *testing.T) {
require.Equal(t, "space_test", normalizeName(createGauge("#test", ""), "space"))
}
func TestCleanUpString(t *testing.T) {
require.Equal(t, "", CleanUpString(""))
require.Equal(t, "a_b", CleanUpString("a b"))
require.Equal(t, "hello_world", CleanUpString("hello, world!"))
require.Equal(t, "hello_you_2", CleanUpString("hello you 2"))
require.Equal(t, "1000", CleanUpString("$1000"))
require.Equal(t, "", CleanUpString("*+$^=)"))
func TestCleanUpUnit(t *testing.T) {
require.Equal(t, "", cleanUpUnit(""))
require.Equal(t, "a_b", cleanUpUnit("a b"))
require.Equal(t, "hello_world", cleanUpUnit("hello, world"))
require.Equal(t, "hello_you_2", cleanUpUnit("hello you 2"))
require.Equal(t, "1000", cleanUpUnit("$1000"))
require.Equal(t, "", cleanUpUnit("*+$^=)"))
}
func TestUnitMapGetOrDefault(t *testing.T) {
@ -179,7 +179,7 @@ func TestRemoveItem(t *testing.T) {
require.Equal(t, []string{"b", "c"}, removeItem([]string{"a", "b", "c"}, "a"))
}
func TestBuildCompliantNameWithNormalize(t *testing.T) {
func TestBuildCompliantNameWithSuffixes(t *testing.T) {
require.Equal(t, "system_io_bytes_total", BuildCompliantName(createCounter("system.io", "By"), "", true))
require.Equal(t, "system_network_io_bytes_total", BuildCompliantName(createCounter("network.io", "By"), "system", true))
require.Equal(t, "_3_14_digits", BuildCompliantName(createGauge("3.14 digits", ""), "", true))
@ -190,6 +190,7 @@ func TestBuildCompliantNameWithNormalize(t *testing.T) {
require.Equal(t, "foo_bar_ratio", BuildCompliantName(createGauge("foo.bar", "1"), "", true))
// Slashes in units are converted.
require.Equal(t, "system_io_foo_per_bar_total", BuildCompliantName(createCounter("system.io", "foo/bar"), "", true))
require.Equal(t, "metric_with_foreign_characters_total", BuildCompliantName(createCounter("metric_with_字符_foreign_characters", ""), "", true))
}
func TestBuildCompliantNameWithoutSuffixes(t *testing.T) {

@ -48,7 +48,6 @@ func TestCreateAttributes(t *testing.T) {
resource.Attributes().PutStr(k, v)
}
attrs := pcommon.NewMap()
attrs.PutStr("__name__", "test_metric")
attrs.PutStr("metric-attr", "metric value")
testCases := []struct {
@ -162,7 +161,7 @@ func TestCreateAttributes(t *testing.T) {
settings := Settings{
PromoteResourceAttributes: tc.promoteResourceAttributes,
}
lbls := createAttributes(resource, attrs, settings, nil, false)
lbls := createAttributes(resource, attrs, settings, nil, false, model.MetricNameLabel, "test_metric")
assert.ElementsMatch(t, lbls, tc.expectedLabels)
})

Loading…
Cancel
Save