OTLP receiver: Warn when encountering invalid exponential histograms

Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
pull/14706/head
Arve Knudsen 2024-08-21 15:22:38 +02:00
parent 7fad1ec8ee
commit b50c5d42fe
5 changed files with 67 additions and 14 deletions

View File

@ -3,6 +3,7 @@
## unreleased ## unreleased
* [FEATURE] OTLP receiver: Add new option `otlp.promote_resource_attributes`, for any OTel resource attributes that should be promoted to metric labels. #14200 * [FEATURE] OTLP receiver: Add new option `otlp.promote_resource_attributes`, for any OTel resource attributes that should be promoted to metric labels. #14200
* [ENHANCEMENT] OTLP receiver: Warn when encountering exponential histograms with zero count and non-zero sum. #14706
* [BUGFIX] tsdb/wlog.Watcher.readSegmentForGC: Only count unknown record types against record_decode_failures_total metric. #14042 * [BUGFIX] tsdb/wlog.Watcher.readSegmentForGC: Only count unknown record types against record_decode_failures_total metric. #14042
## 2.54.0-rc.1 / 2024-08-05 ## 2.54.0-rc.1 / 2024-08-05

View File

@ -26,6 +26,7 @@ import (
"github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/model/value"
"github.com/prometheus/prometheus/prompb" "github.com/prometheus/prometheus/prompb"
"github.com/prometheus/prometheus/util/annotations"
) )
const defaultZeroThreshold = 1e-128 const defaultZeroThreshold = 1e-128
@ -33,13 +34,15 @@ const defaultZeroThreshold = 1e-128
// addExponentialHistogramDataPoints adds OTel exponential histogram data points to the corresponding time series // addExponentialHistogramDataPoints adds OTel exponential histogram data points to the corresponding time series
// as native histogram samples. // as native histogram samples.
func (c *PrometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetric.ExponentialHistogramDataPointSlice, func (c *PrometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetric.ExponentialHistogramDataPointSlice,
resource pcommon.Resource, settings Settings, promName string) error { resource pcommon.Resource, settings Settings, promName string) (annotations.Annotations, error) {
var annots annotations.Annotations
for x := 0; x < dataPoints.Len(); x++ { for x := 0; x < dataPoints.Len(); x++ {
pt := dataPoints.At(x) pt := dataPoints.At(x)
histogram, err := exponentialToNativeHistogram(pt) histogram, ws, err := exponentialToNativeHistogram(pt)
annots.Merge(ws)
if err != nil { if err != nil {
return err return annots, err
} }
lbls := createAttributes( lbls := createAttributes(
@ -58,15 +61,16 @@ func (c *PrometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetr
ts.Exemplars = append(ts.Exemplars, exemplars...) ts.Exemplars = append(ts.Exemplars, exemplars...)
} }
return nil return annots, nil
} }
// exponentialToNativeHistogram translates OTel Exponential Histogram data point // exponentialToNativeHistogram translates OTel Exponential Histogram data point
// to Prometheus Native Histogram. // to Prometheus Native Histogram.
func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prompb.Histogram, error) { func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prompb.Histogram, annotations.Annotations, error) {
var annots annotations.Annotations
scale := p.Scale() scale := p.Scale()
if scale < -4 { if scale < -4 {
return prompb.Histogram{}, return prompb.Histogram{}, annots,
fmt.Errorf("cannot convert exponential to native histogram."+ fmt.Errorf("cannot convert exponential to native histogram."+
" Scale must be >= -4, was %d", scale) " Scale must be >= -4, was %d", scale)
} }
@ -114,8 +118,11 @@ func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prom
h.Sum = p.Sum() h.Sum = p.Sum()
} }
h.Count = &prompb.Histogram_CountInt{CountInt: p.Count()} h.Count = &prompb.Histogram_CountInt{CountInt: p.Count()}
if p.Count() == 0 && h.Sum != 0 {
annots.Add(fmt.Errorf("exponential histogram data point has zero count, but non-zero sum: %f", h.Sum))
} }
return h, nil }
return h, annots, nil
} }
// convertBucketsLayout translates OTel Exponential Histogram dense buckets // convertBucketsLayout translates OTel Exponential Histogram dense buckets

View File

@ -27,6 +27,7 @@ import (
"github.com/prometheus/prometheus/prompb" "github.com/prometheus/prometheus/prompb"
prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus"
"github.com/prometheus/prometheus/util/annotations"
) )
type Settings struct { type Settings struct {
@ -53,7 +54,7 @@ func NewPrometheusConverter() *PrometheusConverter {
} }
// FromMetrics converts pmetric.Metrics to Prometheus remote write format. // FromMetrics converts pmetric.Metrics to Prometheus remote write format.
func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) (errs error) { func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) (annots annotations.Annotations, errs error) {
resourceMetricsSlice := md.ResourceMetrics() resourceMetricsSlice := md.ResourceMetrics()
for i := 0; i < resourceMetricsSlice.Len(); i++ { for i := 0; i < resourceMetricsSlice.Len(); i++ {
resourceMetrics := resourceMetricsSlice.At(i) resourceMetrics := resourceMetricsSlice.At(i)
@ -107,12 +108,14 @@ func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings)
errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name()))
break break
} }
errs = multierr.Append(errs, c.addExponentialHistogramDataPoints( ws, err := c.addExponentialHistogramDataPoints(
dataPoints, dataPoints,
resource, resource,
settings, settings,
promName, promName,
)) )
annots.Merge(ws)
errs = multierr.Append(errs, err)
case pmetric.MetricTypeSummary: case pmetric.MetricTypeSummary:
dataPoints := metric.Summary().DataPoints() dataPoints := metric.Summary().DataPoints()
if dataPoints.Len() == 0 { if dataPoints.Len() == 0 {
@ -128,7 +131,7 @@ func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings)
addResourceTargetInfo(resource, settings, mostRecentTimestamp, c) addResourceTargetInfo(resource, settings, mostRecentTimestamp, c)
} }
return return annots, errs
} }
func isSameMetric(ts *prompb.TimeSeries, lbls []prompb.Label) bool { func isSameMetric(ts *prompb.TimeSeries, lbls []prompb.Label) bool {

View File

@ -27,6 +27,41 @@ import (
"go.opentelemetry.io/collector/pdata/pmetric/pmetricotlp" "go.opentelemetry.io/collector/pdata/pmetric/pmetricotlp"
) )
func TestFromMetrics(t *testing.T) {
t.Run("exponential histogram warnings for zero count and non-zero sum", func(t *testing.T) {
request := pmetricotlp.NewExportRequest()
rm := request.Metrics().ResourceMetrics().AppendEmpty()
generateAttributes(rm.Resource().Attributes(), "resource", 10)
metrics := rm.ScopeMetrics().AppendEmpty().Metrics()
ts := pcommon.NewTimestampFromTime(time.Now())
for i := 1; i <= 10; i++ {
m := metrics.AppendEmpty()
m.SetEmptyExponentialHistogram()
m.SetName(fmt.Sprintf("histogram-%d", i))
m.ExponentialHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative)
h := m.ExponentialHistogram().DataPoints().AppendEmpty()
h.SetTimestamp(ts)
h.SetCount(0)
h.SetSum(155)
generateAttributes(h.Attributes(), "series", 10)
}
converter := NewPrometheusConverter()
annots, err := converter.FromMetrics(request.Metrics(), Settings{})
require.NoError(t, err)
require.NotEmpty(t, annots)
ws, infos := annots.AsStrings("", 0, 0)
require.Empty(t, infos)
require.Equal(t, []string{
"exponential histogram data point has zero count, but non-zero sum: 155.000000",
}, ws)
})
}
func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) { func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) {
for _, resourceAttributeCount := range []int{0, 5, 50} { for _, resourceAttributeCount := range []int{0, 5, 50} {
b.Run(fmt.Sprintf("resource attribute count: %v", resourceAttributeCount), func(b *testing.B) { b.Run(fmt.Sprintf("resource attribute count: %v", resourceAttributeCount), func(b *testing.B) {
@ -49,7 +84,9 @@ func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
converter := NewPrometheusConverter() converter := NewPrometheusConverter()
require.NoError(b, converter.FromMetrics(payload.Metrics(), Settings{})) annots, err := converter.FromMetrics(payload.Metrics(), Settings{})
require.NoError(b, err)
require.Empty(b, annots)
require.NotNil(b, converter.TimeSeries()) require.NotNil(b, converter.TimeSeries())
} }
}) })

View File

@ -502,12 +502,17 @@ func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
otlpCfg := h.configFunc().OTLPConfig otlpCfg := h.configFunc().OTLPConfig
converter := otlptranslator.NewPrometheusConverter() converter := otlptranslator.NewPrometheusConverter()
if err := converter.FromMetrics(req.Metrics(), otlptranslator.Settings{ annots, err := converter.FromMetrics(req.Metrics(), otlptranslator.Settings{
AddMetricSuffixes: true, AddMetricSuffixes: true,
PromoteResourceAttributes: otlpCfg.PromoteResourceAttributes, PromoteResourceAttributes: otlpCfg.PromoteResourceAttributes,
}); err != nil { })
if err != nil {
level.Warn(h.logger).Log("msg", "Error translating OTLP metrics to Prometheus write request", "err", err) level.Warn(h.logger).Log("msg", "Error translating OTLP metrics to Prometheus write request", "err", err)
} }
ws, _ := annots.AsStrings("", 0, 0)
if len(ws) > 0 {
level.Warn(h.logger).Log("msg", "Warnings translating OTLP metrics to Prometheus write request", "warnings", ws)
}
err = h.rwHandler.write(r.Context(), &prompb.WriteRequest{ err = h.rwHandler.write(r.Context(), &prompb.WriteRequest{
Timeseries: converter.TimeSeries(), Timeseries: converter.TimeSeries(),