From 1cd6c1cde5a2e44dc5869016b294c7dccba269dc Mon Sep 17 00:00:00 2001 From: Linas Medziunas Date: Fri, 13 Oct 2023 10:58:26 +0300 Subject: [PATCH] ValidateHistogram: strict Count check in absence of NaNs Signed-off-by: Linas Medziunas --- promql/engine_test.go | 2 +- storage/interface.go | 1 + tsdb/db_test.go | 5 +++-- tsdb/head_append.go | 20 +++++++++++++++----- tsdb/head_test.go | 36 +++++++++++++++++++++--------------- 5 files changed, 41 insertions(+), 23 deletions(-) diff --git a/promql/engine_test.go b/promql/engine_test.go index baca992b8..7532a3294 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -3399,7 +3399,7 @@ func TestNativeHistogram_HistogramStdDevVar(t *testing.T) { { name: "-50, -8, 0, 3, 8, 9, 100, +Inf", h: &histogram.Histogram{ - Count: 8, + Count: 7, ZeroCount: 1, Sum: math.Inf(1), Schema: 3, diff --git a/storage/interface.go b/storage/interface.go index 211bcbc41..4da152aa4 100644 --- a/storage/interface.go +++ b/storage/interface.go @@ -44,6 +44,7 @@ var ( ErrExemplarsDisabled = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0") ErrNativeHistogramsDisabled = fmt.Errorf("native histograms are disabled") ErrHistogramCountNotBigEnough = errors.New("histogram's observation count should be at least the number of observations found in the buckets") + ErrHistogramCountMismatch = errors.New("histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)") ErrHistogramNegativeBucketCount = errors.New("histogram has a bucket whose observation count is negative") ErrHistogramSpanNegativeOffset = errors.New("histogram has a span whose offset is negative") ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided") diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 70d1844d4..f021faba9 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -508,7 +508,7 @@ func TestAmendHistogramDatapointCausesError(t *testing.T) { h := histogram.Histogram{ Schema: 3, - Count: 61, + Count: 52, Sum: 2.7, ZeroThreshold: 0.1, ZeroCount: 42, @@ -6314,6 +6314,7 @@ func testHistogramAppendAndQueryHelper(t *testing.T, floatHistogram bool) { t.Run("buckets disappearing", func(t *testing.T) { h.PositiveSpans[1].Length-- h.PositiveBuckets = h.PositiveBuckets[:len(h.PositiveBuckets)-1] + h.Count -= 3 appendHistogram(series1, 110, h, &exp1, histogram.CounterReset) testQuery("foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) }) @@ -6533,7 +6534,7 @@ func TestNativeHistogramFlag(t *testing.T) { require.NoError(t, db.Close()) }) h := &histogram.Histogram{ - Count: 10, + Count: 9, ZeroCount: 4, ZeroThreshold: 0.001, Sum: 35.5, diff --git a/tsdb/head_append.go b/tsdb/head_append.go index d1f4d3035..330caad78 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -659,11 +659,21 @@ func ValidateHistogram(h *histogram.Histogram) error { return errors.Wrap(err, "positive side") } - if c := nCount + pCount + h.ZeroCount; c > h.Count { - return errors.Wrap( - storage.ErrHistogramCountNotBigEnough, - fmt.Sprintf("%d observations found in buckets, but the Count field is %d", c, h.Count), - ) + sumOfBuckets := nCount + pCount + h.ZeroCount + if math.IsNaN(h.Sum) { + if sumOfBuckets > h.Count { + return errors.Wrap( + storage.ErrHistogramCountNotBigEnough, + fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count), + ) + } + } else { + if sumOfBuckets != h.Count { + return errors.Wrap( + storage.ErrHistogramCountMismatch, + fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count), + ) + } } return nil diff --git a/tsdb/head_test.go b/tsdb/head_test.go index edecf8dfe..2feb745f1 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -3419,7 +3419,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) { hists = tsdbutil.GenerateTestHistograms(numHistograms) } for _, h := range hists { - h.Count *= 2 h.NegativeSpans = h.PositiveSpans h.NegativeBuckets = h.PositiveBuckets _, err := app.AppendHistogram(0, s1, ts, h, nil) @@ -3442,7 +3441,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) { hists = tsdbutil.GenerateTestFloatHistograms(numHistograms) } for _, h := range hists { - h.Count *= 2 h.NegativeSpans = h.PositiveSpans h.NegativeBuckets = h.PositiveBuckets _, err := app.AppendHistogram(0, s1, ts, nil, h) @@ -3484,7 +3482,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) { } for _, h := range hists { ts++ - h.Count *= 2 h.NegativeSpans = h.PositiveSpans h.NegativeBuckets = h.PositiveBuckets _, err := app.AppendHistogram(0, s2, ts, h, nil) @@ -3521,7 +3518,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) { } for _, h := range hists { ts++ - h.Count *= 2 h.NegativeSpans = h.PositiveSpans h.NegativeBuckets = h.PositiveBuckets _, err := app.AppendHistogram(0, s2, ts, nil, h) @@ -4907,7 +4903,7 @@ func TestHistogramValidation(t *testing.T) { "valid histogram": { h: tsdbutil.GenerateTestHistograms(1)[0], }, - "valid histogram that has its Count (4) higher than the actual total of buckets (2 + 1)": { + "valid histogram with NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": { // This case is possible if NaN values (which do not fall into any bucket) are observed. h: &histogram.Histogram{ ZeroCount: 2, @@ -4917,6 +4913,17 @@ func TestHistogramValidation(t *testing.T) { PositiveBuckets: []int64{1}, }, }, + "rejects histogram without NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": { + h: &histogram.Histogram{ + ZeroCount: 2, + Count: 4, + Sum: 333, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{1}, + }, + errMsg: `3 observations found in buckets, but the Count field is 4: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`, + skipFloat: true, + }, "rejects histogram that has too few negative buckets": { h: &histogram.Histogram{ NegativeSpans: []histogram.Span{{Offset: 0, Length: 1}}, @@ -4981,7 +4988,7 @@ func TestHistogramValidation(t *testing.T) { NegativeBuckets: []int64{1}, PositiveBuckets: []int64{1}, }, - errMsg: `2 observations found in buckets, but the Count field is 0: histogram's observation count should be at least the number of observations found in the buckets`, + errMsg: `2 observations found in buckets, but the Count field is 0: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`, skipFloat: true, }, "rejects a histogram that doesn't count the zero bucket in its count": { @@ -4993,7 +5000,7 @@ func TestHistogramValidation(t *testing.T) { NegativeBuckets: []int64{1}, PositiveBuckets: []int64{1}, }, - errMsg: `3 observations found in buckets, but the Count field is 2: histogram's observation count should be at least the number of observations found in the buckets`, + errMsg: `3 observations found in buckets, but the Count field is 2: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`, skipFloat: true, }, } @@ -5029,8 +5036,8 @@ func generateBigTestHistograms(numHistograms, numBuckets int) []*histogram.Histo numSpans := numBuckets / 10 bucketsPerSide := numBuckets / 2 spanLength := uint32(bucketsPerSide / numSpans) - // Given all bucket deltas are 1, sum numHistograms + 1. - observationCount := numBuckets / 2 * (1 + numBuckets) + // Given all bucket deltas are 1, sum bucketsPerSide + 1. + observationCount := bucketsPerSide * (1 + bucketsPerSide) var histograms []*histogram.Histogram for i := 0; i < numHistograms; i++ { @@ -5491,14 +5498,13 @@ func TestCuttingNewHeadChunks(t *testing.T) { numSamples int numBytes int }{ - {30, 696}, - {30, 700}, - {30, 708}, - {30, 693}, + {40, 896}, + {40, 899}, + {40, 896}, + {30, 690}, {30, 691}, - {30, 692}, - {30, 695}, {30, 694}, + {30, 693}, }, }, "really large histograms": {