From 5f366e9b62b81a8c289d3d4fc821a30ff839b47c Mon Sep 17 00:00:00 2001
From: beorn7 <beorn@grafana.com>
Date: Sun, 11 Dec 2022 23:39:53 +0100
Subject: [PATCH] histograms: Improve tests and fix exposed bugs

This adds negative buckets and access of float histograms to
TestHistogramChunkSameBuckets and TestHistogramChunkBucketChanges.

It also exercises a specific pattern of reusing an iterator (one where
no access has happened).

This exposes two bugs (where entries for positive buckets where used
where the corresponding entries for negative buckets should have been
used). One was fixed in #11627 (not merged), which triggered the work
in this commit.

This commit fixes both issues, so #11627 can be closed.

It also simplifies the code in the histogramIterator.Next method that
aims to recycle existing slice capacity.

Furthermore, this is on top of the release-2.40 branch because we
should probably cut a bugfix release for this.

Signed-off-by: beorn7 <beorn@grafana.com>
---
 tsdb/chunkenc/histogram.go      |  39 +++-------
 tsdb/chunkenc/histogram_test.go | 132 +++++++++++++++++++++-----------
 2 files changed, 96 insertions(+), 75 deletions(-)

diff --git a/tsdb/chunkenc/histogram.go b/tsdb/chunkenc/histogram.go
index e57b96db9..5aad382b2 100644
--- a/tsdb/chunkenc/histogram.go
+++ b/tsdb/chunkenc/histogram.go
@@ -176,7 +176,7 @@ func newHistogramIterator(b []byte) *histogramIterator {
 }
 
 func (c *HistogramChunk) iterator(it Iterator) *histogramIterator {
-	// This commet is copied from XORChunk.iterator:
+	// This comment is copied from XORChunk.iterator:
 	//   Should iterators guarantee to act on a copy of the data so it doesn't lock append?
 	//   When using striped locks to guard access to chunks, probably yes.
 	//   Could only copy data if the chunk is not completed yet.
@@ -651,7 +651,7 @@ func (it *histogramIterator) Reset(b []byte) {
 	}
 
 	it.pBucketsDelta = it.pBucketsDelta[:0]
-	it.pBucketsDelta = it.pBucketsDelta[:0]
+	it.nBucketsDelta = it.nBucketsDelta[:0]
 
 	it.sum = 0
 	it.leading = 0
@@ -677,36 +677,17 @@ func (it *histogramIterator) Next() ValueType {
 		it.zThreshold = zeroThreshold
 		it.pSpans, it.nSpans = posSpans, negSpans
 		numPBuckets, numNBuckets := countSpans(posSpans), countSpans(negSpans)
-		// Allocate bucket slices as needed, recycling existing slices
-		// in case this iterator was reset and already has slices of a
-		// sufficient capacity.
+		// The code below recycles existing slices in case this iterator
+		// was reset and already has slices of a sufficient capacity.
 		if numPBuckets > 0 {
-			if cap(it.pBuckets) < numPBuckets {
-				it.pBuckets = make([]int64, numPBuckets)
-				// If cap(it.pBuckets) isn't sufficient, neither is the cap of the others.
-				it.pBucketsDelta = make([]int64, numPBuckets)
-				it.pFloatBuckets = make([]float64, numPBuckets)
-			} else {
-				for i := 0; i < numPBuckets; i++ {
-					it.pBuckets = append(it.pBuckets, 0)
-					it.pBucketsDelta = append(it.pBucketsDelta, 0)
-					it.pFloatBuckets = append(it.pFloatBuckets, 0)
-				}
-			}
+			it.pBuckets = append(it.pBuckets, make([]int64, numPBuckets)...)
+			it.pBucketsDelta = append(it.pBucketsDelta, make([]int64, numPBuckets)...)
+			it.pFloatBuckets = append(it.pFloatBuckets, make([]float64, numPBuckets)...)
 		}
 		if numNBuckets > 0 {
-			if cap(it.nBuckets) < numNBuckets {
-				it.nBuckets = make([]int64, numNBuckets)
-				// If cap(it.nBuckets) isn't sufficient, neither is the cap of the others.
-				it.nBucketsDelta = make([]int64, numNBuckets)
-				it.nFloatBuckets = make([]float64, numNBuckets)
-			} else {
-				for i := 0; i < numNBuckets; i++ {
-					it.nBuckets = append(it.nBuckets, 0)
-					it.nBucketsDelta = append(it.nBucketsDelta, 0)
-					it.pFloatBuckets = append(it.pFloatBuckets, 0)
-				}
-			}
+			it.nBuckets = append(it.nBuckets, make([]int64, numNBuckets)...)
+			it.nBucketsDelta = append(it.nBucketsDelta, make([]int64, numNBuckets)...)
+			it.nFloatBuckets = append(it.nFloatBuckets, make([]float64, numNBuckets)...)
 		}
 
 		// Now read the actual data.
diff --git a/tsdb/chunkenc/histogram_test.go b/tsdb/chunkenc/histogram_test.go
index 09c08ae3c..59483f86f 100644
--- a/tsdb/chunkenc/histogram_test.go
+++ b/tsdb/chunkenc/histogram_test.go
@@ -21,9 +21,15 @@ import (
 	"github.com/prometheus/prometheus/model/histogram"
 )
 
+type result struct {
+	t  int64
+	h  *histogram.Histogram
+	fh *histogram.FloatHistogram
+}
+
 func TestHistogramChunkSameBuckets(t *testing.T) {
 	c := NewHistogramChunk()
-	var exp []res
+	var exp []result
 
 	// Create fresh appender and add the first histogram.
 	app, err := c.Appender()
@@ -32,7 +38,7 @@ func TestHistogramChunkSameBuckets(t *testing.T) {
 
 	ts := int64(1234567890)
 	h := &histogram.Histogram{
-		Count:         5,
+		Count:         15,
 		ZeroCount:     2,
 		Sum:           18.4,
 		ZeroThreshold: 1e-100,
@@ -42,20 +48,26 @@ func TestHistogramChunkSameBuckets(t *testing.T) {
 			{Offset: 1, Length: 2},
 		},
 		PositiveBuckets: []int64{1, 1, -1, 0}, // counts: 1, 2, 1, 1 (total 5)
+		NegativeSpans: []histogram.Span{
+			{Offset: 1, Length: 1},
+			{Offset: 2, Length: 3},
+		},
+		NegativeBuckets: []int64{2, 1, -1, -1}, // counts: 2, 3, 2, 1 (total 8)
 	}
 	app.AppendHistogram(ts, h)
-	exp = append(exp, res{t: ts, h: h})
+	exp = append(exp, result{t: ts, h: h, fh: h.ToFloat()})
 	require.Equal(t, 1, c.NumSamples())
 
 	// Add an updated histogram.
 	ts += 16
 	h = h.Copy()
-	h.Count += 9
+	h.Count = 32
 	h.ZeroCount++
 	h.Sum = 24.4
 	h.PositiveBuckets = []int64{5, -2, 1, -2} // counts: 5, 3, 4, 2 (total 14)
+	h.NegativeBuckets = []int64{4, -1, 1, -1} // counts: 4, 3, 4, 4 (total 15)
 	app.AppendHistogram(ts, h)
-	exp = append(exp, res{t: ts, h: h})
+	exp = append(exp, result{t: ts, h: h, fh: h.ToFloat()})
 	require.Equal(t, 2, c.NumSamples())
 
 	// Add update with new appender.
@@ -64,59 +76,77 @@ func TestHistogramChunkSameBuckets(t *testing.T) {
 
 	ts += 14
 	h = h.Copy()
-	h.Count += 13
+	h.Count = 54
 	h.ZeroCount += 2
 	h.Sum = 24.4
 	h.PositiveBuckets = []int64{6, 1, -3, 6} // counts: 6, 7, 4, 10 (total 27)
+	h.NegativeBuckets = []int64{5, 1, -2, 3} // counts: 5, 6, 4, 7 (total 22)
 	app.AppendHistogram(ts, h)
-	exp = append(exp, res{t: ts, h: h})
+	exp = append(exp, result{t: ts, h: h, fh: h.ToFloat()})
 	require.Equal(t, 3, c.NumSamples())
 
 	// 1. Expand iterator in simple case.
-	it := c.iterator(nil)
+	it := c.Iterator(nil)
 	require.NoError(t, it.Err())
-	var act []res
+	var act []result
 	for it.Next() == ValHistogram {
 		ts, h := it.AtHistogram()
-		act = append(act, res{t: ts, h: h})
+		fts, fh := it.AtFloatHistogram()
+		require.Equal(t, ts, fts)
+		act = append(act, result{t: ts, h: h, fh: fh})
 	}
 	require.NoError(t, it.Err())
 	require.Equal(t, exp, act)
 
 	// 2. Expand second iterator while reusing first one.
 	it2 := c.Iterator(it)
-	var res2 []res
+	var act2 []result
 	for it2.Next() == ValHistogram {
 		ts, h := it2.AtHistogram()
-		res2 = append(res2, res{t: ts, h: h})
+		fts, fh := it2.AtFloatHistogram()
+		require.Equal(t, ts, fts)
+		act2 = append(act2, result{t: ts, h: h, fh: fh})
 	}
 	require.NoError(t, it2.Err())
-	require.Equal(t, exp, res2)
+	require.Equal(t, exp, act2)
 
-	// 3. Test iterator Seek.
-	//	mid := len(exp) / 2
-
-	//	it3 := c.Iterator(nil)
-	//	var res3 []pair
-	//	require.Equal(t, true, it3.Seek(exp[mid].t))
+	// 3. Now recycle an iterator that was never used to access anything.
+	itX := c.Iterator(nil)
+	for itX.Next() == ValHistogram {
+		// Just iterate through without accessing anything.
+	}
+	it3 := c.iterator(itX)
+	var act3 []result
+	for it3.Next() == ValHistogram {
+		ts, h := it3.AtHistogram()
+		fts, fh := it3.AtFloatHistogram()
+		require.Equal(t, ts, fts)
+		act3 = append(act3, result{t: ts, h: h, fh: fh})
+	}
+	require.NoError(t, it3.Err())
+	require.Equal(t, exp, act3)
+
+	// 4. Test iterator Seek.
+	mid := len(exp) / 2
+	it4 := c.Iterator(nil)
+	var act4 []result
+	require.Equal(t, ValHistogram, it4.Seek(exp[mid].t))
 	// Below ones should not matter.
-	//	require.Equal(t, true, it3.Seek(exp[mid].t))
-	//	require.Equal(t, true, it3.Seek(exp[mid].t))
-	//	ts, v = it3.At()
-	//	res3 = append(res3, pair{t: ts, v: v})
-
-	//	for it3.Next() {
-	//		ts, v := it3.At()
-	//		res3 = append(res3, pair{t: ts, v: v})
-	//	}
-	//	require.NoError(t, it3.Err())
-	//	require.Equal(t, exp[mid:], res3)
-	//	require.Equal(t, false, it3.Seek(exp[len(exp)-1].t+1))
-}
-
-type res struct {
-	t int64
-	h *histogram.Histogram
+	require.Equal(t, ValHistogram, it4.Seek(exp[mid].t))
+	require.Equal(t, ValHistogram, it4.Seek(exp[mid].t))
+	ts, h = it4.AtHistogram()
+	fts, fh := it4.AtFloatHistogram()
+	require.Equal(t, ts, fts)
+	act4 = append(act4, result{t: ts, h: h, fh: fh})
+	for it4.Next() == ValHistogram {
+		ts, h := it4.AtHistogram()
+		fts, fh := it4.AtFloatHistogram()
+		require.Equal(t, ts, fts)
+		act4 = append(act4, result{t: ts, h: h, fh: fh})
+	}
+	require.NoError(t, it4.Err())
+	require.Equal(t, exp[mid:], act4)
+	require.Equal(t, ValNone, it4.Seek(exp[len(exp)-1].t+1))
 }
 
 // Mimics the scenario described for compareSpans().
@@ -130,7 +160,7 @@ func TestHistogramChunkBucketChanges(t *testing.T) {
 
 	ts1 := int64(1234567890)
 	h1 := &histogram.Histogram{
-		Count:         5,
+		Count:         27,
 		ZeroCount:     2,
 		Sum:           18.4,
 		ZeroThreshold: 1e-125,
@@ -143,6 +173,8 @@ func TestHistogramChunkBucketChanges(t *testing.T) {
 			{Offset: 1, Length: 1},
 		},
 		PositiveBuckets: []int64{6, -3, 0, -1, 2, 1, -4}, // counts: 6, 3, 3, 2, 4, 5, 1 (total 24)
+		NegativeSpans:   []histogram.Span{{Offset: 1, Length: 1}},
+		NegativeBuckets: []int64{1},
 	}
 
 	app.AppendHistogram(ts1, h1)
@@ -157,19 +189,23 @@ func TestHistogramChunkBucketChanges(t *testing.T) {
 		{Offset: 1, Length: 4},
 		{Offset: 3, Length: 3},
 	}
-	h2.Count += 9
+	h2.NegativeSpans = []histogram.Span{{Offset: 0, Length: 2}}
+	h2.Count = 35
 	h2.ZeroCount++
 	h2.Sum = 30
 	// Existing histogram should get values converted from the above to:
 	//   6 3 0 3 0 0 2 4 5 0 1 (previous values with some new empty buckets in between)
 	// so the new histogram should have new counts >= these per-bucket counts, e.g.:
 	h2.PositiveBuckets = []int64{7, -2, -4, 2, -2, -1, 2, 3, 0, -5, 1} // 7 5 1 3 1 0 2 5 5 0 1 (total 30)
-
+	// Existing histogram should get values converted from the above to:
+	//   0 1 (previous values with some new empty buckets in between)
+	// so the new histogram should have new counts >= these per-bucket counts, e.g.:
+	h2.NegativeBuckets = []int64{2, -1} // 2 1 (total 3)
 	// This is how span changes will be handled.
 	hApp, _ := app.(*HistogramAppender)
 	posInterjections, negInterjections, ok, cr := hApp.Appendable(h2)
 	require.Greater(t, len(posInterjections), 0)
-	require.Equal(t, 0, len(negInterjections))
+	require.Greater(t, len(negInterjections), 0)
 	require.True(t, ok) // Only new buckets came in.
 	require.False(t, cr)
 	c, app = hApp.Recode(posInterjections, negInterjections, h2.PositiveSpans, h2.NegativeSpans)
@@ -182,21 +218,25 @@ func TestHistogramChunkBucketChanges(t *testing.T) {
 	// metadata as well as the expanded buckets.
 	h1.PositiveSpans = h2.PositiveSpans
 	h1.PositiveBuckets = []int64{6, -3, -3, 3, -3, 0, 2, 2, 1, -5, 1}
-	exp := []res{
-		{t: ts1, h: h1},
-		{t: ts2, h: h2},
+	h1.NegativeSpans = h2.NegativeSpans
+	h1.NegativeBuckets = []int64{0, 1}
+	exp := []result{
+		{t: ts1, h: h1, fh: h1.ToFloat()},
+		{t: ts2, h: h2, fh: h2.ToFloat()},
 	}
 	it := c.Iterator(nil)
-	var act []res
+	var act []result
 	for it.Next() == ValHistogram {
 		ts, h := it.AtHistogram()
-		act = append(act, res{t: ts, h: h})
+		fts, fh := it.AtFloatHistogram()
+		require.Equal(t, ts, fts)
+		act = append(act, result{t: ts, h: h, fh: fh})
 	}
 	require.NoError(t, it.Err())
 	require.Equal(t, exp, act)
 }
 
-func TestHistoChunkAppendable(t *testing.T) {
+func TestHistogramChunkAppendable(t *testing.T) {
 	c := Chunk(NewHistogramChunk())
 
 	// Create fresh appender and add the first histogram.