Merge pull request #12525 from fatsheep9146/native-histogram-min-max

* Add function for iterating through all buckets in reverse to find max bucket

Signed-off-by: Carrie Edwards <edwrdscarrie@gmail.com>

* enhance histogram_quantile to get min/max value

Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>

---------

Signed-off-by: Carrie Edwards <edwrdscarrie@gmail.com>
Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
Co-authored-by: Carrie Edwards <edwrdscarrie@gmail.com>
pull/11426/head^2
Björn Rabenstein 2023-07-12 14:39:57 +02:00 committed by GitHub
commit 5da638d527
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 287 additions and 15 deletions

View File

@ -317,6 +317,12 @@ bound of that bucket is greater than
bucket. Otherwise, the upper bound of the lowest bucket is returned for bucket. Otherwise, the upper bound of the lowest bucket is returned for
quantiles located in the lowest bucket. quantiles located in the lowest bucket.
You can use `histogram_quantile(0, v instant-vector)` to get the estimated minimum value stored in
a histogram.
You can use `histogram_quantile(1, v instant-vector)` to get the estimated maximum value stored in
a histogram.
## `holt_winters()` ## `holt_winters()`

View File

@ -615,10 +615,24 @@ func (h *FloatHistogram) NegativeReverseBucketIterator() BucketIterator[float64]
// set to the zero threshold. // set to the zero threshold.
func (h *FloatHistogram) AllBucketIterator() BucketIterator[float64] { func (h *FloatHistogram) AllBucketIterator() BucketIterator[float64] {
return &allFloatBucketIterator{ return &allFloatBucketIterator{
h: h, h: h,
negIter: h.NegativeReverseBucketIterator(), leftIter: h.NegativeReverseBucketIterator(),
posIter: h.PositiveBucketIterator(), rightIter: h.PositiveBucketIterator(),
state: -1, state: -1,
}
}
// AllReverseBucketIterator returns a BucketIterator to iterate over all negative,
// zero, and positive buckets in descending order (starting at the lowest bucket
// and going up). If the highest negative bucket or the lowest positive bucket
// overlap with the zero bucket, their upper or lower boundary, respectively, is
// set to the zero threshold.
func (h *FloatHistogram) AllReverseBucketIterator() BucketIterator[float64] {
return &allFloatBucketIterator{
h: h,
leftIter: h.PositiveReverseBucketIterator(),
rightIter: h.NegativeBucketIterator(),
state: -1,
} }
} }
@ -903,8 +917,8 @@ func (i *reverseFloatBucketIterator) Next() bool {
} }
type allFloatBucketIterator struct { type allFloatBucketIterator struct {
h *FloatHistogram h *FloatHistogram
negIter, posIter BucketIterator[float64] leftIter, rightIter BucketIterator[float64]
// -1 means we are iterating negative buckets. // -1 means we are iterating negative buckets.
// 0 means it is time for the zero bucket. // 0 means it is time for the zero bucket.
// 1 means we are iterating positive buckets. // 1 means we are iterating positive buckets.
@ -916,10 +930,13 @@ type allFloatBucketIterator struct {
func (i *allFloatBucketIterator) Next() bool { func (i *allFloatBucketIterator) Next() bool {
switch i.state { switch i.state {
case -1: case -1:
if i.negIter.Next() { if i.leftIter.Next() {
i.currBucket = i.negIter.At() i.currBucket = i.leftIter.At()
if i.currBucket.Upper > -i.h.ZeroThreshold { switch {
case i.currBucket.Upper < 0 && i.currBucket.Upper > -i.h.ZeroThreshold:
i.currBucket.Upper = -i.h.ZeroThreshold i.currBucket.Upper = -i.h.ZeroThreshold
case i.currBucket.Lower > 0 && i.currBucket.Lower < i.h.ZeroThreshold:
i.currBucket.Lower = i.h.ZeroThreshold
} }
return true return true
} }
@ -940,10 +957,13 @@ func (i *allFloatBucketIterator) Next() bool {
} }
return i.Next() return i.Next()
case 1: case 1:
if i.posIter.Next() { if i.rightIter.Next() {
i.currBucket = i.posIter.At() i.currBucket = i.rightIter.At()
if i.currBucket.Lower < i.h.ZeroThreshold { switch {
case i.currBucket.Lower > 0 && i.currBucket.Lower < i.h.ZeroThreshold:
i.currBucket.Lower = i.h.ZeroThreshold i.currBucket.Lower = i.h.ZeroThreshold
case i.currBucket.Upper < 0 && i.currBucket.Upper > -i.h.ZeroThreshold:
i.currBucket.Upper = -i.h.ZeroThreshold
} }
return true return true
} }

View File

@ -1979,3 +1979,229 @@ func TestAllFloatBucketIterator(t *testing.T) {
}) })
} }
} }
func TestAllReverseFloatBucketIterator(t *testing.T) {
cases := []struct {
h FloatHistogram
// To determine the expected buckets.
includeNeg, includeZero, includePos bool
}{
{
h: FloatHistogram{
Count: 405,
ZeroCount: 102,
ZeroThreshold: 0.001,
Sum: 1008.4,
Schema: 1,
PositiveSpans: []Span{
{Offset: 0, Length: 4},
{Offset: 1, Length: 0},
{Offset: 3, Length: 3},
{Offset: 3, Length: 0},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
PositiveBuckets: []float64{100, 344, 123, 55, 3, 63, 2, 54, 235, 33},
NegativeSpans: []Span{
{Offset: 0, Length: 3},
{Offset: 1, Length: 0},
{Offset: 3, Length: 0},
{Offset: 3, Length: 4},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
NegativeBuckets: []float64{10, 34, 1230, 54, 67, 63, 2, 554, 235, 33},
},
includeNeg: true,
includeZero: true,
includePos: true,
},
{
h: FloatHistogram{
Count: 405,
ZeroCount: 102,
ZeroThreshold: 0.001,
Sum: 1008.4,
Schema: 1,
NegativeSpans: []Span{
{Offset: 0, Length: 3},
{Offset: 1, Length: 0},
{Offset: 3, Length: 0},
{Offset: 3, Length: 4},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
NegativeBuckets: []float64{10, 34, 1230, 54, 67, 63, 2, 554, 235, 33},
},
includeNeg: true,
includeZero: true,
includePos: false,
},
{
h: FloatHistogram{
Count: 405,
ZeroCount: 102,
ZeroThreshold: 0.001,
Sum: 1008.4,
Schema: 1,
PositiveSpans: []Span{
{Offset: 0, Length: 4},
{Offset: 1, Length: 0},
{Offset: 3, Length: 3},
{Offset: 3, Length: 0},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
PositiveBuckets: []float64{100, 344, 123, 55, 3, 63, 2, 54, 235, 33},
},
includeNeg: false,
includeZero: true,
includePos: true,
},
{
h: FloatHistogram{
Count: 405,
ZeroCount: 102,
ZeroThreshold: 0.001,
Sum: 1008.4,
Schema: 1,
},
includeNeg: false,
includeZero: true,
includePos: false,
},
{
h: FloatHistogram{
Count: 405,
ZeroCount: 0,
ZeroThreshold: 0.001,
Sum: 1008.4,
Schema: 1,
PositiveSpans: []Span{
{Offset: 0, Length: 4},
{Offset: 1, Length: 0},
{Offset: 3, Length: 3},
{Offset: 3, Length: 0},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
PositiveBuckets: []float64{100, 344, 123, 55, 3, 63, 2, 54, 235, 33},
NegativeSpans: []Span{
{Offset: 0, Length: 3},
{Offset: 1, Length: 0},
{Offset: 3, Length: 0},
{Offset: 3, Length: 4},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
NegativeBuckets: []float64{10, 34, 1230, 54, 67, 63, 2, 554, 235, 33},
},
includeNeg: true,
includeZero: false,
includePos: true,
},
{
h: FloatHistogram{
Count: 447,
ZeroCount: 42,
ZeroThreshold: 0.5, // Coinciding with bucket boundary.
Sum: 1008.4,
Schema: 0,
PositiveSpans: []Span{
{Offset: 0, Length: 4},
{Offset: 1, Length: 0},
{Offset: 3, Length: 3},
{Offset: 3, Length: 0},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
PositiveBuckets: []float64{100, 344, 123, 55, 3, 63, 2, 54, 235, 33},
NegativeSpans: []Span{
{Offset: 0, Length: 3},
{Offset: 1, Length: 0},
{Offset: 3, Length: 0},
{Offset: 3, Length: 4},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
NegativeBuckets: []float64{10, 34, 1230, 54, 67, 63, 2, 554, 235, 33},
},
includeNeg: true,
includeZero: true,
includePos: true,
},
{
h: FloatHistogram{
Count: 447,
ZeroCount: 42,
ZeroThreshold: 0.6, // Within the bucket closest to zero.
Sum: 1008.4,
Schema: 0,
PositiveSpans: []Span{
{Offset: 0, Length: 4},
{Offset: 1, Length: 0},
{Offset: 3, Length: 3},
{Offset: 3, Length: 0},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
PositiveBuckets: []float64{100, 344, 123, 55, 3, 63, 2, 54, 235, 33},
NegativeSpans: []Span{
{Offset: 0, Length: 3},
{Offset: 1, Length: 0},
{Offset: 3, Length: 0},
{Offset: 3, Length: 4},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
NegativeBuckets: []float64{10, 34, 1230, 54, 67, 63, 2, 554, 235, 33},
},
includeNeg: true,
includeZero: true,
includePos: true,
},
}
for i, c := range cases {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
var expBuckets, actBuckets []Bucket[float64]
if c.includePos {
it := c.h.PositiveReverseBucketIterator()
for it.Next() {
b := it.At()
if c.includeZero && b.Lower < c.h.ZeroThreshold {
b.Lower = c.h.ZeroThreshold
}
expBuckets = append(expBuckets, b)
}
}
if c.includeZero {
expBuckets = append(expBuckets, Bucket[float64]{
Lower: -c.h.ZeroThreshold,
Upper: c.h.ZeroThreshold,
LowerInclusive: true,
UpperInclusive: true,
Count: c.h.ZeroCount,
})
}
if c.includeNeg {
it := c.h.NegativeBucketIterator()
for it.Next() {
b := it.At()
if c.includeZero && b.Upper > -c.h.ZeroThreshold {
b.Upper = -c.h.ZeroThreshold
}
expBuckets = append(expBuckets, b)
}
}
it := c.h.AllReverseBucketIterator()
for it.Next() {
actBuckets = append(actBuckets, it.At())
}
require.Equal(t, expBuckets, actBuckets)
})
}
}

View File

@ -158,9 +158,21 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
var ( var (
bucket histogram.Bucket[float64] bucket histogram.Bucket[float64]
count float64 count float64
it = h.AllBucketIterator() it histogram.BucketIterator[float64]
rank = q * h.Count rank float64
) )
// if there are NaN observations in the histogram (h.Sum is NaN), use the forward iterator
// if the q < 0.5, use the forward iterator
// if the q >= 0.5, use the reverse iterator
if math.IsNaN(h.Sum) || q < 0.5 {
it = h.AllBucketIterator()
rank = q * h.Count
} else {
it = h.AllReverseBucketIterator()
rank = (1 - q) * h.Count
}
for it.Next() { for it.Next() {
bucket = it.At() bucket = it.At()
count += bucket.Count count += bucket.Count
@ -193,7 +205,15 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
return bucket.Upper return bucket.Upper
} }
rank -= count - bucket.Count // if there are NaN observations in the histogram (h.Sum is NaN), use the forward iterator
// if the q < 0.5, use the forward iterator
// if the q >= 0.5, use the reverse iterator
if math.IsNaN(h.Sum) || q < 0.5 {
rank -= count - bucket.Count
} else {
rank = count - rank
}
// TODO(codesome): Use a better estimation than linear. // TODO(codesome): Use a better estimation than linear.
return bucket.Lower + (bucket.Upper-bucket.Lower)*(rank/bucket.Count) return bucket.Lower + (bucket.Upper-bucket.Lower)*(rank/bucket.Count)
} }