From 4503145c8be0ad3691ea53b76113e5a4b7daadca Mon Sep 17 00:00:00 2001 From: Jeanette Tan Date: Wed, 3 Jul 2024 17:56:48 +0800 Subject: [PATCH] convert classic histograms to int nhcb where possible instead Signed-off-by: Jeanette Tan --- promql/promqltest/test.go | 11 ++++- scrape/scrape.go | 25 ++++++---- util/convertnhcb/convertnhcb.go | 81 +++++++++++++++++++++++++++------ 3 files changed, 93 insertions(+), 24 deletions(-) diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index 576b30d5b..7e6554312 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -534,10 +534,17 @@ func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error { // Convert the collated classic histogram data into native histograms // with custom bounds and append them to the storage. for _, histogramWrapper := range histogramMap { - upperBounds, fhBase := convertnhcb.ProcessUpperBoundsAndCreateBaseHistogram(histogramWrapper.upperBounds) + upperBounds, hBase := convertnhcb.ProcessUpperBoundsAndCreateBaseHistogram(histogramWrapper.upperBounds, true) + fhBase := hBase.ToFloat(nil) samples := make([]promql.Sample, 0, len(histogramWrapper.histogramByTs)) for t, histogram := range histogramWrapper.histogramByTs { - fh := convertnhcb.ConvertHistogramWrapper(histogram, upperBounds, fhBase) + h, fh := convertnhcb.ConvertHistogramWrapper(histogram, upperBounds, hBase, fhBase) + if fh == nil { + if err := h.Validate(); err != nil { + return err + } + fh = h.ToFloat(nil) + } if err := fh.Validate(); err != nil { return err } diff --git a/scrape/scrape.go b/scrape/scrape.go index c3005304e..16766a352 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -1812,15 +1812,24 @@ loop: for b := range th.BucketCounts { ub = append(ub, b) } - upperBounds, fhBase := convertnhcb.ProcessUpperBoundsAndCreateBaseHistogram(ub) - fh := convertnhcb.ConvertHistogramWrapper(th, upperBounds, fhBase) - if err := fh.Validate(); err != nil { - continue - } + upperBounds, hBase := convertnhcb.ProcessUpperBoundsAndCreateBaseHistogram(ub, false) + fhBase := hBase.ToFloat(nil) + h, fh := convertnhcb.ConvertHistogramWrapper(th, upperBounds, hBase, fhBase) // fmt.Printf("FINAL lset: %s, timestamp: %v, val: %v\n", lset, defTime, fh) - _, err = app.AppendHistogram(0, lset, defTime, nil, fh) - if err != nil { - continue + if h != nil { + if err := h.Validate(); err != nil { + continue + } + if _, err = app.AppendHistogram(0, lset, defTime, h, nil); err != nil { + continue + } + } else if fh != nil { + if err := fh.Validate(); err != nil { + continue + } + if _, err = app.AppendHistogram(0, lset, defTime, nil, fh); err != nil { + continue + } } } sl.cache.resetNhcb() diff --git a/util/convertnhcb/convertnhcb.go b/util/convertnhcb/convertnhcb.go index 8a9655386..face43628 100644 --- a/util/convertnhcb/convertnhcb.go +++ b/util/convertnhcb/convertnhcb.go @@ -14,6 +14,7 @@ package convertnhcb import ( + "fmt" "math" "sort" "strings" @@ -26,6 +27,7 @@ type TempHistogram struct { BucketCounts map[float64]float64 Count float64 Sum float64 + HasFloat bool } func NewTempHistogram() TempHistogram { @@ -34,15 +36,32 @@ func NewTempHistogram() TempHistogram { } } -func ProcessUpperBoundsAndCreateBaseHistogram(upperBounds0 []float64) ([]float64, *histogram.FloatHistogram) { +func (h TempHistogram) getIntBucketCounts() (map[float64]int64, error) { + bucketCounts := map[float64]int64{} + for le, count := range h.BucketCounts { + intCount := int64(math.Round(count)) + if float64(intCount) != count { + return nil, fmt.Errorf("bucket count %f for le %g is not an integer", count, le) + } + bucketCounts[le] = intCount + } + return bucketCounts, nil +} + +func ProcessUpperBoundsAndCreateBaseHistogram(upperBounds0 []float64, needsDedup bool) ([]float64, *histogram.Histogram) { sort.Float64s(upperBounds0) - upperBounds := make([]float64, 0, len(upperBounds0)) - prevLE := math.Inf(-1) - for _, le := range upperBounds0 { - if le != prevLE { // deduplicate - upperBounds = append(upperBounds, le) - prevLE = le + var upperBounds []float64 + if needsDedup { + upperBounds = make([]float64, 0, len(upperBounds0)) + prevLE := math.Inf(-1) + for _, le := range upperBounds0 { + if le != prevLE { + upperBounds = append(upperBounds, le) + prevLE = le + } } + } else { + upperBounds = upperBounds0 } var customBounds []float64 if upperBounds[len(upperBounds)-1] == math.Inf(1) { @@ -50,23 +69,57 @@ func ProcessUpperBoundsAndCreateBaseHistogram(upperBounds0 []float64) ([]float64 } else { customBounds = upperBounds } - return upperBounds, &histogram.FloatHistogram{ + return upperBounds, &histogram.Histogram{ Count: 0, Sum: 0, Schema: histogram.CustomBucketsSchema, PositiveSpans: []histogram.Span{ {Offset: 0, Length: uint32(len(upperBounds))}, }, - PositiveBuckets: make([]float64, len(upperBounds)), + PositiveBuckets: make([]int64, len(upperBounds)), CustomValues: customBounds, } } -func ConvertHistogramWrapper(hist TempHistogram, upperBounds []float64, fhBase *histogram.FloatHistogram) *histogram.FloatHistogram { +func ConvertHistogramWrapper(histogram TempHistogram, upperBounds []float64, hBase *histogram.Histogram, fhBase *histogram.FloatHistogram) (*histogram.Histogram, *histogram.FloatHistogram) { + intBucketCounts, err := histogram.getIntBucketCounts() + if err != nil { + return nil, convertFloatHistogramWrapper(histogram, upperBounds, histogram.BucketCounts, fhBase) + } + return convertIntHistogramWrapper(histogram, upperBounds, intBucketCounts, hBase), nil +} + +func convertIntHistogramWrapper(histogram TempHistogram, upperBounds []float64, bucketCounts map[float64]int64, hBase *histogram.Histogram) *histogram.Histogram { + h := hBase.Copy() + absBucketCounts := make([]int64, len(h.PositiveBuckets)) + var prevCount, total int64 + for i, le := range upperBounds { + currCount, exists := bucketCounts[le] + if !exists { + currCount = 0 + } + count := currCount - prevCount + absBucketCounts[i] = count + total += count + prevCount = currCount + } + h.PositiveBuckets[0] = absBucketCounts[0] + for i := 1; i < len(h.PositiveBuckets); i++ { + h.PositiveBuckets[i] = absBucketCounts[i] - absBucketCounts[i-1] + } + h.Sum = histogram.Sum + if histogram.Count != 0 { + total = int64(histogram.Count) + } + h.Count = uint64(total) + return h.Compact(0) +} + +func convertFloatHistogramWrapper(histogram TempHistogram, upperBounds []float64, bucketCounts map[float64]float64, fhBase *histogram.FloatHistogram) *histogram.FloatHistogram { fh := fhBase.Copy() var prevCount, total float64 for i, le := range upperBounds { - currCount, exists := hist.BucketCounts[le] + currCount, exists := bucketCounts[le] if !exists { currCount = 0 } @@ -75,9 +128,9 @@ func ConvertHistogramWrapper(hist TempHistogram, upperBounds []float64, fhBase * total += count prevCount = currCount } - fh.Sum = hist.Sum - if hist.Count != 0 { - total = hist.Count + fh.Sum = histogram.Sum + if histogram.Count != 0 { + total = histogram.Count } fh.Count = total return fh.Compact(0)