From 41f051e9a494bfe88d18bcd5a5fb2a6b97320d0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Mon, 25 Nov 2024 14:46:22 +0100 Subject: [PATCH 1/3] Small improvement in handling cases without count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- util/convertnhcb/convertnhcb.go | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/util/convertnhcb/convertnhcb.go b/util/convertnhcb/convertnhcb.go index ee5bcb72d..ce8fdda89 100644 --- a/util/convertnhcb/convertnhcb.go +++ b/util/convertnhcb/convertnhcb.go @@ -139,19 +139,15 @@ func (h TempHistogram) Convert() (*histogram.Histogram, *histogram.FloatHistogra return nil, nil, h.err } - if len(h.buckets) == 0 || h.buckets[len(h.buckets)-1].le != math.Inf(1) { - // No +Inf bucket. - if !h.hasCount && len(h.buckets) > 0 { - // No count either, so set count to the last known bucket's count. - h.count = h.buckets[len(h.buckets)-1].count - } - // Let the last bucket be +Inf with the overall count. - h.buckets = append(h.buckets, tempHistogramBucket{le: math.Inf(1), count: h.count}) + if !h.hasCount && len(h.buckets) > 0 { + // No count, so set count to the highest known bucket's count. + h.count = h.buckets[len(h.buckets)-1].count } - if !h.hasCount { - h.count = h.buckets[len(h.buckets)-1].count - h.hasCount = true + if len(h.buckets) == 0 || h.buckets[len(h.buckets)-1].le != math.Inf(1) { + // No +Inf bucket. + // Let the last bucket be +Inf with the overall count. + h.buckets = append(h.buckets, tempHistogramBucket{le: math.Inf(1), count: h.count}) } for _, b := range h.buckets { From a48d05912d80e03532d1883a492e7e8de21a1786 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Mon, 25 Nov 2024 15:01:26 +0100 Subject: [PATCH 2/3] nhcb: optimize, do not recalculate suffixes multiple times MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce string manipulation by just cutting off the histogram suffixes from the series name label once. Signed-off-by: György Krajcsovits --- model/textparse/nhcbparse.go | 34 +++++++++++++++++++-------------- promql/promqltest/test.go | 22 ++++++++++++--------- util/convertnhcb/convertnhcb.go | 24 +++++++++++++++-------- 3 files changed, 49 insertions(+), 31 deletions(-) diff --git a/model/textparse/nhcbparse.go b/model/textparse/nhcbparse.go index 6fe2e8e54..ff756965f 100644 --- a/model/textparse/nhcbparse.go +++ b/model/textparse/nhcbparse.go @@ -243,7 +243,8 @@ func (p *NHCBParser) compareLabels() bool { // Different metric type. return true } - if p.lastHistogramName != convertnhcb.GetHistogramMetricBaseName(p.lset.Get(labels.MetricName)) { + _, name := convertnhcb.GetHistogramMetricBaseName(p.lset.Get(labels.MetricName)) + if p.lastHistogramName != name { // Different metric name. return true } @@ -253,8 +254,8 @@ func (p *NHCBParser) compareLabels() bool { } // Save the label set of the classic histogram without suffix and bucket `le` label. -func (p *NHCBParser) storeClassicLabels() { - p.lastHistogramName = convertnhcb.GetHistogramMetricBaseName(p.lset.Get(labels.MetricName)) +func (p *NHCBParser) storeClassicLabels(name string) { + p.lastHistogramName = name p.lastHistogramLabelsHash, _ = p.lset.HashWithoutLabels(p.hBuffer, labels.BucketLabel) p.lastHistogramExponential = false } @@ -275,25 +276,30 @@ func (p *NHCBParser) handleClassicHistogramSeries(lset labels.Labels) bool { } mName := lset.Get(labels.MetricName) // Sanity check to ensure that the TYPE metadata entry name is the same as the base name. - if convertnhcb.GetHistogramMetricBaseName(mName) != string(p.bName) { + suffixType, name := convertnhcb.GetHistogramMetricBaseName(mName) + if name != string(p.bName) { return false } - switch { - case strings.HasSuffix(mName, "_bucket") && lset.Has(labels.BucketLabel): + switch suffixType { + case convertnhcb.SuffixBucket: + if !lset.Has(labels.BucketLabel) { + // This should not really happen. + return false + } le, err := strconv.ParseFloat(lset.Get(labels.BucketLabel), 64) if err == nil && !math.IsNaN(le) { - p.processClassicHistogramSeries(lset, "_bucket", func(hist *convertnhcb.TempHistogram) { + p.processClassicHistogramSeries(lset, name, func(hist *convertnhcb.TempHistogram) { _ = hist.SetBucketCount(le, p.value) }) return true } - case strings.HasSuffix(mName, "_count"): - p.processClassicHistogramSeries(lset, "_count", func(hist *convertnhcb.TempHistogram) { + case convertnhcb.SuffixCount: + p.processClassicHistogramSeries(lset, name, func(hist *convertnhcb.TempHistogram) { _ = hist.SetCount(p.value) }) return true - case strings.HasSuffix(mName, "_sum"): - p.processClassicHistogramSeries(lset, "_sum", func(hist *convertnhcb.TempHistogram) { + case convertnhcb.SuffixSum: + p.processClassicHistogramSeries(lset, name, func(hist *convertnhcb.TempHistogram) { _ = hist.SetSum(p.value) }) return true @@ -301,12 +307,12 @@ func (p *NHCBParser) handleClassicHistogramSeries(lset labels.Labels) bool { return false } -func (p *NHCBParser) processClassicHistogramSeries(lset labels.Labels, suffix string, updateHist func(*convertnhcb.TempHistogram)) { +func (p *NHCBParser) processClassicHistogramSeries(lset labels.Labels, name string, updateHist func(*convertnhcb.TempHistogram)) { if p.state != stateCollecting { - p.storeClassicLabels() + p.storeClassicLabels(name) p.tempCT = p.parser.CreatedTimestamp() p.state = stateCollecting - p.tempLsetNHCB = convertnhcb.GetHistogramMetricBase(lset, suffix) + p.tempLsetNHCB = convertnhcb.GetHistogramMetricBase(lset, name) } p.storeExemplars() updateHist(&p.tempNHCB) diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index 36519561a..a4dbd64ff 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -491,8 +491,8 @@ func newTempHistogramWrapper() tempHistogramWrapper { } } -func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap map[uint64]tempHistogramWrapper, smpls []promql.Sample, updateHistogram func(*convertnhcb.TempHistogram, float64)) { - m2 := convertnhcb.GetHistogramMetricBase(m, suffix) +func processClassicHistogramSeries(m labels.Labels, name string, histogramMap map[uint64]tempHistogramWrapper, smpls []promql.Sample, updateHistogram func(*convertnhcb.TempHistogram, float64)) { + m2 := convertnhcb.GetHistogramMetricBase(m, name) m2hash := m2.Hash() histogramWrapper, exists := histogramMap[m2hash] if !exists { @@ -523,21 +523,25 @@ func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error { for hash, smpls := range cmd.defs { m := cmd.metrics[hash] mName := m.Get(labels.MetricName) - switch { - case strings.HasSuffix(mName, "_bucket") && m.Has(labels.BucketLabel): + suffixType, name := convertnhcb.GetHistogramMetricBaseName(mName) + switch suffixType { + case convertnhcb.SuffixBucket: + if !m.Has(labels.BucketLabel) { + panic(fmt.Sprintf("expected bucket label in metric %s", m)) + } le, err := strconv.ParseFloat(m.Get(labels.BucketLabel), 64) if err != nil || math.IsNaN(le) { continue } - processClassicHistogramSeries(m, "_bucket", histogramMap, smpls, func(histogram *convertnhcb.TempHistogram, f float64) { + processClassicHistogramSeries(m, name, histogramMap, smpls, func(histogram *convertnhcb.TempHistogram, f float64) { _ = histogram.SetBucketCount(le, f) }) - case strings.HasSuffix(mName, "_count"): - processClassicHistogramSeries(m, "_count", histogramMap, smpls, func(histogram *convertnhcb.TempHistogram, f float64) { + case convertnhcb.SuffixCount: + processClassicHistogramSeries(m, name, histogramMap, smpls, func(histogram *convertnhcb.TempHistogram, f float64) { _ = histogram.SetCount(f) }) - case strings.HasSuffix(mName, "_sum"): - processClassicHistogramSeries(m, "_sum", histogramMap, smpls, func(histogram *convertnhcb.TempHistogram, f float64) { + case convertnhcb.SuffixSum: + processClassicHistogramSeries(m, name, histogramMap, smpls, func(histogram *convertnhcb.TempHistogram, f float64) { _ = histogram.SetSum(f) }) } diff --git a/util/convertnhcb/convertnhcb.go b/util/convertnhcb/convertnhcb.go index ce8fdda89..b3dc851da 100644 --- a/util/convertnhcb/convertnhcb.go +++ b/util/convertnhcb/convertnhcb.go @@ -228,26 +228,34 @@ func (h TempHistogram) convertToFloatHistogram() (*histogram.Histogram, *histogr return nil, rh.Compact(0), nil } -func GetHistogramMetricBase(m labels.Labels, suffix string) labels.Labels { - mName := m.Get(labels.MetricName) +func GetHistogramMetricBase(m labels.Labels, name string) labels.Labels { return labels.NewBuilder(m). - Set(labels.MetricName, strings.TrimSuffix(mName, suffix)). + Set(labels.MetricName, name). Del(labels.BucketLabel). Labels() } +type SuffixType int + +const ( + SuffixNone SuffixType = iota + SuffixBucket + SuffixSum + SuffixCount +) + // GetHistogramMetricBaseName removes the suffixes _bucket, _sum, _count from // the metric name. We specifically do not remove the _created suffix as that // should be removed by the caller. -func GetHistogramMetricBaseName(s string) string { +func GetHistogramMetricBaseName(s string) (SuffixType, string) { if r, ok := strings.CutSuffix(s, "_bucket"); ok { - return r + return SuffixBucket, r } if r, ok := strings.CutSuffix(s, "_sum"); ok { - return r + return SuffixSum, r } if r, ok := strings.CutSuffix(s, "_count"); ok { - return r + return SuffixCount, r } - return s + return SuffixNone, s } From eb9ce70c3e7fcaf6b748168110c0527804998d2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Tue, 26 Nov 2024 13:56:22 +0100 Subject: [PATCH 3/3] Set hasCount after setting count to be consistent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- util/convertnhcb/convertnhcb.go | 1 + 1 file changed, 1 insertion(+) diff --git a/util/convertnhcb/convertnhcb.go b/util/convertnhcb/convertnhcb.go index b3dc851da..21ae62b3c 100644 --- a/util/convertnhcb/convertnhcb.go +++ b/util/convertnhcb/convertnhcb.go @@ -142,6 +142,7 @@ func (h TempHistogram) Convert() (*histogram.Histogram, *histogram.FloatHistogra if !h.hasCount && len(h.buckets) > 0 { // No count, so set count to the highest known bucket's count. h.count = h.buckets[len(h.buckets)-1].count + h.hasCount = true } if len(h.buckets) == 0 || h.buckets[len(h.buckets)-1].le != math.Inf(1) {