mirror of https://github.com/prometheus/prometheus
[BUGFIX] PromQL: Fix behaviour of some aggregations with histograms (#15432)
promql: fix some aggregations for histograms This PR fixes the behaviour of `topk`,`bottomk`, `limitk` and `limit_ratio` with histograms. The fixed behaviour are as follows: - For `topk` and `bottomk` histograms are ignored and add info annotations added. - For `limitk` and `limit_ratio` histograms are included in the results(if applicable). Signed-off-by: Neeraj Gartia <neerajgartia211002@gmail.com>pull/15473/head
parent
634afbc130
commit
38bb6ece25
|
@ -3187,18 +3187,19 @@ func (ev *evaluator) aggregationK(e *parser.AggregateExpr, k int, r float64, inp
|
|||
|
||||
seriesLoop:
|
||||
for si := range inputMatrix {
|
||||
f, _, ok := ev.nextValues(enh.Ts, &inputMatrix[si])
|
||||
f, h, ok := ev.nextValues(enh.Ts, &inputMatrix[si])
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
s = Sample{Metric: inputMatrix[si].Metric, F: f, DropName: inputMatrix[si].DropName}
|
||||
s = Sample{Metric: inputMatrix[si].Metric, F: f, H: h, DropName: inputMatrix[si].DropName}
|
||||
|
||||
group := &groups[seriesToResult[si]]
|
||||
// Initialize this group if it's the first time we've seen it.
|
||||
if !group.seen {
|
||||
// LIMIT_RATIO is a special case, as we may not add this very sample to the heap,
|
||||
// while we also don't know the final size of it.
|
||||
if op == parser.LIMIT_RATIO {
|
||||
switch op {
|
||||
case parser.LIMIT_RATIO:
|
||||
*group = groupedAggregation{
|
||||
seen: true,
|
||||
heap: make(vectorByValueHeap, 0),
|
||||
|
@ -3206,12 +3207,34 @@ seriesLoop:
|
|||
if ratiosampler.AddRatioSample(r, &s) {
|
||||
heap.Push(&group.heap, &s)
|
||||
}
|
||||
} else {
|
||||
case parser.LIMITK:
|
||||
*group = groupedAggregation{
|
||||
seen: true,
|
||||
heap: make(vectorByValueHeap, 1, k),
|
||||
}
|
||||
group.heap[0] = s
|
||||
case parser.TOPK:
|
||||
*group = groupedAggregation{
|
||||
seen: true,
|
||||
heap: make(vectorByValueHeap, 0, k),
|
||||
}
|
||||
if s.H != nil {
|
||||
group.seen = false
|
||||
annos.Add(annotations.NewHistogramIgnoredInAggregationInfo("topk", e.PosRange))
|
||||
} else {
|
||||
heap.Push(&group.heap, &s)
|
||||
}
|
||||
case parser.BOTTOMK:
|
||||
*group = groupedAggregation{
|
||||
seen: true,
|
||||
heap: make(vectorByValueHeap, 0, k),
|
||||
}
|
||||
if s.H != nil {
|
||||
group.seen = false
|
||||
annos.Add(annotations.NewHistogramIgnoredInAggregationInfo("bottomk", e.PosRange))
|
||||
} else {
|
||||
heap.Push(&group.heap, &s)
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
@ -3220,6 +3243,9 @@ seriesLoop:
|
|||
case parser.TOPK:
|
||||
// We build a heap of up to k elements, with the smallest element at heap[0].
|
||||
switch {
|
||||
case s.H != nil:
|
||||
// Ignore histogram sample and add info annotation.
|
||||
annos.Add(annotations.NewHistogramIgnoredInAggregationInfo("topk", e.PosRange))
|
||||
case len(group.heap) < k:
|
||||
heap.Push(&group.heap, &s)
|
||||
case group.heap[0].F < s.F || (math.IsNaN(group.heap[0].F) && !math.IsNaN(s.F)):
|
||||
|
@ -3233,6 +3259,9 @@ seriesLoop:
|
|||
case parser.BOTTOMK:
|
||||
// We build a heap of up to k elements, with the biggest element at heap[0].
|
||||
switch {
|
||||
case s.H != nil:
|
||||
// Ignore histogram sample and add info annotation.
|
||||
annos.Add(annotations.NewHistogramIgnoredInAggregationInfo("bottomk", e.PosRange))
|
||||
case len(group.heap) < k:
|
||||
heap.Push((*vectorByReverseValueHeap)(&group.heap), &s)
|
||||
case group.heap[0].F > s.F || (math.IsNaN(group.heap[0].F) && !math.IsNaN(s.F)):
|
||||
|
@ -3275,10 +3304,14 @@ seriesLoop:
|
|||
mat = make(Matrix, 0, len(groups))
|
||||
}
|
||||
|
||||
add := func(lbls labels.Labels, f float64, dropName bool) {
|
||||
add := func(lbls labels.Labels, f float64, h *histogram.FloatHistogram, dropName bool) {
|
||||
// If this could be an instant query, add directly to the matrix so the result is in consistent order.
|
||||
if ev.endTimestamp == ev.startTimestamp {
|
||||
mat = append(mat, Series{Metric: lbls, Floats: []FPoint{{T: enh.Ts, F: f}}, DropName: dropName})
|
||||
if h != nil {
|
||||
mat = append(mat, Series{Metric: lbls, Histograms: []HPoint{{T: enh.Ts, H: h}}, DropName: dropName})
|
||||
} else {
|
||||
mat = append(mat, Series{Metric: lbls, Floats: []FPoint{{T: enh.Ts, F: f}}, DropName: dropName})
|
||||
}
|
||||
} else {
|
||||
// Otherwise the results are added into seriess elements.
|
||||
hash := lbls.Hash()
|
||||
|
@ -3286,7 +3319,7 @@ seriesLoop:
|
|||
if !ok {
|
||||
ss = Series{Metric: lbls, DropName: dropName}
|
||||
}
|
||||
addToSeries(&ss, enh.Ts, f, nil, numSteps)
|
||||
addToSeries(&ss, enh.Ts, f, h, numSteps)
|
||||
seriess[hash] = ss
|
||||
}
|
||||
}
|
||||
|
@ -3301,7 +3334,7 @@ seriesLoop:
|
|||
sort.Sort(sort.Reverse(aggr.heap))
|
||||
}
|
||||
for _, v := range aggr.heap {
|
||||
add(v.Metric, v.F, v.DropName)
|
||||
add(v.Metric, v.F, v.H, v.DropName)
|
||||
}
|
||||
|
||||
case parser.BOTTOMK:
|
||||
|
@ -3310,12 +3343,12 @@ seriesLoop:
|
|||
sort.Sort(sort.Reverse((*vectorByReverseValueHeap)(&aggr.heap)))
|
||||
}
|
||||
for _, v := range aggr.heap {
|
||||
add(v.Metric, v.F, v.DropName)
|
||||
add(v.Metric, v.F, v.H, v.DropName)
|
||||
}
|
||||
|
||||
case parser.LIMITK, parser.LIMIT_RATIO:
|
||||
for _, v := range aggr.heap {
|
||||
add(v.Metric, v.F, v.DropName)
|
||||
add(v.Metric, v.F, v.H, v.DropName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -272,6 +272,8 @@ load 5m
|
|||
http_requests{job="app-server", instance="1", group="production"} 0+60x10
|
||||
http_requests{job="app-server", instance="0", group="canary"} 0+70x10
|
||||
http_requests{job="app-server", instance="1", group="canary"} 0+80x10
|
||||
http_requests_histogram{job="app-server", instance="2", group="canary"} {{schema:0 sum:10 count:10}}x11
|
||||
http_requests_histogram{job="api-server", instance="3", group="production"} {{schema:0 sum:20 count:20}}x11
|
||||
foo 3+0x10
|
||||
|
||||
eval_ordered instant at 50m topk(3, http_requests)
|
||||
|
@ -338,6 +340,47 @@ eval_ordered instant at 50m topk(scalar(foo), http_requests)
|
|||
http_requests{group="canary", instance="0", job="app-server"} 700
|
||||
http_requests{group="production", instance="1", job="app-server"} 600
|
||||
|
||||
# Tests for histogram: should ignore histograms.
|
||||
eval_info instant at 50m topk(100, http_requests_histogram)
|
||||
#empty
|
||||
|
||||
eval_info range from 0 to 50m step 5m topk(100, http_requests_histogram)
|
||||
#empty
|
||||
|
||||
eval_info instant at 50m topk(1, {__name__=~"http_requests(_histogram)?"})
|
||||
{__name__="http_requests", group="canary", instance="1", job="app-server"} 800
|
||||
|
||||
eval_info instant at 50m count(topk(1000, {__name__=~"http_requests(_histogram)?"}))
|
||||
{} 9
|
||||
|
||||
eval_info range from 0 to 50m step 5m count(topk(1000, {__name__=~"http_requests(_histogram)?"}))
|
||||
{} 9x10
|
||||
|
||||
eval_info instant at 50m topk by (instance) (1, {__name__=~"http_requests(_histogram)?"})
|
||||
{__name__="http_requests", group="canary", instance="0", job="app-server"} 700
|
||||
{__name__="http_requests", group="canary", instance="1", job="app-server"} 800
|
||||
{__name__="http_requests", group="production", instance="2", job="api-server"} NaN
|
||||
|
||||
eval_info instant at 50m bottomk(100, http_requests_histogram)
|
||||
#empty
|
||||
|
||||
eval_info range from 0 to 50m step 5m bottomk(100, http_requests_histogram)
|
||||
#empty
|
||||
|
||||
eval_info instant at 50m bottomk(1, {__name__=~"http_requests(_histogram)?"})
|
||||
{__name__="http_requests", group="production", instance="0", job="api-server"} 100
|
||||
|
||||
eval_info instant at 50m count(bottomk(1000, {__name__=~"http_requests(_histogram)?"}))
|
||||
{} 9
|
||||
|
||||
eval_info range from 0 to 50m step 5m count(bottomk(1000, {__name__=~"http_requests(_histogram)?"}))
|
||||
{} 9x10
|
||||
|
||||
eval_info instant at 50m bottomk by (instance) (1, {__name__=~"http_requests(_histogram)?"})
|
||||
{__name__="http_requests", group="production", instance="0", job="api-server"} 100
|
||||
{__name__="http_requests", group="production", instance="1", job="api-server"} 200
|
||||
{__name__="http_requests", group="production", instance="2", job="api-server"} NaN
|
||||
|
||||
clear
|
||||
|
||||
# Tests for count_values.
|
||||
|
|
|
@ -9,6 +9,8 @@ load 5m
|
|||
http_requests{job="api-server", instance="1", group="canary"} 0+40x10
|
||||
http_requests{job="api-server", instance="2", group="canary"} 0+50x10
|
||||
http_requests{job="api-server", instance="3", group="canary"} 0+60x10
|
||||
http_requests{job="api-server", instance="histogram_1", group="canary"} {{schema:0 sum:10 count:10}}x11
|
||||
http_requests{job="api-server", instance="histogram_2", group="canary"} {{schema:0 sum:20 count:20}}x11
|
||||
|
||||
eval instant at 50m count(limitk by (group) (0, http_requests))
|
||||
# empty
|
||||
|
@ -16,25 +18,56 @@ eval instant at 50m count(limitk by (group) (0, http_requests))
|
|||
eval instant at 50m count(limitk by (group) (-1, http_requests))
|
||||
# empty
|
||||
|
||||
# Exercise k==1 special case (as sample is added before the main series loop
|
||||
# Exercise k==1 special case (as sample is added before the main series loop).
|
||||
eval instant at 50m count(limitk by (group) (1, http_requests) and http_requests)
|
||||
{} 2
|
||||
{} 2
|
||||
|
||||
eval instant at 50m count(limitk by (group) (2, http_requests) and http_requests)
|
||||
{} 4
|
||||
{} 4
|
||||
|
||||
eval instant at 50m count(limitk(100, http_requests) and http_requests)
|
||||
{} 6
|
||||
{} 8
|
||||
|
||||
# Exercise k==1 special case (as sample is added before the main series loop
|
||||
# Exercise k==1 special case (as sample is added before the main series loop).
|
||||
eval instant at 50m count(limitk by (group) (1, http_requests) and http_requests)
|
||||
{} 2
|
||||
{} 2
|
||||
|
||||
eval instant at 50m count(limitk by (group) (2, http_requests) and http_requests)
|
||||
{} 4
|
||||
{} 4
|
||||
|
||||
eval instant at 50m count(limitk(100, http_requests) and http_requests)
|
||||
{} 6
|
||||
{} 8
|
||||
|
||||
# Test for histograms.
|
||||
# k==1: verify that histogram is included in the result.
|
||||
eval instant at 50m limitk(1, http_requests{instance="histogram_1"})
|
||||
{__name__="http_requests", group="canary", instance="histogram_1", job="api-server"} {{count:10 sum:10}}
|
||||
|
||||
eval range from 0 to 50m step 5m limitk(1, http_requests{instance="histogram_1"})
|
||||
{__name__="http_requests", group="canary", instance="histogram_1", job="api-server"} {{count:10 sum:10}}x10
|
||||
|
||||
# Histogram is included with mix of floats as well.
|
||||
eval instant at 50m limitk(8, http_requests{instance=~"(histogram_2|0)"})
|
||||
{__name__="http_requests", group="canary", instance="histogram_2", job="api-server"} {{count:20 sum:20}}
|
||||
{__name__="http_requests", group="production", instance="0", job="api-server"} 100
|
||||
{__name__="http_requests", group="canary", instance="0", job="api-server"} 300
|
||||
|
||||
eval range from 0 to 50m step 5m limitk(8, http_requests{instance=~"(histogram_2|0)"})
|
||||
{__name__="http_requests", group="canary", instance="histogram_2", job="api-server"} {{count:20 sum:20}}x10
|
||||
{__name__="http_requests", group="production", instance="0", job="api-server"} 0+10x10
|
||||
{__name__="http_requests", group="canary", instance="0", job="api-server"} 0+30x10
|
||||
|
||||
eval instant at 50m count(limitk(2, http_requests{instance=~"histogram_[0-9]"}))
|
||||
{} 2
|
||||
|
||||
eval range from 0 to 50m step 5m count(limitk(2, http_requests{instance=~"histogram_[0-9]"}))
|
||||
{} 2+0x10
|
||||
|
||||
eval instant at 50m count(limitk(1000, http_requests{instance=~"histogram_[0-9]"}))
|
||||
{} 2
|
||||
|
||||
eval range from 0 to 50m step 5m count(limitk(1000, http_requests{instance=~"histogram_[0-9]"}))
|
||||
{} 2+0x10
|
||||
|
||||
# limit_ratio
|
||||
eval range from 0 to 50m step 5m count(limit_ratio(0.0, http_requests))
|
||||
|
@ -42,9 +75,9 @@ eval range from 0 to 50m step 5m count(limit_ratio(0.0, http_requests))
|
|||
|
||||
# limitk(2, ...) should always return a 2-count subset of the timeseries (hence the AND'ing)
|
||||
eval range from 0 to 50m step 5m count(limitk(2, http_requests) and http_requests)
|
||||
{} 2+0x10
|
||||
{} 2+0x10
|
||||
|
||||
# Tests for limit_ratio
|
||||
# Tests for limit_ratio.
|
||||
#
|
||||
# NB: below 0.5 ratio will depend on some hashing "luck" (also there's no guarantee that
|
||||
# an integer comes from: total number of series * ratio), as it depends on:
|
||||
|
@ -56,50 +89,50 @@ eval range from 0 to 50m step 5m count(limitk(2, http_requests) and http_request
|
|||
#
|
||||
# See `AddRatioSample()` in promql/engine.go for more details.
|
||||
|
||||
# Half~ish samples: verify we get "near" 3 (of 0.5 * 6)
|
||||
eval range from 0 to 50m step 5m count(limit_ratio(0.5, http_requests) and http_requests) <= bool (3+1)
|
||||
{} 1+0x10
|
||||
# Half~ish samples: verify we get "near" 3 (of 0.5 * 6).
|
||||
eval range from 0 to 50m step 5m count(limit_ratio(0.5, http_requests) and http_requests) <= bool (4+1)
|
||||
{} 1+0x10
|
||||
|
||||
eval range from 0 to 50m step 5m count(limit_ratio(0.5, http_requests) and http_requests) >= bool (3-1)
|
||||
{} 1+0x10
|
||||
eval range from 0 to 50m step 5m count(limit_ratio(0.5, http_requests) and http_requests) >= bool (4-1)
|
||||
{} 1+0x10
|
||||
|
||||
# All samples
|
||||
# All samples.
|
||||
eval range from 0 to 50m step 5m count(limit_ratio(1.0, http_requests) and http_requests)
|
||||
{} 6+0x10
|
||||
{} 8+0x10
|
||||
|
||||
# All samples
|
||||
# All samples.
|
||||
eval range from 0 to 50m step 5m count(limit_ratio(-1.0, http_requests) and http_requests)
|
||||
{} 6+0x10
|
||||
{} 8+0x10
|
||||
|
||||
# Capped to 1.0 -> all samples
|
||||
# Capped to 1.0 -> all samples.
|
||||
eval_warn range from 0 to 50m step 5m count(limit_ratio(1.1, http_requests) and http_requests)
|
||||
{} 6+0x10
|
||||
{} 8+0x10
|
||||
|
||||
# Capped to -1.0 -> all samples
|
||||
# Capped to -1.0 -> all samples.
|
||||
eval_warn range from 0 to 50m step 5m count(limit_ratio(-1.1, http_requests) and http_requests)
|
||||
{} 6+0x10
|
||||
{} 8+0x10
|
||||
|
||||
# Verify that limit_ratio(value) and limit_ratio(1.0-value) return the "complement" of each other
|
||||
# Complement below for [0.2, -0.8]
|
||||
# Verify that limit_ratio(value) and limit_ratio(1.0-value) return the "complement" of each other.
|
||||
# Complement below for [0.2, -0.8].
|
||||
#
|
||||
# Complement 1of2: `or` should return all samples
|
||||
# Complement 1of2: `or` should return all samples.
|
||||
eval range from 0 to 50m step 5m count(limit_ratio(0.2, http_requests) or limit_ratio(-0.8, http_requests))
|
||||
{} 6+0x10
|
||||
{} 8+0x10
|
||||
|
||||
# Complement 2of2: `and` should return no samples
|
||||
# Complement 2of2: `and` should return no samples.
|
||||
eval range from 0 to 50m step 5m count(limit_ratio(0.2, http_requests) and limit_ratio(-0.8, http_requests))
|
||||
# empty
|
||||
|
||||
# Complement below for [0.5, -0.5]
|
||||
# Complement below for [0.5, -0.5].
|
||||
eval range from 0 to 50m step 5m count(limit_ratio(0.5, http_requests) or limit_ratio(-0.5, http_requests))
|
||||
{} 6+0x10
|
||||
{} 8+0x10
|
||||
|
||||
eval range from 0 to 50m step 5m count(limit_ratio(0.5, http_requests) and limit_ratio(-0.5, http_requests))
|
||||
# empty
|
||||
|
||||
# Complement below for [0.8, -0.2]
|
||||
# Complement below for [0.8, -0.2].
|
||||
eval range from 0 to 50m step 5m count(limit_ratio(0.8, http_requests) or limit_ratio(-0.2, http_requests))
|
||||
{} 6+0x10
|
||||
{} 8+0x10
|
||||
|
||||
eval range from 0 to 50m step 5m count(limit_ratio(0.8, http_requests) and limit_ratio(-0.2, http_requests))
|
||||
# empty
|
||||
|
@ -107,13 +140,19 @@ eval range from 0 to 50m step 5m count(limit_ratio(0.8, http_requests) and limit
|
|||
# Complement below for [some_ratio, 1.0 - some_ratio], some_ratio derived from time(),
|
||||
# using a small prime number to avoid rounded ratio values, and a small set of them.
|
||||
eval range from 0 to 50m step 5m count(limit_ratio(time() % 17/17, http_requests) or limit_ratio(1.0 - (time() % 17/17), http_requests))
|
||||
{} 6+0x10
|
||||
{} 8+0x10
|
||||
|
||||
eval range from 0 to 50m step 5m count(limit_ratio(time() % 17/17, http_requests) and limit_ratio(1.0 - (time() % 17/17), http_requests))
|
||||
# empty
|
||||
|
||||
# Poor man's normality check: ok (loaded samples follow a nice linearity over labels and time)
|
||||
# The check giving: 1 (i.e. true)
|
||||
eval range from 0 to 50m step 5m abs(avg(limit_ratio(0.5, http_requests)) - avg(limit_ratio(-0.5, http_requests))) <= bool stddev(http_requests)
|
||||
# Poor man's normality check: ok (loaded samples follow a nice linearity over labels and time).
|
||||
# The check giving: 1 (i.e. true).
|
||||
eval range from 0 to 50m step 5m abs(avg(limit_ratio(0.5, http_requests{instance!~"histogram_[0-9]"})) - avg(limit_ratio(-0.5, http_requests{instance!~"histogram_[0-9]"}))) <= bool stddev(http_requests{instance!~"histogram_[0-9]"})
|
||||
{} 1+0x10
|
||||
|
||||
# All specified histograms are included for r=1.
|
||||
eval instant at 50m limit_ratio(1, http_requests{instance="histogram_1"})
|
||||
{__name__="http_requests", group="canary", instance="histogram_1", job="api-server"} {{count:10 sum:10}}
|
||||
|
||||
eval range from 0 to 50m step 5m limit_ratio(1, http_requests{instance="histogram_1"})
|
||||
{__name__="http_requests", group="canary", instance="histogram_1", job="api-server"} {{count:10 sum:10}}x10
|
||||
|
|
Loading…
Reference in New Issue