mirror of https://github.com/prometheus/prometheus
Fix OOM when a large K is used in topk queries (#4087)
This attempts to close #3973. Handles cases where the length of the input vector to an aggregate topk / bottomk function is less than the K paramater. The change updates Prometheus to allocate a result vector the same length as the input vector in these cases. Previously Prometheus would out-of-memory panic for large K values. This change makes that unlikely unless the size of the input vector is equally large. Signed-off-by: David King <dave@davbo.org>pull/4091/head
parent
e7584ee345
commit
6286c10df0
|
@ -1333,14 +1333,19 @@ func (ev *evaluator) aggregation(op ItemType, grouping []string, without bool, p
|
|||
valuesSquaredSum: s.V * s.V,
|
||||
groupCount: 1,
|
||||
}
|
||||
input_vec_len := int64(len(vec))
|
||||
result_size := k
|
||||
if k > input_vec_len {
|
||||
result_size = input_vec_len
|
||||
}
|
||||
if op == itemTopK || op == itemQuantile {
|
||||
result[groupingKey].heap = make(vectorByValueHeap, 0, k)
|
||||
result[groupingKey].heap = make(vectorByValueHeap, 0, result_size)
|
||||
heap.Push(&result[groupingKey].heap, &Sample{
|
||||
Point: Point{V: s.V},
|
||||
Metric: s.Metric,
|
||||
})
|
||||
} else if op == itemBottomK {
|
||||
result[groupingKey].reverseHeap = make(vectorByReverseValueHeap, 0, k)
|
||||
result[groupingKey].reverseHeap = make(vectorByReverseValueHeap, 0, result_size)
|
||||
heap.Push(&result[groupingKey].reverseHeap, &Sample{
|
||||
Point: Point{V: s.V},
|
||||
Metric: s.Metric,
|
||||
|
|
|
@ -184,6 +184,16 @@ eval_ordered instant at 50m bottomk(3, http_requests{job="api-server",group="pro
|
|||
http_requests{job="api-server", instance="1", group="production"} 200
|
||||
http_requests{job="api-server", instance="2", group="production"} NaN
|
||||
|
||||
# Test topk and bottomk allocate min(k, input_vector) for results vector
|
||||
eval_ordered instant at 50m bottomk(9999999999, http_requests{job="app-server",group="canary"})
|
||||
http_requests{group="canary", instance="0", job="app-server"} 700
|
||||
http_requests{group="canary", instance="1", job="app-server"} 800
|
||||
|
||||
eval_ordered instant at 50m topk(9999999999, http_requests{job="api-server",group="production"})
|
||||
http_requests{job="api-server", instance="1", group="production"} 200
|
||||
http_requests{job="api-server", instance="0", group="production"} 100
|
||||
http_requests{job="api-server", instance="2", group="production"} NaN
|
||||
|
||||
clear
|
||||
|
||||
# Tests for count_values.
|
||||
|
|
Loading…
Reference in New Issue