From 896f951e6846ce252d9d19fd4707a4110ceda5ee Mon Sep 17 00:00:00 2001
From: Jack Neely <jjneely@gmail.com>
Date: Fri, 14 Apr 2017 10:21:49 -0400
Subject: [PATCH] Force buckets in a histogram to be monotonic for quantile
 estimation (#2610)

* Force buckets in a histogram to be monotonic for quantile estimation

The assumption that bucket counts increase monotonically with increasing
upperBound may be violated during:

  * Recording rule evaluation of histogram_quantile, especially when rate()
     has been applied to the underlying bucket timeseries.
  * Evaluation of histogram_quantile computed over federated bucket
     timeseries, especially when rate() has been applied

This is because scraped data is not made available to RR evalution or
federation atomically, so some buckets are computed with data from the N
most recent scrapes, but the other buckets are missing the most recent
observations.

Monotonicity is usually guaranteed because if a bucket with upper bound
u1 has count c1, then any bucket with a higher upper bound u > u1 must
have counted all c1 observations and perhaps more, so that c  >= c1.

Randomly interspersed partial sampling breaks that guarantee, and rate()
exacerbates it. Specifically, suppose bucket le=1000 has a count of 10 from
4 samples but the bucket with le=2000 has a count of 7, from 3 samples. The
monotonicity is broken. It is exacerbated by rate() because under normal
operation, cumulative counting of buckets will cause the bucket counts to
diverge such that small differences from missing samples are not a problem.
rate() removes this divergence.)

bucketQuantile depends on that monotonicity to do a binary search for the
bucket with the qth percentile count, so breaking the monotonicity
guarantee causes bucketQuantile() to return undefined (nonsense) results.

As a somewhat hacky solution until the Prometheus project is ready to
accept the changes required to make scrapes atomic, we calculate the
"envelope" of the histogram buckets, essentially removing any decreases
in the count between successive buckets.

* Fix up comment docs for ensureMonotonic

* ensureMonotonic: Use switch statement

Use switch statement rather than if/else for better readability.
Process the most frequent cases first.
---
 promql/quantile.go              | 47 +++++++++++++++++++++++++++++++++
 promql/testdata/histograms.test | 18 +++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/promql/quantile.go b/promql/quantile.go
index a4a6f136b..4250ec388 100644
--- a/promql/quantile.go
+++ b/promql/quantile.go
@@ -85,6 +85,8 @@ func bucketQuantile(q model.SampleValue, buckets buckets) float64 {
 		return math.NaN()
 	}
 
+	ensureMonotonic(buckets)
+
 	rank := q * buckets[len(buckets)-1].count
 	b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank })
 
@@ -107,6 +109,51 @@ func bucketQuantile(q model.SampleValue, buckets buckets) float64 {
 	return bucketStart + (bucketEnd-bucketStart)*float64(rank/count)
 }
 
+// The assumption that bucket counts increase monotonically with increasing
+// upperBound may be violated during:
+//
+//   * Recording rule evaluation of histogram_quantile, especially when rate()
+//      has been applied to the underlying bucket timeseries.
+//   * Evaluation of histogram_quantile computed over federated bucket
+//      timeseries, especially when rate() has been applied.
+//
+// This is because scraped data is not made available to rule evaluation or
+// federation atomically, so some buckets are computed with data from the
+// most recent scrapes, but the other buckets are missing data from the most
+// recent scrape.
+//
+// Monotonicity is usually guaranteed because if a bucket with upper bound
+// u1 has count c1, then any bucket with a higher upper bound u > u1 must
+// have counted all c1 observations and perhaps more, so that c  >= c1.
+//
+// Randomly interspersed partial sampling breaks that guarantee, and rate()
+// exacerbates it. Specifically, suppose bucket le=1000 has a count of 10 from
+// 4 samples but the bucket with le=2000 has a count of 7 from 3 samples. The
+// monotonicity is broken. It is exacerbated by rate() because under normal
+// operation, cumulative counting of buckets will cause the bucket counts to
+// diverge such that small differences from missing samples are not a problem.
+// rate() removes this divergence.)
+//
+// bucketQuantile depends on that monotonicity to do a binary search for the
+// bucket with the φ-quantile count, so breaking the monotonicity
+// guarantee causes bucketQuantile() to return undefined (nonsense) results.
+//
+// As a somewhat hacky solution until ingestion is atomic per scrape, we
+// calculate the "envelope" of the histogram buckets, essentially removing
+// any decreases in the count between successive buckets.
+
+func ensureMonotonic(buckets buckets) {
+	max := buckets[0].count
+	for i := range buckets[1:] {
+		switch {
+		case buckets[i].count > max:
+			max = buckets[i].count
+		case buckets[i].count < max:
+			buckets[i].count = max
+		}
+	}
+}
+
 // qauntile calculates the given quantile of a vector of samples.
 //
 // The vector will be sorted.
diff --git a/promql/testdata/histograms.test b/promql/testdata/histograms.test
index 2478c3484..b1e76ab63 100644
--- a/promql/testdata/histograms.test
+++ b/promql/testdata/histograms.test
@@ -139,3 +139,21 @@ eval instant at 50m histogram_quantile(0.5, rate(request_duration_seconds_bucket
 	{instance="ins2", job="job1"} 0.13333333333333333
 	{instance="ins1", job="job2"} 0.1
 	{instance="ins2", job="job2"} 0.11666666666666667
+
+# A histogram with nonmonotonic bucket counts. This may happen when recording
+# rule evaluation or federation races scrape ingestion, causing some buckets
+# counts to be derived from fewer samples. The wrong answer we want to avoid
+# is for histogram_quantile(0.99, nonmonotonic_bucket) to return ~1000 instead
+# of 1.
+
+load 5m
+    nonmonotonic_bucket{le="0.1"}   0+1x10
+    nonmonotonic_bucket{le="1"}     0+9x10
+    nonmonotonic_bucket{le="10"}    0+8x10
+    nonmonotonic_bucket{le="100"}   0+8x10
+    nonmonotonic_bucket{le="1000"}  0+9x10
+    nonmonotonic_bucket{le="+Inf"}  0+9x10
+
+# Nonmonotonic buckets
+eval instant at 50m histogram_quantile(0.99, nonmonotonic_bucket)
+    {} 0.989875