From feaa93da7796e3e8dd3e35ef96808a253982f296 Mon Sep 17 00:00:00 2001
From: Jeanette Tan <jeanette.tan@grafana.com>
Date: Wed, 4 Oct 2023 18:53:55 +0800
Subject: [PATCH 1/2] Add warning when monotonicity is forced in the input to
 histogram_quantile

Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
---
 promql/functions.go             |  6 +++++-
 promql/quantile.go              | 32 +++++++++++++++++++-------------
 util/annotations/annotations.go | 18 ++++++++++++++----
 3 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/promql/functions.go b/promql/functions.go
index b1245f5a131..2a1a0b03008 100644
--- a/promql/functions.go
+++ b/promql/functions.go
@@ -1168,10 +1168,14 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
 
 	for _, mb := range enh.signatureToMetricWithBuckets {
 		if len(mb.buckets) > 0 {
+			res, forcedMonotonicity := bucketQuantile(q, mb.buckets)
 			enh.Out = append(enh.Out, Sample{
 				Metric: mb.metric,
-				F:      bucketQuantile(q, mb.buckets),
+				F:      res,
 			})
+			if forcedMonotonicity {
+				annos.Add(annotations.NewHistogramQuantileForcedMonotonicityWarning(mb.metric.Get(labels.MetricName), args[1].PositionRange()))
+			}
 		}
 	}
 
diff --git a/promql/quantile.go b/promql/quantile.go
index 06a02a8d424..1ebc39aa28a 100644
--- a/promql/quantile.go
+++ b/promql/quantile.go
@@ -71,15 +71,17 @@ type metricWithBuckets struct {
 // If q<0, -Inf is returned.
 //
 // If q>1, +Inf is returned.
-func bucketQuantile(q float64, buckets buckets) float64 {
+//
+// We also return a bool to indicate if monotonicity needed to be forced.
+func bucketQuantile(q float64, buckets buckets) (float64, bool) {
 	if math.IsNaN(q) {
-		return math.NaN()
+		return math.NaN(), false
 	}
 	if q < 0 {
-		return math.Inf(-1)
+		return math.Inf(-1), false
 	}
 	if q > 1 {
-		return math.Inf(+1)
+		return math.Inf(+1), false
 	}
 	slices.SortFunc(buckets, func(a, b bucket) int {
 		// We don't expect the bucket boundary to be a NaN.
@@ -92,27 +94,27 @@ func bucketQuantile(q float64, buckets buckets) float64 {
 		return 0
 	})
 	if !math.IsInf(buckets[len(buckets)-1].upperBound, +1) {
-		return math.NaN()
+		return math.NaN(), false
 	}
 
 	buckets = coalesceBuckets(buckets)
-	ensureMonotonic(buckets)
+	forcedMonotonic := ensureMonotonic(buckets)
 
 	if len(buckets) < 2 {
-		return math.NaN()
+		return math.NaN(), false
 	}
 	observations := buckets[len(buckets)-1].count
 	if observations == 0 {
-		return math.NaN()
+		return math.NaN(), false
 	}
 	rank := q * observations
 	b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank })
 
 	if b == len(buckets)-1 {
-		return buckets[len(buckets)-2].upperBound
+		return buckets[len(buckets)-2].upperBound, forcedMonotonic
 	}
 	if b == 0 && buckets[0].upperBound <= 0 {
-		return buckets[0].upperBound
+		return buckets[0].upperBound, forcedMonotonic
 	}
 	var (
 		bucketStart float64
@@ -124,7 +126,7 @@ func bucketQuantile(q float64, buckets buckets) float64 {
 		count -= buckets[b-1].count
 		rank -= buckets[b-1].count
 	}
-	return bucketStart + (bucketEnd-bucketStart)*(rank/count)
+	return bucketStart + (bucketEnd-bucketStart)*(rank/count), forcedMonotonic
 }
 
 // histogramQuantile calculates the quantile 'q' based on the given histogram.
@@ -370,9 +372,11 @@ func coalesceBuckets(buckets buckets) buckets {
 //
 // As a somewhat hacky solution until ingestion is atomic per scrape, we
 // calculate the "envelope" of the histogram buckets, essentially removing
-// any decreases in the count between successive buckets.
+// any decreases in the count between successive buckets. We return a bool
+// to indicate if this monotonicity was forced or not.
 
-func ensureMonotonic(buckets buckets) {
+func ensureMonotonic(buckets buckets) bool {
+	forced := false
 	max := buckets[0].count
 	for i := 1; i < len(buckets); i++ {
 		switch {
@@ -380,8 +384,10 @@ func ensureMonotonic(buckets buckets) {
 			max = buckets[i].count
 		case buckets[i].count < max:
 			buckets[i].count = max
+			forced = true
 		}
 	}
+	return forced
 }
 
 // quantile calculates the given quantile of a vector of samples.
diff --git a/util/annotations/annotations.go b/util/annotations/annotations.go
index e8b59dc7f6c..bff0100d910 100644
--- a/util/annotations/annotations.go
+++ b/util/annotations/annotations.go
@@ -100,10 +100,11 @@ var (
 	PromQLInfo    = errors.New("PromQL info")
 	PromQLWarning = errors.New("PromQL warning")
 
-	InvalidQuantileWarning              = fmt.Errorf("%w: quantile value should be between 0 and 1", PromQLWarning)
-	BadBucketLabelWarning               = fmt.Errorf("%w: bucket label %q is missing or has a malformed value", PromQLWarning, model.BucketLabel)
-	MixedFloatsHistogramsWarning        = fmt.Errorf("%w: encountered a mix of histograms and floats for metric name", PromQLWarning)
-	MixedClassicNativeHistogramsWarning = fmt.Errorf("%w: vector contains a mix of classic and native histograms for metric name", PromQLWarning)
+	InvalidQuantileWarning                     = fmt.Errorf("%w: quantile value should be between 0 and 1", PromQLWarning)
+	BadBucketLabelWarning                      = fmt.Errorf("%w: bucket label %q is missing or has a malformed value", PromQLWarning, model.BucketLabel)
+	MixedFloatsHistogramsWarning               = fmt.Errorf("%w: encountered a mix of histograms and floats for metric name", PromQLWarning)
+	MixedClassicNativeHistogramsWarning        = fmt.Errorf("%w: vector contains a mix of classic and native histograms for metric name", PromQLWarning)
+	HistogramQuantileForcedMonotonicityWarning = fmt.Errorf("%w: input to histogram_quantile needed to be fixed for monotonicity (and may give inaccurate results) for metric name", PromQLWarning)
 
 	PossibleNonCounterInfo = fmt.Errorf("%w: metric might not be a counter, name does not end in _total/_sum/_count:", PromQLInfo)
 )
@@ -155,6 +156,15 @@ func NewMixedClassicNativeHistogramsWarning(metricName string, pos posrange.Posi
 	}
 }
 
+// NewHistogramQuantileForcedMonotonicityWarning is used when the input (classic histograms) to
+// histogram_quantile needs to be forced to be monotonic.
+func NewHistogramQuantileForcedMonotonicityWarning(metricName string, pos posrange.PositionRange) annoErr {
+	return annoErr{
+		PositionRange: pos,
+		Err:           fmt.Errorf("%w %q", HistogramQuantileForcedMonotonicityWarning, metricName),
+	}
+}
+
 // NewPossibleNonCounterInfo is used when a counter metric does not have the suffixes
 // _total, _sum or _count.
 func NewPossibleNonCounterInfo(metricName string, pos posrange.PositionRange) annoErr {

From 0cbf0c1c68b7d5590d770db13707e62fac96277c Mon Sep 17 00:00:00 2001
From: Jeanette Tan <jeanette.tan@grafana.com>
Date: Fri, 6 Oct 2023 19:09:32 +0800
Subject: [PATCH 2/2] Revise according to code review

Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
---
 promql/functions.go             |  2 +-
 promql/quantile.go              | 31 ++++++++-----------------------
 util/annotations/annotations.go | 28 ++++++++++++++--------------
 3 files changed, 23 insertions(+), 38 deletions(-)

diff --git a/promql/functions.go b/promql/functions.go
index 2a1a0b03008..8eb0cad8adb 100644
--- a/promql/functions.go
+++ b/promql/functions.go
@@ -1174,7 +1174,7 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
 				F:      res,
 			})
 			if forcedMonotonicity {
-				annos.Add(annotations.NewHistogramQuantileForcedMonotonicityWarning(mb.metric.Get(labels.MetricName), args[1].PositionRange()))
+				annos.Add(annotations.NewHistogramQuantileForcedMonotonicityInfo(mb.metric.Get(labels.MetricName), args[1].PositionRange()))
 			}
 		}
 	}
diff --git a/promql/quantile.go b/promql/quantile.go
index 1ebc39aa28a..f2894486826 100644
--- a/promql/quantile.go
+++ b/promql/quantile.go
@@ -344,37 +344,22 @@ func coalesceBuckets(buckets buckets) buckets {
 // The assumption that bucket counts increase monotonically with increasing
 // upperBound may be violated during:
 //
-//   * Recording rule evaluation of histogram_quantile, especially when rate()
-//      has been applied to the underlying bucket timeseries.
-//   * Evaluation of histogram_quantile computed over federated bucket
-//      timeseries, especially when rate() has been applied.
-//
-// This is because scraped data is not made available to rule evaluation or
-// federation atomically, so some buckets are computed with data from the
-// most recent scrapes, but the other buckets are missing data from the most
-// recent scrape.
+//   - Circumstances where data is already inconsistent at the target's side.
+//   - Ingestion via the remote write receiver that Prometheus implements.
+//   - Optimisation of query execution where precision is sacrificed for other
+//     benefits, not by Prometheus but by systems built on top of it.
 //
 // Monotonicity is usually guaranteed because if a bucket with upper bound
 // u1 has count c1, then any bucket with a higher upper bound u > u1 must
-// have counted all c1 observations and perhaps more, so that c  >= c1.
-//
-// Randomly interspersed partial sampling breaks that guarantee, and rate()
-// exacerbates it. Specifically, suppose bucket le=1000 has a count of 10 from
-// 4 samples but the bucket with le=2000 has a count of 7 from 3 samples. The
-// monotonicity is broken. It is exacerbated by rate() because under normal
-// operation, cumulative counting of buckets will cause the bucket counts to
-// diverge such that small differences from missing samples are not a problem.
-// rate() removes this divergence.)
+// have counted all c1 observations and perhaps more, so that c >= c1.
 //
 // bucketQuantile depends on that monotonicity to do a binary search for the
 // bucket with the φ-quantile count, so breaking the monotonicity
 // guarantee causes bucketQuantile() to return undefined (nonsense) results.
 //
-// As a somewhat hacky solution until ingestion is atomic per scrape, we
-// calculate the "envelope" of the histogram buckets, essentially removing
-// any decreases in the count between successive buckets. We return a bool
-// to indicate if this monotonicity was forced or not.
-
+// As a somewhat hacky solution, we calculate the "envelope" of the histogram
+// buckets, essentially removing any decreases in the count between successive
+// buckets. We return a bool to indicate if this monotonicity was forced or not.
 func ensureMonotonic(buckets buckets) bool {
 	forced := false
 	max := buckets[0].count
diff --git a/util/annotations/annotations.go b/util/annotations/annotations.go
index bff0100d910..082909411cc 100644
--- a/util/annotations/annotations.go
+++ b/util/annotations/annotations.go
@@ -100,13 +100,13 @@ var (
 	PromQLInfo    = errors.New("PromQL info")
 	PromQLWarning = errors.New("PromQL warning")
 
-	InvalidQuantileWarning                     = fmt.Errorf("%w: quantile value should be between 0 and 1", PromQLWarning)
-	BadBucketLabelWarning                      = fmt.Errorf("%w: bucket label %q is missing or has a malformed value", PromQLWarning, model.BucketLabel)
-	MixedFloatsHistogramsWarning               = fmt.Errorf("%w: encountered a mix of histograms and floats for metric name", PromQLWarning)
-	MixedClassicNativeHistogramsWarning        = fmt.Errorf("%w: vector contains a mix of classic and native histograms for metric name", PromQLWarning)
-	HistogramQuantileForcedMonotonicityWarning = fmt.Errorf("%w: input to histogram_quantile needed to be fixed for monotonicity (and may give inaccurate results) for metric name", PromQLWarning)
+	InvalidQuantileWarning              = fmt.Errorf("%w: quantile value should be between 0 and 1", PromQLWarning)
+	BadBucketLabelWarning               = fmt.Errorf("%w: bucket label %q is missing or has a malformed value", PromQLWarning, model.BucketLabel)
+	MixedFloatsHistogramsWarning        = fmt.Errorf("%w: encountered a mix of histograms and floats for metric name", PromQLWarning)
+	MixedClassicNativeHistogramsWarning = fmt.Errorf("%w: vector contains a mix of classic and native histograms for metric name", PromQLWarning)
 
-	PossibleNonCounterInfo = fmt.Errorf("%w: metric might not be a counter, name does not end in _total/_sum/_count:", PromQLInfo)
+	PossibleNonCounterInfo                  = fmt.Errorf("%w: metric might not be a counter, name does not end in _total/_sum/_count:", PromQLInfo)
+	HistogramQuantileForcedMonotonicityInfo = fmt.Errorf("%w: input to histogram_quantile needed to be fixed for monotonicity (and may give inaccurate results) for metric name", PromQLInfo)
 )
 
 type annoErr struct {
@@ -156,20 +156,20 @@ func NewMixedClassicNativeHistogramsWarning(metricName string, pos posrange.Posi
 	}
 }
 
-// NewHistogramQuantileForcedMonotonicityWarning is used when the input (classic histograms) to
-// histogram_quantile needs to be forced to be monotonic.
-func NewHistogramQuantileForcedMonotonicityWarning(metricName string, pos posrange.PositionRange) annoErr {
+// NewPossibleNonCounterInfo is used when a counter metric does not have the suffixes
+// _total, _sum or _count.
+func NewPossibleNonCounterInfo(metricName string, pos posrange.PositionRange) annoErr {
 	return annoErr{
 		PositionRange: pos,
-		Err:           fmt.Errorf("%w %q", HistogramQuantileForcedMonotonicityWarning, metricName),
+		Err:           fmt.Errorf("%w %q", PossibleNonCounterInfo, metricName),
 	}
 }
 
-// NewPossibleNonCounterInfo is used when a counter metric does not have the suffixes
-// _total, _sum or _count.
-func NewPossibleNonCounterInfo(metricName string, pos posrange.PositionRange) annoErr {
+// NewHistogramQuantileForcedMonotonicityInfo is used when the input (classic histograms) to
+// histogram_quantile needs to be forced to be monotonic.
+func NewHistogramQuantileForcedMonotonicityInfo(metricName string, pos posrange.PositionRange) annoErr {
 	return annoErr{
 		PositionRange: pos,
-		Err:           fmt.Errorf("%w %q", PossibleNonCounterInfo, metricName),
+		Err:           fmt.Errorf("%w %q", HistogramQuantileForcedMonotonicityInfo, metricName),
 	}
 }