From b7ef0b45e8e0b338eacd70650d696a71058d5ab6 Mon Sep 17 00:00:00 2001
From: Brian Brazil <brian.brazil@robustperception.io>
Date: Sun, 7 Feb 2016 18:00:23 +0000
Subject: [PATCH 1/2] Break aggregation tests out. Add missing tests.

---
 promql/testdata/aggregators.test | 96 ++++++++++++++++++++++++++++++++
 promql/testdata/legacy.test      | 41 --------------
 promql/testdata/operators.test   | 22 --------
 3 files changed, 96 insertions(+), 63 deletions(-)
 create mode 100644 promql/testdata/aggregators.test
 delete mode 100644 promql/testdata/operators.test

diff --git a/promql/testdata/aggregators.test b/promql/testdata/aggregators.test
new file mode 100644
index 000000000..4e71cd08a
--- /dev/null
+++ b/promql/testdata/aggregators.test
@@ -0,0 +1,96 @@
+load 5m
+  http_requests{job="api-server", instance="0", group="production"} 0+10x10
+  http_requests{job="api-server", instance="1", group="production"} 0+20x10
+  http_requests{job="api-server", instance="0", group="canary"}   0+30x10
+  http_requests{job="api-server", instance="1", group="canary"}   0+40x10
+  http_requests{job="app-server", instance="0", group="production"} 0+50x10
+  http_requests{job="app-server", instance="1", group="production"} 0+60x10
+  http_requests{job="app-server", instance="0", group="canary"}   0+70x10
+  http_requests{job="app-server", instance="1", group="canary"}   0+80x10
+
+# Simple sum.
+eval instant at 50m SUM BY (group) (http_requests{job="api-server"})
+  {group="canary"} 700
+  {group="production"} 300
+
+# Test alternative "by"-clause order.
+eval instant at 50m sum by (group) (http_requests{job="api-server"})
+  {group="canary"} 700
+  {group="production"} 300
+
+# Simple average.
+eval instant at 50m avg by (group) (http_requests{job="api-server"})
+  {group="canary"} 350
+  {group="production"} 150
+
+# Simple count.
+eval instant at 50m count by (group) (http_requests{job="api-server"})
+  {group="canary"} 2
+  {group="production"} 2
+
+# Lower-cased aggregation operators should work too.
+eval instant at 50m sum(http_requests) by (job) + min(http_requests) by (job) + max(http_requests) by (job) + avg(http_requests) by (job)
+  {job="app-server"} 4550
+  {job="api-server"} 1750
+
+# Test alternative "by"-clause order with "keep_common".
+eval instant at 50m sum by (group) keep_common (http_requests{job="api-server"})
+  {group="canary", job="api-server"} 700
+  {group="production", job="api-server"} 300
+
+# Test both alternative "by"-clause orders in one expression.
+# Public health warning: stick to one form within an expression (or even
+# in an organization), or risk serious user confusion.
+eval instant at 50m sum(sum by (group) keep_common (http_requests{job="api-server"})) by (job)
+  {job="api-server"} 1000
+
+
+# Standard deviation and variance.
+eval instant at 50m stddev(http_requests)
+  {} 229.12878474779
+
+eval instant at 50m stddev by (instance)(http_requests)
+  {instance="0"} 223.60679774998
+  {instance="1"} 223.60679774998
+
+eval instant at 50m stdvar(http_requests)
+  {} 52500
+
+eval instant at 50m stdvar by (instance)(http_requests)
+  {instance="0"} 50000
+  {instance="1"} 50000
+
+
+# Regression test for missing separator byte in labelsToGroupingKey.
+clear
+load 5m
+  label_grouping_test{a="aa", b="bb"} 0+10x10
+  label_grouping_test{a="a", b="abb"} 0+20x10
+
+eval instant at 50m sum(label_grouping_test) by (a, b)
+  {a="a", b="abb"} 200
+  {a="aa", b="bb"} 100
+
+
+# Tests for min/max.
+clear
+load 5m
+  http_requests{job="api-server", instance="0", group="production"}	1
+  http_requests{job="api-server", instance="1", group="production"}	2
+  http_requests{job="api-server", instance="0", group="canary"}		NaN
+  http_requests{job="api-server", instance="1", group="canary"}		3
+  http_requests{job="api-server", instance="2", group="canary"}		4
+
+eval instant at 0m max(http_requests)
+  {} 4
+
+eval instant at 0m min(http_requests)
+  {} 1
+
+eval instant at 0m max by (group) (http_requests)
+  {group="production"} 2
+  {group="canary"} 4
+
+eval instant at 0m min by (group) (http_requests)
+  {group="production"} 1
+  {group="canary"} 3
diff --git a/promql/testdata/legacy.test b/promql/testdata/legacy.test
index d80293f0f..c4c811107 100644
--- a/promql/testdata/legacy.test
+++ b/promql/testdata/legacy.test
@@ -147,12 +147,6 @@ eval instant at 50m x{y="testvalue"}
 	x{y="testvalue"} 100 
 
 
-# Lower-cased aggregation operators should work too.
-eval instant at 50m sum(http_requests) by (job) + min(http_requests) by (job) + max(http_requests) by (job) + avg(http_requests) by (job)
-	{job="app-server"} 4550 
-	{job="api-server"} 1750 
-
-
 # Deltas should be adjusted for target interval vs. samples under target interval.
 eval instant at 50m delta(http_requests{group="canary", instance="1", job="app-server"}[18m])
 	{group="canary", instance="1", job="app-server"} 288
@@ -343,22 +337,6 @@ eval instant at 50m {job=~".+-server", job!~"api-.+"}
 	http_requests{group="production", instance="0", job="app-server"} 500 
 	http_requests{group="production", instance="1", job="app-server"} 600 
 
-# Test alternative "by"-clause order.
-eval instant at 50m sum by (group) (http_requests{job="api-server"})
-	{group="canary"} 700 
-	{group="production"} 300 
-
-# Test alternative "by"-clause order with "keep_common".
-eval instant at 50m sum by (group) keep_common (http_requests{job="api-server"})
-	{group="canary", job="api-server"} 700 
-	{group="production", job="api-server"} 300 
-
-# Test both alternative "by"-clause orders in one expression.
-# Public health warning: stick to one form within an expression (or even
-# in an organization), or risk serious user confusion.
-eval instant at 50m sum(sum by (group) keep_common (http_requests{job="api-server"})) by (job)
-	{job="api-server"} 1000 
-
 eval instant at 50m http_requests{group="canary"} and http_requests{instance="0"}
 	http_requests{group="canary", instance="0", job="api-server"} 300 
 	http_requests{group="canary", instance="0", job="app-server"} 700 
@@ -487,11 +465,6 @@ eval instant at 50m rate(http_requests{group="production",job="api-server"}[10m]
 	{group="production", instance="0", job="api-server"} 0.03333333333333333 
 	{group="production", instance="1", job="api-server"} 0.06666666666666667 
 
-# Regression test for missing separator byte in labelsToGroupingKey.
-eval instant at 50m sum(label_grouping_test) by (a, b)
-	{a="a", b="abb"} 200 
-	{a="aa", b="bb"} 100 
-
 eval instant at 50m http_requests{group="canary", instance="0", job="api-server"} / 0
 	{group="canary", instance="0", job="api-server"} +Inf 
 
@@ -560,20 +533,6 @@ eval instant at 50m log10(vector_matching_a - 20)
 	{l="x"} NaN 
 	{l="y"} -Inf 
 
-eval instant at 50m stddev(http_requests)
-	{} 229.12878474779 
-
-eval instant at 50m stddev by (instance)(http_requests)
-	{instance="0"} 223.60679774998 
-	{instance="1"} 223.60679774998 
-
-eval instant at 50m stdvar(http_requests)
-	{} 52500 
-
-eval instant at 50m stdvar by (instance)(http_requests)
-	{instance="0"} 50000 
-	{instance="1"} 50000
-
 
 # Matrix tests.
 
diff --git a/promql/testdata/operators.test b/promql/testdata/operators.test
deleted file mode 100644
index fbef5030c..000000000
--- a/promql/testdata/operators.test
+++ /dev/null
@@ -1,22 +0,0 @@
-# Tests for min/max.
-clear
-load 5m
-  http_requests{job="api-server", instance="0", group="production"}	1
-  http_requests{job="api-server", instance="1", group="production"}	2
-  http_requests{job="api-server", instance="0", group="canary"}		NaN
-  http_requests{job="api-server", instance="1", group="canary"}		3
-  http_requests{job="api-server", instance="2", group="canary"}		4
-
-eval instant at 0m max(http_requests)
-  {} 4
-
-eval instant at 0m min(http_requests)
-  {} 1
-
-eval instant at 0m max by (group) (http_requests)
-  {group="production"} 2
-  {group="canary"} 4
-
-eval instant at 0m min by (group) (http_requests)
-  {group="production"} 1
-  {group="canary"} 3

From 9d0112d7cfe8c1c36c862f7c8bf77111c2ae04ff Mon Sep 17 00:00:00 2001
From: Brian Brazil <brian.brazil@robustperception.io>
Date: Sun, 7 Feb 2016 18:03:16 +0000
Subject: [PATCH 2/2] Add without aggregator modifier.

This has the advantage that the user doesn't need
to list all labels they want to keep (as with "by")
but without having to worry about inconsistent labels
as when there's only one time series (as with "keeping_common").

Almost all aggregation should use this rather than the existing
two options as it's much less error prone and easier to maintain
due to not having to always add in "job" plus whatever other common
job-level labels you have like "region".
---
 promql/ast.go                    |  1 +
 promql/engine.go                 | 21 +++++++++++++++---
 promql/lex.go                    |  2 ++
 promql/lex_test.go               |  3 +++
 promql/parse.go                  | 17 +++++++++++---
 promql/parse_test.go             | 38 ++++++++++++++++++++++++++++++++
 promql/printer.go                |  7 +++++-
 promql/printer_test.go           |  3 +++
 promql/testdata/aggregators.test | 19 ++++++++++++++++
 9 files changed, 104 insertions(+), 7 deletions(-)

diff --git a/promql/ast.go b/promql/ast.go
index 09c6fd44d..bc19d76f1 100644
--- a/promql/ast.go
+++ b/promql/ast.go
@@ -104,6 +104,7 @@ type AggregateExpr struct {
 	Op              itemType         // The used aggregation operation.
 	Expr            Expr             // The vector expression over which is aggregated.
 	Grouping        model.LabelNames // The labels by which to group the vector.
+	Without         bool             // Whether to drop the given labels rather than keep them.
 	KeepExtraLabels bool             // Whether to keep extra labels common among result elements.
 }
 
diff --git a/promql/engine.go b/promql/engine.go
index c1cceb048..f52bbc71a 100644
--- a/promql/engine.go
+++ b/promql/engine.go
@@ -619,7 +619,7 @@ func (ev *evaluator) eval(expr Expr) model.Value {
 	switch e := expr.(type) {
 	case *AggregateExpr:
 		vector := ev.evalVector(e.Expr)
-		return ev.aggregation(e.Op, e.Grouping, e.KeepExtraLabels, vector)
+		return ev.aggregation(e.Op, e.Grouping, e.Without, e.KeepExtraLabels, vector)
 
 	case *BinaryExpr:
 		lhs := ev.evalOneOf(e.LHS, model.ValScalar, model.ValVector)
@@ -1039,12 +1039,25 @@ type groupedAggregation struct {
 }
 
 // aggregation evaluates an aggregation operation on a vector.
-func (ev *evaluator) aggregation(op itemType, grouping model.LabelNames, keepExtra bool, vec vector) vector {
+func (ev *evaluator) aggregation(op itemType, grouping model.LabelNames, without bool, keepExtra bool, vec vector) vector {
 
 	result := map[uint64]*groupedAggregation{}
 
 	for _, sample := range vec {
-		groupingKey := model.SignatureForLabels(sample.Metric.Metric, grouping...)
+		withoutMetric := sample.Metric
+		if without {
+			for _, l := range grouping {
+				withoutMetric.Del(l)
+			}
+			withoutMetric.Del(model.MetricNameLabel)
+		}
+
+		var groupingKey uint64
+		if without {
+			groupingKey = uint64(withoutMetric.Metric.Fingerprint())
+		} else {
+			groupingKey = model.SignatureForLabels(sample.Metric.Metric, grouping...)
+		}
 
 		groupedResult, ok := result[groupingKey]
 		// Add a new group if it doesn't exist.
@@ -1053,6 +1066,8 @@ func (ev *evaluator) aggregation(op itemType, grouping model.LabelNames, keepExt
 			if keepExtra {
 				m = sample.Metric
 				m.Del(model.MetricNameLabel)
+			} else if without {
+				m = withoutMetric
 			} else {
 				m = metric.Metric{
 					Metric: model.Metric{},
diff --git a/promql/lex.go b/promql/lex.go
index 7817d3421..90f82f373 100644
--- a/promql/lex.go
+++ b/promql/lex.go
@@ -158,6 +158,7 @@ const (
 	itemKeepCommon
 	itemOffset
 	itemBy
+	itemWithout
 	itemOn
 	itemGroupLeft
 	itemGroupRight
@@ -192,6 +193,7 @@ var key = map[string]itemType{
 	"annotations":   itemAnnotations,
 	"offset":        itemOffset,
 	"by":            itemBy,
+	"without":       itemWithout,
 	"keeping_extra": itemKeepCommon,
 	"keep_common":   itemKeepCommon,
 	"on":            itemOn,
diff --git a/promql/lex_test.go b/promql/lex_test.go
index e3f0b4c4e..cb1642aec 100644
--- a/promql/lex_test.go
+++ b/promql/lex_test.go
@@ -261,6 +261,9 @@ var tests = []struct {
 	}, {
 		input:    "by",
 		expected: []item{{itemBy, 0, "by"}},
+	}, {
+		input:    "without",
+		expected: []item{{itemWithout, 0, "without"}},
 	}, {
 		input:    "on",
 		expected: []item{{itemOn, 0, "on"}},
diff --git a/promql/parse.go b/promql/parse.go
index a23986973..5912809bc 100644
--- a/promql/parse.go
+++ b/promql/parse.go
@@ -732,11 +732,14 @@ func (p *parser) aggrExpr() *AggregateExpr {
 		p.errorf("expected aggregation operator but got %s", agop)
 	}
 	var grouping model.LabelNames
-	var keepExtra bool
+	var keepExtra, without bool
 
 	modifiersFirst := false
 
-	if p.peek().typ == itemBy {
+	if t := p.peek().typ; t == itemBy || t == itemWithout {
+		if t == itemWithout {
+			without = true
+		}
 		p.next()
 		grouping = p.labels()
 		modifiersFirst = true
@@ -752,10 +755,13 @@ func (p *parser) aggrExpr() *AggregateExpr {
 	p.expect(itemRightParen, ctx)
 
 	if !modifiersFirst {
-		if p.peek().typ == itemBy {
+		if t := p.peek().typ; t == itemBy || t == itemWithout {
 			if len(grouping) > 0 {
 				p.errorf("aggregation must only contain one grouping clause")
 			}
+			if t == itemWithout {
+				without = true
+			}
 			p.next()
 			grouping = p.labels()
 		}
@@ -765,10 +771,15 @@ func (p *parser) aggrExpr() *AggregateExpr {
 		}
 	}
 
+	if keepExtra && without {
+		p.errorf("cannot use 'keep_common' with 'without'")
+	}
+
 	return &AggregateExpr{
 		Op:              agop.typ,
 		Expr:            e,
 		Grouping:        grouping,
+		Without:         without,
 		KeepExtraLabels: keepExtra,
 	}
 }
diff --git a/promql/parse_test.go b/promql/parse_test.go
index 21744f6d2..07e5ce0c0 100644
--- a/promql/parse_test.go
+++ b/promql/parse_test.go
@@ -869,6 +869,32 @@ var testExpr = []struct {
 			},
 			Grouping: model.LabelNames{"foo"},
 		},
+	}, {
+		input: "sum without (foo) (some_metric)",
+		expected: &AggregateExpr{
+			Op:      itemSum,
+			Without: true,
+			Expr: &VectorSelector{
+				Name: "some_metric",
+				LabelMatchers: metric.LabelMatchers{
+					{Type: metric.Equal, Name: model.MetricNameLabel, Value: "some_metric"},
+				},
+			},
+			Grouping: model.LabelNames{"foo"},
+		},
+	}, {
+		input: "sum (some_metric) without (foo)",
+		expected: &AggregateExpr{
+			Op:      itemSum,
+			Without: true,
+			Expr: &VectorSelector{
+				Name: "some_metric",
+				LabelMatchers: metric.LabelMatchers{
+					{Type: metric.Equal, Name: model.MetricNameLabel, Value: "some_metric"},
+				},
+			},
+			Grouping: model.LabelNames{"foo"},
+		},
 	}, {
 		input: "stddev(some_metric)",
 		expected: &AggregateExpr{
@@ -920,6 +946,18 @@ var testExpr = []struct {
 		input:  "MIN by(test) (some_metric) keep_common",
 		fail:   true,
 		errMsg: "could not parse remaining input \"keep_common\"...",
+	}, {
+		input:  `sum (some_metric) without (test) keep_common`,
+		fail:   true,
+		errMsg: "cannot use 'keep_common' with 'without'",
+	}, {
+		input:  `sum (some_metric) without (test) by (test)`,
+		fail:   true,
+		errMsg: "could not parse remaining input \"by (test)\"...",
+	}, {
+		input:  `sum without (test) (some_metric) by (test)`,
+		fail:   true,
+		errMsg: "could not parse remaining input \"by (test)\"...",
 	},
 	// Test function calls.
 	{
diff --git a/promql/printer.go b/promql/printer.go
index 7be0de04e..e35699d7d 100644
--- a/promql/printer.go
+++ b/promql/printer.go
@@ -137,7 +137,12 @@ func (es Expressions) String() (s string) {
 func (node *AggregateExpr) String() string {
 	aggrString := fmt.Sprintf("%s(%s)", node.Op, node.Expr)
 	if len(node.Grouping) > 0 {
-		format := "%s BY (%s)"
+		var format string
+		if node.Without {
+			format = "%s WITHOUT (%s)"
+		} else {
+			format = "%s BY (%s)"
+		}
 		if node.KeepExtraLabels {
 			format += " KEEP_COMMON"
 		}
diff --git a/promql/printer_test.go b/promql/printer_test.go
index 473305b61..890adc661 100644
--- a/promql/printer_test.go
+++ b/promql/printer_test.go
@@ -30,6 +30,9 @@ func TestExprString(t *testing.T) {
 		{
 			in: `sum(task:errors:rate10s{job="s"}) BY (code) KEEP_COMMON`,
 		},
+		{
+			in: `sum(task:errors:rate10s{job="s"}) WITHOUT (instance)`,
+		},
 		{
 			in: `up > BOOL 0`,
 		},
diff --git a/promql/testdata/aggregators.test b/promql/testdata/aggregators.test
index 4e71cd08a..de4dd1e83 100644
--- a/promql/testdata/aggregators.test
+++ b/promql/testdata/aggregators.test
@@ -8,6 +8,10 @@ load 5m
   http_requests{job="app-server", instance="0", group="canary"}   0+70x10
   http_requests{job="app-server", instance="1", group="canary"}   0+80x10
 
+load 5m
+  foo{job="api-server", instance="0", region="europe"} 0+90x10
+  foo{job="api-server"} 0+100x10
+
 # Simple sum.
 eval instant at 50m SUM BY (group) (http_requests{job="api-server"})
   {group="canary"} 700
@@ -28,6 +32,18 @@ eval instant at 50m count by (group) (http_requests{job="api-server"})
   {group="canary"} 2
   {group="production"} 2
 
+# Simple without.
+eval instant at 50m sum without (instance) (http_requests{job="api-server"})
+  {group="canary",job="api-server"} 700
+  {group="production",job="api-server"} 300
+
+# Without with mismatched and missing labels. Do not do this.
+eval instant at 50m sum without (instance) (http_requests{job="api-server"} or foo)
+  {group="canary",job="api-server"} 700
+  {group="production",job="api-server"} 300
+  {region="europe",job="api-server"} 900
+  {job="api-server"} 1000
+
 # Lower-cased aggregation operators should work too.
 eval instant at 50m sum(http_requests) by (job) + min(http_requests) by (job) + max(http_requests) by (job) + avg(http_requests) by (job)
   {job="app-server"} 4550
@@ -45,6 +61,7 @@ eval instant at 50m sum(sum by (group) keep_common (http_requests{job="api-serve
   {job="api-server"} 1000
 
 
+
 # Standard deviation and variance.
 eval instant at 50m stddev(http_requests)
   {} 229.12878474779
@@ -61,6 +78,7 @@ eval instant at 50m stdvar by (instance)(http_requests)
   {instance="1"} 50000
 
 
+
 # Regression test for missing separator byte in labelsToGroupingKey.
 clear
 load 5m
@@ -72,6 +90,7 @@ eval instant at 50m sum(label_grouping_test) by (a, b)
   {a="aa", b="bb"} 100
 
 
+
 # Tests for min/max.
 clear
 load 5m