From e45f9787cd40b5e6281b7bb41ce7eff897313c74 Mon Sep 17 00:00:00 2001
From: Lukasz Mierzwa <lukasz@cloudflare.com>
Date: Wed, 2 Oct 2024 15:35:01 +0100
Subject: [PATCH] Warn about topk, bottomk and other sampling functions

Fixes #820.
---
 docs/changelog.md                         |  12 +
 internal/checks/alerts_template.go        |   1 -
 internal/checks/base.go                   |   1 -
 internal/checks/promql_aggregation.go     |   4 -
 internal/checks/promql_fragile.go         |  48 +++-
 internal/checks/promql_fragile_test.go    |  42 ++-
 internal/checks/promql_range_query.go     |   1 -
 internal/checks/promql_rate.go            |   5 -
 internal/checks/promql_vector_matching.go |   9 -
 internal/parser/utils/source.go           | 214 +++++++++++++++
 internal/parser/utils/source_test.go      | 300 ++++++++++++++++++++++
 11 files changed, 606 insertions(+), 31 deletions(-)
 create mode 100644 internal/parser/utils/source.go
 create mode 100644 internal/parser/utils/source_test.go

diff --git a/docs/changelog.md b/docs/changelog.md
index 216d4f1e..dbf03073 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -1,5 +1,17 @@
 # Changelog
 
+## v0.68.0
+
+### Added
+
+- [promql/fragile](checks/promql/fragile.md) will now warn when alerting rules are using
+  one of the aggregation operation that can return different series on every evaluation,
+  which can cause alert floppiness - [#820](https://github.com/cloudflare/pint/issues/820).
+
+### Fixed
+
+- Don't try to create GitLab comments on unmodified lines - [#1147](https://github.com/cloudflare/pint/pull/1147).
+
 ## v0.67.0
 
 ### Added
diff --git a/internal/checks/alerts_template.go b/internal/checks/alerts_template.go
index 6de491c1..f651994d 100644
--- a/internal/checks/alerts_template.go
+++ b/internal/checks/alerts_template.go
@@ -344,7 +344,6 @@ func (c TemplateCheck) Check(ctx context.Context, _ discovery.Path, rule parser.
 func (c TemplateCheck) checkHumanizeIsNeeded(node *parser.PromQLNode) (problems []exprProblem) {
 	for _, call := range utils.HasOuterRate(node) {
 		problems = append(problems, exprProblem{
-			expr:     call.String(),
 			text:     fmt.Sprintf("Using the value of `%s` inside this annotation might be hard to read, consider using one of humanize template functions to make it more human friendly.", call),
 			details:  "[Click here](https://prometheus.io/docs/prometheus/latest/configuration/template_reference/) for a full list of all available template functions.",
 			severity: Information,
diff --git a/internal/checks/base.go b/internal/checks/base.go
index 070f3747..8ee88042 100644
--- a/internal/checks/base.go
+++ b/internal/checks/base.go
@@ -131,7 +131,6 @@ type RuleChecker interface {
 }
 
 type exprProblem struct {
-	expr     string
 	text     string
 	details  string
 	severity Severity
diff --git a/internal/checks/promql_aggregation.go b/internal/checks/promql_aggregation.go
index 4aff2f57..421a2230 100644
--- a/internal/checks/promql_aggregation.go
+++ b/internal/checks/promql_aggregation.go
@@ -131,14 +131,12 @@ func (c AggregationCheck) checkNode(node *parser.PromQLNode) (problems []exprPro
 		if n.Without {
 			if found && c.keep {
 				problems = append(problems, exprProblem{
-					expr: node.Expr.String(),
 					text: fmt.Sprintf("`%s` label is required and should be preserved when aggregating `%s` rules, remove %s from `without()`.", c.label, c.nameRegex.anchored, c.label),
 				})
 			}
 
 			if !found && !c.keep {
 				problems = append(problems, exprProblem{
-					expr: node.Expr.String(),
 					text: fmt.Sprintf("`%s` label should be removed when aggregating `%s` rules, use `without(%s, ...)`.", c.label, c.nameRegex.anchored, c.label),
 				})
 			}
@@ -151,14 +149,12 @@ func (c AggregationCheck) checkNode(node *parser.PromQLNode) (problems []exprPro
 		} else {
 			if found && !c.keep {
 				problems = append(problems, exprProblem{
-					expr: node.Expr.String(),
 					text: fmt.Sprintf("`%s` label should be removed when aggregating `%s` rules, remove %s from `by()`.", c.label, c.nameRegex.anchored, c.label),
 				})
 			}
 
 			if !found && c.keep {
 				problems = append(problems, exprProblem{
-					expr: node.Expr.String(),
 					text: fmt.Sprintf("`%s` label is required and should be preserved when aggregating `%s` rules, use `by(%s, ...)`.", c.label, c.nameRegex.anchored, c.label),
 				})
 			}
diff --git a/internal/checks/promql_fragile.go b/internal/checks/promql_fragile.go
index 07a53282..29b4b8fb 100644
--- a/internal/checks/promql_fragile.go
+++ b/internal/checks/promql_fragile.go
@@ -2,6 +2,8 @@ package checks
 
 import (
 	"context"
+	"fmt"
+	"slices"
 
 	promParser "github.com/prometheus/prometheus/promql/parser"
 
@@ -12,6 +14,11 @@ import (
 
 const (
 	FragileCheckName = "promql/fragile"
+
+	FragileCheckSamplingDetails = `Alerts are identied by labels, two alerts with identical sets of labels are identical.
+If two alerts have the same name but the rest of labels isn't 100% identical then they are two different alerts.
+If the same alert query returns results that over time have different labels on them then previous alert instances will resolve and new alerts will be fired.
+This can happen when using one of the aggregation operation like topk or bottomk as they can return a different time series each time they are evaluated.`
 )
 
 func NewFragileCheck() FragileCheck {
@@ -46,18 +53,32 @@ func (c FragileCheck) Check(_ context.Context, _ discovery.Path, rule parser.Rul
 		return nil
 	}
 
-	for _, problem := range c.checkNode(expr.Query) {
+	for _, problem := range c.checkAggregation(expr.Query) {
 		problems = append(problems, Problem{
 			Lines:    expr.Value.Lines,
 			Reporter: c.Reporter(),
 			Text:     problem.text,
-			Severity: Warning,
+			Details:  problem.details,
+			Severity: problem.severity,
 		})
 	}
+
+	if rule.AlertingRule != nil {
+		for _, problem := range c.checkSampling(expr.Query) {
+			problems = append(problems, Problem{
+				Lines:    expr.Value.Lines,
+				Reporter: c.Reporter(),
+				Text:     problem.text,
+				Details:  problem.details,
+				Severity: problem.severity,
+			})
+		}
+	}
+
 	return problems
 }
 
-func (c FragileCheck) checkNode(node *parser.PromQLNode) (problems []exprProblem) {
+func (c FragileCheck) checkAggregation(node *parser.PromQLNode) (problems []exprProblem) {
 	if n := utils.HasOuterBinaryExpr(node); n != nil && n.Op != promParser.LOR && n.Op != promParser.LUNLESS {
 		if n.VectorMatching != nil && n.VectorMatching.On {
 			goto NEXT
@@ -89,7 +110,6 @@ func (c FragileCheck) checkNode(node *parser.PromQLNode) (problems []exprProblem
 		}
 		if len(series) >= 2 {
 			p := exprProblem{
-				expr:     node.Expr.String(),
 				text:     "Aggregation using `without()` can be fragile when used inside binary expression because both sides must have identical sets of labels to produce any results, adding or removing labels to metrics used here can easily break the query, consider aggregating using `by()` to ensure consistent labels.",
 				severity: Warning,
 			}
@@ -100,8 +120,26 @@ func (c FragileCheck) checkNode(node *parser.PromQLNode) (problems []exprProblem
 
 NEXT:
 	for _, child := range node.Children {
-		problems = append(problems, c.checkNode(child)...)
+		problems = append(problems, c.checkAggregation(child)...)
 	}
 
 	return problems
 }
+
+func (c FragileCheck) checkSampling(node *parser.PromQLNode) (problems []exprProblem) {
+	src := utils.LabelsSource(node)
+	if src.Type != utils.AggregateSource {
+		return nil
+	}
+	if src.EmptyLabels {
+		return nil
+	}
+	if !slices.Contains([]string{"topk", "bottomk", "limit", "limit_ratio"}, src.Operation) {
+		return nil
+	}
+	return append(problems, exprProblem{
+		text:     fmt.Sprintf("Using `%s` to select time series might return different set of time series on every query, which would cause flapping alerts.", src.Operation),
+		details:  FragileCheckSamplingDetails,
+		severity: Warning,
+	})
+}
diff --git a/internal/checks/promql_fragile_test.go b/internal/checks/promql_fragile_test.go
index 10d5a208..549d3ed7 100644
--- a/internal/checks/promql_fragile_test.go
+++ b/internal/checks/promql_fragile_test.go
@@ -1,6 +1,7 @@
 package checks_test
 
 import (
+	"fmt"
 	"testing"
 
 	"github.com/cloudflare/pint/internal/checks"
@@ -12,6 +13,10 @@ func newFragileCheck(_ *promapi.FailoverGroup) checks.RuleChecker {
 	return checks.NewFragileCheck()
 }
 
+func fragileSampleFunc(s string) string {
+	return fmt.Sprintf("Using `%s` to select time series might return different set of time series on every query, which would cause flapping alerts.", s)
+}
+
 func TestFragileCheck(t *testing.T) {
 	text := "Aggregation using `without()` can be fragile when used inside binary expression because both sides must have identical sets of labels to produce any results, adding or removing labels to metrics used here can easily break the query, consider aggregating using `by()` to ensure consistent labels."
 
@@ -63,7 +68,7 @@ func TestFragileCheck(t *testing.T) {
 							First: 2,
 							Last:  2,
 						},
-						Reporter: "promql/fragile",
+						Reporter: checks.FragileCheckName,
 						Text:     text,
 						Severity: checks.Warning,
 					},
@@ -82,7 +87,7 @@ func TestFragileCheck(t *testing.T) {
 							First: 2,
 							Last:  2,
 						},
-						Reporter: "promql/fragile",
+						Reporter: checks.FragileCheckName,
 						Text:     text,
 						Severity: checks.Warning,
 					},
@@ -101,7 +106,7 @@ func TestFragileCheck(t *testing.T) {
 							First: 2,
 							Last:  2,
 						},
-						Reporter: "promql/fragile",
+						Reporter: checks.FragileCheckName,
 						Text:     text,
 						Severity: checks.Warning,
 					},
@@ -120,7 +125,7 @@ func TestFragileCheck(t *testing.T) {
 							First: 2,
 							Last:  2,
 						},
-						Reporter: "promql/fragile",
+						Reporter: checks.FragileCheckName,
 						Text:     text,
 						Severity: checks.Warning,
 					},
@@ -139,7 +144,7 @@ func TestFragileCheck(t *testing.T) {
 							First: 2,
 							Last:  2,
 						},
-						Reporter: "promql/fragile",
+						Reporter: checks.FragileCheckName,
 						Text:     text,
 						Severity: checks.Warning,
 					},
@@ -216,6 +221,33 @@ func TestFragileCheck(t *testing.T) {
 			prometheus: noProm,
 			problems:   noProblems,
 		},
+		{
+			description: "warns about topk() as source of series",
+			content:     "- alert: foo\n  expr: topk(10, foo)\n",
+			checker:     newFragileCheck,
+			prometheus:  noProm,
+			problems: func(_ string) []checks.Problem {
+				return []checks.Problem{
+					{
+						Lines: parser.LineRange{
+							First: 2,
+							Last:  2,
+						},
+						Reporter: checks.FragileCheckName,
+						Text:     fragileSampleFunc("topk"),
+						Details:  checks.FragileCheckSamplingDetails,
+						Severity: checks.Warning,
+					},
+				}
+			},
+		},
+		{
+			description: "ignores aggregated topk()",
+			content:     "- alert: foo\n  expr: min(topk(10, foo)) > 5000\n",
+			checker:     newFragileCheck,
+			prometheus:  noProm,
+			problems:    noProblems,
+		},
 	}
 
 	runTests(t, testCases)
diff --git a/internal/checks/promql_range_query.go b/internal/checks/promql_range_query.go
index 9db4e2dd..0ef939ed 100644
--- a/internal/checks/promql_range_query.go
+++ b/internal/checks/promql_range_query.go
@@ -122,7 +122,6 @@ func (c RangeQueryCheck) checkNode(ctx context.Context, node *parser.PromQLNode,
 	if n, ok := node.Expr.(*promParser.MatrixSelector); ok {
 		if n.Range > retention {
 			problems = append(problems, exprProblem{
-				expr: node.Expr.String(),
 				text: fmt.Sprintf("`%s` selector is trying to query Prometheus for %s worth of metrics, but %s",
 					node.Expr, model.Duration(n.Range), reason),
 				severity: Warning,
diff --git a/internal/checks/promql_rate.go b/internal/checks/promql_rate.go
index 61467a46..2a1b745d 100644
--- a/internal/checks/promql_rate.go
+++ b/internal/checks/promql_rate.go
@@ -101,7 +101,6 @@ func (c RateCheck) checkNode(ctx context.Context, node *parser.PromQLNode, entri
 			}
 			if m.Range < cfg.Config.Global.ScrapeInterval*time.Duration(c.minIntervals) {
 				p := exprProblem{
-					expr: node.Expr.String(),
 					text: fmt.Sprintf("Duration for `%s()` must be at least %d x scrape_interval, %s is using `%s` scrape_interval.",
 						n.Func.Name, c.minIntervals, promText(c.prom.Name(), cfg.URI), output.HumanizeDuration(cfg.Config.Global.ScrapeInterval)),
 					details:  RateCheckDetails,
@@ -121,7 +120,6 @@ func (c RateCheck) checkNode(ctx context.Context, node *parser.PromQLNode, entri
 				if err != nil {
 					text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Bug)
 					problems = append(problems, exprProblem{
-						expr:     s.Name,
 						text:     text,
 						severity: severity,
 					})
@@ -130,7 +128,6 @@ func (c RateCheck) checkNode(ctx context.Context, node *parser.PromQLNode, entri
 				for _, m := range metadata.Metadata {
 					if m.Type != v1.MetricTypeCounter && m.Type != v1.MetricTypeUnknown {
 						problems = append(problems, exprProblem{
-							expr: s.Name,
 							text: fmt.Sprintf("`%s()` should only be used with counters but `%s` is a %s according to metrics metadata from %s.",
 								n.Func.Name, s.Name, m.Type, promText(c.prom.Name(), metadata.URI)),
 							details:  RateCheckDetails,
@@ -153,7 +150,6 @@ func (c RateCheck) checkNode(ctx context.Context, node *parser.PromQLNode, entri
 								if err != nil {
 									text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Bug)
 									problems = append(problems, exprProblem{
-										expr:     sv.Name,
 										text:     text,
 										severity: severity,
 									})
@@ -162,7 +158,6 @@ func (c RateCheck) checkNode(ctx context.Context, node *parser.PromQLNode, entri
 								for _, m := range metadata.Metadata {
 									if m.Type == v1.MetricTypeCounter {
 										problems = append(problems, exprProblem{
-											expr: node.Expr.String(),
 											text: fmt.Sprintf("`rate(sum(counter))` chain detected, `%s` is called here on results of `%s`, calling `rate()` on `sum()` results will return bogus results, always `sum(rate(counter))`, never `rate(sum(counter))`.",
 												node.Expr, sm),
 											severity: Bug,
diff --git a/internal/checks/promql_vector_matching.go b/internal/checks/promql_vector_matching.go
index 6160aabe..3d693474 100644
--- a/internal/checks/promql_vector_matching.go
+++ b/internal/checks/promql_vector_matching.go
@@ -82,7 +82,6 @@ func (c VectorMatchingCheck) checkNode(ctx context.Context, node *parser.PromQLN
 		if err != nil {
 			text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Bug)
 			problems = append(problems, exprProblem{
-				expr:     node.Expr.String(),
 				text:     text,
 				severity: severity,
 			})
@@ -125,7 +124,6 @@ func (c VectorMatchingCheck) checkNode(ctx context.Context, node *parser.PromQLN
 			for k, lv := range lhsMatchers {
 				if rv, ok := rhsMatchers[k]; ok && rv != lv {
 					problems = append(problems, exprProblem{
-						expr:     node.Expr.String(),
 						text:     fmt.Sprintf("The left hand side uses `{%s=%q}` while the right hand side uses `{%s=%q}`, this will never match.", k, lv, k, rv),
 						details:  VectorMatchingCheckDetails,
 						severity: Bug,
@@ -139,7 +137,6 @@ func (c VectorMatchingCheck) checkNode(ctx context.Context, node *parser.PromQLN
 		if err != nil {
 			text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Bug)
 			problems = append(problems, exprProblem{
-				expr:     node.Expr.String(),
 				text:     text,
 				severity: severity,
 			})
@@ -153,7 +150,6 @@ func (c VectorMatchingCheck) checkNode(ctx context.Context, node *parser.PromQLN
 		if err != nil {
 			text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Bug)
 			problems = append(problems, exprProblem{
-				expr:     node.Expr.String(),
 				text:     text,
 				severity: severity,
 			})
@@ -167,7 +163,6 @@ func (c VectorMatchingCheck) checkNode(ctx context.Context, node *parser.PromQLN
 			for _, name := range n.VectorMatching.MatchingLabels {
 				if !leftLabels.hasName(name) && rightLabels.hasName(name) {
 					problems = append(problems, exprProblem{
-						expr: node.Expr.String(),
 						text: fmt.Sprintf(
 							"Using `on(%s)` won't produce any results on %s because results from the left hand side of the query don't have this label: `%s`.",
 							name, promText(c.prom.Name(), qr.URI), node.Expr.(*promParser.BinaryExpr).LHS),
@@ -177,7 +172,6 @@ func (c VectorMatchingCheck) checkNode(ctx context.Context, node *parser.PromQLN
 				}
 				if leftLabels.hasName(name) && !rightLabels.hasName(name) {
 					problems = append(problems, exprProblem{
-						expr: node.Expr.String(),
 						text: fmt.Sprintf("Using `on(%s)` won't produce any results on %s because results from the right hand side of the query don't have this label: `%s`.",
 							name, promText(c.prom.Name(), qr.URI), node.Expr.(*promParser.BinaryExpr).RHS),
 						details:  VectorMatchingCheckDetails,
@@ -186,7 +180,6 @@ func (c VectorMatchingCheck) checkNode(ctx context.Context, node *parser.PromQLN
 				}
 				if !leftLabels.hasName(name) && !rightLabels.hasName(name) {
 					problems = append(problems, exprProblem{
-						expr: node.Expr.String(),
 						text: fmt.Sprintf("Using `on(%s)` won't produce any results on %s because results from both sides of the query don't have this label: `%s`.",
 							name, promText(c.prom.Name(), qr.URI), node.Expr),
 						details:  VectorMatchingCheckDetails,
@@ -198,7 +191,6 @@ func (c VectorMatchingCheck) checkNode(ctx context.Context, node *parser.PromQLN
 			l, r := leftLabels.getFirstNonOverlap(rightLabels)
 			if len(n.VectorMatching.MatchingLabels) == 0 {
 				problems = append(problems, exprProblem{
-					expr: node.Expr.String(),
 					text: fmt.Sprintf("This query will never return anything on %s because results from the right and the left hand side have different labels: `%s` != `%s`. Failing query: `%s`.",
 						promText(c.prom.Name(), qr.URI), l, r, node.Expr),
 					details:  VectorMatchingCheckDetails,
@@ -206,7 +198,6 @@ func (c VectorMatchingCheck) checkNode(ctx context.Context, node *parser.PromQLN
 				})
 			} else {
 				problems = append(problems, exprProblem{
-					expr: node.Expr.String(),
 					text: fmt.Sprintf("Using `ignoring(%s)` won't produce any results on %s because results from both sides of the query have different labels: `%s` != `%s`. Failing query: `%s`.",
 						strings.Join(n.VectorMatching.MatchingLabels, ","), promText(c.prom.Name(), qr.URI), l, r, node.Expr),
 					details:  VectorMatchingCheckDetails,
diff --git a/internal/parser/utils/source.go b/internal/parser/utils/source.go
new file mode 100644
index 00000000..555cdea3
--- /dev/null
+++ b/internal/parser/utils/source.go
@@ -0,0 +1,214 @@
+package utils
+
+import (
+	"slices"
+
+	"github.com/cloudflare/pint/internal/parser"
+
+	"github.com/prometheus/prometheus/model/labels"
+	promParser "github.com/prometheus/prometheus/promql/parser"
+)
+
+type SourceType int
+
+const (
+	UnknownSource SourceType = iota
+	NumberSource
+	StringSource
+	SelectorSource
+	FuncSource
+	AggregateSource
+)
+
+type Source struct {
+	Selector         *promParser.VectorSelector
+	Call             *promParser.Call
+	Operation        string
+	IncludedLabels   []string // Labels that are included by filters, they will be present if exist on source series.
+	ExcludedLabels   []string // Labels guaranteed to be excluded from the results (without).
+	OnlyLabels       []string // Labels that are the only ones that can be present on the results (by).
+	GuaranteedLabels []string // Labels guaranteed to be present on the results (matchers).
+	Type             SourceType
+	EmptyLabels      bool // No labels at all will be present on the results.
+}
+
+func LabelsSource(node *parser.PromQLNode) Source {
+	return walkNode(node.Expr)
+}
+
+func removeFromSlice(sl []string, s string) []string {
+	idx := slices.Index(sl, s)
+	if idx >= 0 {
+		if len(sl) == 1 {
+			return nil
+		}
+		return slices.Delete(sl, idx, idx+1)
+	}
+	return sl
+}
+
+func parseAggregation(n *promParser.AggregateExpr) (s Source) {
+	s = walkNode(n.Expr)
+	if n.Without {
+		s.ExcludedLabels = append(s.ExcludedLabels, n.Grouping...)
+		for _, name := range n.Grouping {
+			s.GuaranteedLabels = removeFromSlice(s.GuaranteedLabels, name)
+			s.OnlyLabels = removeFromSlice(s.OnlyLabels, name)
+		}
+	} else {
+		if len(n.Grouping) == 0 {
+			s.EmptyLabels = true
+			s.GuaranteedLabels = nil
+			s.OnlyLabels = nil
+		} else {
+			s.OnlyLabels = append(s.OnlyLabels, n.Grouping...)
+			for _, name := range n.Grouping {
+				s.ExcludedLabels = removeFromSlice(s.ExcludedLabels, name)
+			}
+		}
+	}
+	s.Type = AggregateSource
+	s.Call = nil
+	return s
+}
+
+func walkNode(node promParser.Node) (s Source) {
+	switch n := node.(type) {
+	case *promParser.AggregateExpr:
+		switch n.Op {
+		case promParser.SUM:
+			s = parseAggregation(n)
+			s.Operation = "sum"
+		case promParser.MIN:
+			s = parseAggregation(n)
+			s.Operation = "min"
+		case promParser.MAX:
+			s = parseAggregation(n)
+			s.Operation = "max"
+		case promParser.AVG:
+			s = parseAggregation(n)
+			s.Operation = "avg"
+		case promParser.GROUP:
+			s = parseAggregation(n)
+			s.Operation = "group"
+		case promParser.STDDEV:
+			s = parseAggregation(n)
+			s.Operation = "stddev"
+		case promParser.STDVAR:
+			s = parseAggregation(n)
+			s.Operation = "stdvar"
+		case promParser.COUNT:
+			s = parseAggregation(n)
+			s.Operation = "count"
+		case promParser.COUNT_VALUES:
+			s = parseAggregation(n)
+			s.Operation = "count_values"
+			// Param is the label to store the count value in.
+			s.GuaranteedLabels = append(s.GuaranteedLabels, n.Param.(*promParser.StringLiteral).Val)
+			if s.EmptyLabels || len(s.OnlyLabels) > 0 {
+				s.OnlyLabels = append(s.OnlyLabels, n.Param.(*promParser.StringLiteral).Val)
+				s.EmptyLabels = false
+			}
+			if s.EmptyLabels {
+				s.EmptyLabels = false
+			}
+		case promParser.QUANTILE:
+			s = parseAggregation(n)
+			s.Operation = "quantile"
+		case promParser.TOPK:
+			s = walkNode(n.Expr)
+			s.Type = AggregateSource
+			s.Operation = "topk"
+		case promParser.BOTTOMK:
+			s = walkNode(n.Expr)
+			s.Type = AggregateSource
+			s.Operation = "bottomk"
+		case promParser.LIMITK:
+			s = walkNode(n.Expr)
+			s.Type = AggregateSource
+			s.Operation = "limitk"
+		case promParser.LIMIT_RATIO:
+			s = walkNode(n.Expr)
+			s.Type = AggregateSource
+			s.Operation = "limit_ratio"
+		}
+
+	case *promParser.BinaryExpr:
+		switch {
+		case n.VectorMatching == nil:
+			s = walkNode(n.LHS)
+		case n.VectorMatching.Card == promParser.CardOneToOne:
+			s = walkNode(n.LHS)
+		case n.VectorMatching.Card == promParser.CardOneToMany:
+			s = walkNode(n.RHS)
+		case n.VectorMatching.Card == promParser.CardManyToMany:
+			// FIXME need to handle 'foo or bar' here, we might have multiple selectors as the source.
+			s = walkNode(n.LHS)
+		case n.VectorMatching.Card == promParser.CardManyToOne:
+			s = walkNode(n.LHS)
+		}
+		if n.VectorMatching != nil {
+			s.IncludedLabels = append(s.IncludedLabels, n.VectorMatching.Include...)
+			s.IncludedLabels = append(s.IncludedLabels, n.VectorMatching.MatchingLabels...)
+		}
+
+	case *promParser.Call:
+		s.Type = FuncSource
+		s.Operation = n.Func.Name
+		s.Call = n
+
+		var vt promParser.ValueType
+		for i, e := range n.Args {
+			if i >= len(n.Func.ArgTypes) {
+				vt = n.Func.ArgTypes[len(n.Func.ArgTypes)-1]
+			} else {
+				vt = n.Func.ArgTypes[i]
+			}
+			// nolint: exhaustive
+			switch vt {
+			case promParser.ValueTypeVector:
+				s.Selector, _ = e.(*promParser.VectorSelector)
+			case promParser.ValueTypeMatrix:
+				s.Selector, _ = e.(*promParser.MatrixSelector).VectorSelector.(*promParser.VectorSelector)
+			}
+		}
+
+	case *promParser.MatrixSelector:
+		s = walkNode(n.VectorSelector)
+
+	case *promParser.SubqueryExpr:
+		s = walkNode(n.Expr)
+
+	case *promParser.NumberLiteral:
+		s.Type = NumberSource
+
+	case *promParser.ParenExpr:
+		s = walkNode(n.Expr)
+
+	case *promParser.StringLiteral:
+		s.Type = StringSource
+
+	case *promParser.UnaryExpr:
+		s = walkNode(n.Expr)
+
+	case *promParser.StepInvariantExpr:
+		// Not possible to get this from the parser.
+
+	case *promParser.VectorSelector:
+		s.Type = SelectorSource
+		s.Selector = n
+		// Any label used in positive filters is gurnateed to be present.
+		for _, lm := range s.Selector.LabelMatchers {
+			if lm.Name == labels.MetricName {
+				continue
+			}
+			if lm.Type == labels.MatchEqual || lm.Type == labels.MatchRegexp {
+				s.GuaranteedLabels = append(s.GuaranteedLabels, lm.Name)
+			}
+		}
+
+	default:
+		// unhandled type
+	}
+	return s
+}
diff --git a/internal/parser/utils/source_test.go b/internal/parser/utils/source_test.go
new file mode 100644
index 00000000..9c77e03a
--- /dev/null
+++ b/internal/parser/utils/source_test.go
@@ -0,0 +1,300 @@
+package utils_test
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/cloudflare/pint/internal/parser"
+	"github.com/cloudflare/pint/internal/parser/utils"
+
+	promParser "github.com/prometheus/prometheus/promql/parser"
+	"github.com/prometheus/prometheus/promql/parser/posrange"
+)
+
+func TestLabelsSource(t *testing.T) {
+	type testCaseT struct {
+		expr   string
+		output utils.Source
+	}
+
+	mustParseVector := func(s string, offset int) *promParser.VectorSelector {
+		m, err := promParser.ParseExpr(s)
+		require.NoErrorf(t, err, "failed to parse vector selector: %s", s)
+		v := m.(*promParser.VectorSelector)
+		v.PosRange.Start += posrange.Pos(offset)
+		v.PosRange.End += posrange.Pos(offset)
+		return v
+	}
+
+	mustParseMatrix := func(s string, offset int) *promParser.MatrixSelector {
+		e, err := promParser.ParseExpr(s)
+		require.NoErrorf(t, err, "failed to parse matrix selector: %s", s)
+		m := e.(*promParser.MatrixSelector)
+		m.VectorSelector = mustParseVector(m.VectorSelector.String(), offset)
+		m.EndPos += posrange.Pos(offset)
+		return m
+	}
+
+	testCases := []testCaseT{
+		{
+			expr: "1",
+			output: utils.Source{
+				Type: utils.NumberSource,
+			},
+		},
+		{
+			expr: `"test"`,
+			output: utils.Source{
+				Type: utils.StringSource,
+			},
+		},
+		{
+			expr: "foo",
+			output: utils.Source{
+				Type:     utils.SelectorSource,
+				Selector: mustParseVector("foo", 0),
+			},
+		},
+		{
+			expr: "foo - 1",
+			output: utils.Source{
+				Type:     utils.SelectorSource,
+				Selector: mustParseVector("foo", 0),
+			},
+		},
+		{
+			expr: `sum(foo{job="myjob"})`,
+			output: utils.Source{
+				Type:        utils.AggregateSource,
+				Operation:   "sum",
+				Selector:    mustParseVector(`foo{job="myjob"}`, 4),
+				EmptyLabels: true,
+			},
+		},
+		{
+			expr: `sum(foo{job="myjob"}) without(job)`,
+			output: utils.Source{
+				Type:           utils.AggregateSource,
+				Operation:      "sum",
+				Selector:       mustParseVector(`foo{job="myjob"}`, 4),
+				ExcludedLabels: []string{"job"},
+			},
+		},
+		{
+			expr: `sum(foo) by(job)`,
+			output: utils.Source{
+				Type:       utils.AggregateSource,
+				Operation:  "sum",
+				Selector:   mustParseVector(`foo`, 4),
+				OnlyLabels: []string{"job"},
+			},
+		},
+		{
+			expr: `sum(foo{job="myjob"}) by(job)`,
+			output: utils.Source{
+				Type:             utils.AggregateSource,
+				Operation:        "sum",
+				Selector:         mustParseVector(`foo{job="myjob"}`, 4),
+				OnlyLabels:       []string{"job"},
+				GuaranteedLabels: []string{"job"},
+			},
+		},
+		{
+			expr: `sum(foo{job="myjob"}) without(instance)`,
+			output: utils.Source{
+				Type:             utils.AggregateSource,
+				Operation:        "sum",
+				Selector:         mustParseVector(`foo{job="myjob"}`, 4),
+				GuaranteedLabels: []string{"job"},
+				ExcludedLabels:   []string{"instance"},
+			},
+		},
+		{
+			expr: `count_values("version", build_version)`,
+			output: utils.Source{
+				Type:             utils.AggregateSource,
+				Operation:        "count_values",
+				Selector:         mustParseVector(`build_version`, 24),
+				GuaranteedLabels: []string{"version"},
+				OnlyLabels:       []string{"version"},
+			},
+		},
+		{
+			expr: `count_values("version", build_version) without(job)`,
+			output: utils.Source{
+				Type:             utils.AggregateSource,
+				Operation:        "count_values",
+				Selector:         mustParseVector(`build_version`, 24),
+				GuaranteedLabels: []string{"version"},
+				ExcludedLabels:   []string{"job"},
+			},
+		},
+		{
+			expr: `count_values("version", build_version) by(job)`,
+			output: utils.Source{
+				Type:             utils.AggregateSource,
+				Operation:        "count_values",
+				Selector:         mustParseVector(`build_version`, 24),
+				GuaranteedLabels: []string{"version"},
+				OnlyLabels:       []string{"job", "version"},
+			},
+		},
+		{
+			expr: `topk(10, foo{job="myjob"}) > 10`,
+			output: utils.Source{
+				Type:             utils.AggregateSource,
+				Operation:        "topk",
+				Selector:         mustParseVector(`foo{job="myjob"}`, 9),
+				GuaranteedLabels: []string{"job"},
+			},
+		},
+		{
+			expr: `rate(foo[10m])`,
+			output: utils.Source{
+				Type:      utils.FuncSource,
+				Operation: "rate",
+				Selector:  mustParseVector(`foo`, 5),
+				Call: &promParser.Call{
+					Func: &promParser.Function{
+						Name: "rate",
+						ArgTypes: []promParser.ValueType{
+							promParser.ValueTypeMatrix,
+						},
+						Variadic:   0,
+						ReturnType: promParser.ValueTypeVector,
+					},
+					Args: promParser.Expressions{
+						mustParseMatrix(`foo[10m]`, 5),
+					},
+					PosRange: posrange.PositionRange{
+						Start: 0,
+						End:   14,
+					},
+				},
+			},
+		},
+		{
+			expr: `sum(rate(foo[10m])) without(instance)`,
+			output: utils.Source{
+				Type:           utils.AggregateSource,
+				Operation:      "sum",
+				Selector:       mustParseVector(`foo`, 9),
+				ExcludedLabels: []string{"instance"},
+			},
+		},
+		{
+			expr: `foo{job="foo"} / bar`,
+			output: utils.Source{
+				Type:             utils.SelectorSource,
+				Selector:         mustParseVector(`foo{job="foo"}`, 0),
+				GuaranteedLabels: []string{"job"},
+			},
+		},
+		{
+			expr: `foo{job="foo"} * on(instance) bar`,
+			output: utils.Source{
+				Type:             utils.SelectorSource,
+				Selector:         mustParseVector(`foo{job="foo"}`, 0),
+				GuaranteedLabels: []string{"job"},
+				IncludedLabels:   []string{"instance"},
+			},
+		},
+		{
+			expr: `foo{job="foo"} * on(instance) group_left(bar) bar`,
+			output: utils.Source{
+				Type:             utils.SelectorSource,
+				Selector:         mustParseVector(`foo{job="foo"}`, 0),
+				IncludedLabels:   []string{"bar", "instance"},
+				GuaranteedLabels: []string{"job"},
+			},
+		},
+		{
+			expr: `foo{job="foo"} * on(instance) group_left(cluster) bar{cluster="bar", ignored="true"}`,
+			output: utils.Source{
+				Type:             utils.SelectorSource,
+				Selector:         mustParseVector(`foo{job="foo"}`, 0),
+				IncludedLabels:   []string{"cluster", "instance"},
+				GuaranteedLabels: []string{"job"},
+			},
+		},
+		{
+			expr: `foo{job="foo", ignored="true"} * on(instance) group_right(job) bar{cluster="bar"}`,
+			output: utils.Source{
+				Type:             utils.SelectorSource,
+				Selector:         mustParseVector(`bar{cluster="bar"}`, 63),
+				IncludedLabels:   []string{"job", "instance"},
+				GuaranteedLabels: []string{"cluster"},
+			},
+		},
+		{
+			expr: `count(foo / bar)`,
+			output: utils.Source{
+				Type:        utils.AggregateSource,
+				Operation:   "count",
+				Selector:    mustParseVector(`foo`, 6),
+				EmptyLabels: true,
+			},
+		},
+		{
+			expr: `count(up{job="a"} / on () up{job="b"})`,
+			output: utils.Source{
+				Type:        utils.AggregateSource,
+				Operation:   "count",
+				Selector:    mustParseVector(`up{job="a"}`, 6),
+				EmptyLabels: true,
+			},
+		},
+		{
+			expr: `foo{job="foo", instance="1"} and bar`,
+			output: utils.Source{
+				Type:             utils.SelectorSource,
+				Selector:         mustParseVector(`foo{job="foo", instance="1"}`, 0),
+				GuaranteedLabels: []string{"job", "instance"},
+			},
+		},
+		{
+			expr: `foo{job="foo", instance="1"} and on(cluster) bar`,
+			output: utils.Source{
+				Type:             utils.SelectorSource,
+				Selector:         mustParseVector(`foo{job="foo", instance="1"}`, 0),
+				IncludedLabels:   []string{"cluster"},
+				GuaranteedLabels: []string{"job", "instance"},
+			},
+		},
+		{
+			expr: `topk(10, foo)`,
+			output: utils.Source{
+				Type:      utils.AggregateSource,
+				Operation: "topk",
+				Selector:  mustParseVector(`foo`, 9),
+			},
+		},
+		{
+			expr: `foo or bar`,
+			output: utils.Source{
+				Type:     utils.SelectorSource,
+				Selector: mustParseVector(`foo`, 0),
+			},
+		},
+		{
+			expr: `(foo or bar) or baz`,
+			output: utils.Source{
+				Type:     utils.SelectorSource,
+				Selector: mustParseVector(`foo`, 1),
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.expr, func(t *testing.T) {
+			n, err := parser.DecodeExpr(tc.expr)
+			if err != nil {
+				t.Error(err)
+				t.FailNow()
+			}
+			output := utils.LabelsSource(n)
+			require.Equal(t, tc.output, output)
+		})
+	}
+}