diff --git a/src/go/rpk/pkg/cli/cmd/generate/graf/graph.go b/src/go/rpk/pkg/cli/cmd/generate/graf/graph.go
index b9a229e11be6e..88118cf3e4431 100644
--- a/src/go/rpk/pkg/cli/cmd/generate/graf/graph.go
+++ b/src/go/rpk/pkg/cli/cmd/generate/graf/graph.go
@@ -26,6 +26,7 @@ type GraphPanel struct {
Tooltip Tooltip `json:"tooltip"`
AliasColors AliasColors `json:"aliasColors"`
SteppedLine bool `json:"steppedLine"`
+ Interval string `json:"interval,omitempty"`
}
func (*GraphPanel) Type() string {
diff --git a/src/go/rpk/pkg/cli/cmd/generate/grafana.go b/src/go/rpk/pkg/cli/cmd/generate/grafana.go
index ab23a03171682..30c16ac3ef597 100644
--- a/src/go/rpk/pkg/cli/cmd/generate/grafana.go
+++ b/src/go/rpk/pkg/cli/cmd/generate/grafana.go
@@ -107,7 +107,8 @@ func executeGrafanaDashboard(metricsEndpoint string) error {
if err != nil {
return err
}
- dashboard := buildGrafanaDashboard(metricFamilies)
+ isPublicMetrics := strings.Contains(metricsEndpoint, "public_metrics")
+ dashboard := buildGrafanaDashboard(metricFamilies, isPublicMetrics)
jsonSpec, err := json.MarshalIndent(dashboard, "", " ")
if err != nil {
return err
@@ -124,13 +125,19 @@ func executeGrafanaDashboard(metricsEndpoint string) error {
func buildGrafanaDashboard(
metricFamilies map[string]*dto.MetricFamily,
+ isPublicMetrics bool,
) graf.Dashboard {
intervals := []string{"5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"}
timeOptions := []string{"5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"}
- summaryPanels := buildSummary(metricFamilies)
+ var summaryPanels []graf.Panel
+ if isPublicMetrics {
+ summaryPanels = buildPublicMetricsSummary(metricFamilies)
+ } else {
+ summaryPanels = buildSummary(metricFamilies)
+ }
lastY := summaryPanels[len(summaryPanels)-1].GetGridPos().Y + panelHeight
rowSet := newRowSet()
- rowSet.processRows(metricFamilies)
+ rowSet.processRows(metricFamilies, isPublicMetrics)
rowSet.addCachePerformancePanels(metricFamilies)
rows := rowSet.finalize(lastY)
return graf.Dashboard{
@@ -173,7 +180,7 @@ func (rowSet *RowSet) finalize(fromY int) []graf.Panel {
return rows
}
-func (rowSet *RowSet) processRows(metricFamilies map[string]*dto.MetricFamily) {
+func (rowSet *RowSet) processRows(metricFamilies map[string]*dto.MetricFamily, isPublicMetrics bool) {
names := []string{}
for k := range metricFamilies {
names = append(names, k)
@@ -182,12 +189,14 @@ func (rowSet *RowSet) processRows(metricFamilies map[string]*dto.MetricFamily) {
for _, name := range names {
family := metricFamilies[name]
var panel graf.Panel
- if family.GetType() == dto.MetricType_COUNTER {
- panel = newCounterPanel(family)
+ // hack around redpanda_storage_* metrics: these should be gauge
+ // panels but the metrics type come as COUNTER
+ if family.GetType() == dto.MetricType_COUNTER && !strings.Contains(name, "redpanda_storage") {
+ panel = newCounterPanel(family, isPublicMetrics)
} else if subtype(family) == "histogram" {
- panel = newPercentilePanel(family, 0.95)
+ panel = newPercentilePanel(family, 0.95, isPublicMetrics)
} else {
- panel = newGaugePanel(family)
+ panel = newGaugePanel(family, isPublicMetrics)
}
if panel == nil {
@@ -286,6 +295,8 @@ func buildTemplating() graf.Templating {
}
}
+// buildSummary builds the Summary section of the Redpanda generated grafana
+// dashboard that use the /metric endpoint.
func buildSummary(metricFamilies map[string]*dto.MetricFamily) []graf.Panel {
maxWidth := 24
singleStatW := 2
@@ -334,7 +345,7 @@ func buildSummary(metricFamilies map[string]*dto.MetricFamily) []graf.Panel {
if kafkaExists {
width := (maxWidth - (singleStatW * 2)) / percentilesNo
for i, p := range percentiles {
- panel := newPercentilePanel(kafkaFamily, p)
+ panel := newPercentilePanel(kafkaFamily, p, false)
panel.GridPos = graf.GridPos{
H: panelHeight,
W: width,
@@ -355,7 +366,7 @@ func buildSummary(metricFamilies map[string]*dto.MetricFamily) []graf.Panel {
y += rpcLatencyTitle.GridPos.H
panels = append(panels, rpcLatencyTitle)
for i, p := range percentiles {
- panel := newPercentilePanel(rpcFamily, p)
+ panel := newPercentilePanel(rpcFamily, p, false)
panel.GridPos = graf.GridPos{
H: panelHeight,
W: width,
@@ -380,7 +391,7 @@ func buildSummary(metricFamilies map[string]*dto.MetricFamily) []graf.Panel {
readBytesFamily, readBytesExist := metricFamilies["vectorized_storage_log_read_bytes"]
writtenBytesFamily, writtenBytesExist := metricFamilies["vectorized_storage_log_written_bytes"]
if readBytesExist && writtenBytesExist {
- readPanel := newCounterPanel(readBytesFamily)
+ readPanel := newCounterPanel(readBytesFamily, false)
readPanel.GridPos = graf.GridPos{
H: panelHeight,
W: width,
@@ -389,7 +400,7 @@ func buildSummary(metricFamilies map[string]*dto.MetricFamily) []graf.Panel {
}
panels = append(panels, readPanel)
- writtenPanel := newCounterPanel(writtenBytesFamily)
+ writtenPanel := newCounterPanel(writtenBytesFamily, false)
writtenPanel.GridPos = graf.GridPos{
H: panelHeight,
W: width,
@@ -402,6 +413,186 @@ func buildSummary(metricFamilies map[string]*dto.MetricFamily) []graf.Panel {
return panels
}
+// buildPublicMetricsSummary builds the Summary section of the Redpanda generated
+// grafana dashboard that use the /public_metrics endpoint.
+func buildPublicMetricsSummary(metricFamilies map[string]*dto.MetricFamily) []graf.Panel {
+ maxWidth := 24
+ singleStatW := 2
+ percentiles := []float32{0.95, 0.99}
+ percentilesNo := len(percentiles)
+ panels := []graf.Panel{}
+ y := 0
+
+ summaryText := htmlHeader("Redpanda Summary")
+ summaryTitle := graf.NewTextPanel(summaryText, "html")
+ summaryTitle.GridPos = graf.GridPos{H: 2, W: maxWidth, X: 0, Y: y}
+ summaryTitle.Transparent = true
+ panels = append(panels, summaryTitle)
+ y += summaryTitle.GridPos.H
+
+ // Nodes Up Panel
+ nodesUp := graf.NewSingleStatPanel("Nodes Up")
+ nodesUp.Datasource = datasource
+ nodesUp.GridPos = graf.GridPos{H: 6, W: singleStatW, X: 0, Y: y}
+ nodesUp.Targets = []graf.Target{{
+ Expr: `redpanda_cluster_brokers`,
+ Step: 40,
+ IntervalFactor: 1,
+ LegendFormat: "Nodes Up",
+ Instant: true,
+ }}
+ nodesUp.Transparent = true
+ panels = append(panels, nodesUp)
+ y += nodesUp.GridPos.H
+
+ // Partitions Panel
+ partitionCount := graf.NewSingleStatPanel("Partitions")
+ partitionCount.Datasource = datasource
+ partitionCount.GridPos = graf.GridPos{
+ H: 6,
+ W: singleStatW,
+ X: 0,
+ Y: nodesUp.GridPos.H,
+ }
+ partitionCount.Targets = []graf.Target{{
+ Expr: "redpanda_cluster_partitions",
+ LegendFormat: "Partition count",
+ Instant: true,
+ }}
+ partitionCount.Transparent = true
+ panels = append(panels, partitionCount)
+ y += partitionCount.GridPos.H
+
+ // Latency of Kafka consume/produce requests (p95 - p99)
+ _, kafkaExists := metricFamilies[`redpanda_kafka_request_latency_seconds`]
+ if kafkaExists {
+ width := (maxWidth - singleStatW) / percentilesNo
+ for i, p := range percentiles {
+ pTarget := graf.Target{
+ Expr: fmt.Sprintf(`histogram_quantile(%.2f, sum(rate(redpanda_kafka_request_latency_seconds_bucket{instance=~"$node", redpanda_request="produce"}[$__rate_interval])) by (le, provider, region, instance, namespace, pod))`, p),
+ LegendFormat: "node: {{instance}}",
+ Format: "time_series",
+ Step: 10,
+ IntervalFactor: 2,
+ RefID: "A",
+ }
+ pTitle := fmt.Sprintf("Latency of Kafka produce requests (p%.0f) per broker", p*100)
+ producePanel := newGraphPanel(pTitle, pTarget, "s")
+ producePanel.Interval = "1m"
+ producePanel.Lines = true
+ producePanel.SteppedLine = true
+ producePanel.NullPointMode = "null as zero"
+ producePanel.Tooltip.ValueType = "individual"
+ producePanel.Tooltip.Sort = 0
+ producePanel.GridPos = graf.GridPos{
+ H: panelHeight,
+ W: width,
+ X: i*width + singleStatW,
+ Y: y,
+ }
+ cTarget := graf.Target{
+ Expr: fmt.Sprintf(`histogram_quantile(%.2f, sum(rate(redpanda_kafka_request_latency_seconds_bucket{instance=~"$node", redpanda_request="consume"}[$__rate_interval])) by (le, provider, region, instance, namespace, pod))`, p),
+ LegendFormat: "node: {{instance}}",
+ Format: "time_series",
+ Step: 10,
+ IntervalFactor: 2,
+ RefID: "A",
+ }
+ cTitle := fmt.Sprintf("Latency of Kafka consume requests (p%.0f) per broker", p*100)
+ consumePanel := newGraphPanel(cTitle, cTarget, "s")
+ consumePanel.Interval = "1m"
+ consumePanel.Lines = true
+ consumePanel.SteppedLine = true
+ consumePanel.NullPointMode = "null as zero"
+ consumePanel.Tooltip.ValueType = "individual"
+ consumePanel.Tooltip.Sort = 0
+ consumePanel.GridPos = graf.GridPos{
+ H: panelHeight,
+ W: width,
+ X: i*width + singleStatW,
+ Y: producePanel.GridPos.H,
+ }
+ panels = append(panels, consumePanel, producePanel)
+ }
+ y += panelHeight
+ }
+ width := maxWidth / 4
+
+ // Internal RPC Latency Section
+ rpcLatencyText := htmlHeader("Internal RPC Latency")
+ rpcLatencyTitle := graf.NewTextPanel(rpcLatencyText, "html")
+ rpcLatencyTitle.GridPos = graf.GridPos{H: 2, W: maxWidth / 2, X: 0, Y: y}
+ rpcLatencyTitle.Transparent = true
+ rpcFamily, rpcExists := metricFamilies[`redpanda_rpc_request_latency_seconds`]
+ if rpcExists {
+ y += rpcLatencyTitle.GridPos.H
+ panels = append(panels, rpcLatencyTitle)
+ for i, p := range percentiles {
+ template := `histogram_quantile(%.2f, sum(rate(%s_bucket{instance=~"$node",redpanda_server="internal"}[$__rate_interval])) by (le, $aggr_criteria))`
+ expr := fmt.Sprintf(template, p, rpcFamily.GetName())
+ target := graf.Target{
+ Expr: expr,
+ LegendFormat: "node: {{instance}}",
+ Format: "time_series",
+ Step: 10,
+ IntervalFactor: 2,
+ RefID: "A",
+ }
+ title := fmt.Sprintf("%s (p%.0f)", rpcFamily.GetHelp(), p*100)
+ panel := newGraphPanel(title, target, "s")
+ panel.Interval = "1m"
+ panel.Lines = true
+ panel.SteppedLine = true
+ panel.NullPointMode = "null as zero"
+ panel.Tooltip.ValueType = "individual"
+ panel.Tooltip.Sort = 0
+ panel.GridPos = graf.GridPos{
+ H: panelHeight,
+ W: width,
+ X: i * width,
+ Y: y,
+ }
+ panels = append(panels, panel)
+ }
+ }
+
+ // Throughput section
+ throughputText := htmlHeader("Throughput")
+ throughputTitle := graf.NewTextPanel(throughputText, "html")
+ throughputTitle.GridPos = graf.GridPos{
+ H: 2,
+ W: maxWidth / 2,
+ X: rpcLatencyTitle.GridPos.W,
+ Y: rpcLatencyTitle.GridPos.Y,
+ }
+ throughputTitle.Transparent = true
+ panels = append(panels, throughputTitle)
+
+ reqBytesFamily, reqBytesExists := metricFamilies["redpanda_kafka_request_bytes_total"]
+ if reqBytesExists {
+ target := graf.Target{
+ Expr: `sum(rate(redpanda_kafka_request_bytes_total[$__rate_interval])) by (redpanda_request)`,
+ LegendFormat: "redpanda_request: {{redpanda_request}}",
+ Format: "time_series",
+ Step: 10,
+ IntervalFactor: 2,
+ }
+ panel := newGraphPanel("Rate - "+reqBytesFamily.GetHelp(), target, "Bps")
+ panel.Interval = "1m"
+ panel.Lines = true
+ panel.GridPos = graf.GridPos{
+ H: panelHeight,
+ W: width * 2,
+ X: maxWidth / 2,
+ Y: y,
+ }
+ panel.Title = "Throughput of Kafka produce/consume requests for the cluster"
+ panels = append(panels, panel)
+ }
+
+ return panels
+}
+
func metricGroup(metric string) string {
for _, group := range metricGroups {
if strings.Contains(metric, group) {
@@ -445,15 +636,14 @@ func fetchMetrics(
}
func newPercentilePanel(
- m *dto.MetricFamily, percentile float32,
+ m *dto.MetricFamily, percentile float32, isPublicMetrics bool,
) *graf.GraphPanel {
- expr := fmt.Sprintf(
- `histogram_quantile(%.2f, sum(rate(%s_bucket{instance=~"$node",shard=~"$node_shard"}[2m])) by (le, $aggr_criteria))`,
- percentile,
- m.GetName(),
- )
+ template := `histogram_quantile(%.2f, sum(rate(%s_bucket{instance=~"$node",shard=~"$node_shard"}[2m])) by (le, $aggr_criteria))`
+ if isPublicMetrics {
+ template = `histogram_quantile(%.2f, sum(rate(%s_bucket{instance=~"$node"}[$__rate_interval])) by (le, $aggr_criteria))`
+ }
target := graf.Target{
- Expr: expr,
+ Expr: fmt.Sprintf(template, percentile, m.GetName()),
LegendFormat: legendFormat(m),
Format: "time_series",
Step: 10,
@@ -467,16 +657,17 @@ func newPercentilePanel(
panel.NullPointMode = "null as zero"
panel.Tooltip.ValueType = "individual"
panel.Tooltip.Sort = 0
+ panel.Interval = "1m"
return panel
}
-func newCounterPanel(m *dto.MetricFamily) *graf.GraphPanel {
- expr := fmt.Sprintf(
- `sum(irate(%s{instance=~"$node",shard=~"$node_shard"}[2m])) by ($aggr_criteria)`,
- m.GetName(),
- )
+func newCounterPanel(m *dto.MetricFamily, isPublicMetrics bool) *graf.GraphPanel {
+ template := `sum(irate(%s{instance=~"$node",shard=~"$node_shard"}[2m])) by ($aggr_criteria)`
+ if isPublicMetrics {
+ template = `sum(rate(%s{instance=~"$node"}[$__rate_interval])) by ($aggr_criteria)`
+ }
target := graf.Target{
- Expr: expr,
+ Expr: fmt.Sprintf(template, m.GetName()),
LegendFormat: legendFormat(m),
Format: "time_series",
Step: 10,
@@ -485,19 +676,22 @@ func newCounterPanel(m *dto.MetricFamily) *graf.GraphPanel {
format := "ops"
if strings.Contains(m.GetName(), "bytes") {
format = "Bps"
+ } else if strings.Contains(m.GetName(), "redpanda_scheduler") {
+ format = "percentunit"
}
panel := newGraphPanel("Rate - "+m.GetHelp(), target, format)
panel.Lines = true
+ panel.Interval = "1m"
return panel
}
-func newGaugePanel(m *dto.MetricFamily) *graf.GraphPanel {
- expr := fmt.Sprintf(
- `sum(%s{instance=~"$node",shard=~"$node_shard"}) by ($aggr_criteria)`,
- m.GetName(),
- )
+func newGaugePanel(m *dto.MetricFamily, isPublicMetrics bool) *graf.GraphPanel {
+ template := `sum(%s{instance=~"$node",shard=~"$node_shard"}) by ($aggr_criteria)`
+ if isPublicMetrics {
+ template = `sum(%s{instance=~"$node"}) by ($aggr_criteria)`
+ }
target := graf.Target{
- Expr: expr,
+ Expr: fmt.Sprintf(template, m.GetName()),
LegendFormat: legendFormat(m),
Format: "time_series",
Step: 10,
diff --git a/src/go/rpk/pkg/cli/cmd/generate/grafana_test.go b/src/go/rpk/pkg/cli/cmd/generate/grafana_test.go
index d8e4fe07dd9c9..d96701a35d05a 100644
--- a/src/go/rpk/pkg/cli/cmd/generate/grafana_test.go
+++ b/src/go/rpk/pkg/cli/cmd/generate/grafana_test.go
@@ -65,7 +65,7 @@ vectorized_vectorized_internal_rpc_dispatch_handler_latency_bucket{le="20.000000
vectorized_memory_allocated_memory_bytes{shard="0",type="bytes"} 40837120
vectorized_memory_allocated_memory_bytes{shard="1",type="bytes"} 36986880
`
- expected := `{"title":"Redpanda","templating":{"list":[{"name":"node","datasource":"prometheus","label":"Node","type":"query","refresh":1,"options":[],"includeAll":true,"allFormat":"","allValue":".*","multi":true,"multiFormat":"","query":"label_values(instance)","current":{"text":"","value":null},"hide":0,"sort":1},{"name":"node_shard","datasource":"prometheus","label":"Shard","type":"query","refresh":1,"options":[],"includeAll":true,"allFormat":"","allValue":".*","multi":true,"multiFormat":"","query":"label_values(shard)","current":{"text":"","value":null},"hide":0,"sort":1},{"name":"aggr_criteria","datasource":"prometheus","label":"Aggregate by","type":"custom","refresh":1,"options":[{"text":"Cluster","value":"","selected":false},{"text":"Instance","value":"instance,","selected":false},{"text":"Instance, Shard","value":"instance,shard,","selected":false}],"includeAll":false,"allFormat":"","allValue":"","multi":false,"multiFormat":"","query":"Cluster : cluster,Instance : instance,Instance\\,Shard : instance\\,shard","current":{"text":"Cluster","value":""},"hide":0,"sort":1}]},"panels":[{"type":"text","id":1,"title":"","editable":true,"gridPos":{"h":2,"w":24,"x":0,"y":0},"transparent":true,"links":null,"span":1,"error":false,"content":"
Redpanda Summary
","mode":"html"},{"type":"singlestat","id":2,"title":"Nodes Up","datasource":"prometheus","editable":true,"gridPos":{"h":6,"w":2,"x":0,"y":2},"transparent":true,"span":1,"error":false,"targets":[{"refId":"","expr":"count by (app) (vectorized_application_uptime)","intervalFactor":1,"step":40,"legendFormat":"Nodes Up"}],"format":"none","prefix":"","postfix":"","maxDataPoints":100,"valueMaps":[{"value":"null","op":"=","text":"N/A"}],"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"rangeMaps":[{"from":"null","to":"null","text":"N/A"}],"mappingType":1,"nullPointMode":"connected","valueName":"current","valueFontSize":"200%","prefixFontSize":"50%","postfixFontSize":"50%","colorBackground":false,"colorValue":true,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"thresholds":"","sparkline":{"show":false,"full":false,"ymin":null,"ymax":null,"lineColor":"rgb(31, 120, 193)","fillColor":"rgba(31, 118, 189, 0.18)"},"gauge":{"show":false,"minValue":0,"maxValue":100,"thresholdMarkers":true,"thresholdLabels":false},"links":[],"interval":null,"timeFrom":null,"timeShift":null,"nullText":null,"cacheTimeout":null,"tableColumn":""},{"type":"singlestat","id":3,"title":"Partitions","datasource":"prometheus","editable":true,"gridPos":{"h":6,"w":2,"x":2,"y":8},"transparent":true,"span":1,"error":false,"targets":[{"refId":"","expr":"count(count by (topic,partition) (vectorized_storage_log_partition_size{namespace=\"kafka\"}))","legendFormat":"Partition count"}],"format":"none","prefix":"","postfix":"","maxDataPoints":100,"valueMaps":[{"value":"null","op":"=","text":"N/A"}],"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"rangeMaps":[{"from":"null","to":"null","text":"N/A"}],"mappingType":1,"nullPointMode":"connected","valueName":"current","valueFontSize":"200%","prefixFontSize":"50%","postfixFontSize":"50%","colorBackground":false,"colorValue":true,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"thresholds":"","sparkline":{"show":false,"full":false,"ymin":null,"ymax":null,"lineColor":"rgb(31, 120, 193)","fillColor":"rgba(31, 118, 189, 0.18)"},"gauge":{"show":false,"minValue":0,"maxValue":100,"thresholdMarkers":true,"thresholdLabels":false},"links":[],"interval":null,"timeFrom":null,"timeShift":null,"nullText":null,"cacheTimeout":null,"tableColumn":""},{"type":"text","id":5,"title":"","editable":true,"gridPos":{"h":2,"w":12,"x":12,"y":14},"transparent":true,"links":null,"span":1,"error":false,"content":"Throughput
","mode":"html"},{"type":"row","collapsed":true,"id":7,"title":"memory","editable":true,"gridPos":{"h":6,"w":24,"x":0,"y":20},"transparent":false,"links":null,"span":0,"error":false,"panels":[{"type":"graph","id":6,"title":"Rate - Allocated memory size in bytes","datasource":"prometheus","editable":true,"gridPos":{"h":6,"w":8,"x":0,"y":20},"transparent":false,"links":null,"renderer":"flot","span":4,"error":false,"targets":[{"refId":"","expr":"sum(irate(vectorized_memory_allocated_memory_bytes{instance=~\"$node\",shard=~\"$node_shard\"}[2m])) by ($aggr_criteria)","intervalFactor":2,"step":10,"legendFormat":"node: {{instance}}, shard: {{shard}}","format":"time_series"}],"xaxis":{"format":"","logBase":0,"show":true,"mode":"time"},"yaxes":[{"label":null,"show":true,"logBase":1,"min":0,"format":"Bps"},{"label":null,"show":true,"logBase":1,"min":0,"format":"short"}],"legend":{"show":true,"max":false,"min":false,"values":false,"avg":false,"current":false,"total":false},"fill":1,"linewidth":2,"nullPointMode":"null","thresholds":null,"lines":true,"bars":false,"tooltip":{"shared":true,"value_type":"cumulative","msResolution":true},"aliasColors":{},"steppedLine":false}]},{"type":"row","collapsed":true,"id":9,"title":"vectorized_internal_rpc","editable":true,"gridPos":{"h":6,"w":24,"x":0,"y":21},"transparent":false,"links":null,"span":0,"error":false,"panels":[{"type":"graph","id":8,"title":"Amount of memory consumed for requests processing","datasource":"prometheus","editable":true,"gridPos":{"h":6,"w":8,"x":0,"y":21},"transparent":false,"links":null,"renderer":"flot","span":4,"error":false,"targets":[{"refId":"","expr":"sum(vectorized_vectorized_internal_rpc_consumed_mem{instance=~\"$node\",shard=~\"$node_shard\"}) by ($aggr_criteria)","intervalFactor":2,"step":10,"legendFormat":"node: {{instance}}, shard: {{shard}}","format":"time_series"}],"xaxis":{"format":"","logBase":0,"show":true,"mode":"time"},"yaxes":[{"label":null,"show":true,"logBase":1,"min":0,"format":"short"},{"label":null,"show":true,"logBase":1,"min":0,"format":"short"}],"legend":{"show":true,"max":false,"min":false,"values":false,"avg":false,"current":false,"total":false},"fill":1,"linewidth":2,"nullPointMode":"null","thresholds":null,"lines":true,"bars":false,"tooltip":{"shared":true,"value_type":"cumulative","msResolution":true},"aliasColors":{},"steppedLine":true},{"type":"graph","id":10,"title":"Rate - Number of requests with corrupted headers","datasource":"prometheus","editable":true,"gridPos":{"h":6,"w":8,"x":8,"y":21},"transparent":false,"links":null,"renderer":"flot","span":4,"error":false,"targets":[{"refId":"","expr":"sum(irate(vectorized_vectorized_internal_rpc_corrupted_headers{instance=~\"$node\",shard=~\"$node_shard\"}[2m])) by ($aggr_criteria)","intervalFactor":2,"step":10,"legendFormat":"node: {{instance}}, shard: {{shard}}","format":"time_series"}],"xaxis":{"format":"","logBase":0,"show":true,"mode":"time"},"yaxes":[{"label":null,"show":true,"logBase":1,"min":0,"format":"ops"},{"label":null,"show":true,"logBase":1,"min":0,"format":"short"}],"legend":{"show":true,"max":false,"min":false,"values":false,"avg":false,"current":false,"total":false},"fill":1,"linewidth":2,"nullPointMode":"null","thresholds":null,"lines":true,"bars":false,"tooltip":{"shared":true,"value_type":"cumulative","msResolution":true},"aliasColors":{},"steppedLine":false},{"type":"graph","id":11,"title":"Latency of service handler dispatch (p95)","datasource":"prometheus","editable":true,"gridPos":{"h":6,"w":8,"x":16,"y":21},"transparent":false,"links":null,"renderer":"flot","span":4,"error":false,"targets":[{"refId":"A","expr":"histogram_quantile(0.95, sum(rate(vectorized_vectorized_internal_rpc_dispatch_handler_latency_bucket{instance=~\"$node\",shard=~\"$node_shard\"}[2m])) by (le, $aggr_criteria))","intervalFactor":2,"step":10,"legendFormat":"node: {{instance}}, shard: {{shard}}","format":"time_series"}],"xaxis":{"format":"","logBase":0,"show":true,"mode":"time"},"yaxes":[{"label":null,"show":true,"logBase":1,"min":0,"format":"µs"},{"label":null,"show":true,"logBase":1,"min":0,"format":"short"}],"legend":{"show":true,"max":false,"min":false,"values":false,"avg":false,"current":false,"total":false},"fill":1,"linewidth":2,"nullPointMode":"null as zero","thresholds":null,"lines":true,"bars":false,"tooltip":{"shared":true,"value_type":"individual","msResolution":true},"aliasColors":{},"steppedLine":true}]}],"editable":true,"timezone":"utc","refresh":"10s","time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"annotations":{"list":null},"links":null,"schemaVersion":12}`
+ expected := `{"title":"Redpanda","templating":{"list":[{"name":"node","datasource":"prometheus","label":"Node","type":"query","refresh":1,"options":[],"includeAll":true,"allFormat":"","allValue":".*","multi":true,"multiFormat":"","query":"label_values(instance)","current":{"text":"","value":null},"hide":0,"sort":1},{"name":"node_shard","datasource":"prometheus","label":"Shard","type":"query","refresh":1,"options":[],"includeAll":true,"allFormat":"","allValue":".*","multi":true,"multiFormat":"","query":"label_values(shard)","current":{"text":"","value":null},"hide":0,"sort":1},{"name":"aggr_criteria","datasource":"prometheus","label":"Aggregate by","type":"custom","refresh":1,"options":[{"text":"Cluster","value":"","selected":false},{"text":"Instance","value":"instance,","selected":false},{"text":"Instance, Shard","value":"instance,shard,","selected":false}],"includeAll":false,"allFormat":"","allValue":"","multi":false,"multiFormat":"","query":"Cluster : cluster,Instance : instance,Instance\\,Shard : instance\\,shard","current":{"text":"Cluster","value":""},"hide":0,"sort":1}]},"panels":[{"type":"text","id":1,"title":"","editable":true,"gridPos":{"h":2,"w":24,"x":0,"y":0},"transparent":true,"links":null,"span":1,"error":false,"content":"Redpanda Summary
","mode":"html"},{"type":"singlestat","id":2,"title":"Nodes Up","datasource":"prometheus","editable":true,"gridPos":{"h":6,"w":2,"x":0,"y":2},"transparent":true,"span":1,"error":false,"targets":[{"refId":"","expr":"count by (app) (vectorized_application_uptime)","intervalFactor":1,"step":40,"legendFormat":"Nodes Up"}],"format":"none","prefix":"","postfix":"","maxDataPoints":100,"valueMaps":[{"value":"null","op":"=","text":"N/A"}],"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"rangeMaps":[{"from":"null","to":"null","text":"N/A"}],"mappingType":1,"nullPointMode":"connected","valueName":"current","valueFontSize":"200%","prefixFontSize":"50%","postfixFontSize":"50%","colorBackground":false,"colorValue":true,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"thresholds":"","sparkline":{"show":false,"full":false,"ymin":null,"ymax":null,"lineColor":"rgb(31, 120, 193)","fillColor":"rgba(31, 118, 189, 0.18)"},"gauge":{"show":false,"minValue":0,"maxValue":100,"thresholdMarkers":true,"thresholdLabels":false},"links":[],"interval":null,"timeFrom":null,"timeShift":null,"nullText":null,"cacheTimeout":null,"tableColumn":""},{"type":"singlestat","id":3,"title":"Partitions","datasource":"prometheus","editable":true,"gridPos":{"h":6,"w":2,"x":2,"y":8},"transparent":true,"span":1,"error":false,"targets":[{"refId":"","expr":"count(count by (topic,partition) (vectorized_storage_log_partition_size{namespace=\"kafka\"}))","legendFormat":"Partition count"}],"format":"none","prefix":"","postfix":"","maxDataPoints":100,"valueMaps":[{"value":"null","op":"=","text":"N/A"}],"mappingTypes":[{"name":"value to text","value":1},{"name":"range to text","value":2}],"rangeMaps":[{"from":"null","to":"null","text":"N/A"}],"mappingType":1,"nullPointMode":"connected","valueName":"current","valueFontSize":"200%","prefixFontSize":"50%","postfixFontSize":"50%","colorBackground":false,"colorValue":true,"colors":["#299c46","rgba(237, 129, 40, 0.89)","#d44a3a"],"thresholds":"","sparkline":{"show":false,"full":false,"ymin":null,"ymax":null,"lineColor":"rgb(31, 120, 193)","fillColor":"rgba(31, 118, 189, 0.18)"},"gauge":{"show":false,"minValue":0,"maxValue":100,"thresholdMarkers":true,"thresholdLabels":false},"links":[],"interval":null,"timeFrom":null,"timeShift":null,"nullText":null,"cacheTimeout":null,"tableColumn":""},{"type":"text","id":5,"title":"","editable":true,"gridPos":{"h":2,"w":12,"x":12,"y":14},"transparent":true,"links":null,"span":1,"error":false,"content":"Throughput
","mode":"html"},{"type":"row","collapsed":true,"id":7,"title":"memory","editable":true,"gridPos":{"h":6,"w":24,"x":0,"y":20},"transparent":false,"links":null,"span":0,"error":false,"panels":[{"type":"graph","id":6,"interval":"1m","title":"Rate - Allocated memory size in bytes","datasource":"prometheus","editable":true,"gridPos":{"h":6,"w":8,"x":0,"y":20},"transparent":false,"links":null,"renderer":"flot","span":4,"error":false,"targets":[{"refId":"","expr":"sum(irate(vectorized_memory_allocated_memory_bytes{instance=~\"$node\",shard=~\"$node_shard\"}[2m])) by ($aggr_criteria)","intervalFactor":2,"step":10,"legendFormat":"node: {{instance}}, shard: {{shard}}","format":"time_series"}],"xaxis":{"format":"","logBase":0,"show":true,"mode":"time"},"yaxes":[{"label":null,"show":true,"logBase":1,"min":0,"format":"Bps"},{"label":null,"show":true,"logBase":1,"min":0,"format":"short"}],"legend":{"show":true,"max":false,"min":false,"values":false,"avg":false,"current":false,"total":false},"fill":1,"linewidth":2,"nullPointMode":"null","thresholds":null,"lines":true,"bars":false,"tooltip":{"shared":true,"value_type":"cumulative","msResolution":true},"aliasColors":{},"steppedLine":false}]},{"type":"row","collapsed":true,"id":9,"title":"vectorized_internal_rpc","editable":true,"gridPos":{"h":6,"w":24,"x":0,"y":21},"transparent":false,"links":null,"span":0,"error":false,"panels":[{"type":"graph","id":8,"title":"Amount of memory consumed for requests processing","datasource":"prometheus","editable":true,"gridPos":{"h":6,"w":8,"x":0,"y":21},"transparent":false,"links":null,"renderer":"flot","span":4,"error":false,"targets":[{"refId":"","expr":"sum(vectorized_vectorized_internal_rpc_consumed_mem{instance=~\"$node\",shard=~\"$node_shard\"}) by ($aggr_criteria)","intervalFactor":2,"step":10,"legendFormat":"node: {{instance}}, shard: {{shard}}","format":"time_series"}],"xaxis":{"format":"","logBase":0,"show":true,"mode":"time"},"yaxes":[{"label":null,"show":true,"logBase":1,"min":0,"format":"short"},{"label":null,"show":true,"logBase":1,"min":0,"format":"short"}],"legend":{"show":true,"max":false,"min":false,"values":false,"avg":false,"current":false,"total":false},"fill":1,"linewidth":2,"nullPointMode":"null","thresholds":null,"lines":true,"bars":false,"tooltip":{"shared":true,"value_type":"cumulative","msResolution":true},"aliasColors":{},"steppedLine":true},{"type":"graph","id":10,"interval":"1m","title":"Rate - Number of requests with corrupted headers","datasource":"prometheus","editable":true,"gridPos":{"h":6,"w":8,"x":8,"y":21},"transparent":false,"links":null,"renderer":"flot","span":4,"error":false,"targets":[{"refId":"","expr":"sum(irate(vectorized_vectorized_internal_rpc_corrupted_headers{instance=~\"$node\",shard=~\"$node_shard\"}[2m])) by ($aggr_criteria)","intervalFactor":2,"step":10,"legendFormat":"node: {{instance}}, shard: {{shard}}","format":"time_series"}],"xaxis":{"format":"","logBase":0,"show":true,"mode":"time"},"yaxes":[{"label":null,"show":true,"logBase":1,"min":0,"format":"ops"},{"label":null,"show":true,"logBase":1,"min":0,"format":"short"}],"legend":{"show":true,"max":false,"min":false,"values":false,"avg":false,"current":false,"total":false},"fill":1,"linewidth":2,"nullPointMode":"null","thresholds":null,"lines":true,"bars":false,"tooltip":{"shared":true,"value_type":"cumulative","msResolution":true},"aliasColors":{},"steppedLine":false},{"type":"graph","id":11,"interval":"1m","title":"Latency of service handler dispatch (p95)","datasource":"prometheus","editable":true,"gridPos":{"h":6,"w":8,"x":16,"y":21},"transparent":false,"links":null,"renderer":"flot","span":4,"error":false,"targets":[{"refId":"A","expr":"histogram_quantile(0.95, sum(rate(vectorized_vectorized_internal_rpc_dispatch_handler_latency_bucket{instance=~\"$node\",shard=~\"$node_shard\"}[2m])) by (le, $aggr_criteria))","intervalFactor":2,"step":10,"legendFormat":"node: {{instance}}, shard: {{shard}}","format":"time_series"}],"xaxis":{"format":"","logBase":0,"show":true,"mode":"time"},"yaxes":[{"label":null,"show":true,"logBase":1,"min":0,"format":"µs"},{"label":null,"show":true,"logBase":1,"min":0,"format":"short"}],"legend":{"show":true,"max":false,"min":false,"values":false,"avg":false,"current":false,"total":false},"fill":1,"linewidth":2,"nullPointMode":"null as zero","thresholds":null,"lines":true,"bars":false,"tooltip":{"shared":true,"value_type":"individual","msResolution":true},"aliasColors":{},"steppedLine":true}]}],"editable":true,"timezone":"utc","refresh":"10s","time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"annotations":{"list":null},"links":null,"schemaVersion":12}`
ts := httptest.NewServer(
http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)