From 2de80e354d2af42d85bad5a2c8ef105e2720a028 Mon Sep 17 00:00:00 2001 From: Aaron Benton Date: Thu, 30 Jan 2025 12:38:15 -0500 Subject: [PATCH 01/42] Remove extra lines in app-o11y notes --- .../feature-application-observability/templates/_notes.tpl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/charts/k8s-monitoring/charts/feature-application-observability/templates/_notes.tpl b/charts/k8s-monitoring/charts/feature-application-observability/templates/_notes.tpl index 8361257f8..6deeba2e2 100644 --- a/charts/k8s-monitoring/charts/feature-application-observability/templates/_notes.tpl +++ b/charts/k8s-monitoring/charts/feature-application-observability/templates/_notes.tpl @@ -17,10 +17,10 @@ Gather application data via {{ include "english_list" $receivers }} {{ $receiver Configure your applications to send telemetry data to: {{- if .Values.receivers.otlp.grpc.enabled }} * http://{{ .Collector.ServiceName }}.{{ .Collector.Namespace }}.svc.cluster.local:{{ .Values.receivers.otlp.grpc.port }} (OTLP gRPC) -{{ end }} +{{- end }} {{- if .Values.receivers.otlp.http.enabled }} * http://{{ .Collector.ServiceName }}.{{ .Collector.Namespace }}.svc.cluster.local:{{ .Values.receivers.otlp.http.port }} (OTLP HTTP) -{{ end }} +{{- end }} {{- if .Values.receivers.jaeger.grpc.enabled }} * http://{{ .Collector.ServiceName }}.{{ .Collector.Namespace }}.svc.cluster.local:{{ .Values.receivers.jaeger.grpc.port }} (Jaeger gRPC) {{- end }} @@ -35,7 +35,7 @@ Configure your applications to send telemetry data to: {{- end }} {{- if .Values.receivers.zipkin.enabled }} * http://{{ .Collector.ServiceName }}.{{ .Collector.Namespace }}.svc.cluster.local:{{ .Values.receivers.zipkin.port }} (Zipkin) -{{ end }} +{{- end }} {{- end }} {{- define "feature.applicationObservability.summary" -}} From fd44e575181b13838940d76afe9dfc9732b8d569 Mon Sep 17 00:00:00 2001 From: Aaron Benton Date: Thu, 30 Jan 2025 12:39:15 -0500 Subject: [PATCH 02/42] Added namespaces property The namespaces property is already supported by the ksm helm chart, but wasn't documented in the k8s chart --- .../k8s-monitoring/charts/feature-cluster-metrics/values.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/charts/k8s-monitoring/charts/feature-cluster-metrics/values.yaml b/charts/k8s-monitoring/charts/feature-cluster-metrics/values.yaml index bd3d14297..e21788fee 100644 --- a/charts/k8s-monitoring/charts/feature-cluster-metrics/values.yaml +++ b/charts/k8s-monitoring/charts/feature-cluster-metrics/values.yaml @@ -434,6 +434,10 @@ kube-state-metrics: # @section -- kube-state-metrics namespace: "" + # Comma-separated list(string) or yaml list of namespaces to be enabled for collecting resources. By default all namespaces are collected. + # @section -- kube-state-metrics + namespaces: "" + # -- Rule blocks to be added to the discovery.relabel component for kube-state-metrics. # These relabeling rules are applied pre-scrape against the targets from service discovery. # Before the scrape, any remaining target labels that start with __ (i.e. __meta_kubernetes*) are dropped. From f1b94f089245926894c319b5e04430a609ded8de Mon Sep 17 00:00:00 2001 From: Aaron Benton Date: Thu, 30 Jan 2025 12:39:42 -0500 Subject: [PATCH 03/42] Drop debug logs by default --- .../feature-integrations/integrations/grafana-values.yaml | 5 +++-- .../feature-integrations/integrations/loki-values.yaml | 5 +++-- .../feature-integrations/integrations/mimir-values.yaml | 5 +++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/charts/k8s-monitoring/charts/feature-integrations/integrations/grafana-values.yaml b/charts/k8s-monitoring/charts/feature-integrations/integrations/grafana-values.yaml index b6472e4df..010368016 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/integrations/grafana-values.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/integrations/grafana-values.yaml @@ -69,7 +69,7 @@ logs: # -- The timestamp format to use for the log line, if not set the default timestamp which is the collection # will be used for the log line # @section -- Logs Settings - timestampFormat: "RFC3339Nano" + timestampFormat: RFC3339Nano # -- Whether the timestamp should be scrubbed from the log line # @section -- Logs Settings @@ -78,7 +78,8 @@ logs: # -- The log levels to drop. # Will automatically keep all log levels unless specified here. # @section -- Logs Settings - dropLogLevels: [] + dropLogLevels: + - debug # -- Line patterns (valid RE2 regular expression)to exclude from the logs. # @section -- Logs Settings diff --git a/charts/k8s-monitoring/charts/feature-integrations/integrations/loki-values.yaml b/charts/k8s-monitoring/charts/feature-integrations/integrations/loki-values.yaml index 10c342cd9..c7d268e73 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/integrations/loki-values.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/integrations/loki-values.yaml @@ -68,7 +68,7 @@ logs: # -- The timestamp format to use for the log line, if not set the default timestamp which is the collection # will be used for the log line # @section -- Logs Settings - timestampFormat: "RFC3339Nano" + timestampFormat: RFC3339Nano # -- Whether the timestamp should be scrubbed from the log line # @section -- Logs Settings @@ -77,7 +77,8 @@ logs: # -- The log levels to drop. # Will automatically keep all log levels unless specified here. # @section -- Logs Settings - dropLogLevels: [] + dropLogLevels: + - debug # -- Line patterns (valid RE2 regular expression)to exclude from the logs. # @section -- Logs Settings diff --git a/charts/k8s-monitoring/charts/feature-integrations/integrations/mimir-values.yaml b/charts/k8s-monitoring/charts/feature-integrations/integrations/mimir-values.yaml index c67cd750c..3ccfb0719 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/integrations/mimir-values.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/integrations/mimir-values.yaml @@ -68,7 +68,7 @@ logs: # -- The timestamp format to use for the log line, if not set the default timestamp which is the collection # will be used for the log line # @section -- Logs Settings - timestampFormat: "RFC3339Nano" + timestampFormat: RFC3339Nano # -- Whether the timestamp should be scrubbed from the log line # @section -- Logs Settings @@ -77,7 +77,8 @@ logs: # -- The log levels to drop. # Will automatically keep all log levels unless specified here. # @section -- Logs Settings - dropLogLevels: [] + dropLogLevels: + - debug # -- Line patterns (valid RE2 regular expression)to exclude from the logs. # @section -- Logs Settings From 27c4164b0283c46b2169c03293f56d82e7d95376 Mon Sep 17 00:00:00 2001 From: Aaron Benton Date: Mon, 3 Feb 2025 11:02:56 -0500 Subject: [PATCH 04/42] fixed scrubTimestamp bug Ensured that only a leading or trailing space is removed when the timestamp is scrubbed --- .../templates/_integration_grafana_logs.tpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_grafana_logs.tpl b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_grafana_logs.tpl index 3dae1d9fb..893aed6ff 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_grafana_logs.tpl +++ b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_grafana_logs.tpl @@ -94,7 +94,7 @@ stage.match { {{- if .logs.tuning.scrubTimestamp }} // remove the timestamp from the log line stage.replace { - expression = "( t=[^ ]+\\s+)" + expression = `(?:^|\s+)(t=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } {{- end }} From a05ad4e31b8312eaef68d6bd46f36ccff78deb34 Mon Sep 17 00:00:00 2001 From: Aaron Benton Date: Mon, 3 Feb 2025 11:13:51 -0500 Subject: [PATCH 05/42] Update ts format --- .../feature-integrations/templates/_integration_loki_logs.tpl | 2 +- .../feature-integrations/templates/_integration_mimir_logs.tpl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_loki_logs.tpl b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_loki_logs.tpl index 43911eb29..c30a738a8 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_loki_logs.tpl +++ b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_loki_logs.tpl @@ -105,7 +105,7 @@ stage.match { {{- if .logs.tuning.scrubTimestamp }} // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } {{- end }} diff --git a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_mimir_logs.tpl b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_mimir_logs.tpl index 835e818df..585a3ad11 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_mimir_logs.tpl +++ b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_mimir_logs.tpl @@ -105,7 +105,7 @@ stage.match { {{- if .logs.tuning.scrubTimestamp }} // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } {{- end }} From 65c54d3b0ba3a39f0fa0894944794cfa7f2362e6 Mon Sep 17 00:00:00 2001 From: Aaron Benton Date: Mon, 3 Feb 2025 11:14:15 -0500 Subject: [PATCH 06/42] Documentation --- charts/k8s-monitoring/charts/feature-node-logs/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/k8s-monitoring/charts/feature-node-logs/values.yaml b/charts/k8s-monitoring/charts/feature-node-logs/values.yaml index ea43df0d9..a1296764c 100644 --- a/charts/k8s-monitoring/charts/feature-node-logs/values.yaml +++ b/charts/k8s-monitoring/charts/feature-node-logs/values.yaml @@ -24,7 +24,7 @@ journal: # @section -- Journal Logs formatAsJson: false - # -- The list of systemd units to keep scraped logs from. If empty, all units are scraped. + # -- The list of systemd units to keep scraped logs from, this can be a valid RE2 regex. If empty, all units are scraped. # @section -- Journal Logs units: [] # - kubelet.service From 56b169a198df08416e26e84da900496ec409602f Mon Sep 17 00:00:00 2001 From: Aaron Benton Date: Mon, 3 Feb 2025 11:15:53 -0500 Subject: [PATCH 07/42] Set default allow list to null --- .../charts/feature-integrations/integrations/loki-values.yaml | 2 +- .../charts/feature-integrations/integrations/mimir-values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/k8s-monitoring/charts/feature-integrations/integrations/loki-values.yaml b/charts/k8s-monitoring/charts/feature-integrations/integrations/loki-values.yaml index c7d268e73..f15d0f2ed 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/integrations/loki-values.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/integrations/loki-values.yaml @@ -44,7 +44,7 @@ metrics: tuning: # -- Filter the list of metrics from Grafana Loki to the minimal set required for the Grafana Loki integration. # @section -- Metric Processing Settings - useDefaultAllowList: true + useDefaultAllowList: # -- Metrics to keep. Can use regular expressions. # @section -- Metric Processing Settings includeMetrics: [] diff --git a/charts/k8s-monitoring/charts/feature-integrations/integrations/mimir-values.yaml b/charts/k8s-monitoring/charts/feature-integrations/integrations/mimir-values.yaml index 3ccfb0719..55a0a1379 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/integrations/mimir-values.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/integrations/mimir-values.yaml @@ -44,7 +44,7 @@ metrics: tuning: # -- Filter the list of metrics from Grafana Mimir to the minimal set required for the Grafana Mimir integration. # @section -- Metric Processing Settings - useDefaultAllowList: true + useDefaultAllowList: # -- Metrics to keep. Can use regular expressions. # @section -- Metric Processing Settings includeMetrics: [] From 37b9bcd55d4da0921ab88b9eab946992fa335e95 Mon Sep 17 00:00:00 2001 From: Aaron Benton Date: Mon, 3 Feb 2025 11:16:09 -0500 Subject: [PATCH 08/42] Updated Meta-Monitoring Example --- .../docs/examples/meta-monitoring/values.yaml | 31 +++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/charts/k8s-monitoring/docs/examples/meta-monitoring/values.yaml b/charts/k8s-monitoring/docs/examples/meta-monitoring/values.yaml index e4653ffbb..d01935d11 100644 --- a/charts/k8s-monitoring/docs/examples/meta-monitoring/values.yaml +++ b/charts/k8s-monitoring/docs/examples/meta-monitoring/values.yaml @@ -107,24 +107,15 @@ clusterMetrics: enabled: false kube-state-metrics: enabled: true - namespaces: - - collectors - - logs - - metrics - - o11y - extraMetricProcessingRules: |- - rule { - action = "keep" - source_labels = ["namespace"] - regex = "collectors|logs|metrics|o11y" - } + namespaces: collectors,logs,metrics,o11y metricsTuning: useDefaultAllowList: false includeMetrics: [(.+)] node-exporter: enabled: true - useIntegrationAllowList: true deploy: true + metricsTuning: + useIntegrationAllowList: true windows-exporter: enabled: false deploy: false @@ -140,6 +131,7 @@ nodeLogs: podLogs: enabled: true + collector: alloy-singleton labelsToKeep: - app - app_kubernetes_io_name @@ -151,7 +143,6 @@ podLogs: - pod - service_name gatherMethod: kubernetesApi - collector: alloy-singleton namespaces: - collectors - logs @@ -165,6 +156,20 @@ applicationObservability: thriftHttp: enabled: true port: 14268 + processors: + k8sattributes: + metadata: + - k8s.namespace.name + - k8s.pod.name + - k8s.deployment.name + - k8s.statefulset.name + - k8s.daemonset.name + - k8s.cronjob.name + - k8s.job.name + - k8s.node.name + - k8s.pod.uid + - k8s.pod.start_time + - k8s.container.name # Collectors alloy-singleton: From 26723e53bd33ed70b146a51015e0a5f3490fadb7 Mon Sep 17 00:00:00 2001 From: Aaron Benton Date: Mon, 3 Feb 2025 11:16:17 -0500 Subject: [PATCH 09/42] Updates --- .../templates/_integration_loki_metrics.tpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_loki_metrics.tpl b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_loki_metrics.tpl index e72d2b7d9..a8a18e58e 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_loki_metrics.tpl +++ b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_loki_metrics.tpl @@ -189,7 +189,7 @@ declare "loki_integration" { {{/* Inputs: integration (loki integration definition), Values (all values), Files (Files object) */}} {{- define "integrations.loki.include.metrics" }} {{- $defaultValues := "integrations/loki-values.yaml" | .Files.Get | fromYaml }} -{{- with mergeOverwrite $defaultValues (deepCopy .instance) }} +{{- with $defaultValues | merge (deepCopy .instance) }} {{- $metricAllowList := include "integrations.loki.allowList" (dict "instance" . "Files" $.Files) | fromYamlArray }} {{- $metricDenyList := .metrics.tuning.excludeMetrics }} {{- $labelSelectors := list }} From cc78a1c86199bb24ce5b4b5cefc5514f3a4cc025 Mon Sep 17 00:00:00 2001 From: Aaron Benton Date: Mon, 3 Feb 2025 12:12:19 -0500 Subject: [PATCH 10/42] Updated Tests and Rebuilt --- .../charts/feature-cluster-metrics/README.md | 6 +++ .../values.schema.json | 3 ++ .../docs/integrations/grafana.md | 2 +- .../docs/integrations/loki.md | 4 +- .../docs/integrations/mimir.md | 4 +- .../grafana-integration.schema.json | 5 +- .../definitions/loki-integration.schema.json | 7 ++- .../definitions/mimir-integration.schema.json | 7 ++- .../tests/grafana_logs_test.yaml | 12 +++-- .../tests/loki_logs_test.yaml | 12 +++-- .../tests/loki_metrics_test.yaml | 53 ++++++++++++++++++- .../tests/mimir_logs_test.yaml | 12 +++-- .../tests/mimir_metrics_test.yaml | 53 ++++++++++++++++++- .../feature-integrations/values.schema.json | 19 +++++-- .../charts/feature-node-logs/README.md | 2 +- .../integrations/grafana/alloy-logs.alloy | 8 ++- .../features/integrations/grafana/output.yaml | 8 ++- .../integrations/loki/alloy-logs.alloy | 8 ++- .../integrations/loki/alloy-metrics.alloy | 1 - .../features/integrations/loki/output.yaml | 9 +++- .../integrations/mimir/alloy-logs.alloy | 8 ++- .../integrations/mimir/alloy-metrics.alloy | 1 - .../features/integrations/mimir/output.yaml | 9 +++- .../docs/examples/meta-monitoring/README.md | 31 ++++++----- .../meta-monitoring/alloy-receiver.alloy | 2 +- .../meta-monitoring/alloy-singleton.alloy | 39 ++++++++------ .../docs/examples/meta-monitoring/output.yaml | 41 +++++++------- .../integration-grafana/.rendered/output.yaml | 8 ++- .../integration-loki/.rendered/output.yaml | 9 +++- 29 files changed, 294 insertions(+), 89 deletions(-) diff --git a/charts/k8s-monitoring/charts/feature-cluster-metrics/README.md b/charts/k8s-monitoring/charts/feature-cluster-metrics/README.md index f09c05277..4d21193d4 100644 --- a/charts/k8s-monitoring/charts/feature-cluster-metrics/README.md +++ b/charts/k8s-monitoring/charts/feature-cluster-metrics/README.md @@ -350,4 +350,10 @@ Be sure perform actual integration testing in a live environment in the main [k8 | windows-exporter.metricsTuning.useDefaultAllowList | bool | `true` | Filter the list of metrics from Windows Exporter to the minimal set required for Kubernetes Monitoring. | | windows-exporter.namespace | string | `""` | Namespace to locate Windows Exporter pods. If `deploy` is set to `true`, this will automatically be set to the namespace where this Helm chart is deployed. | | windows-exporter.scrapeInterval | string | `60s` | How frequently to scrape metrics from Windows Exporter. | + +### Other Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| kube-state-metrics.namespaces | string | `""` | | diff --git a/charts/k8s-monitoring/charts/feature-cluster-metrics/values.schema.json b/charts/k8s-monitoring/charts/feature-cluster-metrics/values.schema.json index 756f80bf9..719d58157 100644 --- a/charts/k8s-monitoring/charts/feature-cluster-metrics/values.schema.json +++ b/charts/k8s-monitoring/charts/feature-cluster-metrics/values.schema.json @@ -323,6 +323,9 @@ "namespace": { "type": "string" }, + "namespaces": { + "type": "string" + }, "nodeSelector": { "type": "object", "properties": { diff --git a/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/grafana.md b/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/grafana.md index 8f61c6d63..82c56fd1d 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/grafana.md +++ b/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/grafana.md @@ -23,7 +23,7 @@ | Key | Type | Default | Description | |-----|------|---------|-------------| | logs.enabled | bool | `true` | Whether to enable special processing of Grafana pod logs. | -| logs.tuning.dropLogLevels | list | `[]` | The log levels to drop. Will automatically keep all log levels unless specified here. | +| logs.tuning.dropLogLevels | list | `["debug"]` | The log levels to drop. Will automatically keep all log levels unless specified here. | | logs.tuning.excludeLines | list | `[]` | Line patterns (valid RE2 regular expression)to exclude from the logs. | | logs.tuning.scrubTimestamp | bool | `true` | Whether the timestamp should be scrubbed from the log line | | logs.tuning.structuredMetadata | object | `{}` | The structured metadata mappings to set. To not set any structured metadata, set this to an empty object (e.g. `{}`) | diff --git a/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/loki.md b/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/loki.md index 46bf3411e..1bd2b6239 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/loki.md +++ b/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/loki.md @@ -16,7 +16,7 @@ | Key | Type | Default | Description | |-----|------|---------|-------------| | logs.enabled | bool | `true` | Whether to enable special processing of Loki pod logs. | -| logs.tuning.dropLogLevels | list | `[]` | The log levels to drop. Will automatically keep all log levels unless specified here. | +| logs.tuning.dropLogLevels | list | `["debug"]` | The log levels to drop. Will automatically keep all log levels unless specified here. | | logs.tuning.excludeLines | list | `[]` | Line patterns (valid RE2 regular expression)to exclude from the logs. | | logs.tuning.scrubTimestamp | bool | `true` | Whether the timestamp should be scrubbed from the log line | | logs.tuning.structuredMetadata | object | `{}` | The structured metadata mappings to set. To not set any structured metadata, set this to an empty object (e.g. `{}`) | @@ -35,7 +35,7 @@ | metrics.maxCacheSize | string | `100000` | Sets the max_cache_size for prometheus.relabel component. This should be at least 2x-5x your largest scrape target or samples appended rate. ([docs](https://grafana.com/docs/alloy/latest/reference/components/prometheus.relabel/#arguments)) Overrides global.maxCacheSize | | metrics.tuning.excludeMetrics | list | `[]` | Metrics to drop. Can use regular expressions. | | metrics.tuning.includeMetrics | list | `[]` | Metrics to keep. Can use regular expressions. | -| metrics.tuning.useDefaultAllowList | bool | `true` | Filter the list of metrics from Grafana Loki to the minimal set required for the Grafana Loki integration. | +| metrics.tuning.useDefaultAllowList | string | `nil` | Filter the list of metrics from Grafana Loki to the minimal set required for the Grafana Loki integration. | ### Scrape Settings diff --git a/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/mimir.md b/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/mimir.md index 0bc74f30d..4eb484e22 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/mimir.md +++ b/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/mimir.md @@ -15,7 +15,7 @@ | Key | Type | Default | Description | |-----|------|---------|-------------| | logs.enabled | bool | `true` | Whether to enable special processing of Mimir pod logs. | -| logs.tuning.dropLogLevels | list | `[]` | The log levels to drop. Will automatically keep all log levels unless specified here. | +| logs.tuning.dropLogLevels | list | `["debug"]` | The log levels to drop. Will automatically keep all log levels unless specified here. | | logs.tuning.excludeLines | list | `[]` | Line patterns (valid RE2 regular expression)to exclude from the logs. | | logs.tuning.scrubTimestamp | bool | `true` | Whether the timestamp should be scrubbed from the log line | | logs.tuning.structuredMetadata | object | `{}` | The structured metadata mappings to set. To not set any structured metadata, set this to an empty object (e.g. `{}`) | @@ -36,7 +36,7 @@ | metrics.maxCacheSize | string | `100000` | Sets the max_cache_size for prometheus.relabel component. This should be at least 2x-5x your largest scrape target or samples appended rate. ([docs](https://grafana.com/docs/alloy/latest/reference/components/prometheus.relabel/#arguments)) Overrides global.maxCacheSize | | metrics.tuning.excludeMetrics | list | `[]` | Metrics to drop. Can use regular expressions. | | metrics.tuning.includeMetrics | list | `[]` | Metrics to keep. Can use regular expressions. | -| metrics.tuning.useDefaultAllowList | bool | `true` | Filter the list of metrics from Grafana Mimir to the minimal set required for the Grafana Mimir integration. | +| metrics.tuning.useDefaultAllowList | string | `nil` | Filter the list of metrics from Grafana Mimir to the minimal set required for the Grafana Mimir integration. | ### General Settings diff --git a/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/grafana-integration.schema.json b/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/grafana-integration.schema.json index 1d0313bb2..7ff265eb0 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/grafana-integration.schema.json +++ b/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/grafana-integration.schema.json @@ -25,7 +25,10 @@ "type": "object", "properties": { "dropLogLevels": { - "type": "array" + "type": "array", + "items": { + "type": "string" + } }, "excludeLines": { "type": "array" diff --git a/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/loki-integration.schema.json b/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/loki-integration.schema.json index e8f738f86..e419117b9 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/loki-integration.schema.json +++ b/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/loki-integration.schema.json @@ -25,7 +25,10 @@ "type": "object", "properties": { "dropLogLevels": { - "type": "array" + "type": "array", + "items": { + "type": "string" + } }, "excludeLines": { "type": "array" @@ -68,7 +71,7 @@ "type": "array" }, "useDefaultAllowList": { - "type": "boolean" + "type": "null" } } } diff --git a/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/mimir-integration.schema.json b/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/mimir-integration.schema.json index 0d06db17f..5ec98e99f 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/mimir-integration.schema.json +++ b/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/mimir-integration.schema.json @@ -22,7 +22,10 @@ "type": "object", "properties": { "dropLogLevels": { - "type": "array" + "type": "array", + "items": { + "type": "string" + } }, "excludeLines": { "type": "array" @@ -65,7 +68,7 @@ "type": "array" }, "useDefaultAllowList": { - "type": "boolean" + "type": "null" } } } diff --git a/charts/k8s-monitoring/charts/feature-integrations/tests/grafana_logs_test.yaml b/charts/k8s-monitoring/charts/feature-integrations/tests/grafana_logs_test.yaml index afd1ab9e7..c73ff5cab 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/tests/grafana_logs_test.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/tests/grafana_logs_test.yaml @@ -65,9 +65,15 @@ tests: } // remove the timestamp from the log line stage.replace { - expression = "( t=[^ ]+\\s+)" + expression = `(?:^|\s+)(t=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "grafana-drop-log-level" + } } } @@ -141,12 +147,12 @@ tests: path: data["logs.alloy"] # The pattern should look like this, but since the regex is escaped, it will be a bit different # stage.replace { - # expression = "( t=[^ ]+\\s+)" + # expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` # replace = "" # } pattern: |- \s+stage.replace { - \s+ expression = "\( t=\[\^ \]\+\\\\s\+\)" + \s+ expression = `\(\?:\^\|\\s\+\)\(t=\\d\{4\}-\\d\{2\}-\\d\{2\}T\\d\{2\}:\\d\{2\}:\\d\{2\}\\.\\d\+\[\^ \]\*\\s\+\)` \s+ replace = "" \s+} diff --git a/charts/k8s-monitoring/charts/feature-integrations/tests/loki_logs_test.yaml b/charts/k8s-monitoring/charts/feature-integrations/tests/loki_logs_test.yaml index 177a42844..b1537eb07 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/tests/loki_logs_test.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/tests/loki_logs_test.yaml @@ -72,9 +72,15 @@ tests: } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "loki-drop-log-level" + } } } @@ -151,12 +157,12 @@ tests: path: data["logs.alloy"] # The pattern should look like this, but since the regex is escaped, it will be a bit different # stage.replace { - # expression = "(ts=[^ ]+\\s+)" + # expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` # replace = "" # } pattern: |- \s+stage.replace { - \s+ expression = "\(ts=\[\^ \]\+\\\\s\+\)" + \s+ expression = `\(\?:\^\|\\s\+\)\(ts=\\d\{4\}-\\d\{2\}-\\d\{2\}T\\d\{2\}:\\d\{2\}:\\d\{2\}\\.\\d\+\[\^ \]\*\\s\+\)` \s+ replace = "" \s+} diff --git a/charts/k8s-monitoring/charts/feature-integrations/tests/loki_metrics_test.yaml b/charts/k8s-monitoring/charts/feature-integrations/tests/loki_metrics_test.yaml index d595ec1c0..e23882ddd 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/tests/loki_metrics_test.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/tests/loki_metrics_test.yaml @@ -264,7 +264,6 @@ tests: targets = loki_integration_discovery.loki.output job_label = "integrations/loki" clustering = true - keep_metrics = "up|scrape_samples_scraped|go_gc_cycles_total_gc_cycles_total|go_gc_duration_seconds|go_gc_duration_seconds_count|go_gc_duration_seconds_sum|go_gc_pauses_seconds_bucket|go_goroutines|go_memstats_heap_inuse_bytes|loki_azure_blob_request_duration_seconds_bucket|loki_azure_blob_request_duration_seconds_count|loki_bigtable_request_duration_seconds_bucket|loki_bigtable_request_duration_seconds_count|loki_bloom_blocks_cache_added_total|loki_bloom_blocks_cache_entries|loki_bloom_blocks_cache_evicted_total|loki_bloom_blocks_cache_fetched_total|loki_bloom_blocks_cache_usage_bytes|loki_bloom_chunks_indexed_total|loki_bloom_gateway_block_query_latency_seconds_bucket|loki_bloom_gateway_dequeue_duration_seconds_bucket|loki_bloom_gateway_filtered_chunks_sum|loki_bloom_gateway_filtered_series_sum|loki_bloom_gateway_inflight_tasks|loki_bloom_gateway_process_duration_seconds_bucket|loki_bloom_gateway_process_duration_seconds_count|loki_bloom_gateway_querier_chunks_filtered_total|loki_bloom_gateway_querier_chunks_skipped_total|loki_bloom_gateway_querier_chunks_total|loki_bloom_gateway_querier_series_filtered_total|loki_bloom_gateway_querier_series_skipped_total|loki_bloom_gateway_querier_series_total|loki_bloom_gateway_queue_duration_seconds_bucket|loki_bloom_gateway_queue_duration_seconds_count|loki_bloom_gateway_queue_duration_seconds_sum|loki_bloom_gateway_queue_length|loki_bloom_gateway_requested_chunks_sum|loki_bloom_gateway_requested_series_sum|loki_bloom_gateway_tasks_dequeued_bucket|loki_bloom_gateway_tasks_dequeued_total|loki_bloom_gateway_tasks_processed_total|loki_bloom_inserts_total|loki_bloom_recorder_chunks_total|loki_bloom_recorder_series_total|loki_bloom_size_bucket|loki_bloom_store_blocks_fetched_size_bytes_bucket|loki_bloom_store_blocks_fetched_sum|loki_bloom_store_download_queue_size_sum|loki_bloom_store_metas_fetched_bucket|loki_bloom_store_metas_fetched_size_bytes_bucket|loki_bloom_store_metas_fetched_sum|loki_bloom_tokens_total|loki_bloombuilder_blocks_created_total|loki_bloombuilder_blocks_reused_total|loki_bloombuilder_bytes_per_task_bucket|loki_bloombuilder_chunk_series_size_sum|loki_bloombuilder_metas_created_total|loki_bloombuilder_processing_task|loki_bloombuilder_series_per_task_bucket|loki_bloomplanner_blocks_deleted_total|loki_bloomplanner_connected_builders|loki_bloomplanner_inflight_tasks|loki_bloomplanner_metas_deleted_total|loki_bloomplanner_queue_length|loki_bloomplanner_retention_running|loki_bloomplanner_retention_time_seconds_bucket|loki_bloomplanner_tenant_tasks_completed|loki_bloomplanner_tenant_tasks_planned|loki_boltdb_shipper_compact_tables_operation_duration_seconds|loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds|loki_boltdb_shipper_compact_tables_operation_total|loki_boltdb_shipper_request_duration_seconds_bucket|loki_boltdb_shipper_request_duration_seconds_count|loki_boltdb_shipper_request_duration_seconds_sum|loki_boltdb_shipper_retention_marker_count_total|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum|loki_boltdb_shipper_retention_marker_table_processed_total|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum|loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time|loki_boltdb_shipper_retention_sweeper_marker_files_current|loki_build_info|loki_chunk_store_deduped_chunks_total|loki_chunk_store_index_entries_per_chunk_count|loki_chunk_store_index_entries_per_chunk_sum|loki_compactor_apply_retention_last_successful_run_timestamp_seconds|loki_compactor_apply_retention_operation_duration_seconds|loki_compactor_apply_retention_operation_total|loki_compactor_delete_requests_processed_total|loki_compactor_delete_requests_received_total|loki_compactor_deleted_lines|loki_compactor_load_pending_requests_attempts_total|loki_compactor_locked_table_successive_compaction_skips|loki_compactor_oldest_pending_delete_request_age_seconds|loki_compactor_pending_delete_requests_count|loki_consul_request_duration_seconds_bucket|loki_discarded_samples_total|loki_distributor_bytes_received_total|loki_distributor_ingester_append_failures_total|loki_distributor_lines_received_total|loki_distributor_structured_metadata_bytes_received_total|loki_dynamo_consumed_capacity_total|loki_dynamo_dropped_requests_total|loki_dynamo_failures_total|loki_dynamo_query_pages_count|loki_dynamo_request_duration_seconds_bucket|loki_dynamo_request_duration_seconds_count|loki_dynamo_throttled_total|loki_embeddedcache_entries|loki_embeddedcache_memory_bytes|loki_gcs_request_duration_seconds_bucket|loki_gcs_request_duration_seconds_count|loki_index_gateway_postfilter_chunks_sum|loki_index_gateway_prefilter_chunks_sum|loki_index_request_duration_seconds_bucket|loki_index_request_duration_seconds_count|loki_index_request_duration_seconds_sum|loki_ingester_chunk_age_seconds_bucket|loki_ingester_chunk_age_seconds_count|loki_ingester_chunk_age_seconds_sum|loki_ingester_chunk_bounds_hours_bucket|loki_ingester_chunk_bounds_hours_count|loki_ingester_chunk_bounds_hours_sum|loki_ingester_chunk_entries_bucket|loki_ingester_chunk_entries_count|loki_ingester_chunk_entries_sum|loki_ingester_chunk_size_bytes_bucket|loki_ingester_chunk_utilization_bucket|loki_ingester_chunk_utilization_count|loki_ingester_chunk_utilization_sum|loki_ingester_chunks_flushed_total|loki_ingester_flush_queue_length|loki_ingester_memory_chunks|loki_ingester_memory_streams|loki_ingester_streams_created_total|loki_memcache_request_duration_seconds_bucket|loki_memcache_request_duration_seconds_count|loki_panic_total|loki_prometheus_rule_group_rules|loki_request_duration_seconds_bucket|loki_request_duration_seconds_count|loki_request_duration_seconds_sum|loki_ruler_wal_appender_ready|loki_ruler_wal_disk_size|loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds|loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds|loki_ruler_wal_prometheus_remote_storage_samples_pending|loki_ruler_wal_prometheus_remote_storage_samples_total|loki_ruler_wal_samples_appended_total|loki_ruler_wal_storage_created_series_total|loki_s3_request_duration_seconds_bucket|loki_s3_request_duration_seconds_count" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value @@ -285,3 +284,55 @@ tests: - matchRegex: path: data["metrics.alloy"] pattern: namespaces = \["k8smon"\] + + - it: should allow you to set the default allow list + set: + deployAsConfigMap: true + loki: + instances: + - name: loki + metrics: + tuning: + useDefaultAllowList: true + asserts: + - isKind: + of: ConfigMap + - matchRegex: + path: data["metrics.alloy"] + pattern: keep_metrics = "up\|scrape_samples_scraped\|go_gc_cycles_total_gc_cycles_total.+\|loki_s3_request_duration_seconds_count" + + - it: should allow you to specific which metrics to include + set: + deployAsConfigMap: true + loki: + instances: + - name: loki + metrics: + tuning: + includeMetrics: + - foo + - bar + asserts: + - isKind: + of: ConfigMap + - matchRegex: + path: data["metrics.alloy"] + pattern: keep_metrics = "up\|scrape_samples_scraped\|foo\|bar" + + - it: should allow you to specific which metrics to exclude + set: + deployAsConfigMap: true + loki: + instances: + - name: loki + metrics: + tuning: + excludeMetrics: + - foo + - bar + asserts: + - isKind: + of: ConfigMap + - matchRegex: + path: data["metrics.alloy"] + pattern: drop_metrics = "foo\|bar" diff --git a/charts/k8s-monitoring/charts/feature-integrations/tests/mimir_logs_test.yaml b/charts/k8s-monitoring/charts/feature-integrations/tests/mimir_logs_test.yaml index 721f8f4b1..d860e08ed 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/tests/mimir_logs_test.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/tests/mimir_logs_test.yaml @@ -72,9 +72,15 @@ tests: } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "mimir-drop-log-level" + } } } @@ -151,12 +157,12 @@ tests: path: data["logs.alloy"] # The pattern should look like this, but since the regex is escaped, it will be a bit different # stage.replace { - # expression = "(ts=[^ ]+\\s+)" + # expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` # replace = "" # } pattern: |- \s+stage.replace { - \s+ expression = "\(ts=\[\^ \]\+\\\\s\+\)" + \s+ expression = `\(\?:\^\|\\s\+\)\(ts=\\d\{4\}-\\d\{2\}-\\d\{2\}T\\d\{2\}:\\d\{2\}:\\d\{2\}\\.\\d\+\[\^ \]\*\\s\+\)` \s+ replace = "" \s+} diff --git a/charts/k8s-monitoring/charts/feature-integrations/tests/mimir_metrics_test.yaml b/charts/k8s-monitoring/charts/feature-integrations/tests/mimir_metrics_test.yaml index 0136ae552..1fac4eb6e 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/tests/mimir_metrics_test.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/tests/mimir_metrics_test.yaml @@ -264,7 +264,6 @@ tests: targets = mimir_integration_discovery.mimir.output job_label = "integrations/mimir" clustering = true - keep_metrics = "up|scrape_samples_scraped|cortex_alertmanager_alerts|cortex_alertmanager_alerts_invalid_total|cortex_alertmanager_alerts_received_total|cortex_alertmanager_dispatcher_aggregation_groups|cortex_alertmanager_notification_latency_seconds_bucket|cortex_alertmanager_notification_latency_seconds_count|cortex_alertmanager_notification_latency_seconds_sum|cortex_alertmanager_notifications_failed_total|cortex_alertmanager_notifications_total|cortex_alertmanager_partial_state_merges_failed_total|cortex_alertmanager_partial_state_merges_total|cortex_alertmanager_ring_check_errors_total|cortex_alertmanager_silences|cortex_alertmanager_state_fetch_replica_state_failed_total|cortex_alertmanager_state_fetch_replica_state_total|cortex_alertmanager_state_initial_sync_completed_total|cortex_alertmanager_state_initial_sync_duration_seconds_bucket|cortex_alertmanager_state_initial_sync_duration_seconds_count|cortex_alertmanager_state_initial_sync_duration_seconds_sum|cortex_alertmanager_state_persist_failed_total|cortex_alertmanager_state_persist_total|cortex_alertmanager_state_replication_failed_total|cortex_alertmanager_state_replication_total|cortex_alertmanager_sync_configs_failed_total|cortex_alertmanager_sync_configs_total|cortex_alertmanager_tenants_discovered|cortex_alertmanager_tenants_owned|cortex_blockbuilder_consume_cycle_duration_seconds|cortex_blockbuilder_consumer_lag_records|cortex_blockbuilder_tsdb_compact_and_upload_failed_total|cortex_bucket_blocks_count|cortex_bucket_index_estimated_compaction_jobs|cortex_bucket_index_estimated_compaction_jobs_errors_total|cortex_bucket_index_last_successful_update_timestamp_seconds|cortex_bucket_store_block_drop_failures_total|cortex_bucket_store_block_drops_total|cortex_bucket_store_block_load_failures_total|cortex_bucket_store_block_loads_total|cortex_bucket_store_blocks_loaded|cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket|cortex_bucket_store_indexheader_lazy_load_duration_seconds_count|cortex_bucket_store_indexheader_lazy_load_duration_seconds_sum|cortex_bucket_store_indexheader_lazy_load_total|cortex_bucket_store_indexheader_lazy_unload_total|cortex_bucket_store_series_batch_preloading_load_duration_seconds_sum|cortex_bucket_store_series_batch_preloading_wait_duration_seconds_sum|cortex_bucket_store_series_blocks_queried_sum|cortex_bucket_store_series_data_size_fetched_bytes_sum|cortex_bucket_store_series_data_size_touched_bytes_sum|cortex_bucket_store_series_hash_cache_hits_total|cortex_bucket_store_series_hash_cache_requests_total|cortex_bucket_store_series_request_stage_duration_seconds_bucket|cortex_bucket_store_series_request_stage_duration_seconds_count|cortex_bucket_store_series_request_stage_duration_seconds_sum|cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds|cortex_bucket_stores_gate_duration_seconds_bucket|cortex_bucket_stores_gate_duration_seconds_count|cortex_bucket_stores_gate_duration_seconds_sum|cortex_bucket_stores_tenants_synced|cortex_build_info|cortex_cache_memory_hits_total|cortex_cache_memory_requests_total|cortex_compactor_block_cleanup_failures_total|cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds|cortex_compactor_block_max_time_delta_seconds_bucket|cortex_compactor_block_max_time_delta_seconds_count|cortex_compactor_block_max_time_delta_seconds_sum|cortex_compactor_blocks_cleaned_total|cortex_compactor_blocks_marked_for_deletion_total|cortex_compactor_blocks_marked_for_no_compaction_total|cortex_compactor_disk_out_of_space_errors_total|cortex_compactor_group_compaction_runs_started_total|cortex_compactor_last_successful_run_timestamp_seconds|cortex_compactor_meta_sync_duration_seconds_bucket|cortex_compactor_meta_sync_duration_seconds_count|cortex_compactor_meta_sync_duration_seconds_sum|cortex_compactor_meta_sync_failures_total|cortex_compactor_meta_syncs_total|cortex_compactor_runs_completed_total|cortex_compactor_runs_failed_total|cortex_compactor_runs_started_total|cortex_compactor_tenants_discovered|cortex_compactor_tenants_processing_failed|cortex_compactor_tenants_processing_succeeded|cortex_compactor_tenants_skipped|cortex_config_hash|cortex_discarded_exemplars_total|cortex_discarded_requests_total|cortex_discarded_samples_total|cortex_distributor_deduped_samples_total|cortex_distributor_exemplars_in_total|cortex_distributor_inflight_push_requests|cortex_distributor_instance_limits|cortex_distributor_instance_rejected_requests_total|cortex_distributor_latest_seen_sample_timestamp_seconds|cortex_distributor_non_ha_samples_received_total|cortex_distributor_received_exemplars_total|cortex_distributor_received_requests_total|cortex_distributor_received_samples_total|cortex_distributor_replication_factor|cortex_distributor_requests_in_total|cortex_distributor_samples_in_total|cortex_inflight_requests|cortex_ingest_storage_reader_buffered_fetched_records|cortex_ingest_storage_reader_fetch_errors_total|cortex_ingest_storage_reader_fetches_total|cortex_ingest_storage_reader_missed_records_total|cortex_ingest_storage_reader_offset_commit_failures_total|cortex_ingest_storage_reader_offset_commit_requests_total|cortex_ingest_storage_reader_read_errors_total|cortex_ingest_storage_reader_receive_delay_seconds_count|cortex_ingest_storage_reader_receive_delay_seconds_sum|cortex_ingest_storage_reader_records_failed_total|cortex_ingest_storage_reader_records_total|cortex_ingest_storage_reader_requests_failed_total|cortex_ingest_storage_reader_requests_total|cortex_ingest_storage_strong_consistency_failures_total|cortex_ingest_storage_strong_consistency_requests_total|cortex_ingest_storage_writer_buffered_produce_bytes|cortex_ingest_storage_writer_buffered_produce_bytes_limit|cortex_ingester_active_native_histogram_buckets|cortex_ingester_active_native_histogram_buckets_custom_tracker|cortex_ingester_active_native_histogram_series|cortex_ingester_active_native_histogram_series_custom_tracker|cortex_ingester_active_series|cortex_ingester_active_series_custom_tracker|cortex_ingester_client_request_duration_seconds_bucket|cortex_ingester_client_request_duration_seconds_count|cortex_ingester_client_request_duration_seconds_sum|cortex_ingester_ingested_exemplars_total|cortex_ingester_ingested_samples_total|cortex_ingester_instance_limits|cortex_ingester_instance_rejected_requests_total|cortex_ingester_local_limits|cortex_ingester_memory_series|cortex_ingester_memory_series_created_total|cortex_ingester_memory_series_removed_total|cortex_ingester_memory_users|cortex_ingester_oldest_unshipped_block_timestamp_seconds|cortex_ingester_owned_series|cortex_ingester_queried_exemplars_bucket|cortex_ingester_queried_exemplars_count|cortex_ingester_queried_exemplars_sum|cortex_ingester_queried_samples_bucket|cortex_ingester_queried_samples_count|cortex_ingester_queried_samples_sum|cortex_ingester_queried_series_bucket|cortex_ingester_queried_series_count|cortex_ingester_queried_series_sum|cortex_ingester_shipper_last_successful_upload_timestamp_seconds|cortex_ingester_shipper_upload_failures_total|cortex_ingester_shipper_uploads_total|cortex_ingester_tsdb_checkpoint_creations_failed_total|cortex_ingester_tsdb_checkpoint_creations_total|cortex_ingester_tsdb_checkpoint_deletions_failed_total|cortex_ingester_tsdb_compaction_duration_seconds_bucket|cortex_ingester_tsdb_compaction_duration_seconds_count|cortex_ingester_tsdb_compaction_duration_seconds_sum|cortex_ingester_tsdb_compactions_failed_total|cortex_ingester_tsdb_compactions_total|cortex_ingester_tsdb_exemplar_exemplars_appended_total|cortex_ingester_tsdb_exemplar_exemplars_in_storage|cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds|cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage|cortex_ingester_tsdb_head_max_timestamp_seconds|cortex_ingester_tsdb_head_truncations_failed_total|cortex_ingester_tsdb_mmap_chunk_corruptions_total|cortex_ingester_tsdb_out_of_order_samples_appended_total|cortex_ingester_tsdb_storage_blocks_bytes|cortex_ingester_tsdb_symbol_table_size_bytes|cortex_ingester_tsdb_wal_corruptions_total|cortex_ingester_tsdb_wal_truncate_duration_seconds_count|cortex_ingester_tsdb_wal_truncate_duration_seconds_sum|cortex_ingester_tsdb_wal_truncations_failed_total|cortex_ingester_tsdb_wal_truncations_total|cortex_ingester_tsdb_wal_writes_failed_total|cortex_kv_request_duration_seconds_bucket|cortex_kv_request_duration_seconds_count|cortex_kv_request_duration_seconds_sum|cortex_lifecycler_read_only|cortex_limits_defaults|cortex_limits_overrides|cortex_partition_ring_partitions|cortex_prometheus_notifications_dropped_total|cortex_prometheus_notifications_errors_total|cortex_prometheus_notifications_queue_capacity|cortex_prometheus_notifications_queue_length|cortex_prometheus_notifications_sent_total|cortex_prometheus_rule_evaluation_duration_seconds_count|cortex_prometheus_rule_evaluation_duration_seconds_sum|cortex_prometheus_rule_evaluation_failures_total|cortex_prometheus_rule_evaluations_total|cortex_prometheus_rule_group_duration_seconds_count|cortex_prometheus_rule_group_duration_seconds_sum|cortex_prometheus_rule_group_iterations_missed_total|cortex_prometheus_rule_group_iterations_total|cortex_prometheus_rule_group_rules|cortex_querier_blocks_consistency_checks_failed_total|cortex_querier_blocks_consistency_checks_total|cortex_querier_request_duration_seconds_bucket|cortex_querier_request_duration_seconds_count|cortex_querier_request_duration_seconds_sum|cortex_querier_storegateway_instances_hit_per_query_bucket|cortex_querier_storegateway_instances_hit_per_query_count|cortex_querier_storegateway_instances_hit_per_query_sum|cortex_querier_storegateway_refetches_per_query_bucket|cortex_querier_storegateway_refetches_per_query_count|cortex_querier_storegateway_refetches_per_query_sum|cortex_query_frontend_queries_total|cortex_query_frontend_queue_duration_seconds_bucket|cortex_query_frontend_queue_duration_seconds_count|cortex_query_frontend_queue_duration_seconds_sum|cortex_query_frontend_queue_length|cortex_query_frontend_retries_bucket|cortex_query_frontend_retries_count|cortex_query_frontend_retries_sum|cortex_query_scheduler_connected_querier_clients|cortex_query_scheduler_querier_inflight_requests|cortex_query_scheduler_queue_duration_seconds_bucket|cortex_query_scheduler_queue_duration_seconds_count|cortex_query_scheduler_queue_duration_seconds_sum|cortex_query_scheduler_queue_length|cortex_request_duration_seconds|cortex_request_duration_seconds_bucket|cortex_request_duration_seconds_count|cortex_request_duration_seconds_sum|cortex_ring_members|cortex_ruler_managers_total|cortex_ruler_queries_failed_total|cortex_ruler_queries_total|cortex_ruler_ring_check_errors_total|cortex_ruler_write_requests_failed_total|cortex_ruler_write_requests_total|cortex_runtime_config_hash|cortex_runtime_config_last_reload_successful|cortex_tcp_connections|cortex_tcp_connections_limit|go_memstats_heap_inuse_bytes|keda_scaler_errors|keda_scaler_metrics_value|kube_deployment_spec_replicas|kube_deployment_status_replicas_unavailable|kube_deployment_status_replicas_updated|kube_endpoint_address|kube_horizontalpodautoscaler_spec_target_metric|kube_horizontalpodautoscaler_status_condition|kube_pod_info|kube_statefulset_replicas|kube_statefulset_status_current_revision|kube_statefulset_status_replicas_current|kube_statefulset_status_replicas_ready|kube_statefulset_status_replicas_updated|kube_statefulset_status_update_revision|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_used_bytes|memberlist_client_cluster_members_count|memcached_limit_bytes|mimir_continuous_test_queries_failed_total|mimir_continuous_test_query_result_checks_failed_total|mimir_continuous_test_writes_failed_total|node_disk_read_bytes_total|node_disk_written_bytes_total|process_memory_map_areas|process_memory_map_areas_limit|prometheus_tsdb_compaction_duration_seconds_bucket|prometheus_tsdb_compaction_duration_seconds_count|prometheus_tsdb_compaction_duration_seconds_sum|prometheus_tsdb_compactions_total|rollout_operator_last_successful_group_reconcile_timestamp_seconds|thanos_cache_hits_total|thanos_cache_operation_duration_seconds_bucket|thanos_cache_operation_duration_seconds_count|thanos_cache_operation_duration_seconds_sum|thanos_cache_operation_failures_total|thanos_cache_operations_total|thanos_cache_requests_total|thanos_objstore_bucket_last_successful_upload_time|thanos_objstore_bucket_operation_duration_seconds_bucket|thanos_objstore_bucket_operation_duration_seconds_count|thanos_objstore_bucket_operation_duration_seconds_sum|thanos_objstore_bucket_operation_failures_total|thanos_objstore_bucket_operations_total|thanos_store_index_cache_hits_total|thanos_store_index_cache_requests_total" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value @@ -285,3 +284,55 @@ tests: - matchRegex: path: data["metrics.alloy"] pattern: namespaces = \["k8smon"\] + + - it: should allow you to set the default allow list + set: + deployAsConfigMap: true + mimir: + instances: + - name: mimir + metrics: + tuning: + useDefaultAllowList: true + asserts: + - isKind: + of: ConfigMap + - matchRegex: + path: data["metrics.alloy"] + pattern: keep_metrics = "up\|scrape_samples_scraped\|cortex_alertmanager_alerts.+\|thanos_store_index_cache_requests_total" + + - it: should allow you to specific which metrics to include + set: + deployAsConfigMap: true + mimir: + instances: + - name: mimir + metrics: + tuning: + includeMetrics: + - foo + - bar + asserts: + - isKind: + of: ConfigMap + - matchRegex: + path: data["metrics.alloy"] + pattern: keep_metrics = "up\|scrape_samples_scraped\|foo\|bar" + + - it: should allow you to specific which metrics to exclude + set: + deployAsConfigMap: true + mimir: + instances: + - name: mimir + metrics: + tuning: + excludeMetrics: + - foo + - bar + asserts: + - isKind: + of: ConfigMap + - matchRegex: + path: data["metrics.alloy"] + pattern: drop_metrics = "foo\|bar" diff --git a/charts/k8s-monitoring/charts/feature-integrations/values.schema.json b/charts/k8s-monitoring/charts/feature-integrations/values.schema.json index 02b811183..a9466374c 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/values.schema.json +++ b/charts/k8s-monitoring/charts/feature-integrations/values.schema.json @@ -332,7 +332,10 @@ "type": "object", "properties": { "dropLogLevels": { - "type": "array" + "type": "array", + "items": { + "type": "string" + } }, "excludeLines": { "type": "array" @@ -430,7 +433,10 @@ "type": "object", "properties": { "dropLogLevels": { - "type": "array" + "type": "array", + "items": { + "type": "string" + } }, "excludeLines": { "type": "array" @@ -473,7 +479,7 @@ "type": "array" }, "useDefaultAllowList": { - "type": "boolean" + "type": "null" } } } @@ -515,7 +521,10 @@ "type": "object", "properties": { "dropLogLevels": { - "type": "array" + "type": "array", + "items": { + "type": "string" + } }, "excludeLines": { "type": "array" @@ -558,7 +567,7 @@ "type": "array" }, "useDefaultAllowList": { - "type": "boolean" + "type": "null" } } } diff --git a/charts/k8s-monitoring/charts/feature-node-logs/README.md b/charts/k8s-monitoring/charts/feature-node-logs/README.md index de8e19c4a..3c8ee0c35 100644 --- a/charts/k8s-monitoring/charts/feature-node-logs/README.md +++ b/charts/k8s-monitoring/charts/feature-node-logs/README.md @@ -62,7 +62,7 @@ Be sure perform actual integration testing in a live environment in the main [k8 | journal.jobLabel | string | `"integrations/kubernetes/journal"` | The value for the job label for journal logs. | | journal.maxAge | string | `"8h"` | The path to the journal logs on the worker node. | | journal.path | string | `"/var/log/journal"` | The path to the journal logs on the worker node. | -| journal.units | list | `[]` | The list of systemd units to keep scraped logs from. If empty, all units are scraped. | +| journal.units | list | `[]` | The list of systemd units to keep scraped logs from, this can be a valid RE2 regex. If empty, all units are scraped. | ### General settings diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-logs.alloy index 5d945b97f..35cd6ad1f 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-logs.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-logs.alloy @@ -226,9 +226,15 @@ declare "pod_logs" { } // remove the timestamp from the log line stage.replace { - expression = "( t=[^ ]+\\s+)" + expression = `(?:^|\s+)(t=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "grafana-drop-log-level" + } } // Only keep the labels that are defined in the `keepLabels` list. diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/grafana/output.yaml b/charts/k8s-monitoring/docs/examples/features/integrations/grafana/output.yaml index fe86f9b6b..b27b49a78 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/grafana/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/integrations/grafana/output.yaml @@ -602,9 +602,15 @@ data: } // remove the timestamp from the log line stage.replace { - expression = "( t=[^ ]+\\s+)" + expression = `(?:^|\s+)(t=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "grafana-drop-log-level" + } } // Only keep the labels that are defined in the `keepLabels` list. diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-logs.alloy index 799b7727b..3d105ffe5 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-logs.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-logs.alloy @@ -233,9 +233,15 @@ declare "pod_logs" { } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "loki-drop-log-level" + } } diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-metrics.alloy index ab69722b5..d5aa6d98f 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-metrics.alloy @@ -293,7 +293,6 @@ declare "loki_integration" { targets = loki_integration_discovery.loki.output job_label = "integrations/loki" clustering = true - keep_metrics = "up|scrape_samples_scraped|go_gc_cycles_total_gc_cycles_total|go_gc_duration_seconds|go_gc_duration_seconds_count|go_gc_duration_seconds_sum|go_gc_pauses_seconds_bucket|go_goroutines|go_memstats_heap_inuse_bytes|loki_azure_blob_request_duration_seconds_bucket|loki_azure_blob_request_duration_seconds_count|loki_bigtable_request_duration_seconds_bucket|loki_bigtable_request_duration_seconds_count|loki_bloom_blocks_cache_added_total|loki_bloom_blocks_cache_entries|loki_bloom_blocks_cache_evicted_total|loki_bloom_blocks_cache_fetched_total|loki_bloom_blocks_cache_usage_bytes|loki_bloom_chunks_indexed_total|loki_bloom_gateway_block_query_latency_seconds_bucket|loki_bloom_gateway_dequeue_duration_seconds_bucket|loki_bloom_gateway_filtered_chunks_sum|loki_bloom_gateway_filtered_series_sum|loki_bloom_gateway_inflight_tasks|loki_bloom_gateway_process_duration_seconds_bucket|loki_bloom_gateway_process_duration_seconds_count|loki_bloom_gateway_querier_chunks_filtered_total|loki_bloom_gateway_querier_chunks_skipped_total|loki_bloom_gateway_querier_chunks_total|loki_bloom_gateway_querier_series_filtered_total|loki_bloom_gateway_querier_series_skipped_total|loki_bloom_gateway_querier_series_total|loki_bloom_gateway_queue_duration_seconds_bucket|loki_bloom_gateway_queue_duration_seconds_count|loki_bloom_gateway_queue_duration_seconds_sum|loki_bloom_gateway_queue_length|loki_bloom_gateway_requested_chunks_sum|loki_bloom_gateway_requested_series_sum|loki_bloom_gateway_tasks_dequeued_bucket|loki_bloom_gateway_tasks_dequeued_total|loki_bloom_gateway_tasks_processed_total|loki_bloom_inserts_total|loki_bloom_recorder_chunks_total|loki_bloom_recorder_series_total|loki_bloom_size_bucket|loki_bloom_store_blocks_fetched_size_bytes_bucket|loki_bloom_store_blocks_fetched_sum|loki_bloom_store_download_queue_size_sum|loki_bloom_store_metas_fetched_bucket|loki_bloom_store_metas_fetched_size_bytes_bucket|loki_bloom_store_metas_fetched_sum|loki_bloom_tokens_total|loki_bloombuilder_blocks_created_total|loki_bloombuilder_blocks_reused_total|loki_bloombuilder_bytes_per_task_bucket|loki_bloombuilder_chunk_series_size_sum|loki_bloombuilder_metas_created_total|loki_bloombuilder_processing_task|loki_bloombuilder_series_per_task_bucket|loki_bloomplanner_blocks_deleted_total|loki_bloomplanner_connected_builders|loki_bloomplanner_inflight_tasks|loki_bloomplanner_metas_deleted_total|loki_bloomplanner_queue_length|loki_bloomplanner_retention_running|loki_bloomplanner_retention_time_seconds_bucket|loki_bloomplanner_tenant_tasks_completed|loki_bloomplanner_tenant_tasks_planned|loki_boltdb_shipper_compact_tables_operation_duration_seconds|loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds|loki_boltdb_shipper_compact_tables_operation_total|loki_boltdb_shipper_request_duration_seconds_bucket|loki_boltdb_shipper_request_duration_seconds_count|loki_boltdb_shipper_request_duration_seconds_sum|loki_boltdb_shipper_retention_marker_count_total|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum|loki_boltdb_shipper_retention_marker_table_processed_total|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum|loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time|loki_boltdb_shipper_retention_sweeper_marker_files_current|loki_build_info|loki_chunk_store_deduped_chunks_total|loki_chunk_store_index_entries_per_chunk_count|loki_chunk_store_index_entries_per_chunk_sum|loki_compactor_apply_retention_last_successful_run_timestamp_seconds|loki_compactor_apply_retention_operation_duration_seconds|loki_compactor_apply_retention_operation_total|loki_compactor_delete_requests_processed_total|loki_compactor_delete_requests_received_total|loki_compactor_deleted_lines|loki_compactor_load_pending_requests_attempts_total|loki_compactor_locked_table_successive_compaction_skips|loki_compactor_oldest_pending_delete_request_age_seconds|loki_compactor_pending_delete_requests_count|loki_consul_request_duration_seconds_bucket|loki_discarded_samples_total|loki_distributor_bytes_received_total|loki_distributor_ingester_append_failures_total|loki_distributor_lines_received_total|loki_distributor_structured_metadata_bytes_received_total|loki_dynamo_consumed_capacity_total|loki_dynamo_dropped_requests_total|loki_dynamo_failures_total|loki_dynamo_query_pages_count|loki_dynamo_request_duration_seconds_bucket|loki_dynamo_request_duration_seconds_count|loki_dynamo_throttled_total|loki_embeddedcache_entries|loki_embeddedcache_memory_bytes|loki_gcs_request_duration_seconds_bucket|loki_gcs_request_duration_seconds_count|loki_index_gateway_postfilter_chunks_sum|loki_index_gateway_prefilter_chunks_sum|loki_index_request_duration_seconds_bucket|loki_index_request_duration_seconds_count|loki_index_request_duration_seconds_sum|loki_ingester_chunk_age_seconds_bucket|loki_ingester_chunk_age_seconds_count|loki_ingester_chunk_age_seconds_sum|loki_ingester_chunk_bounds_hours_bucket|loki_ingester_chunk_bounds_hours_count|loki_ingester_chunk_bounds_hours_sum|loki_ingester_chunk_entries_bucket|loki_ingester_chunk_entries_count|loki_ingester_chunk_entries_sum|loki_ingester_chunk_size_bytes_bucket|loki_ingester_chunk_utilization_bucket|loki_ingester_chunk_utilization_count|loki_ingester_chunk_utilization_sum|loki_ingester_chunks_flushed_total|loki_ingester_flush_queue_length|loki_ingester_memory_chunks|loki_ingester_memory_streams|loki_ingester_streams_created_total|loki_memcache_request_duration_seconds_bucket|loki_memcache_request_duration_seconds_count|loki_panic_total|loki_prometheus_rule_group_rules|loki_request_duration_seconds_bucket|loki_request_duration_seconds_count|loki_request_duration_seconds_sum|loki_ruler_wal_appender_ready|loki_ruler_wal_disk_size|loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds|loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds|loki_ruler_wal_prometheus_remote_storage_samples_pending|loki_ruler_wal_prometheus_remote_storage_samples_total|loki_ruler_wal_samples_appended_total|loki_ruler_wal_storage_created_series_total|loki_s3_request_duration_seconds_bucket|loki_s3_request_duration_seconds_count" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/loki/output.yaml b/charts/k8s-monitoring/docs/examples/features/integrations/loki/output.yaml index c97ef1457..376a46aa6 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/loki/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/integrations/loki/output.yaml @@ -334,7 +334,6 @@ data: targets = loki_integration_discovery.loki.output job_label = "integrations/loki" clustering = true - keep_metrics = "up|scrape_samples_scraped|go_gc_cycles_total_gc_cycles_total|go_gc_duration_seconds|go_gc_duration_seconds_count|go_gc_duration_seconds_sum|go_gc_pauses_seconds_bucket|go_goroutines|go_memstats_heap_inuse_bytes|loki_azure_blob_request_duration_seconds_bucket|loki_azure_blob_request_duration_seconds_count|loki_bigtable_request_duration_seconds_bucket|loki_bigtable_request_duration_seconds_count|loki_bloom_blocks_cache_added_total|loki_bloom_blocks_cache_entries|loki_bloom_blocks_cache_evicted_total|loki_bloom_blocks_cache_fetched_total|loki_bloom_blocks_cache_usage_bytes|loki_bloom_chunks_indexed_total|loki_bloom_gateway_block_query_latency_seconds_bucket|loki_bloom_gateway_dequeue_duration_seconds_bucket|loki_bloom_gateway_filtered_chunks_sum|loki_bloom_gateway_filtered_series_sum|loki_bloom_gateway_inflight_tasks|loki_bloom_gateway_process_duration_seconds_bucket|loki_bloom_gateway_process_duration_seconds_count|loki_bloom_gateway_querier_chunks_filtered_total|loki_bloom_gateway_querier_chunks_skipped_total|loki_bloom_gateway_querier_chunks_total|loki_bloom_gateway_querier_series_filtered_total|loki_bloom_gateway_querier_series_skipped_total|loki_bloom_gateway_querier_series_total|loki_bloom_gateway_queue_duration_seconds_bucket|loki_bloom_gateway_queue_duration_seconds_count|loki_bloom_gateway_queue_duration_seconds_sum|loki_bloom_gateway_queue_length|loki_bloom_gateway_requested_chunks_sum|loki_bloom_gateway_requested_series_sum|loki_bloom_gateway_tasks_dequeued_bucket|loki_bloom_gateway_tasks_dequeued_total|loki_bloom_gateway_tasks_processed_total|loki_bloom_inserts_total|loki_bloom_recorder_chunks_total|loki_bloom_recorder_series_total|loki_bloom_size_bucket|loki_bloom_store_blocks_fetched_size_bytes_bucket|loki_bloom_store_blocks_fetched_sum|loki_bloom_store_download_queue_size_sum|loki_bloom_store_metas_fetched_bucket|loki_bloom_store_metas_fetched_size_bytes_bucket|loki_bloom_store_metas_fetched_sum|loki_bloom_tokens_total|loki_bloombuilder_blocks_created_total|loki_bloombuilder_blocks_reused_total|loki_bloombuilder_bytes_per_task_bucket|loki_bloombuilder_chunk_series_size_sum|loki_bloombuilder_metas_created_total|loki_bloombuilder_processing_task|loki_bloombuilder_series_per_task_bucket|loki_bloomplanner_blocks_deleted_total|loki_bloomplanner_connected_builders|loki_bloomplanner_inflight_tasks|loki_bloomplanner_metas_deleted_total|loki_bloomplanner_queue_length|loki_bloomplanner_retention_running|loki_bloomplanner_retention_time_seconds_bucket|loki_bloomplanner_tenant_tasks_completed|loki_bloomplanner_tenant_tasks_planned|loki_boltdb_shipper_compact_tables_operation_duration_seconds|loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds|loki_boltdb_shipper_compact_tables_operation_total|loki_boltdb_shipper_request_duration_seconds_bucket|loki_boltdb_shipper_request_duration_seconds_count|loki_boltdb_shipper_request_duration_seconds_sum|loki_boltdb_shipper_retention_marker_count_total|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum|loki_boltdb_shipper_retention_marker_table_processed_total|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum|loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time|loki_boltdb_shipper_retention_sweeper_marker_files_current|loki_build_info|loki_chunk_store_deduped_chunks_total|loki_chunk_store_index_entries_per_chunk_count|loki_chunk_store_index_entries_per_chunk_sum|loki_compactor_apply_retention_last_successful_run_timestamp_seconds|loki_compactor_apply_retention_operation_duration_seconds|loki_compactor_apply_retention_operation_total|loki_compactor_delete_requests_processed_total|loki_compactor_delete_requests_received_total|loki_compactor_deleted_lines|loki_compactor_load_pending_requests_attempts_total|loki_compactor_locked_table_successive_compaction_skips|loki_compactor_oldest_pending_delete_request_age_seconds|loki_compactor_pending_delete_requests_count|loki_consul_request_duration_seconds_bucket|loki_discarded_samples_total|loki_distributor_bytes_received_total|loki_distributor_ingester_append_failures_total|loki_distributor_lines_received_total|loki_distributor_structured_metadata_bytes_received_total|loki_dynamo_consumed_capacity_total|loki_dynamo_dropped_requests_total|loki_dynamo_failures_total|loki_dynamo_query_pages_count|loki_dynamo_request_duration_seconds_bucket|loki_dynamo_request_duration_seconds_count|loki_dynamo_throttled_total|loki_embeddedcache_entries|loki_embeddedcache_memory_bytes|loki_gcs_request_duration_seconds_bucket|loki_gcs_request_duration_seconds_count|loki_index_gateway_postfilter_chunks_sum|loki_index_gateway_prefilter_chunks_sum|loki_index_request_duration_seconds_bucket|loki_index_request_duration_seconds_count|loki_index_request_duration_seconds_sum|loki_ingester_chunk_age_seconds_bucket|loki_ingester_chunk_age_seconds_count|loki_ingester_chunk_age_seconds_sum|loki_ingester_chunk_bounds_hours_bucket|loki_ingester_chunk_bounds_hours_count|loki_ingester_chunk_bounds_hours_sum|loki_ingester_chunk_entries_bucket|loki_ingester_chunk_entries_count|loki_ingester_chunk_entries_sum|loki_ingester_chunk_size_bytes_bucket|loki_ingester_chunk_utilization_bucket|loki_ingester_chunk_utilization_count|loki_ingester_chunk_utilization_sum|loki_ingester_chunks_flushed_total|loki_ingester_flush_queue_length|loki_ingester_memory_chunks|loki_ingester_memory_streams|loki_ingester_streams_created_total|loki_memcache_request_duration_seconds_bucket|loki_memcache_request_duration_seconds_count|loki_panic_total|loki_prometheus_rule_group_rules|loki_request_duration_seconds_bucket|loki_request_duration_seconds_count|loki_request_duration_seconds_sum|loki_ruler_wal_appender_ready|loki_ruler_wal_disk_size|loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds|loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds|loki_ruler_wal_prometheus_remote_storage_samples_pending|loki_ruler_wal_prometheus_remote_storage_samples_total|loki_ruler_wal_samples_appended_total|loki_ruler_wal_storage_created_series_total|loki_s3_request_duration_seconds_bucket|loki_s3_request_duration_seconds_count" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value @@ -644,9 +643,15 @@ data: } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "loki-drop-log-level" + } } diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-logs.alloy index 3c67e92be..6c0d79344 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-logs.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-logs.alloy @@ -233,9 +233,15 @@ declare "pod_logs" { } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "mimir-drop-log-level" + } } diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-metrics.alloy index 3867f6bb5..5486e782f 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-metrics.alloy @@ -293,7 +293,6 @@ declare "mimir_integration" { targets = mimir_integration_discovery.mimir.output job_label = "integrations/mimir" clustering = true - keep_metrics = "up|scrape_samples_scraped|cortex_alertmanager_alerts|cortex_alertmanager_alerts_invalid_total|cortex_alertmanager_alerts_received_total|cortex_alertmanager_dispatcher_aggregation_groups|cortex_alertmanager_notification_latency_seconds_bucket|cortex_alertmanager_notification_latency_seconds_count|cortex_alertmanager_notification_latency_seconds_sum|cortex_alertmanager_notifications_failed_total|cortex_alertmanager_notifications_total|cortex_alertmanager_partial_state_merges_failed_total|cortex_alertmanager_partial_state_merges_total|cortex_alertmanager_ring_check_errors_total|cortex_alertmanager_silences|cortex_alertmanager_state_fetch_replica_state_failed_total|cortex_alertmanager_state_fetch_replica_state_total|cortex_alertmanager_state_initial_sync_completed_total|cortex_alertmanager_state_initial_sync_duration_seconds_bucket|cortex_alertmanager_state_initial_sync_duration_seconds_count|cortex_alertmanager_state_initial_sync_duration_seconds_sum|cortex_alertmanager_state_persist_failed_total|cortex_alertmanager_state_persist_total|cortex_alertmanager_state_replication_failed_total|cortex_alertmanager_state_replication_total|cortex_alertmanager_sync_configs_failed_total|cortex_alertmanager_sync_configs_total|cortex_alertmanager_tenants_discovered|cortex_alertmanager_tenants_owned|cortex_blockbuilder_consume_cycle_duration_seconds|cortex_blockbuilder_consumer_lag_records|cortex_blockbuilder_tsdb_compact_and_upload_failed_total|cortex_bucket_blocks_count|cortex_bucket_index_estimated_compaction_jobs|cortex_bucket_index_estimated_compaction_jobs_errors_total|cortex_bucket_index_last_successful_update_timestamp_seconds|cortex_bucket_store_block_drop_failures_total|cortex_bucket_store_block_drops_total|cortex_bucket_store_block_load_failures_total|cortex_bucket_store_block_loads_total|cortex_bucket_store_blocks_loaded|cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket|cortex_bucket_store_indexheader_lazy_load_duration_seconds_count|cortex_bucket_store_indexheader_lazy_load_duration_seconds_sum|cortex_bucket_store_indexheader_lazy_load_total|cortex_bucket_store_indexheader_lazy_unload_total|cortex_bucket_store_series_batch_preloading_load_duration_seconds_sum|cortex_bucket_store_series_batch_preloading_wait_duration_seconds_sum|cortex_bucket_store_series_blocks_queried_sum|cortex_bucket_store_series_data_size_fetched_bytes_sum|cortex_bucket_store_series_data_size_touched_bytes_sum|cortex_bucket_store_series_hash_cache_hits_total|cortex_bucket_store_series_hash_cache_requests_total|cortex_bucket_store_series_request_stage_duration_seconds_bucket|cortex_bucket_store_series_request_stage_duration_seconds_count|cortex_bucket_store_series_request_stage_duration_seconds_sum|cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds|cortex_bucket_stores_gate_duration_seconds_bucket|cortex_bucket_stores_gate_duration_seconds_count|cortex_bucket_stores_gate_duration_seconds_sum|cortex_bucket_stores_tenants_synced|cortex_build_info|cortex_cache_memory_hits_total|cortex_cache_memory_requests_total|cortex_compactor_block_cleanup_failures_total|cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds|cortex_compactor_block_max_time_delta_seconds_bucket|cortex_compactor_block_max_time_delta_seconds_count|cortex_compactor_block_max_time_delta_seconds_sum|cortex_compactor_blocks_cleaned_total|cortex_compactor_blocks_marked_for_deletion_total|cortex_compactor_blocks_marked_for_no_compaction_total|cortex_compactor_disk_out_of_space_errors_total|cortex_compactor_group_compaction_runs_started_total|cortex_compactor_last_successful_run_timestamp_seconds|cortex_compactor_meta_sync_duration_seconds_bucket|cortex_compactor_meta_sync_duration_seconds_count|cortex_compactor_meta_sync_duration_seconds_sum|cortex_compactor_meta_sync_failures_total|cortex_compactor_meta_syncs_total|cortex_compactor_runs_completed_total|cortex_compactor_runs_failed_total|cortex_compactor_runs_started_total|cortex_compactor_tenants_discovered|cortex_compactor_tenants_processing_failed|cortex_compactor_tenants_processing_succeeded|cortex_compactor_tenants_skipped|cortex_config_hash|cortex_discarded_exemplars_total|cortex_discarded_requests_total|cortex_discarded_samples_total|cortex_distributor_deduped_samples_total|cortex_distributor_exemplars_in_total|cortex_distributor_inflight_push_requests|cortex_distributor_instance_limits|cortex_distributor_instance_rejected_requests_total|cortex_distributor_latest_seen_sample_timestamp_seconds|cortex_distributor_non_ha_samples_received_total|cortex_distributor_received_exemplars_total|cortex_distributor_received_requests_total|cortex_distributor_received_samples_total|cortex_distributor_replication_factor|cortex_distributor_requests_in_total|cortex_distributor_samples_in_total|cortex_inflight_requests|cortex_ingest_storage_reader_buffered_fetched_records|cortex_ingest_storage_reader_fetch_errors_total|cortex_ingest_storage_reader_fetches_total|cortex_ingest_storage_reader_missed_records_total|cortex_ingest_storage_reader_offset_commit_failures_total|cortex_ingest_storage_reader_offset_commit_requests_total|cortex_ingest_storage_reader_read_errors_total|cortex_ingest_storage_reader_receive_delay_seconds_count|cortex_ingest_storage_reader_receive_delay_seconds_sum|cortex_ingest_storage_reader_records_failed_total|cortex_ingest_storage_reader_records_total|cortex_ingest_storage_reader_requests_failed_total|cortex_ingest_storage_reader_requests_total|cortex_ingest_storage_strong_consistency_failures_total|cortex_ingest_storage_strong_consistency_requests_total|cortex_ingest_storage_writer_buffered_produce_bytes|cortex_ingest_storage_writer_buffered_produce_bytes_limit|cortex_ingester_active_native_histogram_buckets|cortex_ingester_active_native_histogram_buckets_custom_tracker|cortex_ingester_active_native_histogram_series|cortex_ingester_active_native_histogram_series_custom_tracker|cortex_ingester_active_series|cortex_ingester_active_series_custom_tracker|cortex_ingester_client_request_duration_seconds_bucket|cortex_ingester_client_request_duration_seconds_count|cortex_ingester_client_request_duration_seconds_sum|cortex_ingester_ingested_exemplars_total|cortex_ingester_ingested_samples_total|cortex_ingester_instance_limits|cortex_ingester_instance_rejected_requests_total|cortex_ingester_local_limits|cortex_ingester_memory_series|cortex_ingester_memory_series_created_total|cortex_ingester_memory_series_removed_total|cortex_ingester_memory_users|cortex_ingester_oldest_unshipped_block_timestamp_seconds|cortex_ingester_owned_series|cortex_ingester_queried_exemplars_bucket|cortex_ingester_queried_exemplars_count|cortex_ingester_queried_exemplars_sum|cortex_ingester_queried_samples_bucket|cortex_ingester_queried_samples_count|cortex_ingester_queried_samples_sum|cortex_ingester_queried_series_bucket|cortex_ingester_queried_series_count|cortex_ingester_queried_series_sum|cortex_ingester_shipper_last_successful_upload_timestamp_seconds|cortex_ingester_shipper_upload_failures_total|cortex_ingester_shipper_uploads_total|cortex_ingester_tsdb_checkpoint_creations_failed_total|cortex_ingester_tsdb_checkpoint_creations_total|cortex_ingester_tsdb_checkpoint_deletions_failed_total|cortex_ingester_tsdb_compaction_duration_seconds_bucket|cortex_ingester_tsdb_compaction_duration_seconds_count|cortex_ingester_tsdb_compaction_duration_seconds_sum|cortex_ingester_tsdb_compactions_failed_total|cortex_ingester_tsdb_compactions_total|cortex_ingester_tsdb_exemplar_exemplars_appended_total|cortex_ingester_tsdb_exemplar_exemplars_in_storage|cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds|cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage|cortex_ingester_tsdb_head_max_timestamp_seconds|cortex_ingester_tsdb_head_truncations_failed_total|cortex_ingester_tsdb_mmap_chunk_corruptions_total|cortex_ingester_tsdb_out_of_order_samples_appended_total|cortex_ingester_tsdb_storage_blocks_bytes|cortex_ingester_tsdb_symbol_table_size_bytes|cortex_ingester_tsdb_wal_corruptions_total|cortex_ingester_tsdb_wal_truncate_duration_seconds_count|cortex_ingester_tsdb_wal_truncate_duration_seconds_sum|cortex_ingester_tsdb_wal_truncations_failed_total|cortex_ingester_tsdb_wal_truncations_total|cortex_ingester_tsdb_wal_writes_failed_total|cortex_kv_request_duration_seconds_bucket|cortex_kv_request_duration_seconds_count|cortex_kv_request_duration_seconds_sum|cortex_lifecycler_read_only|cortex_limits_defaults|cortex_limits_overrides|cortex_partition_ring_partitions|cortex_prometheus_notifications_dropped_total|cortex_prometheus_notifications_errors_total|cortex_prometheus_notifications_queue_capacity|cortex_prometheus_notifications_queue_length|cortex_prometheus_notifications_sent_total|cortex_prometheus_rule_evaluation_duration_seconds_count|cortex_prometheus_rule_evaluation_duration_seconds_sum|cortex_prometheus_rule_evaluation_failures_total|cortex_prometheus_rule_evaluations_total|cortex_prometheus_rule_group_duration_seconds_count|cortex_prometheus_rule_group_duration_seconds_sum|cortex_prometheus_rule_group_iterations_missed_total|cortex_prometheus_rule_group_iterations_total|cortex_prometheus_rule_group_rules|cortex_querier_blocks_consistency_checks_failed_total|cortex_querier_blocks_consistency_checks_total|cortex_querier_request_duration_seconds_bucket|cortex_querier_request_duration_seconds_count|cortex_querier_request_duration_seconds_sum|cortex_querier_storegateway_instances_hit_per_query_bucket|cortex_querier_storegateway_instances_hit_per_query_count|cortex_querier_storegateway_instances_hit_per_query_sum|cortex_querier_storegateway_refetches_per_query_bucket|cortex_querier_storegateway_refetches_per_query_count|cortex_querier_storegateway_refetches_per_query_sum|cortex_query_frontend_queries_total|cortex_query_frontend_queue_duration_seconds_bucket|cortex_query_frontend_queue_duration_seconds_count|cortex_query_frontend_queue_duration_seconds_sum|cortex_query_frontend_queue_length|cortex_query_frontend_retries_bucket|cortex_query_frontend_retries_count|cortex_query_frontend_retries_sum|cortex_query_scheduler_connected_querier_clients|cortex_query_scheduler_querier_inflight_requests|cortex_query_scheduler_queue_duration_seconds_bucket|cortex_query_scheduler_queue_duration_seconds_count|cortex_query_scheduler_queue_duration_seconds_sum|cortex_query_scheduler_queue_length|cortex_request_duration_seconds|cortex_request_duration_seconds_bucket|cortex_request_duration_seconds_count|cortex_request_duration_seconds_sum|cortex_ring_members|cortex_ruler_managers_total|cortex_ruler_queries_failed_total|cortex_ruler_queries_total|cortex_ruler_ring_check_errors_total|cortex_ruler_write_requests_failed_total|cortex_ruler_write_requests_total|cortex_runtime_config_hash|cortex_runtime_config_last_reload_successful|cortex_tcp_connections|cortex_tcp_connections_limit|go_memstats_heap_inuse_bytes|keda_scaler_errors|keda_scaler_metrics_value|kube_deployment_spec_replicas|kube_deployment_status_replicas_unavailable|kube_deployment_status_replicas_updated|kube_endpoint_address|kube_horizontalpodautoscaler_spec_target_metric|kube_horizontalpodautoscaler_status_condition|kube_pod_info|kube_statefulset_replicas|kube_statefulset_status_current_revision|kube_statefulset_status_replicas_current|kube_statefulset_status_replicas_ready|kube_statefulset_status_replicas_updated|kube_statefulset_status_update_revision|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_used_bytes|memberlist_client_cluster_members_count|memcached_limit_bytes|mimir_continuous_test_queries_failed_total|mimir_continuous_test_query_result_checks_failed_total|mimir_continuous_test_writes_failed_total|node_disk_read_bytes_total|node_disk_written_bytes_total|process_memory_map_areas|process_memory_map_areas_limit|prometheus_tsdb_compaction_duration_seconds_bucket|prometheus_tsdb_compaction_duration_seconds_count|prometheus_tsdb_compaction_duration_seconds_sum|prometheus_tsdb_compactions_total|rollout_operator_last_successful_group_reconcile_timestamp_seconds|thanos_cache_hits_total|thanos_cache_operation_duration_seconds_bucket|thanos_cache_operation_duration_seconds_count|thanos_cache_operation_duration_seconds_sum|thanos_cache_operation_failures_total|thanos_cache_operations_total|thanos_cache_requests_total|thanos_objstore_bucket_last_successful_upload_time|thanos_objstore_bucket_operation_duration_seconds_bucket|thanos_objstore_bucket_operation_duration_seconds_count|thanos_objstore_bucket_operation_duration_seconds_sum|thanos_objstore_bucket_operation_failures_total|thanos_objstore_bucket_operations_total|thanos_store_index_cache_hits_total|thanos_store_index_cache_requests_total" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/mimir/output.yaml b/charts/k8s-monitoring/docs/examples/features/integrations/mimir/output.yaml index 03e0a948a..c63ef0736 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/mimir/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/integrations/mimir/output.yaml @@ -334,7 +334,6 @@ data: targets = mimir_integration_discovery.mimir.output job_label = "integrations/mimir" clustering = true - keep_metrics = "up|scrape_samples_scraped|cortex_alertmanager_alerts|cortex_alertmanager_alerts_invalid_total|cortex_alertmanager_alerts_received_total|cortex_alertmanager_dispatcher_aggregation_groups|cortex_alertmanager_notification_latency_seconds_bucket|cortex_alertmanager_notification_latency_seconds_count|cortex_alertmanager_notification_latency_seconds_sum|cortex_alertmanager_notifications_failed_total|cortex_alertmanager_notifications_total|cortex_alertmanager_partial_state_merges_failed_total|cortex_alertmanager_partial_state_merges_total|cortex_alertmanager_ring_check_errors_total|cortex_alertmanager_silences|cortex_alertmanager_state_fetch_replica_state_failed_total|cortex_alertmanager_state_fetch_replica_state_total|cortex_alertmanager_state_initial_sync_completed_total|cortex_alertmanager_state_initial_sync_duration_seconds_bucket|cortex_alertmanager_state_initial_sync_duration_seconds_count|cortex_alertmanager_state_initial_sync_duration_seconds_sum|cortex_alertmanager_state_persist_failed_total|cortex_alertmanager_state_persist_total|cortex_alertmanager_state_replication_failed_total|cortex_alertmanager_state_replication_total|cortex_alertmanager_sync_configs_failed_total|cortex_alertmanager_sync_configs_total|cortex_alertmanager_tenants_discovered|cortex_alertmanager_tenants_owned|cortex_blockbuilder_consume_cycle_duration_seconds|cortex_blockbuilder_consumer_lag_records|cortex_blockbuilder_tsdb_compact_and_upload_failed_total|cortex_bucket_blocks_count|cortex_bucket_index_estimated_compaction_jobs|cortex_bucket_index_estimated_compaction_jobs_errors_total|cortex_bucket_index_last_successful_update_timestamp_seconds|cortex_bucket_store_block_drop_failures_total|cortex_bucket_store_block_drops_total|cortex_bucket_store_block_load_failures_total|cortex_bucket_store_block_loads_total|cortex_bucket_store_blocks_loaded|cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket|cortex_bucket_store_indexheader_lazy_load_duration_seconds_count|cortex_bucket_store_indexheader_lazy_load_duration_seconds_sum|cortex_bucket_store_indexheader_lazy_load_total|cortex_bucket_store_indexheader_lazy_unload_total|cortex_bucket_store_series_batch_preloading_load_duration_seconds_sum|cortex_bucket_store_series_batch_preloading_wait_duration_seconds_sum|cortex_bucket_store_series_blocks_queried_sum|cortex_bucket_store_series_data_size_fetched_bytes_sum|cortex_bucket_store_series_data_size_touched_bytes_sum|cortex_bucket_store_series_hash_cache_hits_total|cortex_bucket_store_series_hash_cache_requests_total|cortex_bucket_store_series_request_stage_duration_seconds_bucket|cortex_bucket_store_series_request_stage_duration_seconds_count|cortex_bucket_store_series_request_stage_duration_seconds_sum|cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds|cortex_bucket_stores_gate_duration_seconds_bucket|cortex_bucket_stores_gate_duration_seconds_count|cortex_bucket_stores_gate_duration_seconds_sum|cortex_bucket_stores_tenants_synced|cortex_build_info|cortex_cache_memory_hits_total|cortex_cache_memory_requests_total|cortex_compactor_block_cleanup_failures_total|cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds|cortex_compactor_block_max_time_delta_seconds_bucket|cortex_compactor_block_max_time_delta_seconds_count|cortex_compactor_block_max_time_delta_seconds_sum|cortex_compactor_blocks_cleaned_total|cortex_compactor_blocks_marked_for_deletion_total|cortex_compactor_blocks_marked_for_no_compaction_total|cortex_compactor_disk_out_of_space_errors_total|cortex_compactor_group_compaction_runs_started_total|cortex_compactor_last_successful_run_timestamp_seconds|cortex_compactor_meta_sync_duration_seconds_bucket|cortex_compactor_meta_sync_duration_seconds_count|cortex_compactor_meta_sync_duration_seconds_sum|cortex_compactor_meta_sync_failures_total|cortex_compactor_meta_syncs_total|cortex_compactor_runs_completed_total|cortex_compactor_runs_failed_total|cortex_compactor_runs_started_total|cortex_compactor_tenants_discovered|cortex_compactor_tenants_processing_failed|cortex_compactor_tenants_processing_succeeded|cortex_compactor_tenants_skipped|cortex_config_hash|cortex_discarded_exemplars_total|cortex_discarded_requests_total|cortex_discarded_samples_total|cortex_distributor_deduped_samples_total|cortex_distributor_exemplars_in_total|cortex_distributor_inflight_push_requests|cortex_distributor_instance_limits|cortex_distributor_instance_rejected_requests_total|cortex_distributor_latest_seen_sample_timestamp_seconds|cortex_distributor_non_ha_samples_received_total|cortex_distributor_received_exemplars_total|cortex_distributor_received_requests_total|cortex_distributor_received_samples_total|cortex_distributor_replication_factor|cortex_distributor_requests_in_total|cortex_distributor_samples_in_total|cortex_inflight_requests|cortex_ingest_storage_reader_buffered_fetched_records|cortex_ingest_storage_reader_fetch_errors_total|cortex_ingest_storage_reader_fetches_total|cortex_ingest_storage_reader_missed_records_total|cortex_ingest_storage_reader_offset_commit_failures_total|cortex_ingest_storage_reader_offset_commit_requests_total|cortex_ingest_storage_reader_read_errors_total|cortex_ingest_storage_reader_receive_delay_seconds_count|cortex_ingest_storage_reader_receive_delay_seconds_sum|cortex_ingest_storage_reader_records_failed_total|cortex_ingest_storage_reader_records_total|cortex_ingest_storage_reader_requests_failed_total|cortex_ingest_storage_reader_requests_total|cortex_ingest_storage_strong_consistency_failures_total|cortex_ingest_storage_strong_consistency_requests_total|cortex_ingest_storage_writer_buffered_produce_bytes|cortex_ingest_storage_writer_buffered_produce_bytes_limit|cortex_ingester_active_native_histogram_buckets|cortex_ingester_active_native_histogram_buckets_custom_tracker|cortex_ingester_active_native_histogram_series|cortex_ingester_active_native_histogram_series_custom_tracker|cortex_ingester_active_series|cortex_ingester_active_series_custom_tracker|cortex_ingester_client_request_duration_seconds_bucket|cortex_ingester_client_request_duration_seconds_count|cortex_ingester_client_request_duration_seconds_sum|cortex_ingester_ingested_exemplars_total|cortex_ingester_ingested_samples_total|cortex_ingester_instance_limits|cortex_ingester_instance_rejected_requests_total|cortex_ingester_local_limits|cortex_ingester_memory_series|cortex_ingester_memory_series_created_total|cortex_ingester_memory_series_removed_total|cortex_ingester_memory_users|cortex_ingester_oldest_unshipped_block_timestamp_seconds|cortex_ingester_owned_series|cortex_ingester_queried_exemplars_bucket|cortex_ingester_queried_exemplars_count|cortex_ingester_queried_exemplars_sum|cortex_ingester_queried_samples_bucket|cortex_ingester_queried_samples_count|cortex_ingester_queried_samples_sum|cortex_ingester_queried_series_bucket|cortex_ingester_queried_series_count|cortex_ingester_queried_series_sum|cortex_ingester_shipper_last_successful_upload_timestamp_seconds|cortex_ingester_shipper_upload_failures_total|cortex_ingester_shipper_uploads_total|cortex_ingester_tsdb_checkpoint_creations_failed_total|cortex_ingester_tsdb_checkpoint_creations_total|cortex_ingester_tsdb_checkpoint_deletions_failed_total|cortex_ingester_tsdb_compaction_duration_seconds_bucket|cortex_ingester_tsdb_compaction_duration_seconds_count|cortex_ingester_tsdb_compaction_duration_seconds_sum|cortex_ingester_tsdb_compactions_failed_total|cortex_ingester_tsdb_compactions_total|cortex_ingester_tsdb_exemplar_exemplars_appended_total|cortex_ingester_tsdb_exemplar_exemplars_in_storage|cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds|cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage|cortex_ingester_tsdb_head_max_timestamp_seconds|cortex_ingester_tsdb_head_truncations_failed_total|cortex_ingester_tsdb_mmap_chunk_corruptions_total|cortex_ingester_tsdb_out_of_order_samples_appended_total|cortex_ingester_tsdb_storage_blocks_bytes|cortex_ingester_tsdb_symbol_table_size_bytes|cortex_ingester_tsdb_wal_corruptions_total|cortex_ingester_tsdb_wal_truncate_duration_seconds_count|cortex_ingester_tsdb_wal_truncate_duration_seconds_sum|cortex_ingester_tsdb_wal_truncations_failed_total|cortex_ingester_tsdb_wal_truncations_total|cortex_ingester_tsdb_wal_writes_failed_total|cortex_kv_request_duration_seconds_bucket|cortex_kv_request_duration_seconds_count|cortex_kv_request_duration_seconds_sum|cortex_lifecycler_read_only|cortex_limits_defaults|cortex_limits_overrides|cortex_partition_ring_partitions|cortex_prometheus_notifications_dropped_total|cortex_prometheus_notifications_errors_total|cortex_prometheus_notifications_queue_capacity|cortex_prometheus_notifications_queue_length|cortex_prometheus_notifications_sent_total|cortex_prometheus_rule_evaluation_duration_seconds_count|cortex_prometheus_rule_evaluation_duration_seconds_sum|cortex_prometheus_rule_evaluation_failures_total|cortex_prometheus_rule_evaluations_total|cortex_prometheus_rule_group_duration_seconds_count|cortex_prometheus_rule_group_duration_seconds_sum|cortex_prometheus_rule_group_iterations_missed_total|cortex_prometheus_rule_group_iterations_total|cortex_prometheus_rule_group_rules|cortex_querier_blocks_consistency_checks_failed_total|cortex_querier_blocks_consistency_checks_total|cortex_querier_request_duration_seconds_bucket|cortex_querier_request_duration_seconds_count|cortex_querier_request_duration_seconds_sum|cortex_querier_storegateway_instances_hit_per_query_bucket|cortex_querier_storegateway_instances_hit_per_query_count|cortex_querier_storegateway_instances_hit_per_query_sum|cortex_querier_storegateway_refetches_per_query_bucket|cortex_querier_storegateway_refetches_per_query_count|cortex_querier_storegateway_refetches_per_query_sum|cortex_query_frontend_queries_total|cortex_query_frontend_queue_duration_seconds_bucket|cortex_query_frontend_queue_duration_seconds_count|cortex_query_frontend_queue_duration_seconds_sum|cortex_query_frontend_queue_length|cortex_query_frontend_retries_bucket|cortex_query_frontend_retries_count|cortex_query_frontend_retries_sum|cortex_query_scheduler_connected_querier_clients|cortex_query_scheduler_querier_inflight_requests|cortex_query_scheduler_queue_duration_seconds_bucket|cortex_query_scheduler_queue_duration_seconds_count|cortex_query_scheduler_queue_duration_seconds_sum|cortex_query_scheduler_queue_length|cortex_request_duration_seconds|cortex_request_duration_seconds_bucket|cortex_request_duration_seconds_count|cortex_request_duration_seconds_sum|cortex_ring_members|cortex_ruler_managers_total|cortex_ruler_queries_failed_total|cortex_ruler_queries_total|cortex_ruler_ring_check_errors_total|cortex_ruler_write_requests_failed_total|cortex_ruler_write_requests_total|cortex_runtime_config_hash|cortex_runtime_config_last_reload_successful|cortex_tcp_connections|cortex_tcp_connections_limit|go_memstats_heap_inuse_bytes|keda_scaler_errors|keda_scaler_metrics_value|kube_deployment_spec_replicas|kube_deployment_status_replicas_unavailable|kube_deployment_status_replicas_updated|kube_endpoint_address|kube_horizontalpodautoscaler_spec_target_metric|kube_horizontalpodautoscaler_status_condition|kube_pod_info|kube_statefulset_replicas|kube_statefulset_status_current_revision|kube_statefulset_status_replicas_current|kube_statefulset_status_replicas_ready|kube_statefulset_status_replicas_updated|kube_statefulset_status_update_revision|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_used_bytes|memberlist_client_cluster_members_count|memcached_limit_bytes|mimir_continuous_test_queries_failed_total|mimir_continuous_test_query_result_checks_failed_total|mimir_continuous_test_writes_failed_total|node_disk_read_bytes_total|node_disk_written_bytes_total|process_memory_map_areas|process_memory_map_areas_limit|prometheus_tsdb_compaction_duration_seconds_bucket|prometheus_tsdb_compaction_duration_seconds_count|prometheus_tsdb_compaction_duration_seconds_sum|prometheus_tsdb_compactions_total|rollout_operator_last_successful_group_reconcile_timestamp_seconds|thanos_cache_hits_total|thanos_cache_operation_duration_seconds_bucket|thanos_cache_operation_duration_seconds_count|thanos_cache_operation_duration_seconds_sum|thanos_cache_operation_failures_total|thanos_cache_operations_total|thanos_cache_requests_total|thanos_objstore_bucket_last_successful_upload_time|thanos_objstore_bucket_operation_duration_seconds_bucket|thanos_objstore_bucket_operation_duration_seconds_count|thanos_objstore_bucket_operation_duration_seconds_sum|thanos_objstore_bucket_operation_failures_total|thanos_objstore_bucket_operations_total|thanos_store_index_cache_hits_total|thanos_store_index_cache_requests_total" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value @@ -644,9 +643,15 @@ data: } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "mimir-drop-log-level" + } } diff --git a/charts/k8s-monitoring/docs/examples/meta-monitoring/README.md b/charts/k8s-monitoring/docs/examples/meta-monitoring/README.md index 677fd0b7f..9e5f2c96b 100644 --- a/charts/k8s-monitoring/docs/examples/meta-monitoring/README.md +++ b/charts/k8s-monitoring/docs/examples/meta-monitoring/README.md @@ -116,24 +116,15 @@ clusterMetrics: enabled: false kube-state-metrics: enabled: true - namespaces: - - collectors - - logs - - metrics - - o11y - extraMetricProcessingRules: |- - rule { - action = "keep" - source_labels = ["namespace"] - regex = "collectors|logs|metrics|o11y" - } + namespaces: collectors,logs,metrics,o11y metricsTuning: useDefaultAllowList: false includeMetrics: [(.+)] node-exporter: enabled: true - useIntegrationAllowList: true deploy: true + metricsTuning: + useIntegrationAllowList: true windows-exporter: enabled: false deploy: false @@ -149,6 +140,7 @@ nodeLogs: podLogs: enabled: true + collector: alloy-singleton labelsToKeep: - app - app_kubernetes_io_name @@ -160,7 +152,6 @@ podLogs: - pod - service_name gatherMethod: kubernetesApi - collector: alloy-singleton namespaces: - collectors - logs @@ -174,6 +165,20 @@ applicationObservability: thriftHttp: enabled: true port: 14268 + processors: + k8sattributes: + metadata: + - k8s.namespace.name + - k8s.pod.name + - k8s.deployment.name + - k8s.statefulset.name + - k8s.daemonset.name + - k8s.cronjob.name + - k8s.job.name + - k8s.node.name + - k8s.pod.uid + - k8s.pod.start_time + - k8s.container.name # Collectors alloy-singleton: diff --git a/charts/k8s-monitoring/docs/examples/meta-monitoring/alloy-receiver.alloy b/charts/k8s-monitoring/docs/examples/meta-monitoring/alloy-receiver.alloy index a95227f1c..e224304d4 100644 --- a/charts/k8s-monitoring/docs/examples/meta-monitoring/alloy-receiver.alloy +++ b/charts/k8s-monitoring/docs/examples/meta-monitoring/alloy-receiver.alloy @@ -161,7 +161,7 @@ declare "application_observability" { // K8s Attributes Processor otelcol.processor.k8sattributes "default" { extract { - metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time"] + metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time","k8s.container.name"] } pod_association { source { diff --git a/charts/k8s-monitoring/docs/examples/meta-monitoring/alloy-singleton.alloy b/charts/k8s-monitoring/docs/examples/meta-monitoring/alloy-singleton.alloy index 92c24651f..3a92b6260 100644 --- a/charts/k8s-monitoring/docs/examples/meta-monitoring/alloy-singleton.alloy +++ b/charts/k8s-monitoring/docs/examples/meta-monitoring/alloy-singleton.alloy @@ -242,17 +242,6 @@ declare "cluster_metrics" { scheme = "http" scrape_interval = "60s" max_cache_size = 100000 - forward_to = [prometheus.relabel.kube_state_metrics.receiver] - } - - prometheus.relabel "kube_state_metrics" { - max_cache_size = 100000 - - rule { - action = "keep" - source_labels = ["namespace"] - regex = "collectors|logs|metrics|o11y" - } forward_to = argument.metrics_destinations.value } @@ -287,7 +276,7 @@ declare "cluster_metrics" { targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" clustering = true - keep_metrics = "up|scrape_samples_scraped|node_cpu.*|node_exporter_build_info|node_filesystem.*|node_memory.*|node_network_receive_bytes_total|node_network_receive_drop_total|node_network_transmit_bytes_total|node_network_transmit_drop_total|process_cpu_seconds_total|process_resident_memory_bytes" + keep_metrics = "up|scrape_samples_scraped|node_cpu.*|node_exporter_build_info|node_filesystem.*|node_memory.*|node_network_receive_bytes_total|node_network_receive_drop_total|node_network_transmit_bytes_total|node_network_transmit_drop_total|process_cpu_seconds_total|process_resident_memory_bytes|node_arp_entries|node_boot_time_seconds|node_context_switches_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_io_time_weighted_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filefd_maximum|node_filesystem_avail_bytes|node_filesystem_device_error|node_filesystem_files|node_filesystem_files_free|node_filesystem_readonly|node_filesystem_size_bytes|node_intr_total|node_load1|node_load15|node_load5|node_md_disks|node_md_disks_required|node_memory_Active_anon_bytes|node_memory_Active_bytes|node_memory_Active_file_bytes|node_memory_AnonHugePages_bytes|node_memory_AnonPages_bytes|node_memory_Bounce_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_CommitLimit_bytes|node_memory_Committed_AS_bytes|node_memory_DirectMap1G_bytes|node_memory_DirectMap2M_bytes|node_memory_DirectMap4k_bytes|node_memory_Dirty_bytes|node_memory_HugePages_Free|node_memory_HugePages_Rsvd|node_memory_HugePages_Surp|node_memory_HugePages_Total|node_memory_Hugepagesize_bytes|node_memory_Inactive_anon_bytes|node_memory_Inactive_bytes|node_memory_Inactive_file_bytes|node_memory_Mapped_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_Shmem_bytes|node_memory_ShmemHugePages_bytes|node_memory_Slab_bytes|node_memory_SReclaimable_bytes|node_memory_SUnreclaim_bytes|node_memory_SwapTotal_bytes|node_memory_VmallocChunk_bytes|node_memory_VmallocTotal_bytes|node_memory_VmallocUsed_bytes|node_memory_Writeback_bytes|node_memory_WritebackTmp_bytes|node_netstat_Icmp_InErrors|node_netstat_Icmp_InMsgs|node_netstat_Icmp_OutMsgs|node_netstat_Icmp6_InErrors|node_netstat_Icmp6_InMsgs|node_netstat_Icmp6_OutMsgs|node_netstat_IpExt_InOctets|node_netstat_IpExt_OutOctets|node_netstat_Tcp_InErrs|node_netstat_Tcp_InSegs|node_netstat_Tcp_OutRsts|node_netstat_Tcp_OutSegs|node_netstat_Tcp_RetransSegs|node_netstat_TcpExt_ListenDrops|node_netstat_TcpExt_ListenOverflows|node_netstat_TcpExt_TCPSynRetrans|node_netstat_Udp_InDatagrams|node_netstat_Udp_InErrors|node_netstat_Udp_NoPorts|node_netstat_Udp_OutDatagrams|node_netstat_Udp_RcvbufErrors|node_netstat_Udp_SndbufErrors|node_netstat_Udp6_InDatagrams|node_netstat_Udp6_InErrors|node_netstat_Udp6_NoPorts|node_netstat_Udp6_OutDatagrams|node_netstat_Udp6_RcvbufErrors|node_netstat_Udp6_SndbufErrors|node_netstat_UdpLite_InErrors|node_network_carrier|node_network_info|node_network_mtu_bytes|node_network_receive_compressed_total|node_network_receive_errs_total|node_network_receive_fifo_total|node_network_receive_multicast_total|node_network_receive_packets_total|node_network_speed_bytes|node_network_transmit_compressed_total|node_network_transmit_errs_total|node_network_transmit_fifo_total|node_network_transmit_multicast_total|node_network_transmit_packets_total|node_network_transmit_queue_length|node_network_up|node_nf_conntrack_entries|node_nf_conntrack_entries_limit|node_os_info|node_procs_running|node_sockstat_FRAG_inuse|node_sockstat_FRAG6_inuse|node_sockstat_RAW_inuse|node_sockstat_RAW6_inuse|node_sockstat_sockets_used|node_sockstat_TCP_alloc|node_sockstat_TCP_inuse|node_sockstat_TCP_mem|node_sockstat_TCP_mem_bytes|node_sockstat_TCP_orphan|node_sockstat_TCP_tw|node_sockstat_TCP6_inuse|node_sockstat_UDP_inuse|node_sockstat_UDP_mem|node_sockstat_UDP_mem_bytes|node_sockstat_UDP6_inuse|node_sockstat_UDPLITE_inuse|node_sockstat_UDPLITE6_inuse|node_softnet_dropped_total|node_softnet_processed_total|node_softnet_times_squeezed_total|node_systemd_service_restart_total|node_systemd_unit_state|node_textfile_scrape_error|node_time_zone_offset_seconds|node_timex_estimated_error_seconds|node_timex_maxerror_seconds|node_timex_offset_seconds|node_timex_sync_status|node_uname_info|node_vmstat_oom_kill|node_vmstat_pgfault|node_vmstat_pgmajfault|node_vmstat_pgpgin|node_vmstat_pgpgout|node_vmstat_pswpin|node_vmstat_pswpout|process_max_fds|process_open_fds" scheme = "http" scrape_interval = "60s" max_cache_size = 100000 @@ -624,7 +613,7 @@ declare "pod_logs" { } // remove the timestamp from the log line stage.replace { - expression = "( t=[^ ]+\\s+)" + expression = `(?:^|\s+)(t=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } // set the structured metadata values @@ -636,6 +625,12 @@ declare "pod_logs" { "user" = "user", } } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "grafana-drop-log-level" + } } // Integration: Loki @@ -667,7 +662,7 @@ declare "pod_logs" { } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } // clean up the caller to remove the line @@ -685,6 +680,12 @@ declare "pod_logs" { "user" = "user", } } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "loki-drop-log-level" + } } @@ -717,7 +718,7 @@ declare "pod_logs" { } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } // clean up the caller to remove the line @@ -735,6 +736,12 @@ declare "pod_logs" { "user" = "user", } } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "mimir-drop-log-level" + } } @@ -1531,7 +1538,6 @@ declare "loki_integration" { targets = loki_integration_discovery.loki.output job_label = "integrations/loki" clustering = true - keep_metrics = "up|scrape_samples_scraped|go_gc_cycles_total_gc_cycles_total|go_gc_duration_seconds|go_gc_duration_seconds_count|go_gc_duration_seconds_sum|go_gc_pauses_seconds_bucket|go_goroutines|go_memstats_heap_inuse_bytes|loki_azure_blob_request_duration_seconds_bucket|loki_azure_blob_request_duration_seconds_count|loki_bigtable_request_duration_seconds_bucket|loki_bigtable_request_duration_seconds_count|loki_bloom_blocks_cache_added_total|loki_bloom_blocks_cache_entries|loki_bloom_blocks_cache_evicted_total|loki_bloom_blocks_cache_fetched_total|loki_bloom_blocks_cache_usage_bytes|loki_bloom_chunks_indexed_total|loki_bloom_gateway_block_query_latency_seconds_bucket|loki_bloom_gateway_dequeue_duration_seconds_bucket|loki_bloom_gateway_filtered_chunks_sum|loki_bloom_gateway_filtered_series_sum|loki_bloom_gateway_inflight_tasks|loki_bloom_gateway_process_duration_seconds_bucket|loki_bloom_gateway_process_duration_seconds_count|loki_bloom_gateway_querier_chunks_filtered_total|loki_bloom_gateway_querier_chunks_skipped_total|loki_bloom_gateway_querier_chunks_total|loki_bloom_gateway_querier_series_filtered_total|loki_bloom_gateway_querier_series_skipped_total|loki_bloom_gateway_querier_series_total|loki_bloom_gateway_queue_duration_seconds_bucket|loki_bloom_gateway_queue_duration_seconds_count|loki_bloom_gateway_queue_duration_seconds_sum|loki_bloom_gateway_queue_length|loki_bloom_gateway_requested_chunks_sum|loki_bloom_gateway_requested_series_sum|loki_bloom_gateway_tasks_dequeued_bucket|loki_bloom_gateway_tasks_dequeued_total|loki_bloom_gateway_tasks_processed_total|loki_bloom_inserts_total|loki_bloom_recorder_chunks_total|loki_bloom_recorder_series_total|loki_bloom_size_bucket|loki_bloom_store_blocks_fetched_size_bytes_bucket|loki_bloom_store_blocks_fetched_sum|loki_bloom_store_download_queue_size_sum|loki_bloom_store_metas_fetched_bucket|loki_bloom_store_metas_fetched_size_bytes_bucket|loki_bloom_store_metas_fetched_sum|loki_bloom_tokens_total|loki_bloombuilder_blocks_created_total|loki_bloombuilder_blocks_reused_total|loki_bloombuilder_bytes_per_task_bucket|loki_bloombuilder_chunk_series_size_sum|loki_bloombuilder_metas_created_total|loki_bloombuilder_processing_task|loki_bloombuilder_series_per_task_bucket|loki_bloomplanner_blocks_deleted_total|loki_bloomplanner_connected_builders|loki_bloomplanner_inflight_tasks|loki_bloomplanner_metas_deleted_total|loki_bloomplanner_queue_length|loki_bloomplanner_retention_running|loki_bloomplanner_retention_time_seconds_bucket|loki_bloomplanner_tenant_tasks_completed|loki_bloomplanner_tenant_tasks_planned|loki_boltdb_shipper_compact_tables_operation_duration_seconds|loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds|loki_boltdb_shipper_compact_tables_operation_total|loki_boltdb_shipper_request_duration_seconds_bucket|loki_boltdb_shipper_request_duration_seconds_count|loki_boltdb_shipper_request_duration_seconds_sum|loki_boltdb_shipper_retention_marker_count_total|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum|loki_boltdb_shipper_retention_marker_table_processed_total|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum|loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time|loki_boltdb_shipper_retention_sweeper_marker_files_current|loki_build_info|loki_chunk_store_deduped_chunks_total|loki_chunk_store_index_entries_per_chunk_count|loki_chunk_store_index_entries_per_chunk_sum|loki_compactor_apply_retention_last_successful_run_timestamp_seconds|loki_compactor_apply_retention_operation_duration_seconds|loki_compactor_apply_retention_operation_total|loki_compactor_delete_requests_processed_total|loki_compactor_delete_requests_received_total|loki_compactor_deleted_lines|loki_compactor_load_pending_requests_attempts_total|loki_compactor_locked_table_successive_compaction_skips|loki_compactor_oldest_pending_delete_request_age_seconds|loki_compactor_pending_delete_requests_count|loki_consul_request_duration_seconds_bucket|loki_discarded_samples_total|loki_distributor_bytes_received_total|loki_distributor_ingester_append_failures_total|loki_distributor_lines_received_total|loki_distributor_structured_metadata_bytes_received_total|loki_dynamo_consumed_capacity_total|loki_dynamo_dropped_requests_total|loki_dynamo_failures_total|loki_dynamo_query_pages_count|loki_dynamo_request_duration_seconds_bucket|loki_dynamo_request_duration_seconds_count|loki_dynamo_throttled_total|loki_embeddedcache_entries|loki_embeddedcache_memory_bytes|loki_gcs_request_duration_seconds_bucket|loki_gcs_request_duration_seconds_count|loki_index_gateway_postfilter_chunks_sum|loki_index_gateway_prefilter_chunks_sum|loki_index_request_duration_seconds_bucket|loki_index_request_duration_seconds_count|loki_index_request_duration_seconds_sum|loki_ingester_chunk_age_seconds_bucket|loki_ingester_chunk_age_seconds_count|loki_ingester_chunk_age_seconds_sum|loki_ingester_chunk_bounds_hours_bucket|loki_ingester_chunk_bounds_hours_count|loki_ingester_chunk_bounds_hours_sum|loki_ingester_chunk_entries_bucket|loki_ingester_chunk_entries_count|loki_ingester_chunk_entries_sum|loki_ingester_chunk_size_bytes_bucket|loki_ingester_chunk_utilization_bucket|loki_ingester_chunk_utilization_count|loki_ingester_chunk_utilization_sum|loki_ingester_chunks_flushed_total|loki_ingester_flush_queue_length|loki_ingester_memory_chunks|loki_ingester_memory_streams|loki_ingester_streams_created_total|loki_memcache_request_duration_seconds_bucket|loki_memcache_request_duration_seconds_count|loki_panic_total|loki_prometheus_rule_group_rules|loki_request_duration_seconds_bucket|loki_request_duration_seconds_count|loki_request_duration_seconds_sum|loki_ruler_wal_appender_ready|loki_ruler_wal_disk_size|loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds|loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds|loki_ruler_wal_prometheus_remote_storage_samples_pending|loki_ruler_wal_prometheus_remote_storage_samples_total|loki_ruler_wal_samples_appended_total|loki_ruler_wal_storage_created_series_total|loki_s3_request_duration_seconds_bucket|loki_s3_request_duration_seconds_count" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value @@ -1788,7 +1794,6 @@ declare "mimir_integration" { targets = mimir_integration_discovery.mimir.output job_label = "integrations/mimir" clustering = true - keep_metrics = "up|scrape_samples_scraped|cortex_alertmanager_alerts|cortex_alertmanager_alerts_invalid_total|cortex_alertmanager_alerts_received_total|cortex_alertmanager_dispatcher_aggregation_groups|cortex_alertmanager_notification_latency_seconds_bucket|cortex_alertmanager_notification_latency_seconds_count|cortex_alertmanager_notification_latency_seconds_sum|cortex_alertmanager_notifications_failed_total|cortex_alertmanager_notifications_total|cortex_alertmanager_partial_state_merges_failed_total|cortex_alertmanager_partial_state_merges_total|cortex_alertmanager_ring_check_errors_total|cortex_alertmanager_silences|cortex_alertmanager_state_fetch_replica_state_failed_total|cortex_alertmanager_state_fetch_replica_state_total|cortex_alertmanager_state_initial_sync_completed_total|cortex_alertmanager_state_initial_sync_duration_seconds_bucket|cortex_alertmanager_state_initial_sync_duration_seconds_count|cortex_alertmanager_state_initial_sync_duration_seconds_sum|cortex_alertmanager_state_persist_failed_total|cortex_alertmanager_state_persist_total|cortex_alertmanager_state_replication_failed_total|cortex_alertmanager_state_replication_total|cortex_alertmanager_sync_configs_failed_total|cortex_alertmanager_sync_configs_total|cortex_alertmanager_tenants_discovered|cortex_alertmanager_tenants_owned|cortex_blockbuilder_consume_cycle_duration_seconds|cortex_blockbuilder_consumer_lag_records|cortex_blockbuilder_tsdb_compact_and_upload_failed_total|cortex_bucket_blocks_count|cortex_bucket_index_estimated_compaction_jobs|cortex_bucket_index_estimated_compaction_jobs_errors_total|cortex_bucket_index_last_successful_update_timestamp_seconds|cortex_bucket_store_block_drop_failures_total|cortex_bucket_store_block_drops_total|cortex_bucket_store_block_load_failures_total|cortex_bucket_store_block_loads_total|cortex_bucket_store_blocks_loaded|cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket|cortex_bucket_store_indexheader_lazy_load_duration_seconds_count|cortex_bucket_store_indexheader_lazy_load_duration_seconds_sum|cortex_bucket_store_indexheader_lazy_load_total|cortex_bucket_store_indexheader_lazy_unload_total|cortex_bucket_store_series_batch_preloading_load_duration_seconds_sum|cortex_bucket_store_series_batch_preloading_wait_duration_seconds_sum|cortex_bucket_store_series_blocks_queried_sum|cortex_bucket_store_series_data_size_fetched_bytes_sum|cortex_bucket_store_series_data_size_touched_bytes_sum|cortex_bucket_store_series_hash_cache_hits_total|cortex_bucket_store_series_hash_cache_requests_total|cortex_bucket_store_series_request_stage_duration_seconds_bucket|cortex_bucket_store_series_request_stage_duration_seconds_count|cortex_bucket_store_series_request_stage_duration_seconds_sum|cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds|cortex_bucket_stores_gate_duration_seconds_bucket|cortex_bucket_stores_gate_duration_seconds_count|cortex_bucket_stores_gate_duration_seconds_sum|cortex_bucket_stores_tenants_synced|cortex_build_info|cortex_cache_memory_hits_total|cortex_cache_memory_requests_total|cortex_compactor_block_cleanup_failures_total|cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds|cortex_compactor_block_max_time_delta_seconds_bucket|cortex_compactor_block_max_time_delta_seconds_count|cortex_compactor_block_max_time_delta_seconds_sum|cortex_compactor_blocks_cleaned_total|cortex_compactor_blocks_marked_for_deletion_total|cortex_compactor_blocks_marked_for_no_compaction_total|cortex_compactor_disk_out_of_space_errors_total|cortex_compactor_group_compaction_runs_started_total|cortex_compactor_last_successful_run_timestamp_seconds|cortex_compactor_meta_sync_duration_seconds_bucket|cortex_compactor_meta_sync_duration_seconds_count|cortex_compactor_meta_sync_duration_seconds_sum|cortex_compactor_meta_sync_failures_total|cortex_compactor_meta_syncs_total|cortex_compactor_runs_completed_total|cortex_compactor_runs_failed_total|cortex_compactor_runs_started_total|cortex_compactor_tenants_discovered|cortex_compactor_tenants_processing_failed|cortex_compactor_tenants_processing_succeeded|cortex_compactor_tenants_skipped|cortex_config_hash|cortex_discarded_exemplars_total|cortex_discarded_requests_total|cortex_discarded_samples_total|cortex_distributor_deduped_samples_total|cortex_distributor_exemplars_in_total|cortex_distributor_inflight_push_requests|cortex_distributor_instance_limits|cortex_distributor_instance_rejected_requests_total|cortex_distributor_latest_seen_sample_timestamp_seconds|cortex_distributor_non_ha_samples_received_total|cortex_distributor_received_exemplars_total|cortex_distributor_received_requests_total|cortex_distributor_received_samples_total|cortex_distributor_replication_factor|cortex_distributor_requests_in_total|cortex_distributor_samples_in_total|cortex_inflight_requests|cortex_ingest_storage_reader_buffered_fetched_records|cortex_ingest_storage_reader_fetch_errors_total|cortex_ingest_storage_reader_fetches_total|cortex_ingest_storage_reader_missed_records_total|cortex_ingest_storage_reader_offset_commit_failures_total|cortex_ingest_storage_reader_offset_commit_requests_total|cortex_ingest_storage_reader_read_errors_total|cortex_ingest_storage_reader_receive_delay_seconds_count|cortex_ingest_storage_reader_receive_delay_seconds_sum|cortex_ingest_storage_reader_records_failed_total|cortex_ingest_storage_reader_records_total|cortex_ingest_storage_reader_requests_failed_total|cortex_ingest_storage_reader_requests_total|cortex_ingest_storage_strong_consistency_failures_total|cortex_ingest_storage_strong_consistency_requests_total|cortex_ingest_storage_writer_buffered_produce_bytes|cortex_ingest_storage_writer_buffered_produce_bytes_limit|cortex_ingester_active_native_histogram_buckets|cortex_ingester_active_native_histogram_buckets_custom_tracker|cortex_ingester_active_native_histogram_series|cortex_ingester_active_native_histogram_series_custom_tracker|cortex_ingester_active_series|cortex_ingester_active_series_custom_tracker|cortex_ingester_client_request_duration_seconds_bucket|cortex_ingester_client_request_duration_seconds_count|cortex_ingester_client_request_duration_seconds_sum|cortex_ingester_ingested_exemplars_total|cortex_ingester_ingested_samples_total|cortex_ingester_instance_limits|cortex_ingester_instance_rejected_requests_total|cortex_ingester_local_limits|cortex_ingester_memory_series|cortex_ingester_memory_series_created_total|cortex_ingester_memory_series_removed_total|cortex_ingester_memory_users|cortex_ingester_oldest_unshipped_block_timestamp_seconds|cortex_ingester_owned_series|cortex_ingester_queried_exemplars_bucket|cortex_ingester_queried_exemplars_count|cortex_ingester_queried_exemplars_sum|cortex_ingester_queried_samples_bucket|cortex_ingester_queried_samples_count|cortex_ingester_queried_samples_sum|cortex_ingester_queried_series_bucket|cortex_ingester_queried_series_count|cortex_ingester_queried_series_sum|cortex_ingester_shipper_last_successful_upload_timestamp_seconds|cortex_ingester_shipper_upload_failures_total|cortex_ingester_shipper_uploads_total|cortex_ingester_tsdb_checkpoint_creations_failed_total|cortex_ingester_tsdb_checkpoint_creations_total|cortex_ingester_tsdb_checkpoint_deletions_failed_total|cortex_ingester_tsdb_compaction_duration_seconds_bucket|cortex_ingester_tsdb_compaction_duration_seconds_count|cortex_ingester_tsdb_compaction_duration_seconds_sum|cortex_ingester_tsdb_compactions_failed_total|cortex_ingester_tsdb_compactions_total|cortex_ingester_tsdb_exemplar_exemplars_appended_total|cortex_ingester_tsdb_exemplar_exemplars_in_storage|cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds|cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage|cortex_ingester_tsdb_head_max_timestamp_seconds|cortex_ingester_tsdb_head_truncations_failed_total|cortex_ingester_tsdb_mmap_chunk_corruptions_total|cortex_ingester_tsdb_out_of_order_samples_appended_total|cortex_ingester_tsdb_storage_blocks_bytes|cortex_ingester_tsdb_symbol_table_size_bytes|cortex_ingester_tsdb_wal_corruptions_total|cortex_ingester_tsdb_wal_truncate_duration_seconds_count|cortex_ingester_tsdb_wal_truncate_duration_seconds_sum|cortex_ingester_tsdb_wal_truncations_failed_total|cortex_ingester_tsdb_wal_truncations_total|cortex_ingester_tsdb_wal_writes_failed_total|cortex_kv_request_duration_seconds_bucket|cortex_kv_request_duration_seconds_count|cortex_kv_request_duration_seconds_sum|cortex_lifecycler_read_only|cortex_limits_defaults|cortex_limits_overrides|cortex_partition_ring_partitions|cortex_prometheus_notifications_dropped_total|cortex_prometheus_notifications_errors_total|cortex_prometheus_notifications_queue_capacity|cortex_prometheus_notifications_queue_length|cortex_prometheus_notifications_sent_total|cortex_prometheus_rule_evaluation_duration_seconds_count|cortex_prometheus_rule_evaluation_duration_seconds_sum|cortex_prometheus_rule_evaluation_failures_total|cortex_prometheus_rule_evaluations_total|cortex_prometheus_rule_group_duration_seconds_count|cortex_prometheus_rule_group_duration_seconds_sum|cortex_prometheus_rule_group_iterations_missed_total|cortex_prometheus_rule_group_iterations_total|cortex_prometheus_rule_group_rules|cortex_querier_blocks_consistency_checks_failed_total|cortex_querier_blocks_consistency_checks_total|cortex_querier_request_duration_seconds_bucket|cortex_querier_request_duration_seconds_count|cortex_querier_request_duration_seconds_sum|cortex_querier_storegateway_instances_hit_per_query_bucket|cortex_querier_storegateway_instances_hit_per_query_count|cortex_querier_storegateway_instances_hit_per_query_sum|cortex_querier_storegateway_refetches_per_query_bucket|cortex_querier_storegateway_refetches_per_query_count|cortex_querier_storegateway_refetches_per_query_sum|cortex_query_frontend_queries_total|cortex_query_frontend_queue_duration_seconds_bucket|cortex_query_frontend_queue_duration_seconds_count|cortex_query_frontend_queue_duration_seconds_sum|cortex_query_frontend_queue_length|cortex_query_frontend_retries_bucket|cortex_query_frontend_retries_count|cortex_query_frontend_retries_sum|cortex_query_scheduler_connected_querier_clients|cortex_query_scheduler_querier_inflight_requests|cortex_query_scheduler_queue_duration_seconds_bucket|cortex_query_scheduler_queue_duration_seconds_count|cortex_query_scheduler_queue_duration_seconds_sum|cortex_query_scheduler_queue_length|cortex_request_duration_seconds|cortex_request_duration_seconds_bucket|cortex_request_duration_seconds_count|cortex_request_duration_seconds_sum|cortex_ring_members|cortex_ruler_managers_total|cortex_ruler_queries_failed_total|cortex_ruler_queries_total|cortex_ruler_ring_check_errors_total|cortex_ruler_write_requests_failed_total|cortex_ruler_write_requests_total|cortex_runtime_config_hash|cortex_runtime_config_last_reload_successful|cortex_tcp_connections|cortex_tcp_connections_limit|go_memstats_heap_inuse_bytes|keda_scaler_errors|keda_scaler_metrics_value|kube_deployment_spec_replicas|kube_deployment_status_replicas_unavailable|kube_deployment_status_replicas_updated|kube_endpoint_address|kube_horizontalpodautoscaler_spec_target_metric|kube_horizontalpodautoscaler_status_condition|kube_pod_info|kube_statefulset_replicas|kube_statefulset_status_current_revision|kube_statefulset_status_replicas_current|kube_statefulset_status_replicas_ready|kube_statefulset_status_replicas_updated|kube_statefulset_status_update_revision|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_used_bytes|memberlist_client_cluster_members_count|memcached_limit_bytes|mimir_continuous_test_queries_failed_total|mimir_continuous_test_query_result_checks_failed_total|mimir_continuous_test_writes_failed_total|node_disk_read_bytes_total|node_disk_written_bytes_total|process_memory_map_areas|process_memory_map_areas_limit|prometheus_tsdb_compaction_duration_seconds_bucket|prometheus_tsdb_compaction_duration_seconds_count|prometheus_tsdb_compaction_duration_seconds_sum|prometheus_tsdb_compactions_total|rollout_operator_last_successful_group_reconcile_timestamp_seconds|thanos_cache_hits_total|thanos_cache_operation_duration_seconds_bucket|thanos_cache_operation_duration_seconds_count|thanos_cache_operation_duration_seconds_sum|thanos_cache_operation_failures_total|thanos_cache_operations_total|thanos_cache_requests_total|thanos_objstore_bucket_last_successful_upload_time|thanos_objstore_bucket_operation_duration_seconds_bucket|thanos_objstore_bucket_operation_duration_seconds_count|thanos_objstore_bucket_operation_duration_seconds_sum|thanos_objstore_bucket_operation_failures_total|thanos_objstore_bucket_operations_total|thanos_store_index_cache_hits_total|thanos_store_index_cache_requests_total" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value diff --git a/charts/k8s-monitoring/docs/examples/meta-monitoring/output.yaml b/charts/k8s-monitoring/docs/examples/meta-monitoring/output.yaml index 3237bceea..fdeca04c5 100644 --- a/charts/k8s-monitoring/docs/examples/meta-monitoring/output.yaml +++ b/charts/k8s-monitoring/docs/examples/meta-monitoring/output.yaml @@ -317,17 +317,6 @@ data: scheme = "http" scrape_interval = "60s" max_cache_size = 100000 - forward_to = [prometheus.relabel.kube_state_metrics.receiver] - } - - prometheus.relabel "kube_state_metrics" { - max_cache_size = 100000 - - rule { - action = "keep" - source_labels = ["namespace"] - regex = "collectors|logs|metrics|o11y" - } forward_to = argument.metrics_destinations.value } @@ -362,7 +351,7 @@ data: targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" clustering = true - keep_metrics = "up|scrape_samples_scraped|node_cpu.*|node_exporter_build_info|node_filesystem.*|node_memory.*|node_network_receive_bytes_total|node_network_receive_drop_total|node_network_transmit_bytes_total|node_network_transmit_drop_total|process_cpu_seconds_total|process_resident_memory_bytes" + keep_metrics = "up|scrape_samples_scraped|node_cpu.*|node_exporter_build_info|node_filesystem.*|node_memory.*|node_network_receive_bytes_total|node_network_receive_drop_total|node_network_transmit_bytes_total|node_network_transmit_drop_total|process_cpu_seconds_total|process_resident_memory_bytes|node_arp_entries|node_boot_time_seconds|node_context_switches_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_io_time_weighted_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filefd_maximum|node_filesystem_avail_bytes|node_filesystem_device_error|node_filesystem_files|node_filesystem_files_free|node_filesystem_readonly|node_filesystem_size_bytes|node_intr_total|node_load1|node_load15|node_load5|node_md_disks|node_md_disks_required|node_memory_Active_anon_bytes|node_memory_Active_bytes|node_memory_Active_file_bytes|node_memory_AnonHugePages_bytes|node_memory_AnonPages_bytes|node_memory_Bounce_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_CommitLimit_bytes|node_memory_Committed_AS_bytes|node_memory_DirectMap1G_bytes|node_memory_DirectMap2M_bytes|node_memory_DirectMap4k_bytes|node_memory_Dirty_bytes|node_memory_HugePages_Free|node_memory_HugePages_Rsvd|node_memory_HugePages_Surp|node_memory_HugePages_Total|node_memory_Hugepagesize_bytes|node_memory_Inactive_anon_bytes|node_memory_Inactive_bytes|node_memory_Inactive_file_bytes|node_memory_Mapped_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_Shmem_bytes|node_memory_ShmemHugePages_bytes|node_memory_Slab_bytes|node_memory_SReclaimable_bytes|node_memory_SUnreclaim_bytes|node_memory_SwapTotal_bytes|node_memory_VmallocChunk_bytes|node_memory_VmallocTotal_bytes|node_memory_VmallocUsed_bytes|node_memory_Writeback_bytes|node_memory_WritebackTmp_bytes|node_netstat_Icmp_InErrors|node_netstat_Icmp_InMsgs|node_netstat_Icmp_OutMsgs|node_netstat_Icmp6_InErrors|node_netstat_Icmp6_InMsgs|node_netstat_Icmp6_OutMsgs|node_netstat_IpExt_InOctets|node_netstat_IpExt_OutOctets|node_netstat_Tcp_InErrs|node_netstat_Tcp_InSegs|node_netstat_Tcp_OutRsts|node_netstat_Tcp_OutSegs|node_netstat_Tcp_RetransSegs|node_netstat_TcpExt_ListenDrops|node_netstat_TcpExt_ListenOverflows|node_netstat_TcpExt_TCPSynRetrans|node_netstat_Udp_InDatagrams|node_netstat_Udp_InErrors|node_netstat_Udp_NoPorts|node_netstat_Udp_OutDatagrams|node_netstat_Udp_RcvbufErrors|node_netstat_Udp_SndbufErrors|node_netstat_Udp6_InDatagrams|node_netstat_Udp6_InErrors|node_netstat_Udp6_NoPorts|node_netstat_Udp6_OutDatagrams|node_netstat_Udp6_RcvbufErrors|node_netstat_Udp6_SndbufErrors|node_netstat_UdpLite_InErrors|node_network_carrier|node_network_info|node_network_mtu_bytes|node_network_receive_compressed_total|node_network_receive_errs_total|node_network_receive_fifo_total|node_network_receive_multicast_total|node_network_receive_packets_total|node_network_speed_bytes|node_network_transmit_compressed_total|node_network_transmit_errs_total|node_network_transmit_fifo_total|node_network_transmit_multicast_total|node_network_transmit_packets_total|node_network_transmit_queue_length|node_network_up|node_nf_conntrack_entries|node_nf_conntrack_entries_limit|node_os_info|node_procs_running|node_sockstat_FRAG_inuse|node_sockstat_FRAG6_inuse|node_sockstat_RAW_inuse|node_sockstat_RAW6_inuse|node_sockstat_sockets_used|node_sockstat_TCP_alloc|node_sockstat_TCP_inuse|node_sockstat_TCP_mem|node_sockstat_TCP_mem_bytes|node_sockstat_TCP_orphan|node_sockstat_TCP_tw|node_sockstat_TCP6_inuse|node_sockstat_UDP_inuse|node_sockstat_UDP_mem|node_sockstat_UDP_mem_bytes|node_sockstat_UDP6_inuse|node_sockstat_UDPLITE_inuse|node_sockstat_UDPLITE6_inuse|node_softnet_dropped_total|node_softnet_processed_total|node_softnet_times_squeezed_total|node_systemd_service_restart_total|node_systemd_unit_state|node_textfile_scrape_error|node_time_zone_offset_seconds|node_timex_estimated_error_seconds|node_timex_maxerror_seconds|node_timex_offset_seconds|node_timex_sync_status|node_uname_info|node_vmstat_oom_kill|node_vmstat_pgfault|node_vmstat_pgmajfault|node_vmstat_pgpgin|node_vmstat_pgpgout|node_vmstat_pswpin|node_vmstat_pswpout|process_max_fds|process_open_fds" scheme = "http" scrape_interval = "60s" max_cache_size = 100000 @@ -699,7 +688,7 @@ data: } // remove the timestamp from the log line stage.replace { - expression = "( t=[^ ]+\\s+)" + expression = `(?:^|\s+)(t=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } // set the structured metadata values @@ -711,6 +700,12 @@ data: "user" = "user", } } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "grafana-drop-log-level" + } } // Integration: Loki @@ -742,7 +737,7 @@ data: } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } // clean up the caller to remove the line @@ -760,6 +755,12 @@ data: "user" = "user", } } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "loki-drop-log-level" + } } @@ -792,7 +793,7 @@ data: } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } // clean up the caller to remove the line @@ -810,6 +811,12 @@ data: "user" = "user", } } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "mimir-drop-log-level" + } } @@ -1606,7 +1613,6 @@ data: targets = loki_integration_discovery.loki.output job_label = "integrations/loki" clustering = true - keep_metrics = "up|scrape_samples_scraped|go_gc_cycles_total_gc_cycles_total|go_gc_duration_seconds|go_gc_duration_seconds_count|go_gc_duration_seconds_sum|go_gc_pauses_seconds_bucket|go_goroutines|go_memstats_heap_inuse_bytes|loki_azure_blob_request_duration_seconds_bucket|loki_azure_blob_request_duration_seconds_count|loki_bigtable_request_duration_seconds_bucket|loki_bigtable_request_duration_seconds_count|loki_bloom_blocks_cache_added_total|loki_bloom_blocks_cache_entries|loki_bloom_blocks_cache_evicted_total|loki_bloom_blocks_cache_fetched_total|loki_bloom_blocks_cache_usage_bytes|loki_bloom_chunks_indexed_total|loki_bloom_gateway_block_query_latency_seconds_bucket|loki_bloom_gateway_dequeue_duration_seconds_bucket|loki_bloom_gateway_filtered_chunks_sum|loki_bloom_gateway_filtered_series_sum|loki_bloom_gateway_inflight_tasks|loki_bloom_gateway_process_duration_seconds_bucket|loki_bloom_gateway_process_duration_seconds_count|loki_bloom_gateway_querier_chunks_filtered_total|loki_bloom_gateway_querier_chunks_skipped_total|loki_bloom_gateway_querier_chunks_total|loki_bloom_gateway_querier_series_filtered_total|loki_bloom_gateway_querier_series_skipped_total|loki_bloom_gateway_querier_series_total|loki_bloom_gateway_queue_duration_seconds_bucket|loki_bloom_gateway_queue_duration_seconds_count|loki_bloom_gateway_queue_duration_seconds_sum|loki_bloom_gateway_queue_length|loki_bloom_gateway_requested_chunks_sum|loki_bloom_gateway_requested_series_sum|loki_bloom_gateway_tasks_dequeued_bucket|loki_bloom_gateway_tasks_dequeued_total|loki_bloom_gateway_tasks_processed_total|loki_bloom_inserts_total|loki_bloom_recorder_chunks_total|loki_bloom_recorder_series_total|loki_bloom_size_bucket|loki_bloom_store_blocks_fetched_size_bytes_bucket|loki_bloom_store_blocks_fetched_sum|loki_bloom_store_download_queue_size_sum|loki_bloom_store_metas_fetched_bucket|loki_bloom_store_metas_fetched_size_bytes_bucket|loki_bloom_store_metas_fetched_sum|loki_bloom_tokens_total|loki_bloombuilder_blocks_created_total|loki_bloombuilder_blocks_reused_total|loki_bloombuilder_bytes_per_task_bucket|loki_bloombuilder_chunk_series_size_sum|loki_bloombuilder_metas_created_total|loki_bloombuilder_processing_task|loki_bloombuilder_series_per_task_bucket|loki_bloomplanner_blocks_deleted_total|loki_bloomplanner_connected_builders|loki_bloomplanner_inflight_tasks|loki_bloomplanner_metas_deleted_total|loki_bloomplanner_queue_length|loki_bloomplanner_retention_running|loki_bloomplanner_retention_time_seconds_bucket|loki_bloomplanner_tenant_tasks_completed|loki_bloomplanner_tenant_tasks_planned|loki_boltdb_shipper_compact_tables_operation_duration_seconds|loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds|loki_boltdb_shipper_compact_tables_operation_total|loki_boltdb_shipper_request_duration_seconds_bucket|loki_boltdb_shipper_request_duration_seconds_count|loki_boltdb_shipper_request_duration_seconds_sum|loki_boltdb_shipper_retention_marker_count_total|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum|loki_boltdb_shipper_retention_marker_table_processed_total|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum|loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time|loki_boltdb_shipper_retention_sweeper_marker_files_current|loki_build_info|loki_chunk_store_deduped_chunks_total|loki_chunk_store_index_entries_per_chunk_count|loki_chunk_store_index_entries_per_chunk_sum|loki_compactor_apply_retention_last_successful_run_timestamp_seconds|loki_compactor_apply_retention_operation_duration_seconds|loki_compactor_apply_retention_operation_total|loki_compactor_delete_requests_processed_total|loki_compactor_delete_requests_received_total|loki_compactor_deleted_lines|loki_compactor_load_pending_requests_attempts_total|loki_compactor_locked_table_successive_compaction_skips|loki_compactor_oldest_pending_delete_request_age_seconds|loki_compactor_pending_delete_requests_count|loki_consul_request_duration_seconds_bucket|loki_discarded_samples_total|loki_distributor_bytes_received_total|loki_distributor_ingester_append_failures_total|loki_distributor_lines_received_total|loki_distributor_structured_metadata_bytes_received_total|loki_dynamo_consumed_capacity_total|loki_dynamo_dropped_requests_total|loki_dynamo_failures_total|loki_dynamo_query_pages_count|loki_dynamo_request_duration_seconds_bucket|loki_dynamo_request_duration_seconds_count|loki_dynamo_throttled_total|loki_embeddedcache_entries|loki_embeddedcache_memory_bytes|loki_gcs_request_duration_seconds_bucket|loki_gcs_request_duration_seconds_count|loki_index_gateway_postfilter_chunks_sum|loki_index_gateway_prefilter_chunks_sum|loki_index_request_duration_seconds_bucket|loki_index_request_duration_seconds_count|loki_index_request_duration_seconds_sum|loki_ingester_chunk_age_seconds_bucket|loki_ingester_chunk_age_seconds_count|loki_ingester_chunk_age_seconds_sum|loki_ingester_chunk_bounds_hours_bucket|loki_ingester_chunk_bounds_hours_count|loki_ingester_chunk_bounds_hours_sum|loki_ingester_chunk_entries_bucket|loki_ingester_chunk_entries_count|loki_ingester_chunk_entries_sum|loki_ingester_chunk_size_bytes_bucket|loki_ingester_chunk_utilization_bucket|loki_ingester_chunk_utilization_count|loki_ingester_chunk_utilization_sum|loki_ingester_chunks_flushed_total|loki_ingester_flush_queue_length|loki_ingester_memory_chunks|loki_ingester_memory_streams|loki_ingester_streams_created_total|loki_memcache_request_duration_seconds_bucket|loki_memcache_request_duration_seconds_count|loki_panic_total|loki_prometheus_rule_group_rules|loki_request_duration_seconds_bucket|loki_request_duration_seconds_count|loki_request_duration_seconds_sum|loki_ruler_wal_appender_ready|loki_ruler_wal_disk_size|loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds|loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds|loki_ruler_wal_prometheus_remote_storage_samples_pending|loki_ruler_wal_prometheus_remote_storage_samples_total|loki_ruler_wal_samples_appended_total|loki_ruler_wal_storage_created_series_total|loki_s3_request_duration_seconds_bucket|loki_s3_request_duration_seconds_count" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value @@ -1863,7 +1869,6 @@ data: targets = mimir_integration_discovery.mimir.output job_label = "integrations/mimir" clustering = true - keep_metrics = "up|scrape_samples_scraped|cortex_alertmanager_alerts|cortex_alertmanager_alerts_invalid_total|cortex_alertmanager_alerts_received_total|cortex_alertmanager_dispatcher_aggregation_groups|cortex_alertmanager_notification_latency_seconds_bucket|cortex_alertmanager_notification_latency_seconds_count|cortex_alertmanager_notification_latency_seconds_sum|cortex_alertmanager_notifications_failed_total|cortex_alertmanager_notifications_total|cortex_alertmanager_partial_state_merges_failed_total|cortex_alertmanager_partial_state_merges_total|cortex_alertmanager_ring_check_errors_total|cortex_alertmanager_silences|cortex_alertmanager_state_fetch_replica_state_failed_total|cortex_alertmanager_state_fetch_replica_state_total|cortex_alertmanager_state_initial_sync_completed_total|cortex_alertmanager_state_initial_sync_duration_seconds_bucket|cortex_alertmanager_state_initial_sync_duration_seconds_count|cortex_alertmanager_state_initial_sync_duration_seconds_sum|cortex_alertmanager_state_persist_failed_total|cortex_alertmanager_state_persist_total|cortex_alertmanager_state_replication_failed_total|cortex_alertmanager_state_replication_total|cortex_alertmanager_sync_configs_failed_total|cortex_alertmanager_sync_configs_total|cortex_alertmanager_tenants_discovered|cortex_alertmanager_tenants_owned|cortex_blockbuilder_consume_cycle_duration_seconds|cortex_blockbuilder_consumer_lag_records|cortex_blockbuilder_tsdb_compact_and_upload_failed_total|cortex_bucket_blocks_count|cortex_bucket_index_estimated_compaction_jobs|cortex_bucket_index_estimated_compaction_jobs_errors_total|cortex_bucket_index_last_successful_update_timestamp_seconds|cortex_bucket_store_block_drop_failures_total|cortex_bucket_store_block_drops_total|cortex_bucket_store_block_load_failures_total|cortex_bucket_store_block_loads_total|cortex_bucket_store_blocks_loaded|cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket|cortex_bucket_store_indexheader_lazy_load_duration_seconds_count|cortex_bucket_store_indexheader_lazy_load_duration_seconds_sum|cortex_bucket_store_indexheader_lazy_load_total|cortex_bucket_store_indexheader_lazy_unload_total|cortex_bucket_store_series_batch_preloading_load_duration_seconds_sum|cortex_bucket_store_series_batch_preloading_wait_duration_seconds_sum|cortex_bucket_store_series_blocks_queried_sum|cortex_bucket_store_series_data_size_fetched_bytes_sum|cortex_bucket_store_series_data_size_touched_bytes_sum|cortex_bucket_store_series_hash_cache_hits_total|cortex_bucket_store_series_hash_cache_requests_total|cortex_bucket_store_series_request_stage_duration_seconds_bucket|cortex_bucket_store_series_request_stage_duration_seconds_count|cortex_bucket_store_series_request_stage_duration_seconds_sum|cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds|cortex_bucket_stores_gate_duration_seconds_bucket|cortex_bucket_stores_gate_duration_seconds_count|cortex_bucket_stores_gate_duration_seconds_sum|cortex_bucket_stores_tenants_synced|cortex_build_info|cortex_cache_memory_hits_total|cortex_cache_memory_requests_total|cortex_compactor_block_cleanup_failures_total|cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds|cortex_compactor_block_max_time_delta_seconds_bucket|cortex_compactor_block_max_time_delta_seconds_count|cortex_compactor_block_max_time_delta_seconds_sum|cortex_compactor_blocks_cleaned_total|cortex_compactor_blocks_marked_for_deletion_total|cortex_compactor_blocks_marked_for_no_compaction_total|cortex_compactor_disk_out_of_space_errors_total|cortex_compactor_group_compaction_runs_started_total|cortex_compactor_last_successful_run_timestamp_seconds|cortex_compactor_meta_sync_duration_seconds_bucket|cortex_compactor_meta_sync_duration_seconds_count|cortex_compactor_meta_sync_duration_seconds_sum|cortex_compactor_meta_sync_failures_total|cortex_compactor_meta_syncs_total|cortex_compactor_runs_completed_total|cortex_compactor_runs_failed_total|cortex_compactor_runs_started_total|cortex_compactor_tenants_discovered|cortex_compactor_tenants_processing_failed|cortex_compactor_tenants_processing_succeeded|cortex_compactor_tenants_skipped|cortex_config_hash|cortex_discarded_exemplars_total|cortex_discarded_requests_total|cortex_discarded_samples_total|cortex_distributor_deduped_samples_total|cortex_distributor_exemplars_in_total|cortex_distributor_inflight_push_requests|cortex_distributor_instance_limits|cortex_distributor_instance_rejected_requests_total|cortex_distributor_latest_seen_sample_timestamp_seconds|cortex_distributor_non_ha_samples_received_total|cortex_distributor_received_exemplars_total|cortex_distributor_received_requests_total|cortex_distributor_received_samples_total|cortex_distributor_replication_factor|cortex_distributor_requests_in_total|cortex_distributor_samples_in_total|cortex_inflight_requests|cortex_ingest_storage_reader_buffered_fetched_records|cortex_ingest_storage_reader_fetch_errors_total|cortex_ingest_storage_reader_fetches_total|cortex_ingest_storage_reader_missed_records_total|cortex_ingest_storage_reader_offset_commit_failures_total|cortex_ingest_storage_reader_offset_commit_requests_total|cortex_ingest_storage_reader_read_errors_total|cortex_ingest_storage_reader_receive_delay_seconds_count|cortex_ingest_storage_reader_receive_delay_seconds_sum|cortex_ingest_storage_reader_records_failed_total|cortex_ingest_storage_reader_records_total|cortex_ingest_storage_reader_requests_failed_total|cortex_ingest_storage_reader_requests_total|cortex_ingest_storage_strong_consistency_failures_total|cortex_ingest_storage_strong_consistency_requests_total|cortex_ingest_storage_writer_buffered_produce_bytes|cortex_ingest_storage_writer_buffered_produce_bytes_limit|cortex_ingester_active_native_histogram_buckets|cortex_ingester_active_native_histogram_buckets_custom_tracker|cortex_ingester_active_native_histogram_series|cortex_ingester_active_native_histogram_series_custom_tracker|cortex_ingester_active_series|cortex_ingester_active_series_custom_tracker|cortex_ingester_client_request_duration_seconds_bucket|cortex_ingester_client_request_duration_seconds_count|cortex_ingester_client_request_duration_seconds_sum|cortex_ingester_ingested_exemplars_total|cortex_ingester_ingested_samples_total|cortex_ingester_instance_limits|cortex_ingester_instance_rejected_requests_total|cortex_ingester_local_limits|cortex_ingester_memory_series|cortex_ingester_memory_series_created_total|cortex_ingester_memory_series_removed_total|cortex_ingester_memory_users|cortex_ingester_oldest_unshipped_block_timestamp_seconds|cortex_ingester_owned_series|cortex_ingester_queried_exemplars_bucket|cortex_ingester_queried_exemplars_count|cortex_ingester_queried_exemplars_sum|cortex_ingester_queried_samples_bucket|cortex_ingester_queried_samples_count|cortex_ingester_queried_samples_sum|cortex_ingester_queried_series_bucket|cortex_ingester_queried_series_count|cortex_ingester_queried_series_sum|cortex_ingester_shipper_last_successful_upload_timestamp_seconds|cortex_ingester_shipper_upload_failures_total|cortex_ingester_shipper_uploads_total|cortex_ingester_tsdb_checkpoint_creations_failed_total|cortex_ingester_tsdb_checkpoint_creations_total|cortex_ingester_tsdb_checkpoint_deletions_failed_total|cortex_ingester_tsdb_compaction_duration_seconds_bucket|cortex_ingester_tsdb_compaction_duration_seconds_count|cortex_ingester_tsdb_compaction_duration_seconds_sum|cortex_ingester_tsdb_compactions_failed_total|cortex_ingester_tsdb_compactions_total|cortex_ingester_tsdb_exemplar_exemplars_appended_total|cortex_ingester_tsdb_exemplar_exemplars_in_storage|cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds|cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage|cortex_ingester_tsdb_head_max_timestamp_seconds|cortex_ingester_tsdb_head_truncations_failed_total|cortex_ingester_tsdb_mmap_chunk_corruptions_total|cortex_ingester_tsdb_out_of_order_samples_appended_total|cortex_ingester_tsdb_storage_blocks_bytes|cortex_ingester_tsdb_symbol_table_size_bytes|cortex_ingester_tsdb_wal_corruptions_total|cortex_ingester_tsdb_wal_truncate_duration_seconds_count|cortex_ingester_tsdb_wal_truncate_duration_seconds_sum|cortex_ingester_tsdb_wal_truncations_failed_total|cortex_ingester_tsdb_wal_truncations_total|cortex_ingester_tsdb_wal_writes_failed_total|cortex_kv_request_duration_seconds_bucket|cortex_kv_request_duration_seconds_count|cortex_kv_request_duration_seconds_sum|cortex_lifecycler_read_only|cortex_limits_defaults|cortex_limits_overrides|cortex_partition_ring_partitions|cortex_prometheus_notifications_dropped_total|cortex_prometheus_notifications_errors_total|cortex_prometheus_notifications_queue_capacity|cortex_prometheus_notifications_queue_length|cortex_prometheus_notifications_sent_total|cortex_prometheus_rule_evaluation_duration_seconds_count|cortex_prometheus_rule_evaluation_duration_seconds_sum|cortex_prometheus_rule_evaluation_failures_total|cortex_prometheus_rule_evaluations_total|cortex_prometheus_rule_group_duration_seconds_count|cortex_prometheus_rule_group_duration_seconds_sum|cortex_prometheus_rule_group_iterations_missed_total|cortex_prometheus_rule_group_iterations_total|cortex_prometheus_rule_group_rules|cortex_querier_blocks_consistency_checks_failed_total|cortex_querier_blocks_consistency_checks_total|cortex_querier_request_duration_seconds_bucket|cortex_querier_request_duration_seconds_count|cortex_querier_request_duration_seconds_sum|cortex_querier_storegateway_instances_hit_per_query_bucket|cortex_querier_storegateway_instances_hit_per_query_count|cortex_querier_storegateway_instances_hit_per_query_sum|cortex_querier_storegateway_refetches_per_query_bucket|cortex_querier_storegateway_refetches_per_query_count|cortex_querier_storegateway_refetches_per_query_sum|cortex_query_frontend_queries_total|cortex_query_frontend_queue_duration_seconds_bucket|cortex_query_frontend_queue_duration_seconds_count|cortex_query_frontend_queue_duration_seconds_sum|cortex_query_frontend_queue_length|cortex_query_frontend_retries_bucket|cortex_query_frontend_retries_count|cortex_query_frontend_retries_sum|cortex_query_scheduler_connected_querier_clients|cortex_query_scheduler_querier_inflight_requests|cortex_query_scheduler_queue_duration_seconds_bucket|cortex_query_scheduler_queue_duration_seconds_count|cortex_query_scheduler_queue_duration_seconds_sum|cortex_query_scheduler_queue_length|cortex_request_duration_seconds|cortex_request_duration_seconds_bucket|cortex_request_duration_seconds_count|cortex_request_duration_seconds_sum|cortex_ring_members|cortex_ruler_managers_total|cortex_ruler_queries_failed_total|cortex_ruler_queries_total|cortex_ruler_ring_check_errors_total|cortex_ruler_write_requests_failed_total|cortex_ruler_write_requests_total|cortex_runtime_config_hash|cortex_runtime_config_last_reload_successful|cortex_tcp_connections|cortex_tcp_connections_limit|go_memstats_heap_inuse_bytes|keda_scaler_errors|keda_scaler_metrics_value|kube_deployment_spec_replicas|kube_deployment_status_replicas_unavailable|kube_deployment_status_replicas_updated|kube_endpoint_address|kube_horizontalpodautoscaler_spec_target_metric|kube_horizontalpodautoscaler_status_condition|kube_pod_info|kube_statefulset_replicas|kube_statefulset_status_current_revision|kube_statefulset_status_replicas_current|kube_statefulset_status_replicas_ready|kube_statefulset_status_replicas_updated|kube_statefulset_status_update_revision|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_used_bytes|memberlist_client_cluster_members_count|memcached_limit_bytes|mimir_continuous_test_queries_failed_total|mimir_continuous_test_query_result_checks_failed_total|mimir_continuous_test_writes_failed_total|node_disk_read_bytes_total|node_disk_written_bytes_total|process_memory_map_areas|process_memory_map_areas_limit|prometheus_tsdb_compaction_duration_seconds_bucket|prometheus_tsdb_compaction_duration_seconds_count|prometheus_tsdb_compaction_duration_seconds_sum|prometheus_tsdb_compactions_total|rollout_operator_last_successful_group_reconcile_timestamp_seconds|thanos_cache_hits_total|thanos_cache_operation_duration_seconds_bucket|thanos_cache_operation_duration_seconds_count|thanos_cache_operation_duration_seconds_sum|thanos_cache_operation_failures_total|thanos_cache_operations_total|thanos_cache_requests_total|thanos_objstore_bucket_last_successful_upload_time|thanos_objstore_bucket_operation_duration_seconds_bucket|thanos_objstore_bucket_operation_duration_seconds_count|thanos_objstore_bucket_operation_duration_seconds_sum|thanos_objstore_bucket_operation_failures_total|thanos_objstore_bucket_operations_total|thanos_store_index_cache_hits_total|thanos_store_index_cache_requests_total" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value @@ -2104,7 +2109,7 @@ data: // K8s Attributes Processor otelcol.processor.k8sattributes "default" { extract { - metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time"] + metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time","k8s.container.name"] } pod_association { source { diff --git a/charts/k8s-monitoring/tests/integration/integration-grafana/.rendered/output.yaml b/charts/k8s-monitoring/tests/integration/integration-grafana/.rendered/output.yaml index 907248fb1..9d6d5d460 100644 --- a/charts/k8s-monitoring/tests/integration/integration-grafana/.rendered/output.yaml +++ b/charts/k8s-monitoring/tests/integration/integration-grafana/.rendered/output.yaml @@ -1135,9 +1135,15 @@ data: } // remove the timestamp from the log line stage.replace { - expression = "( t=[^ ]+\\s+)" + expression = `(?:^|\s+)(t=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "grafana-drop-log-level" + } } // Only keep the labels that are defined in the `keepLabels` list. diff --git a/charts/k8s-monitoring/tests/integration/integration-loki/.rendered/output.yaml b/charts/k8s-monitoring/tests/integration/integration-loki/.rendered/output.yaml index c3d0f3841..009349244 100644 --- a/charts/k8s-monitoring/tests/integration/integration-loki/.rendered/output.yaml +++ b/charts/k8s-monitoring/tests/integration/integration-loki/.rendered/output.yaml @@ -683,7 +683,6 @@ data: targets = loki_integration_discovery.loki.output job_label = "integrations/loki" clustering = true - keep_metrics = "up|scrape_samples_scraped|go_gc_cycles_total_gc_cycles_total|go_gc_duration_seconds|go_gc_duration_seconds_count|go_gc_duration_seconds_sum|go_gc_pauses_seconds_bucket|go_goroutines|go_memstats_heap_inuse_bytes|loki_azure_blob_request_duration_seconds_bucket|loki_azure_blob_request_duration_seconds_count|loki_bigtable_request_duration_seconds_bucket|loki_bigtable_request_duration_seconds_count|loki_bloom_blocks_cache_added_total|loki_bloom_blocks_cache_entries|loki_bloom_blocks_cache_evicted_total|loki_bloom_blocks_cache_fetched_total|loki_bloom_blocks_cache_usage_bytes|loki_bloom_chunks_indexed_total|loki_bloom_gateway_block_query_latency_seconds_bucket|loki_bloom_gateway_dequeue_duration_seconds_bucket|loki_bloom_gateway_filtered_chunks_sum|loki_bloom_gateway_filtered_series_sum|loki_bloom_gateway_inflight_tasks|loki_bloom_gateway_process_duration_seconds_bucket|loki_bloom_gateway_process_duration_seconds_count|loki_bloom_gateway_querier_chunks_filtered_total|loki_bloom_gateway_querier_chunks_skipped_total|loki_bloom_gateway_querier_chunks_total|loki_bloom_gateway_querier_series_filtered_total|loki_bloom_gateway_querier_series_skipped_total|loki_bloom_gateway_querier_series_total|loki_bloom_gateway_queue_duration_seconds_bucket|loki_bloom_gateway_queue_duration_seconds_count|loki_bloom_gateway_queue_duration_seconds_sum|loki_bloom_gateway_queue_length|loki_bloom_gateway_requested_chunks_sum|loki_bloom_gateway_requested_series_sum|loki_bloom_gateway_tasks_dequeued_bucket|loki_bloom_gateway_tasks_dequeued_total|loki_bloom_gateway_tasks_processed_total|loki_bloom_inserts_total|loki_bloom_recorder_chunks_total|loki_bloom_recorder_series_total|loki_bloom_size_bucket|loki_bloom_store_blocks_fetched_size_bytes_bucket|loki_bloom_store_blocks_fetched_sum|loki_bloom_store_download_queue_size_sum|loki_bloom_store_metas_fetched_bucket|loki_bloom_store_metas_fetched_size_bytes_bucket|loki_bloom_store_metas_fetched_sum|loki_bloom_tokens_total|loki_bloombuilder_blocks_created_total|loki_bloombuilder_blocks_reused_total|loki_bloombuilder_bytes_per_task_bucket|loki_bloombuilder_chunk_series_size_sum|loki_bloombuilder_metas_created_total|loki_bloombuilder_processing_task|loki_bloombuilder_series_per_task_bucket|loki_bloomplanner_blocks_deleted_total|loki_bloomplanner_connected_builders|loki_bloomplanner_inflight_tasks|loki_bloomplanner_metas_deleted_total|loki_bloomplanner_queue_length|loki_bloomplanner_retention_running|loki_bloomplanner_retention_time_seconds_bucket|loki_bloomplanner_tenant_tasks_completed|loki_bloomplanner_tenant_tasks_planned|loki_boltdb_shipper_compact_tables_operation_duration_seconds|loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds|loki_boltdb_shipper_compact_tables_operation_total|loki_boltdb_shipper_request_duration_seconds_bucket|loki_boltdb_shipper_request_duration_seconds_count|loki_boltdb_shipper_request_duration_seconds_sum|loki_boltdb_shipper_retention_marker_count_total|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum|loki_boltdb_shipper_retention_marker_table_processed_total|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum|loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time|loki_boltdb_shipper_retention_sweeper_marker_files_current|loki_build_info|loki_chunk_store_deduped_chunks_total|loki_chunk_store_index_entries_per_chunk_count|loki_chunk_store_index_entries_per_chunk_sum|loki_compactor_apply_retention_last_successful_run_timestamp_seconds|loki_compactor_apply_retention_operation_duration_seconds|loki_compactor_apply_retention_operation_total|loki_compactor_delete_requests_processed_total|loki_compactor_delete_requests_received_total|loki_compactor_deleted_lines|loki_compactor_load_pending_requests_attempts_total|loki_compactor_locked_table_successive_compaction_skips|loki_compactor_oldest_pending_delete_request_age_seconds|loki_compactor_pending_delete_requests_count|loki_consul_request_duration_seconds_bucket|loki_discarded_samples_total|loki_distributor_bytes_received_total|loki_distributor_ingester_append_failures_total|loki_distributor_lines_received_total|loki_distributor_structured_metadata_bytes_received_total|loki_dynamo_consumed_capacity_total|loki_dynamo_dropped_requests_total|loki_dynamo_failures_total|loki_dynamo_query_pages_count|loki_dynamo_request_duration_seconds_bucket|loki_dynamo_request_duration_seconds_count|loki_dynamo_throttled_total|loki_embeddedcache_entries|loki_embeddedcache_memory_bytes|loki_gcs_request_duration_seconds_bucket|loki_gcs_request_duration_seconds_count|loki_index_gateway_postfilter_chunks_sum|loki_index_gateway_prefilter_chunks_sum|loki_index_request_duration_seconds_bucket|loki_index_request_duration_seconds_count|loki_index_request_duration_seconds_sum|loki_ingester_chunk_age_seconds_bucket|loki_ingester_chunk_age_seconds_count|loki_ingester_chunk_age_seconds_sum|loki_ingester_chunk_bounds_hours_bucket|loki_ingester_chunk_bounds_hours_count|loki_ingester_chunk_bounds_hours_sum|loki_ingester_chunk_entries_bucket|loki_ingester_chunk_entries_count|loki_ingester_chunk_entries_sum|loki_ingester_chunk_size_bytes_bucket|loki_ingester_chunk_utilization_bucket|loki_ingester_chunk_utilization_count|loki_ingester_chunk_utilization_sum|loki_ingester_chunks_flushed_total|loki_ingester_flush_queue_length|loki_ingester_memory_chunks|loki_ingester_memory_streams|loki_ingester_streams_created_total|loki_memcache_request_duration_seconds_bucket|loki_memcache_request_duration_seconds_count|loki_panic_total|loki_prometheus_rule_group_rules|loki_request_duration_seconds_bucket|loki_request_duration_seconds_count|loki_request_duration_seconds_sum|loki_ruler_wal_appender_ready|loki_ruler_wal_disk_size|loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds|loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds|loki_ruler_wal_prometheus_remote_storage_samples_pending|loki_ruler_wal_prometheus_remote_storage_samples_total|loki_ruler_wal_samples_appended_total|loki_ruler_wal_storage_created_series_total|loki_s3_request_duration_seconds_bucket|loki_s3_request_duration_seconds_count" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value @@ -1177,9 +1176,15 @@ data: } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "loki-drop-log-level" + } } From 41b7494836efcfad5aeab145f362ace01d352222 Mon Sep 17 00:00:00 2001 From: Aaron Date: Mon, 3 Feb 2025 15:29:27 -0500 Subject: [PATCH 11/42] Update charts/k8s-monitoring/charts/feature-cluster-metrics/values.yaml Co-authored-by: Pete Wall --- .../k8s-monitoring/charts/feature-cluster-metrics/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/k8s-monitoring/charts/feature-cluster-metrics/values.yaml b/charts/k8s-monitoring/charts/feature-cluster-metrics/values.yaml index e21788fee..71f54c44a 100644 --- a/charts/k8s-monitoring/charts/feature-cluster-metrics/values.yaml +++ b/charts/k8s-monitoring/charts/feature-cluster-metrics/values.yaml @@ -434,7 +434,7 @@ kube-state-metrics: # @section -- kube-state-metrics namespace: "" - # Comma-separated list(string) or yaml list of namespaces to be enabled for collecting resources. By default all namespaces are collected. + # -- Comma-separated list(string) or yaml list of namespaces to be enabled for collecting resources. By default all namespaces are collected. # @section -- kube-state-metrics namespaces: "" From 3cd9e02566d7ae778409b17c9b90d96178872dcc Mon Sep 17 00:00:00 2001 From: Pete Wall Date: Tue, 4 Feb 2025 09:46:24 -0600 Subject: [PATCH 12/42] Fix a few things: (#1200) * Fix a few things: * Pod Logs annotations and labels * Turn off authentication for Grafana in integration tests * Fix validation messages and check for pod logs Signed-off-by: Pete Wall * Fix test Signed-off-by: Pete Wall --------- Signed-off-by: Pete Wall --- .../templates/_module.alloy.tpl | 2 +- .../templates/_collector_validation.tpl | 2 +- .../templates/_collector_validation.tpl | 8 +- .../templates/_common_pod_discovery.alloy.tpl | 19 +- .../feature-pod-logs/tests/default_test.yaml | 60 ++-- .../auth/bearer-token/alloy-logs.alloy | 54 ++-- .../auth/bearer-token/alloy-metrics.alloy | 8 +- .../auth/bearer-token/alloy-receiver.alloy | 25 +- .../examples/auth/bearer-token/output.yaml | 93 +++--- .../auth/embedded-secrets/alloy-logs.alloy | 54 ++-- .../auth/embedded-secrets/alloy-metrics.alloy | 8 +- .../embedded-secrets/alloy-receiver.alloy | 23 +- .../auth/embedded-secrets/output.yaml | 91 +++--- .../auth/external-secrets/alloy-logs.alloy | 54 ++-- .../auth/external-secrets/alloy-metrics.alloy | 8 +- .../external-secrets/alloy-receiver.alloy | 25 +- .../auth/external-secrets/output.yaml | 93 +++--- .../examples/auth/oauth2/alloy-logs.alloy | 55 ++-- .../examples/auth/oauth2/alloy-metrics.alloy | 61 ++-- .../auth/oauth2/alloy-singleton.alloy | 2 - .../docs/examples/auth/oauth2/output.yaml | 124 ++++---- .../examples/auth/sigv4/alloy-metrics.alloy | 54 ++-- .../docs/examples/auth/sigv4/output.yaml | 60 ++-- .../collector-storage/alloy-logs.alloy | 54 ++-- .../collector-storage/alloy-metrics.alloy | 54 ++-- .../examples/collector-storage/output.yaml | 114 ++++--- .../otlp-endpoint/alloy-logs.alloy | 54 ++-- .../otlp-endpoint/alloy-metrics.alloy | 54 ++-- .../destinations/otlp-endpoint/output.yaml | 114 ++++--- .../extra-configuration/alloy-metrics.alloy | 1 - .../examples/extra-configuration/output.yaml | 3 +- .../examples/extra-rules/alloy-logs.alloy | 58 ++-- .../examples/extra-rules/alloy-metrics.alloy | 57 ++-- .../extra-rules/alloy-singleton.alloy | 2 - .../docs/examples/extra-rules/output.yaml | 123 ++++---- .../default/alloy-metrics.alloy | 2 - .../default/output.yaml | 8 +- .../prom-annotations/alloy-metrics.alloy | 2 - .../prom-annotations/output.yaml | 8 +- .../default/alloy-receiver.alloy | 24 +- .../default/output.yaml | 30 +- .../alloy-metrics.alloy | 2 - .../alloy-receiver.alloy | 23 +- .../beyla-metrics-and-traces/output.yaml | 31 +- .../beyla-metrics/alloy-metrics.alloy | 2 - .../beyla-metrics/output.yaml | 8 +- .../default/alloy-singleton.alloy | 1 - .../cluster-events/default/output.yaml | 1 - .../control-plane-monitoring/alloy-logs.alloy | 54 ++-- .../alloy-metrics.alloy | 94 +++--- .../alloy-singleton.alloy | 2 - .../control-plane-monitoring/output.yaml | 156 +++++----- .../default/alloy-metrics.alloy | 72 +++-- .../cluster-metrics/default/output.yaml | 78 +++-- .../integrations/alloy/alloy-metrics.alloy | 26 +- .../features/integrations/alloy/output.yaml | 32 +- .../cert-manager/alloy-metrics.alloy | 10 +- .../integrations/cert-manager/output.yaml | 16 +- .../integrations/etcd/alloy-metrics.alloy | 10 +- .../features/integrations/etcd/output.yaml | 16 +- .../integrations/grafana/alloy-logs.alloy | 58 ++-- .../integrations/grafana/alloy-metrics.alloy | 26 +- .../features/integrations/grafana/output.yaml | 90 +++--- .../integrations/loki/alloy-logs.alloy | 68 ++--- .../integrations/loki/alloy-metrics.alloy | 26 +- .../features/integrations/loki/output.yaml | 101 +++---- .../integrations/mimir/alloy-logs.alloy | 68 ++--- .../integrations/mimir/alloy-metrics.alloy | 26 +- .../features/integrations/mimir/output.yaml | 101 +++---- .../integrations/mysql/alloy-logs.alloy | 64 ++-- .../integrations/mysql/alloy-metrics.alloy | 18 +- .../features/integrations/mysql/output.yaml | 88 +++--- .../node-logs/default/alloy-logs.alloy | 1 - .../features/node-logs/default/output.yaml | 1 - .../pod-logs/default/alloy-logs.alloy | 54 ++-- .../features/pod-logs/default/output.yaml | 54 ++-- .../profiling/default/alloy-profiles.alloy | 95 +++--- .../features/profiling/default/output.yaml | 95 +++--- .../default/alloy-metrics.alloy | 8 +- .../default/output.yaml | 14 +- .../istio-service-mesh/alloy-metrics.alloy | 55 ++-- .../examples/istio-service-mesh/output.yaml | 61 ++-- .../meta-monitoring/alloy-receiver.alloy | 25 +- .../meta-monitoring/alloy-singleton.alloy | 253 ++++++++-------- .../docs/examples/meta-monitoring/output.yaml | 286 +++++++++--------- .../metrics-tuning/alloy-metrics.alloy | 55 ++-- .../docs/examples/metrics-tuning/output.yaml | 61 ++-- .../platforms/azure-aks/alloy-logs.alloy | 54 ++-- .../platforms/azure-aks/alloy-metrics.alloy | 53 ++-- .../platforms/azure-aks/alloy-singleton.alloy | 2 - .../examples/platforms/azure-aks/output.yaml | 115 ++++--- .../platforms/eks-fargate/alloy-logs.alloy | 48 +-- .../platforms/eks-fargate/alloy-metrics.alloy | 41 ++- .../eks-fargate/alloy-singleton.alloy | 2 - .../platforms/eks-fargate/output.yaml | 97 +++--- .../platforms/gke-autopilot/alloy-logs.alloy | 54 ++-- .../gke-autopilot/alloy-metrics.alloy | 41 ++- .../gke-autopilot/alloy-singleton.alloy | 2 - .../platforms/gke-autopilot/output.yaml | 103 +++---- .../platforms/openshift/alloy-logs.alloy | 54 ++-- .../platforms/openshift/alloy-metrics.alloy | 63 ++-- .../platforms/openshift/alloy-singleton.alloy | 2 - .../examples/platforms/openshift/output.yaml | 125 ++++---- .../alloy-logs.alloy | 62 ++-- .../alloy-metrics.alloy | 54 ++-- .../alloy-receiver.alloy | 23 +- .../pod-labels-and-annotations/output.yaml | 145 ++++----- .../globally/alloy-logs.alloy | 54 ++-- .../globally/alloy-metrics.alloy | 54 ++-- .../globally/output.yaml | 114 ++++--- .../individual/alloy-metrics.alloy | 55 ++-- .../individual/output.yaml | 61 ++-- .../docs/examples/proxies/alloy-logs.alloy | 54 ++-- .../docs/examples/proxies/alloy-metrics.alloy | 53 ++-- .../examples/proxies/alloy-profiles.alloy | 95 +++--- .../examples/proxies/alloy-receiver.alloy | 23 +- .../examples/proxies/alloy-singleton.alloy | 2 - .../docs/examples/proxies/output.yaml | 233 +++++++------- .../examples/remote-config/alloy-logs.alloy | 3 - .../remote-config/alloy-metrics.alloy | 3 - .../docs/examples/remote-config/output.yaml | 6 - .../autoscaling/alloy-metrics.alloy | 54 ++-- .../scalability/autoscaling/output.yaml | 60 ++-- .../alloy-metrics.alloy | 54 ++-- .../sharded-kube-state-metrics/output.yaml | 60 ++-- .../examples/tolerations/alloy-logs.alloy | 54 ++-- .../examples/tolerations/alloy-metrics.alloy | 54 ++-- .../docs/examples/tolerations/output.yaml | 114 ++++--- .../templates/alloy-config.yaml | 30 +- .../templates/features/_feature_pod_logs.tpl | 2 +- .../features/_feature_self_reporting.tpl | 15 +- .../tests/cluster_events_test.yaml | 1 - .../.rendered/output.yaml | 8 +- .../deployments/grafana.yaml | 4 + .../integration/auth/.rendered/output.yaml | 72 +++-- .../integration/auth/deployments/grafana.yaml | 4 + .../.rendered/output.yaml | 31 +- .../deployments/grafana.yaml | 4 + .../cluster-monitoring/.rendered/output.yaml | 133 ++++---- .../deployments/grafana.yaml | 4 + .../.rendered/output.yaml | 147 +++++---- .../deployments/grafana.yaml | 4 + .../.rendered/output.yaml | 16 +- .../deployments/grafana.yaml | 4 + .../integration-grafana/.rendered/output.yaml | 144 +++++---- .../deployments/grafana.yaml | 3 + .../integration-loki/.rendered/output.yaml | 155 +++++----- .../integration-loki/deployments/grafana.yaml | 4 + .../integration-mysql/.rendered/output.yaml | 80 +++-- .../deployments/grafana.yaml | 4 + .../istio-service-mesh/.rendered/output.yaml | 116 ++++--- .../deployments/grafana.yaml | 4 + .../profiling/.rendered/output.yaml | 95 +++--- .../profiling/deployments/grafana.yaml | 4 + .../.rendered/output.yaml | 61 ++-- .../deployments/grafana.yaml | 4 + .../.rendered/output.yaml | 61 ++-- .../deployments/grafana.yaml | 4 + .../split-destinations/.rendered/output.yaml | 114 ++++--- .../deployments/grafana.yaml | 4 + .../integration/statsd/.rendered/output.yaml | 3 +- .../statsd/deployments/grafana.yaml | 4 + .../deployments/grafana.yaml | 4 + .../eks-with-windows/.rendered/output.yaml | 141 ++++----- .../gke-autopilot/.rendered/output.yaml | 100 +++--- .../k8s-monitoring/.rendered/output.yaml | 158 +++++----- .../otlp-gateway/.rendered/output.yaml | 32 +- .../remote-config/.rendered/output.yaml | 6 - 168 files changed, 4046 insertions(+), 4369 deletions(-) diff --git a/charts/k8s-monitoring/charts/feature-application-observability/templates/_module.alloy.tpl b/charts/k8s-monitoring/charts/feature-application-observability/templates/_module.alloy.tpl index a692dee90..41d471aad 100644 --- a/charts/k8s-monitoring/charts/feature-application-observability/templates/_module.alloy.tpl +++ b/charts/k8s-monitoring/charts/feature-application-observability/templates/_module.alloy.tpl @@ -27,7 +27,7 @@ declare "application_observability" { {{- end }} {{- end }} - // {{ $component.description }} + // {{ $component.description | trim }} {{- include (printf "feature.applicationObservability.%s.alloy" $component.component) $args | indent 2 }} {{- end }} } diff --git a/charts/k8s-monitoring/charts/feature-node-logs/templates/_collector_validation.tpl b/charts/k8s-monitoring/charts/feature-node-logs/templates/_collector_validation.tpl index 84bb24688..a854ab32e 100644 --- a/charts/k8s-monitoring/charts/feature-node-logs/templates/_collector_validation.tpl +++ b/charts/k8s-monitoring/charts/feature-node-logs/templates/_collector_validation.tpl @@ -8,6 +8,6 @@ {{- fail (printf "Node Logs feature requires Alloy to mount /var/log.\nPlease set:\n%s:\n alloy:\n mounts:\n varlog: true" .CollectorName) }} {{- end -}} {{- if .Collector.alloy.clustering.enabled }} - {{- fail (printf "Node Logs feature requires Alloy to not be in clustering mode.\nPlease set:\n%s:\n alloy:\n clustering:\n enabled: true" .CollectorName) }} + {{- fail (printf "Node Logs feature requires Alloy clustering to be disabled.\nPlease set:\n%s:\n alloy:\n clustering:\n enabled: false" .CollectorName) }} {{- end -}} {{- end -}} diff --git a/charts/k8s-monitoring/charts/feature-pod-logs/templates/_collector_validation.tpl b/charts/k8s-monitoring/charts/feature-pod-logs/templates/_collector_validation.tpl index 2f852e2aa..23b754926 100644 --- a/charts/k8s-monitoring/charts/feature-pod-logs/templates/_collector_validation.tpl +++ b/charts/k8s-monitoring/charts/feature-pod-logs/templates/_collector_validation.tpl @@ -9,14 +9,18 @@ {{- fail (printf "Pod Logs feature requires Alloy to mount /var/log when using the \"volumes\" gather method.\nPlease set:\n%s:\n alloy:\n mounts:\n varlog: true" .CollectorName) }} {{- end -}} {{- if .Collector.alloy.clustering.enabled }} - {{- fail (printf "Pod Logs feature requires Alloy to not be in clustering mode when using the \"volumes\" gather method.\nPlease set:\n%s:\n alloy:\n clustering:\n enabled: true" .CollectorName) }} + {{- fail (printf "Pod Logs feature requires Alloy clustering to be disabled when using the \"volumes\" gather method.\nPlease set:\n%s:\n alloy:\n clustering:\n enabled: false" .CollectorName) }} {{- end -}} {{- else if eq .Values.gatherMethod "kubernetesApi" }} {{- if .Collector.alloy.mounts.varlog }} {{- fail (printf "Pod Logs feature should not mount /var/log when using the \"kubernetesApi\" gather method.\nPlease set:\n%s:\n alloy:\n mounts:\n varlog: false" .CollectorName) }} {{- end -}} {{- if not .Collector.alloy.clustering.enabled }} - {{- fail (printf "Pod Logs feature requires Alloy to be in clustering mode when using the \"kubernetesApi\" gather method.\nPlease set:\n%s:\n alloy:\n clustering:\n enabled: true" .CollectorName) }} + {{- if eq .Collector.controller.type "daemonset" }} + {{- fail (printf "Pod Logs feature requires Alloy DaemonSet to be in clustering mode when using the \"kubernetesApi\" gather method.\nPlease set:\n%s:\n alloy:\n clustering:\n enabled: true" .CollectorName) }} + {{- else if gt (.Collector.controller.replicas | int) 1 }} + {{- fail (printf "Pod Logs feature requires Alloy with multiple replicas to be in clustering mode when using the \"kubernetesApi\" gather method.\nPlease set:\n%s:\n alloy:\n clustering:\n enabled: true" .CollectorName) }} + {{- end -}} {{- end -}} {{- end -}} {{- end -}} diff --git a/charts/k8s-monitoring/charts/feature-pod-logs/templates/_common_pod_discovery.alloy.tpl b/charts/k8s-monitoring/charts/feature-pod-logs/templates/_common_pod_discovery.alloy.tpl index b78673ae4..4488a55d1 100644 --- a/charts/k8s-monitoring/charts/feature-pod-logs/templates/_common_pod_discovery.alloy.tpl +++ b/charts/k8s-monitoring/charts/feature-pod-logs/templates/_common_pod_discovery.alloy.tpl @@ -40,13 +40,6 @@ discovery.relabel "filtered_pods" { target_label = "tmp_container_runtime" } - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["{{ include "pod_annotation" .Values.annotations.job }}"] - regex = "(.+)" - target_label = "job" - } - // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { @@ -102,6 +95,18 @@ discovery.relabel "filtered_pods" { target_label = "deployment_environment" } +{{- range $label, $k8sAnnotation := .Values.annotations }} + rule { + source_labels = ["{{ include "pod_annotation" $k8sAnnotation }}"] + target_label = {{ $label | quote }} + } +{{- end }} +{{- range $label, $k8sLabels := .Values.labels }} + rule { + source_labels = ["{{ include "pod_label" $k8sLabels }}"] + target_label = {{ $label | quote }} + } +{{- end }} {{- if .Values.extraDiscoveryRules }} {{ .Values.extraDiscoveryRules | indent 2 }} diff --git a/charts/k8s-monitoring/charts/feature-pod-logs/tests/default_test.yaml b/charts/k8s-monitoring/charts/feature-pod-logs/tests/default_test.yaml index 128c80a77..e516ee281 100644 --- a/charts/k8s-monitoring/charts/feature-pod-logs/tests/default_test.yaml +++ b/charts/k8s-monitoring/charts/feature-pod-logs/tests/default_test.yaml @@ -51,13 +51,6 @@ tests: target_label = "tmp_container_runtime" } - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { @@ -112,6 +105,14 @@ tests: source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } } discovery.kubernetes "pods" { @@ -243,13 +244,6 @@ tests: target_label = "tmp_container_runtime" } - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { @@ -304,6 +298,14 @@ tests: source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } } discovery.kubernetes "pods" { @@ -433,13 +435,6 @@ tests: target_label = "tmp_container_runtime" } - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { @@ -494,6 +489,14 @@ tests: source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } } discovery.kubernetes "pods" { @@ -631,13 +634,6 @@ tests: target_label = "tmp_container_runtime" } - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { @@ -692,6 +688,14 @@ tests: source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } } discovery.kubernetes "pods" { diff --git a/charts/k8s-monitoring/docs/examples/auth/bearer-token/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/auth/bearer-token/alloy-logs.alloy index f160114fb..82f237c5b 100644 --- a/charts/k8s-monitoring/docs/examples/auth/bearer-token/alloy-logs.alloy +++ b/charts/k8s-monitoring/docs/examples/auth/bearer-token/alloy-logs.alloy @@ -16,13 +16,12 @@ loki.write "loki" { "k8s_cluster_name" = "bearer-token-example-cluster", } } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -47,7 +46,7 @@ declare "pod_logs" { replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -56,28 +55,21 @@ declare "pod_logs" { replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -99,14 +91,14 @@ declare "pod_logs" { replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -118,8 +110,16 @@ declare "pod_logs" { source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -127,10 +127,10 @@ declare "pod_logs" { field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -139,22 +139,22 @@ declare "pod_logs" { target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -163,12 +163,12 @@ declare "pod_logs" { } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -176,7 +176,7 @@ declare "pod_logs" { } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -186,12 +186,12 @@ declare "pod_logs" { "tmp_container_runtime", ] } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/auth/bearer-token/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/auth/bearer-token/alloy-metrics.alloy index b592aa6fc..b6ae91511 100644 --- a/charts/k8s-monitoring/docs/examples/auth/bearer-token/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/auth/bearer-token/alloy-metrics.alloy @@ -52,13 +52,12 @@ remote.kubernetes.secret "prometheus" { name = "prometheus-k8smon-k8s-monitoring" namespace = "default" } - // Feature: Prometheus Operator Objects declare "prometheus_operator_objects" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + // Prometheus Operator PodMonitor objects prometheus.operator.podmonitors "pod_monitors" { clustering { @@ -69,7 +68,7 @@ declare "prometheus_operator_objects" { } forward_to = argument.metrics_destinations.value } - + // Prometheus Operator Probe objects prometheus.operator.probes "pod_monitors" { clustering { @@ -80,7 +79,7 @@ declare "prometheus_operator_objects" { } forward_to = argument.metrics_destinations.value } - + // Prometheus Operator ServiceMonitor objects prometheus.operator.servicemonitors "service_monitors" { clustering { @@ -97,7 +96,6 @@ prometheus_operator_objects "feature" { prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/auth/bearer-token/alloy-receiver.alloy b/charts/k8s-monitoring/docs/examples/auth/bearer-token/alloy-receiver.alloy index b646dedbe..b5fa58458 100644 --- a/charts/k8s-monitoring/docs/examples/auth/bearer-token/alloy-receiver.alloy +++ b/charts/k8s-monitoring/docs/examples/auth/bearer-token/alloy-receiver.alloy @@ -136,7 +136,6 @@ remote.kubernetes.secret "tempo" { name = "my-tempo-secret" namespace = "tempo" } - // Feature: Application Observability declare "application_observability" { argument "metrics_destinations" { @@ -151,14 +150,14 @@ declare "application_observability" { comment = "Must be a list of trace destinations where collected trace should be forwarded to" } - // Jaeger Receiver + // Jaeger Receiver otelcol.receiver.jaeger "receiver" { protocols { thrift_http { endpoint = "0.0.0.0:14268" } } - + debug_metrics { disable_high_cardinality_metrics = true } @@ -167,13 +166,13 @@ declare "application_observability" { } } - // Resource Detection Processor + // Resource Detection Processor otelcol.processor.resourcedetection "default" { detectors = ["env", "system"] system { hostname_sources = ["os"] } - + output { metrics = [otelcol.processor.k8sattributes.default.input] logs = [otelcol.processor.k8sattributes.default.input] @@ -181,7 +180,7 @@ declare "application_observability" { } } - // K8s Attributes Processor + // K8s Attributes Processor otelcol.processor.k8sattributes "default" { extract { metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time"] @@ -191,7 +190,7 @@ declare "application_observability" { from = "connection" } } - + output { metrics = [otelcol.processor.transform.default.input] logs = [otelcol.processor.transform.default.input] @@ -199,16 +198,16 @@ declare "application_observability" { } } - // Host Info Connector + // Host Info Connector otelcol.connector.host_info "default" { host_identifiers = [ "k8s.node.name" ] - + output { metrics = [otelcol.processor.batch.default.input] } } - // Transform Processor + // Transform Processor otelcol.processor.transform "default" { error_mode = "ignore" log_statements { @@ -219,7 +218,7 @@ declare "application_observability" { "set(attributes[\"loki.resource.labels\"], \"cluster, namespace, job, pod\")", ] } - + output { metrics = [otelcol.processor.batch.default.input] logs = [otelcol.processor.batch.default.input] @@ -227,12 +226,12 @@ declare "application_observability" { } } - // Batch Processor + // Batch Processor otelcol.processor.batch "default" { send_batch_size = 16384 send_batch_max_size = 0 timeout = "2s" - + output { metrics = argument.metrics_destinations.value logs = argument.logs_destinations.value diff --git a/charts/k8s-monitoring/docs/examples/auth/bearer-token/output.yaml b/charts/k8s-monitoring/docs/examples/auth/bearer-token/output.yaml index 45b30ecb2..6cb7d9d3a 100644 --- a/charts/k8s-monitoring/docs/examples/auth/bearer-token/output.yaml +++ b/charts/k8s-monitoring/docs/examples/auth/bearer-token/output.yaml @@ -119,13 +119,12 @@ data: name = "prometheus-k8smon-k8s-monitoring" namespace = "default" } - // Feature: Prometheus Operator Objects declare "prometheus_operator_objects" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + // Prometheus Operator PodMonitor objects prometheus.operator.podmonitors "pod_monitors" { clustering { @@ -136,7 +135,7 @@ data: } forward_to = argument.metrics_destinations.value } - + // Prometheus Operator Probe objects prometheus.operator.probes "pod_monitors" { clustering { @@ -147,7 +146,7 @@ data: } forward_to = argument.metrics_destinations.value } - + // Prometheus Operator ServiceMonitor objects prometheus.operator.servicemonitors "service_monitors" { clustering { @@ -164,7 +163,6 @@ data: prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -207,11 +205,9 @@ data: prometheus.remote_write.prometheus.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 @@ -247,13 +243,12 @@ data: "k8s_cluster_name" = "bearer-token-example-cluster", } } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -278,7 +273,7 @@ data: replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -287,28 +282,21 @@ data: replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -330,14 +318,14 @@ data: replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -349,8 +337,16 @@ data: source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -358,10 +354,10 @@ data: field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -370,22 +366,22 @@ data: target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -394,12 +390,12 @@ data: } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -407,7 +403,7 @@ data: } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -417,12 +413,12 @@ data: "tmp_container_runtime", ] } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } @@ -578,7 +574,6 @@ data: name = "my-tempo-secret" namespace = "tempo" } - // Feature: Application Observability declare "application_observability" { argument "metrics_destinations" { @@ -593,14 +588,14 @@ data: comment = "Must be a list of trace destinations where collected trace should be forwarded to" } - // Jaeger Receiver + // Jaeger Receiver otelcol.receiver.jaeger "receiver" { protocols { thrift_http { endpoint = "0.0.0.0:14268" } } - + debug_metrics { disable_high_cardinality_metrics = true } @@ -609,13 +604,13 @@ data: } } - // Resource Detection Processor + // Resource Detection Processor otelcol.processor.resourcedetection "default" { detectors = ["env", "system"] system { hostname_sources = ["os"] } - + output { metrics = [otelcol.processor.k8sattributes.default.input] logs = [otelcol.processor.k8sattributes.default.input] @@ -623,7 +618,7 @@ data: } } - // K8s Attributes Processor + // K8s Attributes Processor otelcol.processor.k8sattributes "default" { extract { metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time"] @@ -633,7 +628,7 @@ data: from = "connection" } } - + output { metrics = [otelcol.processor.transform.default.input] logs = [otelcol.processor.transform.default.input] @@ -641,16 +636,16 @@ data: } } - // Host Info Connector + // Host Info Connector otelcol.connector.host_info "default" { host_identifiers = [ "k8s.node.name" ] - + output { metrics = [otelcol.processor.batch.default.input] } } - // Transform Processor + // Transform Processor otelcol.processor.transform "default" { error_mode = "ignore" log_statements { @@ -661,7 +656,7 @@ data: "set(attributes[\"loki.resource.labels\"], \"cluster, namespace, job, pod\")", ] } - + output { metrics = [otelcol.processor.batch.default.input] logs = [otelcol.processor.batch.default.input] @@ -669,12 +664,12 @@ data: } } - // Batch Processor + // Batch Processor otelcol.processor.batch "default" { send_batch_size = 16384 send_batch_max_size = 0 timeout = "2s" - + output { metrics = argument.metrics_destinations.value logs = argument.logs_destinations.value diff --git a/charts/k8s-monitoring/docs/examples/auth/embedded-secrets/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/auth/embedded-secrets/alloy-logs.alloy index 69b8cad1c..79e7c9294 100644 --- a/charts/k8s-monitoring/docs/examples/auth/embedded-secrets/alloy-logs.alloy +++ b/charts/k8s-monitoring/docs/examples/auth/embedded-secrets/alloy-logs.alloy @@ -16,13 +16,12 @@ loki.write "loki" { "k8s_cluster_name" = "embedded-secrets-example-cluster", } } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -47,7 +46,7 @@ declare "pod_logs" { replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -56,28 +55,21 @@ declare "pod_logs" { replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -99,14 +91,14 @@ declare "pod_logs" { replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -118,8 +110,16 @@ declare "pod_logs" { source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -127,10 +127,10 @@ declare "pod_logs" { field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -139,22 +139,22 @@ declare "pod_logs" { target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -163,12 +163,12 @@ declare "pod_logs" { } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -176,7 +176,7 @@ declare "pod_logs" { } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -186,12 +186,12 @@ declare "pod_logs" { "tmp_container_runtime", ] } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/auth/embedded-secrets/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/auth/embedded-secrets/alloy-metrics.alloy index 7ce1b777d..e575c2284 100644 --- a/charts/k8s-monitoring/docs/examples/auth/embedded-secrets/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/auth/embedded-secrets/alloy-metrics.alloy @@ -51,13 +51,12 @@ prometheus.remote_write "prometheus" { max_keepalive_time = "8h" } } - // Feature: Prometheus Operator Objects declare "prometheus_operator_objects" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + // Prometheus Operator PodMonitor objects prometheus.operator.podmonitors "pod_monitors" { clustering { @@ -68,7 +67,7 @@ declare "prometheus_operator_objects" { } forward_to = argument.metrics_destinations.value } - + // Prometheus Operator Probe objects prometheus.operator.probes "pod_monitors" { clustering { @@ -79,7 +78,7 @@ declare "prometheus_operator_objects" { } forward_to = argument.metrics_destinations.value } - + // Prometheus Operator ServiceMonitor objects prometheus.operator.servicemonitors "service_monitors" { clustering { @@ -96,7 +95,6 @@ prometheus_operator_objects "feature" { prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/auth/embedded-secrets/alloy-receiver.alloy b/charts/k8s-monitoring/docs/examples/auth/embedded-secrets/alloy-receiver.alloy index 249ffc9ba..59203f07b 100644 --- a/charts/k8s-monitoring/docs/examples/auth/embedded-secrets/alloy-receiver.alloy +++ b/charts/k8s-monitoring/docs/examples/auth/embedded-secrets/alloy-receiver.alloy @@ -124,7 +124,6 @@ otelcol.exporter.otlp "tempo" { } } } - // Feature: Application Observability declare "application_observability" { argument "metrics_destinations" { @@ -139,7 +138,7 @@ declare "application_observability" { comment = "Must be a list of trace destinations where collected trace should be forwarded to" } - // OTLP Receiver + // OTLP Receiver otelcol.receiver.otlp "receiver" { grpc { endpoint = "0.0.0.0:4317" @@ -154,13 +153,13 @@ declare "application_observability" { } } - // Resource Detection Processor + // Resource Detection Processor otelcol.processor.resourcedetection "default" { detectors = ["env", "system"] system { hostname_sources = ["os"] } - + output { metrics = [otelcol.processor.k8sattributes.default.input] logs = [otelcol.processor.k8sattributes.default.input] @@ -168,7 +167,7 @@ declare "application_observability" { } } - // K8s Attributes Processor + // K8s Attributes Processor otelcol.processor.k8sattributes "default" { extract { metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time"] @@ -178,7 +177,7 @@ declare "application_observability" { from = "connection" } } - + output { metrics = [otelcol.processor.transform.default.input] logs = [otelcol.processor.transform.default.input] @@ -186,16 +185,16 @@ declare "application_observability" { } } - // Host Info Connector + // Host Info Connector otelcol.connector.host_info "default" { host_identifiers = [ "k8s.node.name" ] - + output { metrics = [otelcol.processor.batch.default.input] } } - // Transform Processor + // Transform Processor otelcol.processor.transform "default" { error_mode = "ignore" log_statements { @@ -206,7 +205,7 @@ declare "application_observability" { "set(attributes[\"loki.resource.labels\"], \"cluster, namespace, job, pod\")", ] } - + output { metrics = [otelcol.processor.batch.default.input] logs = [otelcol.processor.batch.default.input] @@ -214,12 +213,12 @@ declare "application_observability" { } } - // Batch Processor + // Batch Processor otelcol.processor.batch "default" { send_batch_size = 16384 send_batch_max_size = 0 timeout = "2s" - + output { metrics = argument.metrics_destinations.value logs = argument.logs_destinations.value diff --git a/charts/k8s-monitoring/docs/examples/auth/embedded-secrets/output.yaml b/charts/k8s-monitoring/docs/examples/auth/embedded-secrets/output.yaml index 7458134dc..d918fe6ea 100644 --- a/charts/k8s-monitoring/docs/examples/auth/embedded-secrets/output.yaml +++ b/charts/k8s-monitoring/docs/examples/auth/embedded-secrets/output.yaml @@ -108,13 +108,12 @@ data: max_keepalive_time = "8h" } } - // Feature: Prometheus Operator Objects declare "prometheus_operator_objects" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + // Prometheus Operator PodMonitor objects prometheus.operator.podmonitors "pod_monitors" { clustering { @@ -125,7 +124,7 @@ data: } forward_to = argument.metrics_destinations.value } - + // Prometheus Operator Probe objects prometheus.operator.probes "pod_monitors" { clustering { @@ -136,7 +135,7 @@ data: } forward_to = argument.metrics_destinations.value } - + // Prometheus Operator ServiceMonitor objects prometheus.operator.servicemonitors "service_monitors" { clustering { @@ -153,7 +152,6 @@ data: prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -196,11 +194,9 @@ data: prometheus.remote_write.prometheus.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 @@ -236,13 +232,12 @@ data: "k8s_cluster_name" = "embedded-secrets-example-cluster", } } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -267,7 +262,7 @@ data: replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -276,28 +271,21 @@ data: replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -319,14 +307,14 @@ data: replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -338,8 +326,16 @@ data: source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -347,10 +343,10 @@ data: field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -359,22 +355,22 @@ data: target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -383,12 +379,12 @@ data: } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -396,7 +392,7 @@ data: } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -406,12 +402,12 @@ data: "tmp_container_runtime", ] } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } @@ -555,7 +551,6 @@ data: } } } - // Feature: Application Observability declare "application_observability" { argument "metrics_destinations" { @@ -570,7 +565,7 @@ data: comment = "Must be a list of trace destinations where collected trace should be forwarded to" } - // OTLP Receiver + // OTLP Receiver otelcol.receiver.otlp "receiver" { grpc { endpoint = "0.0.0.0:4317" @@ -585,13 +580,13 @@ data: } } - // Resource Detection Processor + // Resource Detection Processor otelcol.processor.resourcedetection "default" { detectors = ["env", "system"] system { hostname_sources = ["os"] } - + output { metrics = [otelcol.processor.k8sattributes.default.input] logs = [otelcol.processor.k8sattributes.default.input] @@ -599,7 +594,7 @@ data: } } - // K8s Attributes Processor + // K8s Attributes Processor otelcol.processor.k8sattributes "default" { extract { metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time"] @@ -609,7 +604,7 @@ data: from = "connection" } } - + output { metrics = [otelcol.processor.transform.default.input] logs = [otelcol.processor.transform.default.input] @@ -617,16 +612,16 @@ data: } } - // Host Info Connector + // Host Info Connector otelcol.connector.host_info "default" { host_identifiers = [ "k8s.node.name" ] - + output { metrics = [otelcol.processor.batch.default.input] } } - // Transform Processor + // Transform Processor otelcol.processor.transform "default" { error_mode = "ignore" log_statements { @@ -637,7 +632,7 @@ data: "set(attributes[\"loki.resource.labels\"], \"cluster, namespace, job, pod\")", ] } - + output { metrics = [otelcol.processor.batch.default.input] logs = [otelcol.processor.batch.default.input] @@ -645,12 +640,12 @@ data: } } - // Batch Processor + // Batch Processor otelcol.processor.batch "default" { send_batch_size = 16384 send_batch_max_size = 0 timeout = "2s" - + output { metrics = argument.metrics_destinations.value logs = argument.logs_destinations.value diff --git a/charts/k8s-monitoring/docs/examples/auth/external-secrets/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/auth/external-secrets/alloy-logs.alloy index b92f112ab..423391ffd 100644 --- a/charts/k8s-monitoring/docs/examples/auth/external-secrets/alloy-logs.alloy +++ b/charts/k8s-monitoring/docs/examples/auth/external-secrets/alloy-logs.alloy @@ -28,13 +28,12 @@ remote.kubernetes.secret "loki" { name = "my-monitoring-secret" namespace = "monitoring" } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -59,7 +58,7 @@ declare "pod_logs" { replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -68,28 +67,21 @@ declare "pod_logs" { replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -111,14 +103,14 @@ declare "pod_logs" { replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -130,8 +122,16 @@ declare "pod_logs" { source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -139,10 +139,10 @@ declare "pod_logs" { field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -151,22 +151,22 @@ declare "pod_logs" { target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -175,12 +175,12 @@ declare "pod_logs" { } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -188,7 +188,7 @@ declare "pod_logs" { } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -198,12 +198,12 @@ declare "pod_logs" { "tmp_container_runtime", ] } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/auth/external-secrets/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/auth/external-secrets/alloy-metrics.alloy index 6f4fd226c..aa1ebfa58 100644 --- a/charts/k8s-monitoring/docs/examples/auth/external-secrets/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/auth/external-secrets/alloy-metrics.alloy @@ -59,13 +59,12 @@ remote.kubernetes.secret "prometheus" { name = "my-monitoring-secret" namespace = "monitoring" } - // Feature: Prometheus Operator Objects declare "prometheus_operator_objects" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + // Prometheus Operator PodMonitor objects prometheus.operator.podmonitors "pod_monitors" { clustering { @@ -76,7 +75,7 @@ declare "prometheus_operator_objects" { } forward_to = argument.metrics_destinations.value } - + // Prometheus Operator Probe objects prometheus.operator.probes "pod_monitors" { clustering { @@ -87,7 +86,7 @@ declare "prometheus_operator_objects" { } forward_to = argument.metrics_destinations.value } - + // Prometheus Operator ServiceMonitor objects prometheus.operator.servicemonitors "service_monitors" { clustering { @@ -104,7 +103,6 @@ prometheus_operator_objects "feature" { prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/auth/external-secrets/alloy-receiver.alloy b/charts/k8s-monitoring/docs/examples/auth/external-secrets/alloy-receiver.alloy index 7d623688d..e004ce60f 100644 --- a/charts/k8s-monitoring/docs/examples/auth/external-secrets/alloy-receiver.alloy +++ b/charts/k8s-monitoring/docs/examples/auth/external-secrets/alloy-receiver.alloy @@ -156,7 +156,6 @@ remote.kubernetes.secret "tempo" { name = "my-tempo-secret" namespace = "tempo" } - // Feature: Application Observability declare "application_observability" { argument "metrics_destinations" { @@ -171,14 +170,14 @@ declare "application_observability" { comment = "Must be a list of trace destinations where collected trace should be forwarded to" } - // Jaeger Receiver + // Jaeger Receiver otelcol.receiver.jaeger "receiver" { protocols { grpc { endpoint = "0.0.0.0:14250" } } - + debug_metrics { disable_high_cardinality_metrics = true } @@ -187,13 +186,13 @@ declare "application_observability" { } } - // Resource Detection Processor + // Resource Detection Processor otelcol.processor.resourcedetection "default" { detectors = ["env", "system"] system { hostname_sources = ["os"] } - + output { metrics = [otelcol.processor.k8sattributes.default.input] logs = [otelcol.processor.k8sattributes.default.input] @@ -201,7 +200,7 @@ declare "application_observability" { } } - // K8s Attributes Processor + // K8s Attributes Processor otelcol.processor.k8sattributes "default" { extract { metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time"] @@ -211,7 +210,7 @@ declare "application_observability" { from = "connection" } } - + output { metrics = [otelcol.processor.transform.default.input] logs = [otelcol.processor.transform.default.input] @@ -219,16 +218,16 @@ declare "application_observability" { } } - // Host Info Connector + // Host Info Connector otelcol.connector.host_info "default" { host_identifiers = [ "k8s.node.name" ] - + output { metrics = [otelcol.processor.batch.default.input] } } - // Transform Processor + // Transform Processor otelcol.processor.transform "default" { error_mode = "ignore" log_statements { @@ -239,7 +238,7 @@ declare "application_observability" { "set(attributes[\"loki.resource.labels\"], \"cluster, namespace, job, pod\")", ] } - + output { metrics = [otelcol.processor.batch.default.input] logs = [otelcol.processor.batch.default.input] @@ -247,12 +246,12 @@ declare "application_observability" { } } - // Batch Processor + // Batch Processor otelcol.processor.batch "default" { send_batch_size = 16384 send_batch_max_size = 0 timeout = "2s" - + output { metrics = argument.metrics_destinations.value logs = argument.logs_destinations.value diff --git a/charts/k8s-monitoring/docs/examples/auth/external-secrets/output.yaml b/charts/k8s-monitoring/docs/examples/auth/external-secrets/output.yaml index 98ee30118..eb4a84e3e 100644 --- a/charts/k8s-monitoring/docs/examples/auth/external-secrets/output.yaml +++ b/charts/k8s-monitoring/docs/examples/auth/external-secrets/output.yaml @@ -116,13 +116,12 @@ data: name = "my-monitoring-secret" namespace = "monitoring" } - // Feature: Prometheus Operator Objects declare "prometheus_operator_objects" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + // Prometheus Operator PodMonitor objects prometheus.operator.podmonitors "pod_monitors" { clustering { @@ -133,7 +132,7 @@ data: } forward_to = argument.metrics_destinations.value } - + // Prometheus Operator Probe objects prometheus.operator.probes "pod_monitors" { clustering { @@ -144,7 +143,7 @@ data: } forward_to = argument.metrics_destinations.value } - + // Prometheus Operator ServiceMonitor objects prometheus.operator.servicemonitors "service_monitors" { clustering { @@ -161,7 +160,6 @@ data: prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -204,11 +202,9 @@ data: prometheus.remote_write.prometheus.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 @@ -256,13 +252,12 @@ data: name = "my-monitoring-secret" namespace = "monitoring" } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -287,7 +282,7 @@ data: replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -296,28 +291,21 @@ data: replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -339,14 +327,14 @@ data: replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -358,8 +346,16 @@ data: source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -367,10 +363,10 @@ data: field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -379,22 +375,22 @@ data: target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -403,12 +399,12 @@ data: } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -416,7 +412,7 @@ data: } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -426,12 +422,12 @@ data: "tmp_container_runtime", ] } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } @@ -607,7 +603,6 @@ data: name = "my-tempo-secret" namespace = "tempo" } - // Feature: Application Observability declare "application_observability" { argument "metrics_destinations" { @@ -622,14 +617,14 @@ data: comment = "Must be a list of trace destinations where collected trace should be forwarded to" } - // Jaeger Receiver + // Jaeger Receiver otelcol.receiver.jaeger "receiver" { protocols { grpc { endpoint = "0.0.0.0:14250" } } - + debug_metrics { disable_high_cardinality_metrics = true } @@ -638,13 +633,13 @@ data: } } - // Resource Detection Processor + // Resource Detection Processor otelcol.processor.resourcedetection "default" { detectors = ["env", "system"] system { hostname_sources = ["os"] } - + output { metrics = [otelcol.processor.k8sattributes.default.input] logs = [otelcol.processor.k8sattributes.default.input] @@ -652,7 +647,7 @@ data: } } - // K8s Attributes Processor + // K8s Attributes Processor otelcol.processor.k8sattributes "default" { extract { metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time"] @@ -662,7 +657,7 @@ data: from = "connection" } } - + output { metrics = [otelcol.processor.transform.default.input] logs = [otelcol.processor.transform.default.input] @@ -670,16 +665,16 @@ data: } } - // Host Info Connector + // Host Info Connector otelcol.connector.host_info "default" { host_identifiers = [ "k8s.node.name" ] - + output { metrics = [otelcol.processor.batch.default.input] } } - // Transform Processor + // Transform Processor otelcol.processor.transform "default" { error_mode = "ignore" log_statements { @@ -690,7 +685,7 @@ data: "set(attributes[\"loki.resource.labels\"], \"cluster, namespace, job, pod\")", ] } - + output { metrics = [otelcol.processor.batch.default.input] logs = [otelcol.processor.batch.default.input] @@ -698,12 +693,12 @@ data: } } - // Batch Processor + // Batch Processor otelcol.processor.batch "default" { send_batch_size = 16384 send_batch_max_size = 0 timeout = "2s" - + output { metrics = argument.metrics_destinations.value logs = argument.logs_destinations.value diff --git a/charts/k8s-monitoring/docs/examples/auth/oauth2/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/auth/oauth2/alloy-logs.alloy index 93bdb97f9..4263308eb 100644 --- a/charts/k8s-monitoring/docs/examples/auth/oauth2/alloy-logs.alloy +++ b/charts/k8s-monitoring/docs/examples/auth/oauth2/alloy-logs.alloy @@ -73,7 +73,6 @@ remote.kubernetes.secret "otel_endpoint" { name = "otel-endpoint-k8smon-k8s-monitoring" namespace = "default" } - // Feature: Node Logs declare "node_logs" { argument "logs_destinations" { @@ -226,13 +225,12 @@ node_logs "feature" { otelcol.receiver.loki.otel_endpoint.receiver, ] } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -257,7 +255,7 @@ declare "pod_logs" { replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -266,28 +264,21 @@ declare "pod_logs" { replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -309,14 +300,14 @@ declare "pod_logs" { replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -328,8 +319,16 @@ declare "pod_logs" { source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -337,10 +336,10 @@ declare "pod_logs" { field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -349,22 +348,22 @@ declare "pod_logs" { target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -373,12 +372,12 @@ declare "pod_logs" { } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -386,7 +385,7 @@ declare "pod_logs" { } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -396,12 +395,12 @@ declare "pod_logs" { "tmp_container_runtime", ] } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/auth/oauth2/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/auth/oauth2/alloy-metrics.alloy index f1152c687..6453a3e7c 100644 --- a/charts/k8s-monitoring/docs/examples/auth/oauth2/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/auth/oauth2/alloy-metrics.alloy @@ -73,7 +73,6 @@ remote.kubernetes.secret "otel_endpoint" { name = "otel-endpoint-k8smon-k8s-monitoring" namespace = "default" } - // Feature: Annotation Autodiscovery declare "annotation_autodiscovery" { argument "metrics_destinations" { @@ -282,22 +281,21 @@ annotation_autodiscovery "feature" { otelcol.receiver.prometheus.otel_endpoint.receiver, ] } - // Feature: Cluster Metrics declare "cluster_metrics" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "kubernetes" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kubernetes" { content = remote.kubernetes.configmap.kubernetes.data["core_metrics.alloy"] - } - + } + kubernetes.kubelet "scrape" { clustering = true job_label = "integrations/kubernetes/kubelet" @@ -305,8 +303,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.resources "scrape" { clustering = true job_label = "integrations/kubernetes/resources" @@ -314,8 +312,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.cadvisor "scrape" { clustering = true job_label = "integrations/kubernetes/cadvisor" @@ -324,7 +322,7 @@ declare "cluster_metrics" { max_cache_size = 100000 forward_to = [prometheus.relabel.cadvisor.receiver] } - + prometheus.relabel "cadvisor" { max_cache_size = 100000 // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 @@ -396,17 +394,17 @@ declare "cluster_metrics" { replacement = "" } forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "kube_state_metrics" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kube_state_metrics" { content = remote.kubernetes.configmap.kube_state_metrics.data["kube-state-metrics_metrics.alloy"] } - + kube_state_metrics.kubernetes "targets" { namespaces = ["default"] port_name = "http" @@ -415,7 +413,7 @@ declare "cluster_metrics" { "release=k8smon", ] } - + kube_state_metrics.scrape "metrics" { targets = kube_state_metrics.kubernetes.targets.output clustering = true @@ -425,17 +423,17 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "node_exporter" { name = "k8smon-alloy-module-system" namespace = "default" } - + import.string "node_exporter" { content = remote.kubernetes.configmap.node_exporter.data["node-exporter_metrics.alloy"] } - + node_exporter.kubernetes "targets" { namespaces = ["default"] port_name = "metrics" @@ -444,7 +442,7 @@ declare "cluster_metrics" { "release=k8smon", ] } - + discovery.relabel "node_exporter" { targets = node_exporter.kubernetes.targets.output rule { @@ -453,7 +451,7 @@ declare "cluster_metrics" { target_label = "instance" } } - + node_exporter.scrape "metrics" { targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" @@ -463,8 +461,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "windows_exporter_pods" { role = "pod" namespaces { @@ -475,7 +473,7 @@ declare "cluster_metrics" { label = "app.kubernetes.io/name=windows-exporter,release=k8smon" } } - + discovery.relabel "windows_exporter" { targets = discovery.kubernetes.windows_exporter_pods.targets rule { @@ -484,7 +482,7 @@ declare "cluster_metrics" { target_label = "instance" } } - + prometheus.scrape "windows_exporter" { job_name = "integrations/windows-exporter" targets = discovery.relabel.windows_exporter.output @@ -494,7 +492,7 @@ declare "cluster_metrics" { } forward_to = [prometheus.relabel.windows_exporter.receiver] } - + prometheus.relabel "windows_exporter" { max_cache_size = 100000 rule { @@ -503,20 +501,19 @@ declare "cluster_metrics" { action = "keep" } forward_to = argument.metrics_destinations.value - } + } } cluster_metrics "feature" { metrics_destinations = [ otelcol.receiver.prometheus.otel_endpoint.receiver, ] } - // Feature: Prometheus Operator Objects declare "prometheus_operator_objects" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + // Prometheus Operator PodMonitor objects prometheus.operator.podmonitors "pod_monitors" { clustering { @@ -527,7 +524,7 @@ declare "prometheus_operator_objects" { } forward_to = argument.metrics_destinations.value } - + // Prometheus Operator Probe objects prometheus.operator.probes "pod_monitors" { clustering { @@ -538,7 +535,7 @@ declare "prometheus_operator_objects" { } forward_to = argument.metrics_destinations.value } - + // Prometheus Operator ServiceMonitor objects prometheus.operator.servicemonitors "service_monitors" { clustering { diff --git a/charts/k8s-monitoring/docs/examples/auth/oauth2/alloy-singleton.alloy b/charts/k8s-monitoring/docs/examples/auth/oauth2/alloy-singleton.alloy index 3b74863de..f03d62304 100644 --- a/charts/k8s-monitoring/docs/examples/auth/oauth2/alloy-singleton.alloy +++ b/charts/k8s-monitoring/docs/examples/auth/oauth2/alloy-singleton.alloy @@ -73,7 +73,6 @@ remote.kubernetes.secret "otel_endpoint" { name = "otel-endpoint-k8smon-k8s-monitoring" namespace = "default" } - // Feature: Cluster Events declare "cluster_events" { argument "logs_destinations" { @@ -151,7 +150,6 @@ cluster_events "feature" { otelcol.receiver.loki.otel_endpoint.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/auth/oauth2/output.yaml b/charts/k8s-monitoring/docs/examples/auth/oauth2/output.yaml index 0e99a194b..d10b33a92 100644 --- a/charts/k8s-monitoring/docs/examples/auth/oauth2/output.yaml +++ b/charts/k8s-monitoring/docs/examples/auth/oauth2/output.yaml @@ -213,7 +213,6 @@ data: name = "otel-endpoint-k8smon-k8s-monitoring" namespace = "default" } - // Feature: Annotation Autodiscovery declare "annotation_autodiscovery" { argument "metrics_destinations" { @@ -422,22 +421,21 @@ data: otelcol.receiver.prometheus.otel_endpoint.receiver, ] } - // Feature: Cluster Metrics declare "cluster_metrics" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "kubernetes" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kubernetes" { content = remote.kubernetes.configmap.kubernetes.data["core_metrics.alloy"] - } - + } + kubernetes.kubelet "scrape" { clustering = true job_label = "integrations/kubernetes/kubelet" @@ -445,8 +443,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.resources "scrape" { clustering = true job_label = "integrations/kubernetes/resources" @@ -454,8 +452,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.cadvisor "scrape" { clustering = true job_label = "integrations/kubernetes/cadvisor" @@ -464,7 +462,7 @@ data: max_cache_size = 100000 forward_to = [prometheus.relabel.cadvisor.receiver] } - + prometheus.relabel "cadvisor" { max_cache_size = 100000 // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 @@ -536,17 +534,17 @@ data: replacement = "" } forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "kube_state_metrics" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kube_state_metrics" { content = remote.kubernetes.configmap.kube_state_metrics.data["kube-state-metrics_metrics.alloy"] } - + kube_state_metrics.kubernetes "targets" { namespaces = ["default"] port_name = "http" @@ -555,7 +553,7 @@ data: "release=k8smon", ] } - + kube_state_metrics.scrape "metrics" { targets = kube_state_metrics.kubernetes.targets.output clustering = true @@ -565,17 +563,17 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "node_exporter" { name = "k8smon-alloy-module-system" namespace = "default" } - + import.string "node_exporter" { content = remote.kubernetes.configmap.node_exporter.data["node-exporter_metrics.alloy"] } - + node_exporter.kubernetes "targets" { namespaces = ["default"] port_name = "metrics" @@ -584,7 +582,7 @@ data: "release=k8smon", ] } - + discovery.relabel "node_exporter" { targets = node_exporter.kubernetes.targets.output rule { @@ -593,7 +591,7 @@ data: target_label = "instance" } } - + node_exporter.scrape "metrics" { targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" @@ -603,8 +601,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "windows_exporter_pods" { role = "pod" namespaces { @@ -615,7 +613,7 @@ data: label = "app.kubernetes.io/name=windows-exporter,release=k8smon" } } - + discovery.relabel "windows_exporter" { targets = discovery.kubernetes.windows_exporter_pods.targets rule { @@ -624,7 +622,7 @@ data: target_label = "instance" } } - + prometheus.scrape "windows_exporter" { job_name = "integrations/windows-exporter" targets = discovery.relabel.windows_exporter.output @@ -634,7 +632,7 @@ data: } forward_to = [prometheus.relabel.windows_exporter.receiver] } - + prometheus.relabel "windows_exporter" { max_cache_size = 100000 rule { @@ -643,20 +641,19 @@ data: action = "keep" } forward_to = argument.metrics_destinations.value - } + } } cluster_metrics "feature" { metrics_destinations = [ otelcol.receiver.prometheus.otel_endpoint.receiver, ] } - // Feature: Prometheus Operator Objects declare "prometheus_operator_objects" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + // Prometheus Operator PodMonitor objects prometheus.operator.podmonitors "pod_monitors" { clustering { @@ -667,7 +664,7 @@ data: } forward_to = argument.metrics_destinations.value } - + // Prometheus Operator Probe objects prometheus.operator.probes "pod_monitors" { clustering { @@ -678,7 +675,7 @@ data: } forward_to = argument.metrics_destinations.value } - + // Prometheus Operator ServiceMonitor objects prometheus.operator.servicemonitors "service_monitors" { clustering { @@ -779,7 +776,6 @@ data: name = "otel-endpoint-k8smon-k8s-monitoring" namespace = "default" } - // Feature: Cluster Events declare "cluster_events" { argument "logs_destinations" { @@ -857,7 +853,6 @@ data: otelcol.receiver.loki.otel_endpoint.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -900,11 +895,9 @@ data: otelcol.receiver.prometheus.otel_endpoint.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 @@ -1000,7 +993,6 @@ data: name = "otel-endpoint-k8smon-k8s-monitoring" namespace = "default" } - // Feature: Node Logs declare "node_logs" { argument "logs_destinations" { @@ -1153,13 +1145,12 @@ data: otelcol.receiver.loki.otel_endpoint.receiver, ] } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -1184,7 +1175,7 @@ data: replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -1193,28 +1184,21 @@ data: replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -1236,14 +1220,14 @@ data: replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -1255,8 +1239,16 @@ data: source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -1264,10 +1256,10 @@ data: field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -1276,22 +1268,22 @@ data: target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -1300,12 +1292,12 @@ data: } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -1313,7 +1305,7 @@ data: } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -1323,12 +1315,12 @@ data: "tmp_container_runtime", ] } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/auth/sigv4/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/auth/sigv4/alloy-metrics.alloy index bd8347b08..3d3273dab 100644 --- a/charts/k8s-monitoring/docs/examples/auth/sigv4/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/auth/sigv4/alloy-metrics.alloy @@ -56,22 +56,21 @@ remote.kubernetes.secret "prometheus" { name = "prometheus-k8smon-k8s-monitoring" namespace = "default" } - // Feature: Cluster Metrics declare "cluster_metrics" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "kubernetes" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kubernetes" { content = remote.kubernetes.configmap.kubernetes.data["core_metrics.alloy"] - } - + } + kubernetes.kubelet "scrape" { clustering = true job_label = "integrations/kubernetes/kubelet" @@ -79,8 +78,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.resources "scrape" { clustering = true job_label = "integrations/kubernetes/resources" @@ -88,8 +87,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.cadvisor "scrape" { clustering = true job_label = "integrations/kubernetes/cadvisor" @@ -98,7 +97,7 @@ declare "cluster_metrics" { max_cache_size = 100000 forward_to = [prometheus.relabel.cadvisor.receiver] } - + prometheus.relabel "cadvisor" { max_cache_size = 100000 // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 @@ -170,17 +169,17 @@ declare "cluster_metrics" { replacement = "" } forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "kube_state_metrics" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kube_state_metrics" { content = remote.kubernetes.configmap.kube_state_metrics.data["kube-state-metrics_metrics.alloy"] } - + kube_state_metrics.kubernetes "targets" { namespaces = ["default"] port_name = "http" @@ -189,7 +188,7 @@ declare "cluster_metrics" { "release=k8smon", ] } - + kube_state_metrics.scrape "metrics" { targets = kube_state_metrics.kubernetes.targets.output clustering = true @@ -199,17 +198,17 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "node_exporter" { name = "k8smon-alloy-module-system" namespace = "default" } - + import.string "node_exporter" { content = remote.kubernetes.configmap.node_exporter.data["node-exporter_metrics.alloy"] } - + node_exporter.kubernetes "targets" { namespaces = ["default"] port_name = "metrics" @@ -218,7 +217,7 @@ declare "cluster_metrics" { "release=k8smon", ] } - + discovery.relabel "node_exporter" { targets = node_exporter.kubernetes.targets.output rule { @@ -227,7 +226,7 @@ declare "cluster_metrics" { target_label = "instance" } } - + node_exporter.scrape "metrics" { targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" @@ -237,8 +236,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "windows_exporter_pods" { role = "pod" namespaces { @@ -249,7 +248,7 @@ declare "cluster_metrics" { label = "app.kubernetes.io/name=windows-exporter,release=k8smon" } } - + discovery.relabel "windows_exporter" { targets = discovery.kubernetes.windows_exporter_pods.targets rule { @@ -258,7 +257,7 @@ declare "cluster_metrics" { target_label = "instance" } } - + prometheus.scrape "windows_exporter" { job_name = "integrations/windows-exporter" targets = discovery.relabel.windows_exporter.output @@ -268,7 +267,7 @@ declare "cluster_metrics" { } forward_to = [prometheus.relabel.windows_exporter.receiver] } - + prometheus.relabel "windows_exporter" { max_cache_size = 100000 rule { @@ -277,14 +276,13 @@ declare "cluster_metrics" { action = "keep" } forward_to = argument.metrics_destinations.value - } + } } cluster_metrics "feature" { metrics_destinations = [ prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/auth/sigv4/output.yaml b/charts/k8s-monitoring/docs/examples/auth/sigv4/output.yaml index 842121725..94fc16d3f 100644 --- a/charts/k8s-monitoring/docs/examples/auth/sigv4/output.yaml +++ b/charts/k8s-monitoring/docs/examples/auth/sigv4/output.yaml @@ -165,22 +165,21 @@ data: name = "prometheus-k8smon-k8s-monitoring" namespace = "default" } - // Feature: Cluster Metrics declare "cluster_metrics" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "kubernetes" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kubernetes" { content = remote.kubernetes.configmap.kubernetes.data["core_metrics.alloy"] - } - + } + kubernetes.kubelet "scrape" { clustering = true job_label = "integrations/kubernetes/kubelet" @@ -188,8 +187,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.resources "scrape" { clustering = true job_label = "integrations/kubernetes/resources" @@ -197,8 +196,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.cadvisor "scrape" { clustering = true job_label = "integrations/kubernetes/cadvisor" @@ -207,7 +206,7 @@ data: max_cache_size = 100000 forward_to = [prometheus.relabel.cadvisor.receiver] } - + prometheus.relabel "cadvisor" { max_cache_size = 100000 // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 @@ -279,17 +278,17 @@ data: replacement = "" } forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "kube_state_metrics" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kube_state_metrics" { content = remote.kubernetes.configmap.kube_state_metrics.data["kube-state-metrics_metrics.alloy"] } - + kube_state_metrics.kubernetes "targets" { namespaces = ["default"] port_name = "http" @@ -298,7 +297,7 @@ data: "release=k8smon", ] } - + kube_state_metrics.scrape "metrics" { targets = kube_state_metrics.kubernetes.targets.output clustering = true @@ -308,17 +307,17 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "node_exporter" { name = "k8smon-alloy-module-system" namespace = "default" } - + import.string "node_exporter" { content = remote.kubernetes.configmap.node_exporter.data["node-exporter_metrics.alloy"] } - + node_exporter.kubernetes "targets" { namespaces = ["default"] port_name = "metrics" @@ -327,7 +326,7 @@ data: "release=k8smon", ] } - + discovery.relabel "node_exporter" { targets = node_exporter.kubernetes.targets.output rule { @@ -336,7 +335,7 @@ data: target_label = "instance" } } - + node_exporter.scrape "metrics" { targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" @@ -346,8 +345,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "windows_exporter_pods" { role = "pod" namespaces { @@ -358,7 +357,7 @@ data: label = "app.kubernetes.io/name=windows-exporter,release=k8smon" } } - + discovery.relabel "windows_exporter" { targets = discovery.kubernetes.windows_exporter_pods.targets rule { @@ -367,7 +366,7 @@ data: target_label = "instance" } } - + prometheus.scrape "windows_exporter" { job_name = "integrations/windows-exporter" targets = discovery.relabel.windows_exporter.output @@ -377,7 +376,7 @@ data: } forward_to = [prometheus.relabel.windows_exporter.receiver] } - + prometheus.relabel "windows_exporter" { max_cache_size = 100000 rule { @@ -386,14 +385,13 @@ data: action = "keep" } forward_to = argument.metrics_destinations.value - } + } } cluster_metrics "feature" { metrics_destinations = [ prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -436,11 +434,9 @@ data: prometheus.remote_write.prometheus.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 diff --git a/charts/k8s-monitoring/docs/examples/collector-storage/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/collector-storage/alloy-logs.alloy index ad3d3c83f..ce3be3afc 100644 --- a/charts/k8s-monitoring/docs/examples/collector-storage/alloy-logs.alloy +++ b/charts/k8s-monitoring/docs/examples/collector-storage/alloy-logs.alloy @@ -15,13 +15,12 @@ loki.write "loki" { "k8s_cluster_name" = "collector-storage-example-cluster", } } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -46,7 +45,7 @@ declare "pod_logs" { replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -55,28 +54,21 @@ declare "pod_logs" { replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -98,14 +90,14 @@ declare "pod_logs" { replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -117,8 +109,16 @@ declare "pod_logs" { source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -126,10 +126,10 @@ declare "pod_logs" { field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -138,22 +138,22 @@ declare "pod_logs" { target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -162,12 +162,12 @@ declare "pod_logs" { } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -175,7 +175,7 @@ declare "pod_logs" { } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -185,12 +185,12 @@ declare "pod_logs" { "tmp_container_runtime", ] } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/collector-storage/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/collector-storage/alloy-metrics.alloy index 9f5e230ad..02163c406 100644 --- a/charts/k8s-monitoring/docs/examples/collector-storage/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/collector-storage/alloy-metrics.alloy @@ -46,22 +46,21 @@ prometheus.remote_write "prometheus" { max_keepalive_time = "8h" } } - // Feature: Cluster Metrics declare "cluster_metrics" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "kubernetes" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kubernetes" { content = remote.kubernetes.configmap.kubernetes.data["core_metrics.alloy"] - } - + } + kubernetes.kubelet "scrape" { clustering = true job_label = "integrations/kubernetes/kubelet" @@ -69,8 +68,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.resources "scrape" { clustering = true job_label = "integrations/kubernetes/resources" @@ -78,8 +77,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.cadvisor "scrape" { clustering = true job_label = "integrations/kubernetes/cadvisor" @@ -88,7 +87,7 @@ declare "cluster_metrics" { max_cache_size = 100000 forward_to = [prometheus.relabel.cadvisor.receiver] } - + prometheus.relabel "cadvisor" { max_cache_size = 100000 // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 @@ -160,17 +159,17 @@ declare "cluster_metrics" { replacement = "" } forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "kube_state_metrics" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kube_state_metrics" { content = remote.kubernetes.configmap.kube_state_metrics.data["kube-state-metrics_metrics.alloy"] } - + kube_state_metrics.kubernetes "targets" { namespaces = ["default"] port_name = "http" @@ -179,7 +178,7 @@ declare "cluster_metrics" { "release=k8smon", ] } - + kube_state_metrics.scrape "metrics" { targets = kube_state_metrics.kubernetes.targets.output clustering = true @@ -189,17 +188,17 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "node_exporter" { name = "k8smon-alloy-module-system" namespace = "default" } - + import.string "node_exporter" { content = remote.kubernetes.configmap.node_exporter.data["node-exporter_metrics.alloy"] } - + node_exporter.kubernetes "targets" { namespaces = ["default"] port_name = "metrics" @@ -208,7 +207,7 @@ declare "cluster_metrics" { "release=k8smon", ] } - + discovery.relabel "node_exporter" { targets = node_exporter.kubernetes.targets.output rule { @@ -217,7 +216,7 @@ declare "cluster_metrics" { target_label = "instance" } } - + node_exporter.scrape "metrics" { targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" @@ -227,8 +226,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "windows_exporter_pods" { role = "pod" namespaces { @@ -239,7 +238,7 @@ declare "cluster_metrics" { label = "app.kubernetes.io/name=windows-exporter,release=k8smon" } } - + discovery.relabel "windows_exporter" { targets = discovery.kubernetes.windows_exporter_pods.targets rule { @@ -248,7 +247,7 @@ declare "cluster_metrics" { target_label = "instance" } } - + prometheus.scrape "windows_exporter" { job_name = "integrations/windows-exporter" targets = discovery.relabel.windows_exporter.output @@ -258,7 +257,7 @@ declare "cluster_metrics" { } forward_to = [prometheus.relabel.windows_exporter.receiver] } - + prometheus.relabel "windows_exporter" { max_cache_size = 100000 rule { @@ -267,14 +266,13 @@ declare "cluster_metrics" { action = "keep" } forward_to = argument.metrics_destinations.value - } + } } cluster_metrics "feature" { metrics_destinations = [ prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/collector-storage/output.yaml b/charts/k8s-monitoring/docs/examples/collector-storage/output.yaml index 009dad13a..c4a8967ca 100644 --- a/charts/k8s-monitoring/docs/examples/collector-storage/output.yaml +++ b/charts/k8s-monitoring/docs/examples/collector-storage/output.yaml @@ -160,22 +160,21 @@ data: max_keepalive_time = "8h" } } - // Feature: Cluster Metrics declare "cluster_metrics" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "kubernetes" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kubernetes" { content = remote.kubernetes.configmap.kubernetes.data["core_metrics.alloy"] - } - + } + kubernetes.kubelet "scrape" { clustering = true job_label = "integrations/kubernetes/kubelet" @@ -183,8 +182,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.resources "scrape" { clustering = true job_label = "integrations/kubernetes/resources" @@ -192,8 +191,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.cadvisor "scrape" { clustering = true job_label = "integrations/kubernetes/cadvisor" @@ -202,7 +201,7 @@ data: max_cache_size = 100000 forward_to = [prometheus.relabel.cadvisor.receiver] } - + prometheus.relabel "cadvisor" { max_cache_size = 100000 // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 @@ -274,17 +273,17 @@ data: replacement = "" } forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "kube_state_metrics" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kube_state_metrics" { content = remote.kubernetes.configmap.kube_state_metrics.data["kube-state-metrics_metrics.alloy"] } - + kube_state_metrics.kubernetes "targets" { namespaces = ["default"] port_name = "http" @@ -293,7 +292,7 @@ data: "release=k8smon", ] } - + kube_state_metrics.scrape "metrics" { targets = kube_state_metrics.kubernetes.targets.output clustering = true @@ -303,17 +302,17 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "node_exporter" { name = "k8smon-alloy-module-system" namespace = "default" } - + import.string "node_exporter" { content = remote.kubernetes.configmap.node_exporter.data["node-exporter_metrics.alloy"] } - + node_exporter.kubernetes "targets" { namespaces = ["default"] port_name = "metrics" @@ -322,7 +321,7 @@ data: "release=k8smon", ] } - + discovery.relabel "node_exporter" { targets = node_exporter.kubernetes.targets.output rule { @@ -331,7 +330,7 @@ data: target_label = "instance" } } - + node_exporter.scrape "metrics" { targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" @@ -341,8 +340,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "windows_exporter_pods" { role = "pod" namespaces { @@ -353,7 +352,7 @@ data: label = "app.kubernetes.io/name=windows-exporter,release=k8smon" } } - + discovery.relabel "windows_exporter" { targets = discovery.kubernetes.windows_exporter_pods.targets rule { @@ -362,7 +361,7 @@ data: target_label = "instance" } } - + prometheus.scrape "windows_exporter" { job_name = "integrations/windows-exporter" targets = discovery.relabel.windows_exporter.output @@ -372,7 +371,7 @@ data: } forward_to = [prometheus.relabel.windows_exporter.receiver] } - + prometheus.relabel "windows_exporter" { max_cache_size = 100000 rule { @@ -381,14 +380,13 @@ data: action = "keep" } forward_to = argument.metrics_destinations.value - } + } } cluster_metrics "feature" { metrics_destinations = [ prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -431,11 +429,9 @@ data: prometheus.remote_write.prometheus.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 @@ -469,13 +465,12 @@ data: "k8s_cluster_name" = "collector-storage-example-cluster", } } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -500,7 +495,7 @@ data: replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -509,28 +504,21 @@ data: replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -552,14 +540,14 @@ data: replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -571,8 +559,16 @@ data: source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -580,10 +576,10 @@ data: field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -592,22 +588,22 @@ data: target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -616,12 +612,12 @@ data: } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -629,7 +625,7 @@ data: } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -639,12 +635,12 @@ data: "tmp_container_runtime", ] } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/destinations/otlp-endpoint/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/destinations/otlp-endpoint/alloy-logs.alloy index a7b32da85..7dcdf30e9 100644 --- a/charts/k8s-monitoring/docs/examples/destinations/otlp-endpoint/alloy-logs.alloy +++ b/charts/k8s-monitoring/docs/examples/destinations/otlp-endpoint/alloy-logs.alloy @@ -68,13 +68,12 @@ remote.kubernetes.secret "otlp_gateway" { name = "otlp-gateway-k8smon-k8s-monitoring" namespace = "default" } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -99,7 +98,7 @@ declare "pod_logs" { replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -108,28 +107,21 @@ declare "pod_logs" { replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -151,14 +143,14 @@ declare "pod_logs" { replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -170,8 +162,16 @@ declare "pod_logs" { source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -179,10 +179,10 @@ declare "pod_logs" { field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -191,22 +191,22 @@ declare "pod_logs" { target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -215,12 +215,12 @@ declare "pod_logs" { } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -228,7 +228,7 @@ declare "pod_logs" { } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -238,12 +238,12 @@ declare "pod_logs" { "tmp_container_runtime", ] } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/destinations/otlp-endpoint/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/destinations/otlp-endpoint/alloy-metrics.alloy index 3974cb1e9..f3ab32b22 100644 --- a/charts/k8s-monitoring/docs/examples/destinations/otlp-endpoint/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/destinations/otlp-endpoint/alloy-metrics.alloy @@ -68,22 +68,21 @@ remote.kubernetes.secret "otlp_gateway" { name = "otlp-gateway-k8smon-k8s-monitoring" namespace = "default" } - // Feature: Cluster Metrics declare "cluster_metrics" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "kubernetes" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kubernetes" { content = remote.kubernetes.configmap.kubernetes.data["core_metrics.alloy"] - } - + } + kubernetes.kubelet "scrape" { clustering = true job_label = "integrations/kubernetes/kubelet" @@ -91,8 +90,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.resources "scrape" { clustering = true job_label = "integrations/kubernetes/resources" @@ -100,8 +99,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.cadvisor "scrape" { clustering = true job_label = "integrations/kubernetes/cadvisor" @@ -110,7 +109,7 @@ declare "cluster_metrics" { max_cache_size = 100000 forward_to = [prometheus.relabel.cadvisor.receiver] } - + prometheus.relabel "cadvisor" { max_cache_size = 100000 // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 @@ -182,17 +181,17 @@ declare "cluster_metrics" { replacement = "" } forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "kube_state_metrics" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kube_state_metrics" { content = remote.kubernetes.configmap.kube_state_metrics.data["kube-state-metrics_metrics.alloy"] } - + kube_state_metrics.kubernetes "targets" { namespaces = ["default"] port_name = "http" @@ -201,7 +200,7 @@ declare "cluster_metrics" { "release=k8smon", ] } - + kube_state_metrics.scrape "metrics" { targets = kube_state_metrics.kubernetes.targets.output clustering = true @@ -211,17 +210,17 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "node_exporter" { name = "k8smon-alloy-module-system" namespace = "default" } - + import.string "node_exporter" { content = remote.kubernetes.configmap.node_exporter.data["node-exporter_metrics.alloy"] } - + node_exporter.kubernetes "targets" { namespaces = ["default"] port_name = "metrics" @@ -230,7 +229,7 @@ declare "cluster_metrics" { "release=k8smon", ] } - + discovery.relabel "node_exporter" { targets = node_exporter.kubernetes.targets.output rule { @@ -239,7 +238,7 @@ declare "cluster_metrics" { target_label = "instance" } } - + node_exporter.scrape "metrics" { targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" @@ -249,8 +248,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "windows_exporter_pods" { role = "pod" namespaces { @@ -261,7 +260,7 @@ declare "cluster_metrics" { label = "app.kubernetes.io/name=windows-exporter,release=k8smon" } } - + discovery.relabel "windows_exporter" { targets = discovery.kubernetes.windows_exporter_pods.targets rule { @@ -270,7 +269,7 @@ declare "cluster_metrics" { target_label = "instance" } } - + prometheus.scrape "windows_exporter" { job_name = "integrations/windows-exporter" targets = discovery.relabel.windows_exporter.output @@ -280,7 +279,7 @@ declare "cluster_metrics" { } forward_to = [prometheus.relabel.windows_exporter.receiver] } - + prometheus.relabel "windows_exporter" { max_cache_size = 100000 rule { @@ -289,14 +288,13 @@ declare "cluster_metrics" { action = "keep" } forward_to = argument.metrics_destinations.value - } + } } cluster_metrics "feature" { metrics_destinations = [ otelcol.receiver.prometheus.otlp_gateway.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/destinations/otlp-endpoint/output.yaml b/charts/k8s-monitoring/docs/examples/destinations/otlp-endpoint/output.yaml index c05a056a6..bd0cb4fde 100644 --- a/charts/k8s-monitoring/docs/examples/destinations/otlp-endpoint/output.yaml +++ b/charts/k8s-monitoring/docs/examples/destinations/otlp-endpoint/output.yaml @@ -193,22 +193,21 @@ data: name = "otlp-gateway-k8smon-k8s-monitoring" namespace = "default" } - // Feature: Cluster Metrics declare "cluster_metrics" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "kubernetes" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kubernetes" { content = remote.kubernetes.configmap.kubernetes.data["core_metrics.alloy"] - } - + } + kubernetes.kubelet "scrape" { clustering = true job_label = "integrations/kubernetes/kubelet" @@ -216,8 +215,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.resources "scrape" { clustering = true job_label = "integrations/kubernetes/resources" @@ -225,8 +224,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.cadvisor "scrape" { clustering = true job_label = "integrations/kubernetes/cadvisor" @@ -235,7 +234,7 @@ data: max_cache_size = 100000 forward_to = [prometheus.relabel.cadvisor.receiver] } - + prometheus.relabel "cadvisor" { max_cache_size = 100000 // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 @@ -307,17 +306,17 @@ data: replacement = "" } forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "kube_state_metrics" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kube_state_metrics" { content = remote.kubernetes.configmap.kube_state_metrics.data["kube-state-metrics_metrics.alloy"] } - + kube_state_metrics.kubernetes "targets" { namespaces = ["default"] port_name = "http" @@ -326,7 +325,7 @@ data: "release=k8smon", ] } - + kube_state_metrics.scrape "metrics" { targets = kube_state_metrics.kubernetes.targets.output clustering = true @@ -336,17 +335,17 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "node_exporter" { name = "k8smon-alloy-module-system" namespace = "default" } - + import.string "node_exporter" { content = remote.kubernetes.configmap.node_exporter.data["node-exporter_metrics.alloy"] } - + node_exporter.kubernetes "targets" { namespaces = ["default"] port_name = "metrics" @@ -355,7 +354,7 @@ data: "release=k8smon", ] } - + discovery.relabel "node_exporter" { targets = node_exporter.kubernetes.targets.output rule { @@ -364,7 +363,7 @@ data: target_label = "instance" } } - + node_exporter.scrape "metrics" { targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" @@ -374,8 +373,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "windows_exporter_pods" { role = "pod" namespaces { @@ -386,7 +385,7 @@ data: label = "app.kubernetes.io/name=windows-exporter,release=k8smon" } } - + discovery.relabel "windows_exporter" { targets = discovery.kubernetes.windows_exporter_pods.targets rule { @@ -395,7 +394,7 @@ data: target_label = "instance" } } - + prometheus.scrape "windows_exporter" { job_name = "integrations/windows-exporter" targets = discovery.relabel.windows_exporter.output @@ -405,7 +404,7 @@ data: } forward_to = [prometheus.relabel.windows_exporter.receiver] } - + prometheus.relabel "windows_exporter" { max_cache_size = 100000 rule { @@ -414,14 +413,13 @@ data: action = "keep" } forward_to = argument.metrics_destinations.value - } + } } cluster_metrics "feature" { metrics_destinations = [ otelcol.receiver.prometheus.otlp_gateway.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -464,11 +462,9 @@ data: otelcol.receiver.prometheus.otlp_gateway.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 @@ -555,13 +551,12 @@ data: name = "otlp-gateway-k8smon-k8s-monitoring" namespace = "default" } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -586,7 +581,7 @@ data: replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -595,28 +590,21 @@ data: replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -638,14 +626,14 @@ data: replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -657,8 +645,16 @@ data: source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -666,10 +662,10 @@ data: field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -678,22 +674,22 @@ data: target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -702,12 +698,12 @@ data: } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -715,7 +711,7 @@ data: } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -725,12 +721,12 @@ data: "tmp_container_runtime", ] } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/extra-configuration/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/extra-configuration/alloy-metrics.alloy index 4bb73ef85..741012114 100644 --- a/charts/k8s-monitoring/docs/examples/extra-configuration/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/extra-configuration/alloy-metrics.alloy @@ -46,7 +46,6 @@ prometheus.remote_write "prometheus" { max_keepalive_time = "8h" } } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/extra-configuration/output.yaml b/charts/k8s-monitoring/docs/examples/extra-configuration/output.yaml index d65385439..a5755f7b6 100644 --- a/charts/k8s-monitoring/docs/examples/extra-configuration/output.yaml +++ b/charts/k8s-monitoring/docs/examples/extra-configuration/output.yaml @@ -71,7 +71,6 @@ data: max_keepalive_time = "8h" } } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -132,7 +131,9 @@ data: targets = discovery.kubernetes.animal_service.targets forward_to = [prometheus.remote_write.prometheus.receiver] } + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 diff --git a/charts/k8s-monitoring/docs/examples/extra-rules/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/extra-rules/alloy-logs.alloy index e22f0d243..a0290a5e9 100644 --- a/charts/k8s-monitoring/docs/examples/extra-rules/alloy-logs.alloy +++ b/charts/k8s-monitoring/docs/examples/extra-rules/alloy-logs.alloy @@ -17,13 +17,12 @@ loki.write "loki" { region = env("REGION"), } } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -48,7 +47,7 @@ declare "pod_logs" { replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -57,28 +56,21 @@ declare "pod_logs" { replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -100,14 +92,14 @@ declare "pod_logs" { replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -119,13 +111,21 @@ declare "pod_logs" { source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } rule { source_labels = ["__meta_kubernetes_namespace"] regex = "production" action = "keep" } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -133,10 +133,10 @@ declare "pod_logs" { field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -145,22 +145,22 @@ declare "pod_logs" { target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -169,12 +169,12 @@ declare "pod_logs" { } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -182,7 +182,7 @@ declare "pod_logs" { } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -192,7 +192,7 @@ declare "pod_logs" { "tmp_container_runtime", ] } - + stage.static_labels { values = { site = "lab2", @@ -206,19 +206,19 @@ declare "pod_logs" { count = "", } } - + stage.labels { values = { sku = "", count = "", } } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/extra-rules/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/extra-rules/alloy-metrics.alloy index 99d7b4df9..e645791fc 100644 --- a/charts/k8s-monitoring/docs/examples/extra-rules/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/extra-rules/alloy-metrics.alloy @@ -55,22 +55,21 @@ prometheus.remote_write "prometheus" { region = env("REGION"), } } - // Feature: Cluster Metrics declare "cluster_metrics" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "kubernetes" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kubernetes" { content = remote.kubernetes.configmap.kubernetes.data["core_metrics.alloy"] - } - + } + kubernetes.kubelet "scrape" { clustering = true job_label = "integrations/kubernetes/kubelet" @@ -78,8 +77,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.resources "scrape" { clustering = true job_label = "integrations/kubernetes/resources" @@ -87,8 +86,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.cadvisor "scrape" { clustering = true job_label = "integrations/kubernetes/cadvisor" @@ -97,7 +96,7 @@ declare "cluster_metrics" { max_cache_size = 100000 forward_to = [prometheus.relabel.cadvisor.receiver] } - + prometheus.relabel "cadvisor" { max_cache_size = 100000 // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 @@ -169,17 +168,17 @@ declare "cluster_metrics" { replacement = "" } forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "kube_state_metrics" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kube_state_metrics" { content = remote.kubernetes.configmap.kube_state_metrics.data["kube-state-metrics_metrics.alloy"] } - + kube_state_metrics.kubernetes "targets" { namespaces = ["default"] port_name = "http" @@ -188,7 +187,7 @@ declare "cluster_metrics" { "release=k8smon", ] } - + kube_state_metrics.scrape "metrics" { targets = kube_state_metrics.kubernetes.targets.output clustering = true @@ -199,27 +198,27 @@ declare "cluster_metrics" { max_cache_size = 100000 forward_to = [prometheus.relabel.kube_state_metrics.receiver] } - + prometheus.relabel "kube_state_metrics" { max_cache_size = 100000 - + rule { source_labels = ["namespace"] regex = "production" action = "keep" } forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "node_exporter" { name = "k8smon-alloy-module-system" namespace = "default" } - + import.string "node_exporter" { content = remote.kubernetes.configmap.node_exporter.data["node-exporter_metrics.alloy"] } - + node_exporter.kubernetes "targets" { namespaces = ["default"] port_name = "metrics" @@ -228,7 +227,7 @@ declare "cluster_metrics" { "release=k8smon", ] } - + discovery.relabel "node_exporter" { targets = node_exporter.kubernetes.targets.output rule { @@ -237,7 +236,7 @@ declare "cluster_metrics" { target_label = "instance" } } - + node_exporter.scrape "metrics" { targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" @@ -247,8 +246,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "windows_exporter_pods" { role = "pod" namespaces { @@ -259,7 +258,7 @@ declare "cluster_metrics" { label = "app.kubernetes.io/name=windows-exporter,release=k8smon" } } - + discovery.relabel "windows_exporter" { targets = discovery.kubernetes.windows_exporter_pods.targets rule { @@ -268,7 +267,7 @@ declare "cluster_metrics" { target_label = "instance" } } - + prometheus.scrape "windows_exporter" { job_name = "integrations/windows-exporter" targets = discovery.relabel.windows_exporter.output @@ -278,7 +277,7 @@ declare "cluster_metrics" { } forward_to = [prometheus.relabel.windows_exporter.receiver] } - + prometheus.relabel "windows_exporter" { max_cache_size = 100000 rule { @@ -287,7 +286,7 @@ declare "cluster_metrics" { action = "keep" } forward_to = argument.metrics_destinations.value - } + } } cluster_metrics "feature" { metrics_destinations = [ diff --git a/charts/k8s-monitoring/docs/examples/extra-rules/alloy-singleton.alloy b/charts/k8s-monitoring/docs/examples/extra-rules/alloy-singleton.alloy index 47c8d661a..187b96c1d 100644 --- a/charts/k8s-monitoring/docs/examples/extra-rules/alloy-singleton.alloy +++ b/charts/k8s-monitoring/docs/examples/extra-rules/alloy-singleton.alloy @@ -74,7 +74,6 @@ loki.write "loki" { region = env("REGION"), } } - // Feature: Cluster Events declare "cluster_events" { argument "logs_destinations" { @@ -153,7 +152,6 @@ cluster_events "feature" { loki.write.loki.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/extra-rules/output.yaml b/charts/k8s-monitoring/docs/examples/extra-rules/output.yaml index 2e23161f5..ae5f03fa2 100644 --- a/charts/k8s-monitoring/docs/examples/extra-rules/output.yaml +++ b/charts/k8s-monitoring/docs/examples/extra-rules/output.yaml @@ -185,22 +185,21 @@ data: region = env("REGION"), } } - // Feature: Cluster Metrics declare "cluster_metrics" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "kubernetes" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kubernetes" { content = remote.kubernetes.configmap.kubernetes.data["core_metrics.alloy"] - } - + } + kubernetes.kubelet "scrape" { clustering = true job_label = "integrations/kubernetes/kubelet" @@ -208,8 +207,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.resources "scrape" { clustering = true job_label = "integrations/kubernetes/resources" @@ -217,8 +216,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.cadvisor "scrape" { clustering = true job_label = "integrations/kubernetes/cadvisor" @@ -227,7 +226,7 @@ data: max_cache_size = 100000 forward_to = [prometheus.relabel.cadvisor.receiver] } - + prometheus.relabel "cadvisor" { max_cache_size = 100000 // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 @@ -299,17 +298,17 @@ data: replacement = "" } forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "kube_state_metrics" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kube_state_metrics" { content = remote.kubernetes.configmap.kube_state_metrics.data["kube-state-metrics_metrics.alloy"] } - + kube_state_metrics.kubernetes "targets" { namespaces = ["default"] port_name = "http" @@ -318,7 +317,7 @@ data: "release=k8smon", ] } - + kube_state_metrics.scrape "metrics" { targets = kube_state_metrics.kubernetes.targets.output clustering = true @@ -329,27 +328,27 @@ data: max_cache_size = 100000 forward_to = [prometheus.relabel.kube_state_metrics.receiver] } - + prometheus.relabel "kube_state_metrics" { max_cache_size = 100000 - + rule { source_labels = ["namespace"] regex = "production" action = "keep" } forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "node_exporter" { name = "k8smon-alloy-module-system" namespace = "default" } - + import.string "node_exporter" { content = remote.kubernetes.configmap.node_exporter.data["node-exporter_metrics.alloy"] } - + node_exporter.kubernetes "targets" { namespaces = ["default"] port_name = "metrics" @@ -358,7 +357,7 @@ data: "release=k8smon", ] } - + discovery.relabel "node_exporter" { targets = node_exporter.kubernetes.targets.output rule { @@ -367,7 +366,7 @@ data: target_label = "instance" } } - + node_exporter.scrape "metrics" { targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" @@ -377,8 +376,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "windows_exporter_pods" { role = "pod" namespaces { @@ -389,7 +388,7 @@ data: label = "app.kubernetes.io/name=windows-exporter,release=k8smon" } } - + discovery.relabel "windows_exporter" { targets = discovery.kubernetes.windows_exporter_pods.targets rule { @@ -398,7 +397,7 @@ data: target_label = "instance" } } - + prometheus.scrape "windows_exporter" { job_name = "integrations/windows-exporter" targets = discovery.relabel.windows_exporter.output @@ -408,7 +407,7 @@ data: } forward_to = [prometheus.relabel.windows_exporter.receiver] } - + prometheus.relabel "windows_exporter" { max_cache_size = 100000 rule { @@ -417,7 +416,7 @@ data: action = "keep" } forward_to = argument.metrics_destinations.value - } + } } cluster_metrics "feature" { metrics_destinations = [ @@ -509,7 +508,6 @@ data: region = env("REGION"), } } - // Feature: Cluster Events declare "cluster_events" { argument "logs_destinations" { @@ -588,7 +586,6 @@ data: loki.write.loki.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -631,11 +628,9 @@ data: prometheus.remote_write.prometheus.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 @@ -672,13 +667,12 @@ data: region = env("REGION"), } } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -703,7 +697,7 @@ data: replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -712,28 +706,21 @@ data: replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -755,14 +742,14 @@ data: replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -774,13 +761,21 @@ data: source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } rule { source_labels = ["__meta_kubernetes_namespace"] regex = "production" action = "keep" } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -788,10 +783,10 @@ data: field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -800,22 +795,22 @@ data: target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -824,12 +819,12 @@ data: } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -837,7 +832,7 @@ data: } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -847,7 +842,7 @@ data: "tmp_container_runtime", ] } - + stage.static_labels { values = { site = "lab2", @@ -861,19 +856,19 @@ data: count = "", } } - + stage.labels { values = { sku = "", count = "", } } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/features/annotation-autodiscovery/default/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/annotation-autodiscovery/default/alloy-metrics.alloy index 623b43c78..942bf49df 100644 --- a/charts/k8s-monitoring/docs/examples/features/annotation-autodiscovery/default/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/annotation-autodiscovery/default/alloy-metrics.alloy @@ -46,7 +46,6 @@ prometheus.remote_write "prometheus" { max_keepalive_time = "8h" } } - // Feature: Annotation Autodiscovery declare "annotation_autodiscovery" { argument "metrics_destinations" { @@ -255,7 +254,6 @@ annotation_autodiscovery "feature" { prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/features/annotation-autodiscovery/default/output.yaml b/charts/k8s-monitoring/docs/examples/features/annotation-autodiscovery/default/output.yaml index a0138c1fa..5b2624544 100644 --- a/charts/k8s-monitoring/docs/examples/features/annotation-autodiscovery/default/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/annotation-autodiscovery/default/output.yaml @@ -71,7 +71,6 @@ data: max_keepalive_time = "8h" } } - // Feature: Annotation Autodiscovery declare "annotation_autodiscovery" { argument "metrics_destinations" { @@ -280,7 +279,6 @@ data: prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -323,11 +321,9 @@ data: prometheus.remote_write.prometheus.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 diff --git a/charts/k8s-monitoring/docs/examples/features/annotation-autodiscovery/prom-annotations/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/annotation-autodiscovery/prom-annotations/alloy-metrics.alloy index 023961e2e..a7a494178 100644 --- a/charts/k8s-monitoring/docs/examples/features/annotation-autodiscovery/prom-annotations/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/annotation-autodiscovery/prom-annotations/alloy-metrics.alloy @@ -46,7 +46,6 @@ prometheus.remote_write "prometheus" { max_keepalive_time = "8h" } } - // Feature: Annotation Autodiscovery declare "annotation_autodiscovery" { argument "metrics_destinations" { @@ -255,7 +254,6 @@ annotation_autodiscovery "feature" { prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/features/annotation-autodiscovery/prom-annotations/output.yaml b/charts/k8s-monitoring/docs/examples/features/annotation-autodiscovery/prom-annotations/output.yaml index 5ad48b9a7..2935ef1dc 100644 --- a/charts/k8s-monitoring/docs/examples/features/annotation-autodiscovery/prom-annotations/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/annotation-autodiscovery/prom-annotations/output.yaml @@ -71,7 +71,6 @@ data: max_keepalive_time = "8h" } } - // Feature: Annotation Autodiscovery declare "annotation_autodiscovery" { argument "metrics_destinations" { @@ -280,7 +279,6 @@ data: prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -323,11 +321,9 @@ data: prometheus.remote_write.prometheus.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 diff --git a/charts/k8s-monitoring/docs/examples/features/application-observability/default/alloy-receiver.alloy b/charts/k8s-monitoring/docs/examples/features/application-observability/default/alloy-receiver.alloy index 76b9929c7..05a91a2bb 100644 --- a/charts/k8s-monitoring/docs/examples/features/application-observability/default/alloy-receiver.alloy +++ b/charts/k8s-monitoring/docs/examples/features/application-observability/default/alloy-receiver.alloy @@ -58,7 +58,6 @@ otelcol.exporter.otlp "otlp_gateway" { } } } - // Feature: Application Observability declare "application_observability" { argument "metrics_destinations" { @@ -73,7 +72,7 @@ declare "application_observability" { comment = "Must be a list of trace destinations where collected trace should be forwarded to" } - // OTLP Receiver + // OTLP Receiver otelcol.receiver.otlp "receiver" { http { endpoint = "0.0.0.0:4318" @@ -88,13 +87,13 @@ declare "application_observability" { } } - // Resource Detection Processor + // Resource Detection Processor otelcol.processor.resourcedetection "default" { detectors = ["env", "system"] system { hostname_sources = ["os"] } - + output { metrics = [otelcol.processor.k8sattributes.default.input] logs = [otelcol.processor.k8sattributes.default.input] @@ -102,7 +101,7 @@ declare "application_observability" { } } - // K8s Attributes Processor + // K8s Attributes Processor otelcol.processor.k8sattributes "default" { extract { metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time"] @@ -112,7 +111,7 @@ declare "application_observability" { from = "connection" } } - + output { metrics = [otelcol.processor.transform.default.input] logs = [otelcol.processor.transform.default.input] @@ -120,16 +119,16 @@ declare "application_observability" { } } - // Host Info Connector + // Host Info Connector otelcol.connector.host_info "default" { host_identifiers = [ "k8s.node.name" ] - + output { metrics = [otelcol.processor.batch.default.input] } } - // Transform Processor + // Transform Processor otelcol.processor.transform "default" { error_mode = "ignore" log_statements { @@ -140,7 +139,7 @@ declare "application_observability" { "set(attributes[\"loki.resource.labels\"], \"cluster, namespace, job, pod\")", ] } - + output { metrics = [otelcol.processor.batch.default.input] logs = [otelcol.processor.batch.default.input] @@ -148,12 +147,12 @@ declare "application_observability" { } } - // Batch Processor + // Batch Processor otelcol.processor.batch "default" { send_batch_size = 16384 send_batch_max_size = 0 timeout = "2s" - + output { metrics = argument.metrics_destinations.value logs = argument.logs_destinations.value @@ -172,7 +171,6 @@ application_observability "feature" { otelcol.processor.attributes.otlp_gateway.input, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/features/application-observability/default/output.yaml b/charts/k8s-monitoring/docs/examples/features/application-observability/default/output.yaml index 8f13342c5..6df365306 100644 --- a/charts/k8s-monitoring/docs/examples/features/application-observability/default/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/application-observability/default/output.yaml @@ -83,7 +83,6 @@ data: } } } - // Feature: Application Observability declare "application_observability" { argument "metrics_destinations" { @@ -98,7 +97,7 @@ data: comment = "Must be a list of trace destinations where collected trace should be forwarded to" } - // OTLP Receiver + // OTLP Receiver otelcol.receiver.otlp "receiver" { http { endpoint = "0.0.0.0:4318" @@ -113,13 +112,13 @@ data: } } - // Resource Detection Processor + // Resource Detection Processor otelcol.processor.resourcedetection "default" { detectors = ["env", "system"] system { hostname_sources = ["os"] } - + output { metrics = [otelcol.processor.k8sattributes.default.input] logs = [otelcol.processor.k8sattributes.default.input] @@ -127,7 +126,7 @@ data: } } - // K8s Attributes Processor + // K8s Attributes Processor otelcol.processor.k8sattributes "default" { extract { metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time"] @@ -137,7 +136,7 @@ data: from = "connection" } } - + output { metrics = [otelcol.processor.transform.default.input] logs = [otelcol.processor.transform.default.input] @@ -145,16 +144,16 @@ data: } } - // Host Info Connector + // Host Info Connector otelcol.connector.host_info "default" { host_identifiers = [ "k8s.node.name" ] - + output { metrics = [otelcol.processor.batch.default.input] } } - // Transform Processor + // Transform Processor otelcol.processor.transform "default" { error_mode = "ignore" log_statements { @@ -165,7 +164,7 @@ data: "set(attributes[\"loki.resource.labels\"], \"cluster, namespace, job, pod\")", ] } - + output { metrics = [otelcol.processor.batch.default.input] logs = [otelcol.processor.batch.default.input] @@ -173,12 +172,12 @@ data: } } - // Batch Processor + // Batch Processor otelcol.processor.batch "default" { send_batch_size = 16384 send_batch_max_size = 0 timeout = "2s" - + output { metrics = argument.metrics_destinations.value logs = argument.logs_destinations.value @@ -197,7 +196,6 @@ data: otelcol.processor.attributes.otlp_gateway.input, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -240,11 +238,9 @@ data: otelcol.receiver.prometheus.otlp_gateway.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 diff --git a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/alloy-metrics.alloy index 0649a588f..d8572a1ee 100644 --- a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/alloy-metrics.alloy @@ -58,7 +58,6 @@ otelcol.exporter.otlp "otlp_gateway" { } } } - // Feature: Auto-Instrumentation declare "auto_instrumentation" { argument "metrics_destinations" { @@ -112,7 +111,6 @@ auto_instrumentation "feature" { otelcol.receiver.prometheus.otlp_gateway.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/alloy-receiver.alloy b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/alloy-receiver.alloy index 1374a7bc8..578c564f9 100644 --- a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/alloy-receiver.alloy +++ b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/alloy-receiver.alloy @@ -58,7 +58,6 @@ otelcol.exporter.otlp "otlp_gateway" { } } } - // Feature: Application Observability declare "application_observability" { argument "metrics_destinations" { @@ -73,7 +72,7 @@ declare "application_observability" { comment = "Must be a list of trace destinations where collected trace should be forwarded to" } - // OTLP Receiver + // OTLP Receiver otelcol.receiver.otlp "receiver" { grpc { endpoint = "0.0.0.0:4317" @@ -88,13 +87,13 @@ declare "application_observability" { } } - // Resource Detection Processor + // Resource Detection Processor otelcol.processor.resourcedetection "default" { detectors = ["env", "system"] system { hostname_sources = ["os"] } - + output { metrics = [otelcol.processor.k8sattributes.default.input] logs = [otelcol.processor.k8sattributes.default.input] @@ -102,7 +101,7 @@ declare "application_observability" { } } - // K8s Attributes Processor + // K8s Attributes Processor otelcol.processor.k8sattributes "default" { extract { metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time"] @@ -112,7 +111,7 @@ declare "application_observability" { from = "connection" } } - + output { metrics = [otelcol.processor.transform.default.input] logs = [otelcol.processor.transform.default.input] @@ -120,16 +119,16 @@ declare "application_observability" { } } - // Host Info Connector + // Host Info Connector otelcol.connector.host_info "default" { host_identifiers = [ "k8s.node.name" ] - + output { metrics = [otelcol.processor.batch.default.input] } } - // Transform Processor + // Transform Processor otelcol.processor.transform "default" { error_mode = "ignore" log_statements { @@ -140,7 +139,7 @@ declare "application_observability" { "set(attributes[\"loki.resource.labels\"], \"cluster, namespace, job, pod\")", ] } - + output { metrics = [otelcol.processor.batch.default.input] logs = [otelcol.processor.batch.default.input] @@ -148,12 +147,12 @@ declare "application_observability" { } } - // Batch Processor + // Batch Processor otelcol.processor.batch "default" { send_batch_size = 16384 send_batch_max_size = 0 timeout = "2s" - + output { metrics = argument.metrics_destinations.value logs = argument.logs_destinations.value diff --git a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/output.yaml b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/output.yaml index 9223b1e88..7cf67ff27 100644 --- a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics-and-traces/output.yaml @@ -115,7 +115,6 @@ data: } } } - // Feature: Auto-Instrumentation declare "auto_instrumentation" { argument "metrics_destinations" { @@ -169,7 +168,6 @@ data: otelcol.receiver.prometheus.otlp_gateway.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -212,11 +210,9 @@ data: otelcol.receiver.prometheus.otlp_gateway.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 @@ -293,7 +289,6 @@ data: } } } - // Feature: Application Observability declare "application_observability" { argument "metrics_destinations" { @@ -308,7 +303,7 @@ data: comment = "Must be a list of trace destinations where collected trace should be forwarded to" } - // OTLP Receiver + // OTLP Receiver otelcol.receiver.otlp "receiver" { grpc { endpoint = "0.0.0.0:4317" @@ -323,13 +318,13 @@ data: } } - // Resource Detection Processor + // Resource Detection Processor otelcol.processor.resourcedetection "default" { detectors = ["env", "system"] system { hostname_sources = ["os"] } - + output { metrics = [otelcol.processor.k8sattributes.default.input] logs = [otelcol.processor.k8sattributes.default.input] @@ -337,7 +332,7 @@ data: } } - // K8s Attributes Processor + // K8s Attributes Processor otelcol.processor.k8sattributes "default" { extract { metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time"] @@ -347,7 +342,7 @@ data: from = "connection" } } - + output { metrics = [otelcol.processor.transform.default.input] logs = [otelcol.processor.transform.default.input] @@ -355,16 +350,16 @@ data: } } - // Host Info Connector + // Host Info Connector otelcol.connector.host_info "default" { host_identifiers = [ "k8s.node.name" ] - + output { metrics = [otelcol.processor.batch.default.input] } } - // Transform Processor + // Transform Processor otelcol.processor.transform "default" { error_mode = "ignore" log_statements { @@ -375,7 +370,7 @@ data: "set(attributes[\"loki.resource.labels\"], \"cluster, namespace, job, pod\")", ] } - + output { metrics = [otelcol.processor.batch.default.input] logs = [otelcol.processor.batch.default.input] @@ -383,12 +378,12 @@ data: } } - // Batch Processor + // Batch Processor otelcol.processor.batch "default" { send_batch_size = 16384 send_batch_max_size = 0 timeout = "2s" - + output { metrics = argument.metrics_destinations.value logs = argument.logs_destinations.value diff --git a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics/alloy-metrics.alloy index a44502d1b..b7798fd6c 100644 --- a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics/alloy-metrics.alloy @@ -46,7 +46,6 @@ prometheus.remote_write "prometheus" { max_keepalive_time = "8h" } } - // Feature: Auto-Instrumentation declare "auto_instrumentation" { argument "metrics_destinations" { @@ -100,7 +99,6 @@ auto_instrumentation "feature" { prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics/output.yaml b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics/output.yaml index 46a85aa2a..7b8f34ae6 100644 --- a/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/auto-instrumentation/beyla-metrics/output.yaml @@ -87,7 +87,6 @@ data: max_keepalive_time = "8h" } } - // Feature: Auto-Instrumentation declare "auto_instrumentation" { argument "metrics_destinations" { @@ -141,7 +140,6 @@ data: prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -184,11 +182,9 @@ data: prometheus.remote_write.prometheus.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 diff --git a/charts/k8s-monitoring/docs/examples/features/cluster-events/default/alloy-singleton.alloy b/charts/k8s-monitoring/docs/examples/features/cluster-events/default/alloy-singleton.alloy index 6852a232b..c36c08769 100644 --- a/charts/k8s-monitoring/docs/examples/features/cluster-events/default/alloy-singleton.alloy +++ b/charts/k8s-monitoring/docs/examples/features/cluster-events/default/alloy-singleton.alloy @@ -15,7 +15,6 @@ loki.write "loki" { "k8s_cluster_name" = "cluster-events-cluster", } } - // Feature: Cluster Events declare "cluster_events" { argument "logs_destinations" { diff --git a/charts/k8s-monitoring/docs/examples/features/cluster-events/default/output.yaml b/charts/k8s-monitoring/docs/examples/features/cluster-events/default/output.yaml index df4158fa0..c1232d46b 100644 --- a/charts/k8s-monitoring/docs/examples/features/cluster-events/default/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/cluster-events/default/output.yaml @@ -40,7 +40,6 @@ data: "k8s_cluster_name" = "cluster-events-cluster", } } - // Feature: Cluster Events declare "cluster_events" { argument "logs_destinations" { diff --git a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/alloy-logs.alloy index 1dd052837..40ca32ed5 100644 --- a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/alloy-logs.alloy +++ b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/alloy-logs.alloy @@ -15,13 +15,12 @@ loki.write "loki" { "k8s_cluster_name" = "cluster-metrics-cluster", } } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -46,7 +45,7 @@ declare "pod_logs" { replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -55,28 +54,21 @@ declare "pod_logs" { replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -98,14 +90,14 @@ declare "pod_logs" { replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -117,8 +109,16 @@ declare "pod_logs" { source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -126,10 +126,10 @@ declare "pod_logs" { field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -138,22 +138,22 @@ declare "pod_logs" { target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -162,12 +162,12 @@ declare "pod_logs" { } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -175,7 +175,7 @@ declare "pod_logs" { } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -185,12 +185,12 @@ declare "pod_logs" { "tmp_container_runtime", ] } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/alloy-metrics.alloy index c6dcc71ec..4c30619f5 100644 --- a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/alloy-metrics.alloy @@ -46,22 +46,21 @@ prometheus.remote_write "prometheus" { max_keepalive_time = "8h" } } - // Feature: Cluster Metrics declare "cluster_metrics" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "kubernetes" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kubernetes" { content = remote.kubernetes.configmap.kubernetes.data["core_metrics.alloy"] - } - + } + kubernetes.kubelet "scrape" { clustering = true job_label = "integrations/kubernetes/kubelet" @@ -69,8 +68,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.resources "scrape" { clustering = true job_label = "integrations/kubernetes/resources" @@ -78,8 +77,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.cadvisor "scrape" { clustering = true job_label = "integrations/kubernetes/cadvisor" @@ -88,7 +87,7 @@ declare "cluster_metrics" { max_cache_size = 100000 forward_to = [prometheus.relabel.cadvisor.receiver] } - + prometheus.relabel "cadvisor" { max_cache_size = 100000 // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 @@ -160,16 +159,16 @@ declare "cluster_metrics" { replacement = "" } forward_to = argument.metrics_destinations.value - } - + } + kubernetes.apiserver "scrape" { clustering = true job_label = "integrations/kubernetes/kube-apiserver" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "kube_controller_manager" { role = "pod" namespaces { @@ -180,7 +179,7 @@ declare "cluster_metrics" { label = "component=kube-controller-manager" } } - + discovery.relabel "kube_controller_manager" { targets = discovery.kubernetes.kube_controller_manager.targets rule { @@ -189,7 +188,7 @@ declare "cluster_metrics" { target_label = "__address__" } } - + prometheus.scrape "kube_controller_manager" { targets = discovery.relabel.kube_controller_manager.output job_name = "kube-controller-manager" @@ -203,16 +202,16 @@ declare "cluster_metrics" { enabled = true } forward_to = argument.metrics_destinations.value - } - + } + kubernetes.kube_dns "scrape" { clustering = true job_label = "integrations/kubernetes/kube-dns" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "kube_proxy" { role = "pod" namespaces { @@ -223,7 +222,7 @@ declare "cluster_metrics" { label = "k8s-app=kube-proxy" } } - + discovery.relabel "kube_proxy" { targets = discovery.kubernetes.kube_proxy.targets rule { @@ -232,7 +231,7 @@ declare "cluster_metrics" { target_label = "__address__" } } - + prometheus.scrape "kube_proxy" { targets = discovery.relabel.kube_proxy.output job_name = "integrations/kubernetes/kube-proxy" @@ -242,8 +241,8 @@ declare "cluster_metrics" { enabled = true } forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "kube_scheduler" { role = "pod" namespaces { @@ -254,7 +253,7 @@ declare "cluster_metrics" { label = "component=kube-scheduler" } } - + discovery.relabel "kube_scheduler" { targets = discovery.kubernetes.kube_scheduler.targets rule { @@ -263,7 +262,7 @@ declare "cluster_metrics" { target_label = "__address__" } } - + prometheus.scrape "kube_scheduler" { targets = discovery.relabel.kube_scheduler.output job_name = "kube-scheduler" @@ -277,17 +276,17 @@ declare "cluster_metrics" { enabled = true } forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "kube_state_metrics" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kube_state_metrics" { content = remote.kubernetes.configmap.kube_state_metrics.data["kube-state-metrics_metrics.alloy"] } - + kube_state_metrics.kubernetes "targets" { namespaces = ["default"] port_name = "http" @@ -296,7 +295,7 @@ declare "cluster_metrics" { "release=k8smon", ] } - + kube_state_metrics.scrape "metrics" { targets = kube_state_metrics.kubernetes.targets.output clustering = true @@ -306,17 +305,17 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "node_exporter" { name = "k8smon-alloy-module-system" namespace = "default" } - + import.string "node_exporter" { content = remote.kubernetes.configmap.node_exporter.data["node-exporter_metrics.alloy"] } - + node_exporter.kubernetes "targets" { namespaces = ["default"] port_name = "metrics" @@ -325,7 +324,7 @@ declare "cluster_metrics" { "release=k8smon", ] } - + discovery.relabel "node_exporter" { targets = node_exporter.kubernetes.targets.output rule { @@ -334,7 +333,7 @@ declare "cluster_metrics" { target_label = "instance" } } - + node_exporter.scrape "metrics" { targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" @@ -344,8 +343,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "windows_exporter_pods" { role = "pod" namespaces { @@ -356,7 +355,7 @@ declare "cluster_metrics" { label = "app.kubernetes.io/name=windows-exporter,release=k8smon" } } - + discovery.relabel "windows_exporter" { targets = discovery.kubernetes.windows_exporter_pods.targets rule { @@ -365,7 +364,7 @@ declare "cluster_metrics" { target_label = "instance" } } - + prometheus.scrape "windows_exporter" { job_name = "integrations/windows-exporter" targets = discovery.relabel.windows_exporter.output @@ -375,7 +374,7 @@ declare "cluster_metrics" { } forward_to = [prometheus.relabel.windows_exporter.receiver] } - + prometheus.relabel "windows_exporter" { max_cache_size = 100000 rule { @@ -384,33 +383,32 @@ declare "cluster_metrics" { action = "keep" } forward_to = argument.metrics_destinations.value - } + } } cluster_metrics "feature" { metrics_destinations = [ prometheus.remote_write.prometheus.receiver, ] } - declare "etcd_integration" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "etcd" { name = "k8smon-alloy-module-databases" namespace = "default" } - + import.string "etcd" { content = remote.kubernetes.configmap.etcd.data["kv_etcd_metrics.alloy"] } - + etcd.kubernetes "k8s_controlplane_etcd" { label_selectors = ["app.kubernetes.io/component=etcd"] port_name = "metrics" } - + etcd.scrape "k8s_controlplane_etcd" { targets = etcd.kubernetes.k8s_controlplane_etcd.output job_label = "integrations/etcd" diff --git a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/alloy-singleton.alloy b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/alloy-singleton.alloy index 75e95be26..ce7462032 100644 --- a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/alloy-singleton.alloy +++ b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/alloy-singleton.alloy @@ -63,7 +63,6 @@ loki.write "loki" { "k8s_cluster_name" = "cluster-metrics-cluster", } } - // Feature: Cluster Events declare "cluster_events" { argument "logs_destinations" { @@ -141,7 +140,6 @@ cluster_events "feature" { loki.write.loki.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/output.yaml b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/output.yaml index e189f64da..ddd2dcfd3 100644 --- a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/output.yaml @@ -176,22 +176,21 @@ data: max_keepalive_time = "8h" } } - // Feature: Cluster Metrics declare "cluster_metrics" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "kubernetes" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kubernetes" { content = remote.kubernetes.configmap.kubernetes.data["core_metrics.alloy"] - } - + } + kubernetes.kubelet "scrape" { clustering = true job_label = "integrations/kubernetes/kubelet" @@ -199,8 +198,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.resources "scrape" { clustering = true job_label = "integrations/kubernetes/resources" @@ -208,8 +207,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.cadvisor "scrape" { clustering = true job_label = "integrations/kubernetes/cadvisor" @@ -218,7 +217,7 @@ data: max_cache_size = 100000 forward_to = [prometheus.relabel.cadvisor.receiver] } - + prometheus.relabel "cadvisor" { max_cache_size = 100000 // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 @@ -290,16 +289,16 @@ data: replacement = "" } forward_to = argument.metrics_destinations.value - } - + } + kubernetes.apiserver "scrape" { clustering = true job_label = "integrations/kubernetes/kube-apiserver" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "kube_controller_manager" { role = "pod" namespaces { @@ -310,7 +309,7 @@ data: label = "component=kube-controller-manager" } } - + discovery.relabel "kube_controller_manager" { targets = discovery.kubernetes.kube_controller_manager.targets rule { @@ -319,7 +318,7 @@ data: target_label = "__address__" } } - + prometheus.scrape "kube_controller_manager" { targets = discovery.relabel.kube_controller_manager.output job_name = "kube-controller-manager" @@ -333,16 +332,16 @@ data: enabled = true } forward_to = argument.metrics_destinations.value - } - + } + kubernetes.kube_dns "scrape" { clustering = true job_label = "integrations/kubernetes/kube-dns" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "kube_proxy" { role = "pod" namespaces { @@ -353,7 +352,7 @@ data: label = "k8s-app=kube-proxy" } } - + discovery.relabel "kube_proxy" { targets = discovery.kubernetes.kube_proxy.targets rule { @@ -362,7 +361,7 @@ data: target_label = "__address__" } } - + prometheus.scrape "kube_proxy" { targets = discovery.relabel.kube_proxy.output job_name = "integrations/kubernetes/kube-proxy" @@ -372,8 +371,8 @@ data: enabled = true } forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "kube_scheduler" { role = "pod" namespaces { @@ -384,7 +383,7 @@ data: label = "component=kube-scheduler" } } - + discovery.relabel "kube_scheduler" { targets = discovery.kubernetes.kube_scheduler.targets rule { @@ -393,7 +392,7 @@ data: target_label = "__address__" } } - + prometheus.scrape "kube_scheduler" { targets = discovery.relabel.kube_scheduler.output job_name = "kube-scheduler" @@ -407,17 +406,17 @@ data: enabled = true } forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "kube_state_metrics" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kube_state_metrics" { content = remote.kubernetes.configmap.kube_state_metrics.data["kube-state-metrics_metrics.alloy"] } - + kube_state_metrics.kubernetes "targets" { namespaces = ["default"] port_name = "http" @@ -426,7 +425,7 @@ data: "release=k8smon", ] } - + kube_state_metrics.scrape "metrics" { targets = kube_state_metrics.kubernetes.targets.output clustering = true @@ -436,17 +435,17 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "node_exporter" { name = "k8smon-alloy-module-system" namespace = "default" } - + import.string "node_exporter" { content = remote.kubernetes.configmap.node_exporter.data["node-exporter_metrics.alloy"] } - + node_exporter.kubernetes "targets" { namespaces = ["default"] port_name = "metrics" @@ -455,7 +454,7 @@ data: "release=k8smon", ] } - + discovery.relabel "node_exporter" { targets = node_exporter.kubernetes.targets.output rule { @@ -464,7 +463,7 @@ data: target_label = "instance" } } - + node_exporter.scrape "metrics" { targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" @@ -474,8 +473,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "windows_exporter_pods" { role = "pod" namespaces { @@ -486,7 +485,7 @@ data: label = "app.kubernetes.io/name=windows-exporter,release=k8smon" } } - + discovery.relabel "windows_exporter" { targets = discovery.kubernetes.windows_exporter_pods.targets rule { @@ -495,7 +494,7 @@ data: target_label = "instance" } } - + prometheus.scrape "windows_exporter" { job_name = "integrations/windows-exporter" targets = discovery.relabel.windows_exporter.output @@ -505,7 +504,7 @@ data: } forward_to = [prometheus.relabel.windows_exporter.receiver] } - + prometheus.relabel "windows_exporter" { max_cache_size = 100000 rule { @@ -514,33 +513,32 @@ data: action = "keep" } forward_to = argument.metrics_destinations.value - } + } } cluster_metrics "feature" { metrics_destinations = [ prometheus.remote_write.prometheus.receiver, ] } - declare "etcd_integration" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "etcd" { name = "k8smon-alloy-module-databases" namespace = "default" } - + import.string "etcd" { content = remote.kubernetes.configmap.etcd.data["kv_etcd_metrics.alloy"] } - + etcd.kubernetes "k8s_controlplane_etcd" { label_selectors = ["app.kubernetes.io/component=etcd"] port_name = "metrics" } - + etcd.scrape "k8s_controlplane_etcd" { targets = etcd.kubernetes.k8s_controlplane_etcd.output job_label = "integrations/etcd" @@ -629,7 +627,6 @@ data: "k8s_cluster_name" = "cluster-metrics-cluster", } } - // Feature: Cluster Events declare "cluster_events" { argument "logs_destinations" { @@ -707,7 +704,6 @@ data: loki.write.loki.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -750,11 +746,9 @@ data: prometheus.remote_write.prometheus.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 @@ -790,13 +784,12 @@ data: "k8s_cluster_name" = "cluster-metrics-cluster", } } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -821,7 +814,7 @@ data: replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -830,28 +823,21 @@ data: replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -873,14 +859,14 @@ data: replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -892,8 +878,16 @@ data: source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -901,10 +895,10 @@ data: field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -913,22 +907,22 @@ data: target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -937,12 +931,12 @@ data: } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -950,7 +944,7 @@ data: } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -960,12 +954,12 @@ data: "tmp_container_runtime", ] } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/default/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/default/alloy-metrics.alloy index 038896f41..9e8dc51ce 100644 --- a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/default/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/default/alloy-metrics.alloy @@ -46,22 +46,21 @@ prometheus.remote_write "prometheus" { max_keepalive_time = "8h" } } - // Feature: Cluster Metrics declare "cluster_metrics" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "kubernetes" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kubernetes" { content = remote.kubernetes.configmap.kubernetes.data["core_metrics.alloy"] - } - + } + kubernetes.kubelet "scrape" { clustering = true job_label = "integrations/kubernetes/kubelet" @@ -69,8 +68,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.resources "scrape" { clustering = true job_label = "integrations/kubernetes/resources" @@ -78,8 +77,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.cadvisor "scrape" { clustering = true job_label = "integrations/kubernetes/cadvisor" @@ -88,7 +87,7 @@ declare "cluster_metrics" { max_cache_size = 100000 forward_to = [prometheus.relabel.cadvisor.receiver] } - + prometheus.relabel "cadvisor" { max_cache_size = 100000 // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 @@ -160,17 +159,17 @@ declare "cluster_metrics" { replacement = "" } forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "kube_state_metrics" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kube_state_metrics" { content = remote.kubernetes.configmap.kube_state_metrics.data["kube-state-metrics_metrics.alloy"] } - + kube_state_metrics.kubernetes "targets" { namespaces = ["default"] port_name = "http" @@ -179,7 +178,7 @@ declare "cluster_metrics" { "release=k8smon", ] } - + kube_state_metrics.scrape "metrics" { targets = kube_state_metrics.kubernetes.targets.output clustering = true @@ -189,17 +188,17 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "node_exporter" { name = "k8smon-alloy-module-system" namespace = "default" } - + import.string "node_exporter" { content = remote.kubernetes.configmap.node_exporter.data["node-exporter_metrics.alloy"] } - + node_exporter.kubernetes "targets" { namespaces = ["default"] port_name = "metrics" @@ -208,7 +207,7 @@ declare "cluster_metrics" { "release=k8smon", ] } - + discovery.relabel "node_exporter" { targets = node_exporter.kubernetes.targets.output rule { @@ -217,7 +216,7 @@ declare "cluster_metrics" { target_label = "instance" } } - + node_exporter.scrape "metrics" { targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" @@ -227,8 +226,8 @@ declare "cluster_metrics" { scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "windows_exporter_pods" { role = "pod" namespaces { @@ -239,7 +238,7 @@ declare "cluster_metrics" { label = "app.kubernetes.io/name=windows-exporter,release=k8smon" } } - + discovery.relabel "windows_exporter" { targets = discovery.kubernetes.windows_exporter_pods.targets rule { @@ -248,7 +247,7 @@ declare "cluster_metrics" { target_label = "instance" } } - + prometheus.scrape "windows_exporter" { job_name = "integrations/windows-exporter" targets = discovery.relabel.windows_exporter.output @@ -258,7 +257,7 @@ declare "cluster_metrics" { } forward_to = [prometheus.relabel.windows_exporter.receiver] } - + prometheus.relabel "windows_exporter" { max_cache_size = 100000 rule { @@ -267,8 +266,8 @@ declare "cluster_metrics" { action = "keep" } forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "kepler" { role = "pod" namespaces { @@ -279,7 +278,7 @@ declare "cluster_metrics" { label = "app.kubernetes.io/name=kepler" } } - + discovery.relabel "kepler" { targets = discovery.kubernetes.kepler.targets rule { @@ -288,7 +287,7 @@ declare "cluster_metrics" { target_label = "instance" } } - + prometheus.scrape "kepler" { targets = discovery.relabel.kepler.output job_name = "integrations/kepler" @@ -299,7 +298,7 @@ declare "cluster_metrics" { } forward_to = [prometheus.relabel.kepler.receiver] } - + prometheus.relabel "kepler" { max_cache_size = 100000 rule { @@ -308,8 +307,8 @@ declare "cluster_metrics" { action = "keep" } forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "opencost" { role = "pod" namespaces { @@ -320,7 +319,7 @@ declare "cluster_metrics" { label = "app.kubernetes.io/name=opencost" } } - + discovery.relabel "opencost" { targets = discovery.kubernetes.opencost.targets rule { @@ -329,7 +328,7 @@ declare "cluster_metrics" { target_label = "instance" } } - + prometheus.scrape "opencost" { targets = discovery.relabel.opencost.output job_name = "integrations/opencost" @@ -340,7 +339,7 @@ declare "cluster_metrics" { } forward_to = [prometheus.relabel.opencost.receiver] } - + prometheus.relabel "opencost" { max_cache_size = 100000 rule { @@ -356,7 +355,6 @@ cluster_metrics "feature" { prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/default/output.yaml b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/default/output.yaml index adcfca775..bef11f1a6 100644 --- a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/default/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/default/output.yaml @@ -172,22 +172,21 @@ data: max_keepalive_time = "8h" } } - // Feature: Cluster Metrics declare "cluster_metrics" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "kubernetes" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kubernetes" { content = remote.kubernetes.configmap.kubernetes.data["core_metrics.alloy"] - } - + } + kubernetes.kubelet "scrape" { clustering = true job_label = "integrations/kubernetes/kubelet" @@ -195,8 +194,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.resources "scrape" { clustering = true job_label = "integrations/kubernetes/resources" @@ -204,8 +203,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + kubernetes.cadvisor "scrape" { clustering = true job_label = "integrations/kubernetes/cadvisor" @@ -214,7 +213,7 @@ data: max_cache_size = 100000 forward_to = [prometheus.relabel.cadvisor.receiver] } - + prometheus.relabel "cadvisor" { max_cache_size = 100000 // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 @@ -286,17 +285,17 @@ data: replacement = "" } forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "kube_state_metrics" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "kube_state_metrics" { content = remote.kubernetes.configmap.kube_state_metrics.data["kube-state-metrics_metrics.alloy"] } - + kube_state_metrics.kubernetes "targets" { namespaces = ["default"] port_name = "http" @@ -305,7 +304,7 @@ data: "release=k8smon", ] } - + kube_state_metrics.scrape "metrics" { targets = kube_state_metrics.kubernetes.targets.output clustering = true @@ -315,17 +314,17 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + remote.kubernetes.configmap "node_exporter" { name = "k8smon-alloy-module-system" namespace = "default" } - + import.string "node_exporter" { content = remote.kubernetes.configmap.node_exporter.data["node-exporter_metrics.alloy"] } - + node_exporter.kubernetes "targets" { namespaces = ["default"] port_name = "metrics" @@ -334,7 +333,7 @@ data: "release=k8smon", ] } - + discovery.relabel "node_exporter" { targets = node_exporter.kubernetes.targets.output rule { @@ -343,7 +342,7 @@ data: target_label = "instance" } } - + node_exporter.scrape "metrics" { targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" @@ -353,8 +352,8 @@ data: scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "windows_exporter_pods" { role = "pod" namespaces { @@ -365,7 +364,7 @@ data: label = "app.kubernetes.io/name=windows-exporter,release=k8smon" } } - + discovery.relabel "windows_exporter" { targets = discovery.kubernetes.windows_exporter_pods.targets rule { @@ -374,7 +373,7 @@ data: target_label = "instance" } } - + prometheus.scrape "windows_exporter" { job_name = "integrations/windows-exporter" targets = discovery.relabel.windows_exporter.output @@ -384,7 +383,7 @@ data: } forward_to = [prometheus.relabel.windows_exporter.receiver] } - + prometheus.relabel "windows_exporter" { max_cache_size = 100000 rule { @@ -393,8 +392,8 @@ data: action = "keep" } forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "kepler" { role = "pod" namespaces { @@ -405,7 +404,7 @@ data: label = "app.kubernetes.io/name=kepler" } } - + discovery.relabel "kepler" { targets = discovery.kubernetes.kepler.targets rule { @@ -414,7 +413,7 @@ data: target_label = "instance" } } - + prometheus.scrape "kepler" { targets = discovery.relabel.kepler.output job_name = "integrations/kepler" @@ -425,7 +424,7 @@ data: } forward_to = [prometheus.relabel.kepler.receiver] } - + prometheus.relabel "kepler" { max_cache_size = 100000 rule { @@ -434,8 +433,8 @@ data: action = "keep" } forward_to = argument.metrics_destinations.value - } - + } + discovery.kubernetes "opencost" { role = "pod" namespaces { @@ -446,7 +445,7 @@ data: label = "app.kubernetes.io/name=opencost" } } - + discovery.relabel "opencost" { targets = discovery.kubernetes.opencost.targets rule { @@ -455,7 +454,7 @@ data: target_label = "instance" } } - + prometheus.scrape "opencost" { targets = discovery.relabel.opencost.output job_name = "integrations/opencost" @@ -466,7 +465,7 @@ data: } forward_to = [prometheus.relabel.opencost.receiver] } - + prometheus.relabel "opencost" { max_cache_size = 100000 rule { @@ -482,7 +481,6 @@ data: prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -525,11 +523,9 @@ data: prometheus.remote_write.prometheus.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/alloy/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/alloy/alloy-metrics.alloy index 7d1895873..f8b5a6946 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/alloy/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/alloy/alloy-metrics.alloy @@ -46,7 +46,6 @@ prometheus.remote_write "prometheus" { max_keepalive_time = "8h" } } - declare "alloy_integration" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" @@ -105,35 +104,35 @@ declare "alloy_integration" { action = "keep" } - - + + rule { source_labels = ["__meta_kubernetes_namespace"] target_label = "namespace" } - + rule { source_labels = ["__meta_kubernetes_pod_name"] target_label = "pod" } - + rule { source_labels = ["__meta_kubernetes_pod_container_name"] target_label = "container" } - + // set the workload to the controller kind and name rule { action = "lowercase" source_labels = ["__meta_kubernetes_pod_controller_kind"] target_label = "workload_type" } - + rule { source_labels = ["__meta_kubernetes_pod_controller_name"] target_label = "workload" } - + // remove the hash from the ReplicaSet rule { source_labels = [ @@ -144,7 +143,7 @@ declare "alloy_integration" { regex = "replicaset/(.+)-.+$" target_label = "workload" } - + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" rule { action = "replace" @@ -158,7 +157,7 @@ declare "alloy_integration" { replacement = "$1" target_label = "app" } - + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" rule { action = "replace" @@ -171,7 +170,7 @@ declare "alloy_integration" { replacement = "$1" target_label = "component" } - + // set a source label rule { action = "replace" @@ -320,12 +319,12 @@ declare "alloy_integration" { } } } - + alloy_integration_discovery "alloy" { port_name = "http-metrics" label_selectors = ["app.kubernetes.io/name=alloy-metrics"] } - + alloy_integration_scrape "alloy" { targets = alloy_integration_discovery.alloy.output job_label = "integrations/alloy" @@ -341,7 +340,6 @@ alloy_integration "integration" { prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/alloy/output.yaml b/charts/k8s-monitoring/docs/examples/features/integrations/alloy/output.yaml index 0d33ec3c4..7a4a64274 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/alloy/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/integrations/alloy/output.yaml @@ -71,7 +71,6 @@ data: max_keepalive_time = "8h" } } - declare "alloy_integration" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" @@ -130,35 +129,35 @@ data: action = "keep" } - - + + rule { source_labels = ["__meta_kubernetes_namespace"] target_label = "namespace" } - + rule { source_labels = ["__meta_kubernetes_pod_name"] target_label = "pod" } - + rule { source_labels = ["__meta_kubernetes_pod_container_name"] target_label = "container" } - + // set the workload to the controller kind and name rule { action = "lowercase" source_labels = ["__meta_kubernetes_pod_controller_kind"] target_label = "workload_type" } - + rule { source_labels = ["__meta_kubernetes_pod_controller_name"] target_label = "workload" } - + // remove the hash from the ReplicaSet rule { source_labels = [ @@ -169,7 +168,7 @@ data: regex = "replicaset/(.+)-.+$" target_label = "workload" } - + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" rule { action = "replace" @@ -183,7 +182,7 @@ data: replacement = "$1" target_label = "app" } - + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" rule { action = "replace" @@ -196,7 +195,7 @@ data: replacement = "$1" target_label = "component" } - + // set a source label rule { action = "replace" @@ -345,12 +344,12 @@ data: } } } - + alloy_integration_discovery "alloy" { port_name = "http-metrics" label_selectors = ["app.kubernetes.io/name=alloy-metrics"] } - + alloy_integration_scrape "alloy" { targets = alloy_integration_discovery.alloy.output job_label = "integrations/alloy" @@ -366,7 +365,6 @@ data: prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -409,11 +407,9 @@ data: prometheus.remote_write.prometheus.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/cert-manager/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/cert-manager/alloy-metrics.alloy index 3c8d45574..a43115c3c 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/cert-manager/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/cert-manager/alloy-metrics.alloy @@ -46,26 +46,25 @@ prometheus.remote_write "prometheus" { max_keepalive_time = "8h" } } - declare "cert_manager_integration" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "cert_manager" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "cert_manager" { content = remote.kubernetes.configmap.cert_manager.data["cert-manager_metrics.alloy"] } - + cert_manager.kubernetes "cert_manager" { label_selectors = ["app.kubernetes.io/name=cert-manager"] port_name = "http-metrics" } - + cert_manager.scrape "cert_manager" { targets = cert_manager.kubernetes.cert_manager.output clustering = true @@ -80,7 +79,6 @@ cert_manager_integration "integration" { prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/cert-manager/output.yaml b/charts/k8s-monitoring/docs/examples/features/integrations/cert-manager/output.yaml index aa2017c52..764bc4180 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/cert-manager/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/integrations/cert-manager/output.yaml @@ -71,26 +71,25 @@ data: max_keepalive_time = "8h" } } - declare "cert_manager_integration" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "cert_manager" { name = "k8smon-alloy-module-kubernetes" namespace = "default" } - + import.string "cert_manager" { content = remote.kubernetes.configmap.cert_manager.data["cert-manager_metrics.alloy"] } - + cert_manager.kubernetes "cert_manager" { label_selectors = ["app.kubernetes.io/name=cert-manager"] port_name = "http-metrics" } - + cert_manager.scrape "cert_manager" { targets = cert_manager.kubernetes.cert_manager.output clustering = true @@ -105,7 +104,6 @@ data: prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -148,11 +146,9 @@ data: prometheus.remote_write.prometheus.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/etcd/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/etcd/alloy-metrics.alloy index 6f6a1ec58..2b268d57d 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/etcd/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/etcd/alloy-metrics.alloy @@ -46,26 +46,25 @@ prometheus.remote_write "prometheus" { max_keepalive_time = "8h" } } - declare "etcd_integration" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "etcd" { name = "k8smon-alloy-module-databases" namespace = "default" } - + import.string "etcd" { content = remote.kubernetes.configmap.etcd.data["kv_etcd_metrics.alloy"] } - + etcd.kubernetes "etcd" { label_selectors = ["app.kubernetes.io/component=etcd"] port_name = "metrics" } - + etcd.scrape "etcd" { targets = etcd.kubernetes.etcd.output job_label = "integrations/etcd" @@ -80,7 +79,6 @@ etcd_integration "integration" { prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/etcd/output.yaml b/charts/k8s-monitoring/docs/examples/features/integrations/etcd/output.yaml index d410ae0fa..d3668ed68 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/etcd/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/integrations/etcd/output.yaml @@ -71,26 +71,25 @@ data: max_keepalive_time = "8h" } } - declare "etcd_integration" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + remote.kubernetes.configmap "etcd" { name = "k8smon-alloy-module-databases" namespace = "default" } - + import.string "etcd" { content = remote.kubernetes.configmap.etcd.data["kv_etcd_metrics.alloy"] } - + etcd.kubernetes "etcd" { label_selectors = ["app.kubernetes.io/component=etcd"] port_name = "metrics" } - + etcd.scrape "etcd" { targets = etcd.kubernetes.etcd.output job_label = "integrations/etcd" @@ -105,7 +104,6 @@ data: prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -148,11 +146,9 @@ data: prometheus.remote_write.prometheus.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-logs.alloy index 35cd6ad1f..59cc181b7 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-logs.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-logs.alloy @@ -15,13 +15,12 @@ loki.write "loki" { "k8s_cluster_name" = "grafana-integration-cluster", } } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -46,7 +45,7 @@ declare "pod_logs" { replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -55,28 +54,21 @@ declare "pod_logs" { replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -98,14 +90,14 @@ declare "pod_logs" { replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -117,6 +109,14 @@ declare "pod_logs" { source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } rule { source_labels = ["__meta_kubernetes_namespace","__meta_kubernetes_pod_label_app_kubernetes_io_name"] separator = ";" @@ -132,7 +132,7 @@ declare "pod_logs" { replacement = "grafana" } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -140,10 +140,10 @@ declare "pod_logs" { field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -152,22 +152,22 @@ declare "pod_logs" { target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -176,12 +176,12 @@ declare "pod_logs" { } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -189,7 +189,7 @@ declare "pod_logs" { } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -202,7 +202,7 @@ declare "pod_logs" { // Integration: Loki stage.match { selector = "{job=\"integrations/grafana\",instance=\"grafana\",namespace=~\"o11y\"}" - + // extract some of the fields from the log line stage.logfmt { mapping = { @@ -212,7 +212,7 @@ declare "pod_logs" { "type" = "", } } - + // set the level as a label stage.labels { values = { @@ -236,12 +236,12 @@ declare "pod_logs" { drop_counter_reason = "grafana-drop-log-level" } } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-metrics.alloy index 2b3469bb4..14c396767 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-metrics.alloy @@ -46,7 +46,6 @@ prometheus.remote_write "prometheus" { max_keepalive_time = "8h" } } - declare "grafana_integration" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" @@ -105,35 +104,35 @@ declare "grafana_integration" { action = "keep" } - - + + rule { source_labels = ["__meta_kubernetes_namespace"] target_label = "namespace" } - + rule { source_labels = ["__meta_kubernetes_pod_name"] target_label = "pod" } - + rule { source_labels = ["__meta_kubernetes_pod_container_name"] target_label = "container" } - + // set the workload to the controller kind and name rule { action = "lowercase" source_labels = ["__meta_kubernetes_pod_controller_kind"] target_label = "workload_type" } - + rule { source_labels = ["__meta_kubernetes_pod_controller_name"] target_label = "workload" } - + // remove the hash from the ReplicaSet rule { source_labels = [ @@ -144,7 +143,7 @@ declare "grafana_integration" { regex = "replicaset/(.+)-.+$" target_label = "workload" } - + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" rule { action = "replace" @@ -158,7 +157,7 @@ declare "grafana_integration" { replacement = "$1" target_label = "app" } - + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" rule { action = "replace" @@ -171,7 +170,7 @@ declare "grafana_integration" { replacement = "$1" target_label = "component" } - + // set a source label rule { action = "replace" @@ -248,13 +247,13 @@ declare "grafana_integration" { } } } - + grafana_integration_discovery "grafana" { namespaces = ["o11y"] label_selectors = ["app.kubernetes.io/name=grafana"] port_name = "grafana" } - + grafana_integration_scrape "grafana" { targets = grafana_integration_discovery.grafana.output job_label = "integrations/grafana" @@ -269,7 +268,6 @@ grafana_integration "integration" { prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/grafana/output.yaml b/charts/k8s-monitoring/docs/examples/features/integrations/grafana/output.yaml index b27b49a78..9b067a9f8 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/grafana/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/integrations/grafana/output.yaml @@ -87,7 +87,6 @@ data: max_keepalive_time = "8h" } } - declare "grafana_integration" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" @@ -146,35 +145,35 @@ data: action = "keep" } - - + + rule { source_labels = ["__meta_kubernetes_namespace"] target_label = "namespace" } - + rule { source_labels = ["__meta_kubernetes_pod_name"] target_label = "pod" } - + rule { source_labels = ["__meta_kubernetes_pod_container_name"] target_label = "container" } - + // set the workload to the controller kind and name rule { action = "lowercase" source_labels = ["__meta_kubernetes_pod_controller_kind"] target_label = "workload_type" } - + rule { source_labels = ["__meta_kubernetes_pod_controller_name"] target_label = "workload" } - + // remove the hash from the ReplicaSet rule { source_labels = [ @@ -185,7 +184,7 @@ data: regex = "replicaset/(.+)-.+$" target_label = "workload" } - + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" rule { action = "replace" @@ -199,7 +198,7 @@ data: replacement = "$1" target_label = "app" } - + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" rule { action = "replace" @@ -212,7 +211,7 @@ data: replacement = "$1" target_label = "component" } - + // set a source label rule { action = "replace" @@ -289,13 +288,13 @@ data: } } } - + grafana_integration_discovery "grafana" { namespaces = ["o11y"] label_selectors = ["app.kubernetes.io/name=grafana"] port_name = "grafana" } - + grafana_integration_scrape "grafana" { targets = grafana_integration_discovery.grafana.output job_label = "integrations/grafana" @@ -310,7 +309,6 @@ data: prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -353,11 +351,9 @@ data: prometheus.remote_write.prometheus.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 @@ -391,13 +387,12 @@ data: "k8s_cluster_name" = "grafana-integration-cluster", } } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -422,7 +417,7 @@ data: replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -431,28 +426,21 @@ data: replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -474,14 +462,14 @@ data: replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -493,6 +481,14 @@ data: source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } rule { source_labels = ["__meta_kubernetes_namespace","__meta_kubernetes_pod_label_app_kubernetes_io_name"] separator = ";" @@ -508,7 +504,7 @@ data: replacement = "grafana" } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -516,10 +512,10 @@ data: field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -528,22 +524,22 @@ data: target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -552,12 +548,12 @@ data: } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -565,7 +561,7 @@ data: } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -578,7 +574,7 @@ data: // Integration: Loki stage.match { selector = "{job=\"integrations/grafana\",instance=\"grafana\",namespace=~\"o11y\"}" - + // extract some of the fields from the log line stage.logfmt { mapping = { @@ -588,7 +584,7 @@ data: "type" = "", } } - + // set the level as a label stage.labels { values = { @@ -612,12 +608,12 @@ data: drop_counter_reason = "grafana-drop-log-level" } } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-logs.alloy index 3d105ffe5..296e405fd 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-logs.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-logs.alloy @@ -15,13 +15,12 @@ loki.write "loki" { "k8s_cluster_name" = "loki-integration-cluster", } } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -46,7 +45,7 @@ declare "pod_logs" { replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -55,28 +54,21 @@ declare "pod_logs" { replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -98,14 +90,14 @@ declare "pod_logs" { replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -117,6 +109,14 @@ declare "pod_logs" { source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } // add static label of integration="loki" and instance="name" to pods that match the selector so they can be identified in the loki.process stages rule { source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] @@ -141,7 +141,7 @@ declare "pod_logs" { replacement = "$1/$2" } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -149,10 +149,10 @@ declare "pod_logs" { field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -161,22 +161,22 @@ declare "pod_logs" { target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -185,12 +185,12 @@ declare "pod_logs" { } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -198,7 +198,7 @@ declare "pod_logs" { } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -211,7 +211,7 @@ declare "pod_logs" { // Integration: Loki stage.match { selector = "{integration=\"loki\",instance=\"loki\"}" - + // extract some of the fields from the log line stage.logfmt { mapping = { @@ -219,7 +219,7 @@ declare "pod_logs" { "level" = "", } } - + // set the level as a label stage.labels { values = { @@ -233,23 +233,17 @@ declare "pod_logs" { } // remove the timestamp from the log line stage.replace { - expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` + expression = "(ts=[^ ]+\\s+)" replace = "" } - // drop certain log levels - stage.drop { - source = "level" - expression = "(?i)(debug)" - drop_counter_reason = "loki-drop-log-level" - } - + } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-metrics.alloy index d5aa6d98f..6f89412a2 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-metrics.alloy @@ -46,7 +46,6 @@ prometheus.remote_write "prometheus" { max_keepalive_time = "8h" } } - declare "loki_integration" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" @@ -112,35 +111,35 @@ declare "loki_integration" { target_label = "job" } - - + + rule { source_labels = ["__meta_kubernetes_namespace"] target_label = "namespace" } - + rule { source_labels = ["__meta_kubernetes_pod_name"] target_label = "pod" } - + rule { source_labels = ["__meta_kubernetes_pod_container_name"] target_label = "container" } - + // set the workload to the controller kind and name rule { action = "lowercase" source_labels = ["__meta_kubernetes_pod_controller_kind"] target_label = "workload_type" } - + rule { source_labels = ["__meta_kubernetes_pod_controller_name"] target_label = "workload" } - + // remove the hash from the ReplicaSet rule { source_labels = [ @@ -151,7 +150,7 @@ declare "loki_integration" { regex = "replicaset/(.+)-.+$" target_label = "workload" } - + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" rule { action = "replace" @@ -165,7 +164,7 @@ declare "loki_integration" { replacement = "$1" target_label = "app" } - + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" rule { action = "replace" @@ -178,7 +177,7 @@ declare "loki_integration" { replacement = "$1" target_label = "component" } - + // set a source label rule { action = "replace" @@ -282,13 +281,13 @@ declare "loki_integration" { } } } - + loki_integration_discovery "loki" { namespaces = [] label_selectors = ["app.kubernetes.io/name=loki"] port_name = "http-metrics" } - + loki_integration_scrape "loki" { targets = loki_integration_discovery.loki.output job_label = "integrations/loki" @@ -303,7 +302,6 @@ loki_integration "integration" { prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/loki/output.yaml b/charts/k8s-monitoring/docs/examples/features/integrations/loki/output.yaml index 376a46aa6..f743b294e 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/loki/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/integrations/loki/output.yaml @@ -87,7 +87,6 @@ data: max_keepalive_time = "8h" } } - declare "loki_integration" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" @@ -153,35 +152,35 @@ data: target_label = "job" } - - + + rule { source_labels = ["__meta_kubernetes_namespace"] target_label = "namespace" } - + rule { source_labels = ["__meta_kubernetes_pod_name"] target_label = "pod" } - + rule { source_labels = ["__meta_kubernetes_pod_container_name"] target_label = "container" } - + // set the workload to the controller kind and name rule { action = "lowercase" source_labels = ["__meta_kubernetes_pod_controller_kind"] target_label = "workload_type" } - + rule { source_labels = ["__meta_kubernetes_pod_controller_name"] target_label = "workload" } - + // remove the hash from the ReplicaSet rule { source_labels = [ @@ -192,7 +191,7 @@ data: regex = "replicaset/(.+)-.+$" target_label = "workload" } - + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" rule { action = "replace" @@ -206,7 +205,7 @@ data: replacement = "$1" target_label = "app" } - + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" rule { action = "replace" @@ -219,7 +218,7 @@ data: replacement = "$1" target_label = "component" } - + // set a source label rule { action = "replace" @@ -323,17 +322,18 @@ data: } } } - + loki_integration_discovery "loki" { namespaces = [] label_selectors = ["app.kubernetes.io/name=loki"] port_name = "http-metrics" } - + loki_integration_scrape "loki" { targets = loki_integration_discovery.loki.output job_label = "integrations/loki" clustering = true + keep_metrics = "up|scrape_samples_scraped|go_gc_cycles_total_gc_cycles_total|go_gc_duration_seconds|go_gc_duration_seconds_count|go_gc_duration_seconds_sum|go_gc_pauses_seconds_bucket|go_goroutines|go_memstats_heap_inuse_bytes|loki_azure_blob_request_duration_seconds_bucket|loki_azure_blob_request_duration_seconds_count|loki_bigtable_request_duration_seconds_bucket|loki_bigtable_request_duration_seconds_count|loki_bloom_blocks_cache_added_total|loki_bloom_blocks_cache_entries|loki_bloom_blocks_cache_evicted_total|loki_bloom_blocks_cache_fetched_total|loki_bloom_blocks_cache_usage_bytes|loki_bloom_chunks_indexed_total|loki_bloom_gateway_block_query_latency_seconds_bucket|loki_bloom_gateway_dequeue_duration_seconds_bucket|loki_bloom_gateway_filtered_chunks_sum|loki_bloom_gateway_filtered_series_sum|loki_bloom_gateway_inflight_tasks|loki_bloom_gateway_process_duration_seconds_bucket|loki_bloom_gateway_process_duration_seconds_count|loki_bloom_gateway_querier_chunks_filtered_total|loki_bloom_gateway_querier_chunks_skipped_total|loki_bloom_gateway_querier_chunks_total|loki_bloom_gateway_querier_series_filtered_total|loki_bloom_gateway_querier_series_skipped_total|loki_bloom_gateway_querier_series_total|loki_bloom_gateway_queue_duration_seconds_bucket|loki_bloom_gateway_queue_duration_seconds_count|loki_bloom_gateway_queue_duration_seconds_sum|loki_bloom_gateway_queue_length|loki_bloom_gateway_requested_chunks_sum|loki_bloom_gateway_requested_series_sum|loki_bloom_gateway_tasks_dequeued_bucket|loki_bloom_gateway_tasks_dequeued_total|loki_bloom_gateway_tasks_processed_total|loki_bloom_inserts_total|loki_bloom_recorder_chunks_total|loki_bloom_recorder_series_total|loki_bloom_size_bucket|loki_bloom_store_blocks_fetched_size_bytes_bucket|loki_bloom_store_blocks_fetched_sum|loki_bloom_store_download_queue_size_sum|loki_bloom_store_metas_fetched_bucket|loki_bloom_store_metas_fetched_size_bytes_bucket|loki_bloom_store_metas_fetched_sum|loki_bloom_tokens_total|loki_bloombuilder_blocks_created_total|loki_bloombuilder_blocks_reused_total|loki_bloombuilder_bytes_per_task_bucket|loki_bloombuilder_chunk_series_size_sum|loki_bloombuilder_metas_created_total|loki_bloombuilder_processing_task|loki_bloombuilder_series_per_task_bucket|loki_bloomplanner_blocks_deleted_total|loki_bloomplanner_connected_builders|loki_bloomplanner_inflight_tasks|loki_bloomplanner_metas_deleted_total|loki_bloomplanner_queue_length|loki_bloomplanner_retention_running|loki_bloomplanner_retention_time_seconds_bucket|loki_bloomplanner_tenant_tasks_completed|loki_bloomplanner_tenant_tasks_planned|loki_boltdb_shipper_compact_tables_operation_duration_seconds|loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds|loki_boltdb_shipper_compact_tables_operation_total|loki_boltdb_shipper_request_duration_seconds_bucket|loki_boltdb_shipper_request_duration_seconds_count|loki_boltdb_shipper_request_duration_seconds_sum|loki_boltdb_shipper_retention_marker_count_total|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count|loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum|loki_boltdb_shipper_retention_marker_table_processed_total|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count|loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum|loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time|loki_boltdb_shipper_retention_sweeper_marker_files_current|loki_build_info|loki_chunk_store_deduped_chunks_total|loki_chunk_store_index_entries_per_chunk_count|loki_chunk_store_index_entries_per_chunk_sum|loki_compactor_apply_retention_last_successful_run_timestamp_seconds|loki_compactor_apply_retention_operation_duration_seconds|loki_compactor_apply_retention_operation_total|loki_compactor_delete_requests_processed_total|loki_compactor_delete_requests_received_total|loki_compactor_deleted_lines|loki_compactor_load_pending_requests_attempts_total|loki_compactor_locked_table_successive_compaction_skips|loki_compactor_oldest_pending_delete_request_age_seconds|loki_compactor_pending_delete_requests_count|loki_consul_request_duration_seconds_bucket|loki_discarded_samples_total|loki_distributor_bytes_received_total|loki_distributor_ingester_append_failures_total|loki_distributor_lines_received_total|loki_distributor_structured_metadata_bytes_received_total|loki_dynamo_consumed_capacity_total|loki_dynamo_dropped_requests_total|loki_dynamo_failures_total|loki_dynamo_query_pages_count|loki_dynamo_request_duration_seconds_bucket|loki_dynamo_request_duration_seconds_count|loki_dynamo_throttled_total|loki_embeddedcache_entries|loki_embeddedcache_memory_bytes|loki_gcs_request_duration_seconds_bucket|loki_gcs_request_duration_seconds_count|loki_index_gateway_postfilter_chunks_sum|loki_index_gateway_prefilter_chunks_sum|loki_index_request_duration_seconds_bucket|loki_index_request_duration_seconds_count|loki_index_request_duration_seconds_sum|loki_ingester_chunk_age_seconds_bucket|loki_ingester_chunk_age_seconds_count|loki_ingester_chunk_age_seconds_sum|loki_ingester_chunk_bounds_hours_bucket|loki_ingester_chunk_bounds_hours_count|loki_ingester_chunk_bounds_hours_sum|loki_ingester_chunk_entries_bucket|loki_ingester_chunk_entries_count|loki_ingester_chunk_entries_sum|loki_ingester_chunk_size_bytes_bucket|loki_ingester_chunk_utilization_bucket|loki_ingester_chunk_utilization_count|loki_ingester_chunk_utilization_sum|loki_ingester_chunks_flushed_total|loki_ingester_flush_queue_length|loki_ingester_memory_chunks|loki_ingester_memory_streams|loki_ingester_streams_created_total|loki_memcache_request_duration_seconds_bucket|loki_memcache_request_duration_seconds_count|loki_panic_total|loki_prometheus_rule_group_rules|loki_request_duration_seconds_bucket|loki_request_duration_seconds_count|loki_request_duration_seconds_sum|loki_ruler_wal_appender_ready|loki_ruler_wal_disk_size|loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds|loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds|loki_ruler_wal_prometheus_remote_storage_samples_pending|loki_ruler_wal_prometheus_remote_storage_samples_total|loki_ruler_wal_samples_appended_total|loki_ruler_wal_storage_created_series_total|loki_s3_request_duration_seconds_bucket|loki_s3_request_duration_seconds_count" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value @@ -344,7 +344,6 @@ data: prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -387,11 +386,9 @@ data: prometheus.remote_write.prometheus.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 @@ -425,13 +422,12 @@ data: "k8s_cluster_name" = "loki-integration-cluster", } } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -456,7 +452,7 @@ data: replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -465,28 +461,21 @@ data: replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -508,14 +497,14 @@ data: replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -527,6 +516,14 @@ data: source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } // add static label of integration="loki" and instance="name" to pods that match the selector so they can be identified in the loki.process stages rule { source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] @@ -551,7 +548,7 @@ data: replacement = "$1/$2" } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -559,10 +556,10 @@ data: field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -571,22 +568,22 @@ data: target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -595,12 +592,12 @@ data: } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -608,7 +605,7 @@ data: } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -621,7 +618,7 @@ data: // Integration: Loki stage.match { selector = "{integration=\"loki\",instance=\"loki\"}" - + // extract some of the fields from the log line stage.logfmt { mapping = { @@ -629,7 +626,7 @@ data: "level" = "", } } - + // set the level as a label stage.labels { values = { @@ -643,23 +640,17 @@ data: } // remove the timestamp from the log line stage.replace { - expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` + expression = "(ts=[^ ]+\\s+)" replace = "" } - // drop certain log levels - stage.drop { - source = "level" - expression = "(?i)(debug)" - drop_counter_reason = "loki-drop-log-level" - } - + } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-logs.alloy index 6c0d79344..a70b3e654 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-logs.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-logs.alloy @@ -15,13 +15,12 @@ loki.write "loki" { "k8s_cluster_name" = "mimir-integration-cluster", } } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -46,7 +45,7 @@ declare "pod_logs" { replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -55,28 +54,21 @@ declare "pod_logs" { replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -98,14 +90,14 @@ declare "pod_logs" { replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -117,6 +109,14 @@ declare "pod_logs" { source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } // add static label of integration="mimir" and instance="name" to pods that match the selector so they can be identified in the mimir.process stages rule { source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] @@ -141,7 +141,7 @@ declare "pod_logs" { replacement = "$1/$2" } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -149,10 +149,10 @@ declare "pod_logs" { field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -161,22 +161,22 @@ declare "pod_logs" { target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -185,12 +185,12 @@ declare "pod_logs" { } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -198,7 +198,7 @@ declare "pod_logs" { } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -211,7 +211,7 @@ declare "pod_logs" { // Integration: Mimir stage.match { selector = "{integration=\"mimir\",instance=\"mimir\"}" - + // extract some of the fields from the log line stage.logfmt { mapping = { @@ -219,7 +219,7 @@ declare "pod_logs" { "level" = "", } } - + // set the level as a label stage.labels { values = { @@ -233,23 +233,17 @@ declare "pod_logs" { } // remove the timestamp from the log line stage.replace { - expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` + expression = "(ts=[^ ]+\\s+)" replace = "" } - // drop certain log levels - stage.drop { - source = "level" - expression = "(?i)(debug)" - drop_counter_reason = "mimir-drop-log-level" - } - + } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-metrics.alloy index 5486e782f..4fe41d0c1 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-metrics.alloy @@ -46,7 +46,6 @@ prometheus.remote_write "prometheus" { max_keepalive_time = "8h" } } - declare "mimir_integration" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" @@ -112,35 +111,35 @@ declare "mimir_integration" { target_label = "job" } - - + + rule { source_labels = ["__meta_kubernetes_namespace"] target_label = "namespace" } - + rule { source_labels = ["__meta_kubernetes_pod_name"] target_label = "pod" } - + rule { source_labels = ["__meta_kubernetes_pod_container_name"] target_label = "container" } - + // set the workload to the controller kind and name rule { action = "lowercase" source_labels = ["__meta_kubernetes_pod_controller_kind"] target_label = "workload_type" } - + rule { source_labels = ["__meta_kubernetes_pod_controller_name"] target_label = "workload" } - + // remove the hash from the ReplicaSet rule { source_labels = [ @@ -151,7 +150,7 @@ declare "mimir_integration" { regex = "replicaset/(.+)-.+$" target_label = "workload" } - + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" rule { action = "replace" @@ -165,7 +164,7 @@ declare "mimir_integration" { replacement = "$1" target_label = "app" } - + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" rule { action = "replace" @@ -178,7 +177,7 @@ declare "mimir_integration" { replacement = "$1" target_label = "component" } - + // set a source label rule { action = "replace" @@ -282,13 +281,13 @@ declare "mimir_integration" { } } } - + mimir_integration_discovery "mimir" { namespaces = [] label_selectors = ["app.kubernetes.io/name=mimir"] port_name = "http-metrics" } - + mimir_integration_scrape "mimir" { targets = mimir_integration_discovery.mimir.output job_label = "integrations/mimir" @@ -303,7 +302,6 @@ mimir_integration "integration" { prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/mimir/output.yaml b/charts/k8s-monitoring/docs/examples/features/integrations/mimir/output.yaml index c63ef0736..714ee97a2 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/mimir/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/integrations/mimir/output.yaml @@ -87,7 +87,6 @@ data: max_keepalive_time = "8h" } } - declare "mimir_integration" { argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" @@ -153,35 +152,35 @@ data: target_label = "job" } - - + + rule { source_labels = ["__meta_kubernetes_namespace"] target_label = "namespace" } - + rule { source_labels = ["__meta_kubernetes_pod_name"] target_label = "pod" } - + rule { source_labels = ["__meta_kubernetes_pod_container_name"] target_label = "container" } - + // set the workload to the controller kind and name rule { action = "lowercase" source_labels = ["__meta_kubernetes_pod_controller_kind"] target_label = "workload_type" } - + rule { source_labels = ["__meta_kubernetes_pod_controller_name"] target_label = "workload" } - + // remove the hash from the ReplicaSet rule { source_labels = [ @@ -192,7 +191,7 @@ data: regex = "replicaset/(.+)-.+$" target_label = "workload" } - + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" rule { action = "replace" @@ -206,7 +205,7 @@ data: replacement = "$1" target_label = "app" } - + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" rule { action = "replace" @@ -219,7 +218,7 @@ data: replacement = "$1" target_label = "component" } - + // set a source label rule { action = "replace" @@ -323,17 +322,18 @@ data: } } } - + mimir_integration_discovery "mimir" { namespaces = [] label_selectors = ["app.kubernetes.io/name=mimir"] port_name = "http-metrics" } - + mimir_integration_scrape "mimir" { targets = mimir_integration_discovery.mimir.output job_label = "integrations/mimir" clustering = true + keep_metrics = "up|scrape_samples_scraped|cortex_alertmanager_alerts|cortex_alertmanager_alerts_invalid_total|cortex_alertmanager_alerts_received_total|cortex_alertmanager_dispatcher_aggregation_groups|cortex_alertmanager_notification_latency_seconds_bucket|cortex_alertmanager_notification_latency_seconds_count|cortex_alertmanager_notification_latency_seconds_sum|cortex_alertmanager_notifications_failed_total|cortex_alertmanager_notifications_total|cortex_alertmanager_partial_state_merges_failed_total|cortex_alertmanager_partial_state_merges_total|cortex_alertmanager_ring_check_errors_total|cortex_alertmanager_silences|cortex_alertmanager_state_fetch_replica_state_failed_total|cortex_alertmanager_state_fetch_replica_state_total|cortex_alertmanager_state_initial_sync_completed_total|cortex_alertmanager_state_initial_sync_duration_seconds_bucket|cortex_alertmanager_state_initial_sync_duration_seconds_count|cortex_alertmanager_state_initial_sync_duration_seconds_sum|cortex_alertmanager_state_persist_failed_total|cortex_alertmanager_state_persist_total|cortex_alertmanager_state_replication_failed_total|cortex_alertmanager_state_replication_total|cortex_alertmanager_sync_configs_failed_total|cortex_alertmanager_sync_configs_total|cortex_alertmanager_tenants_discovered|cortex_alertmanager_tenants_owned|cortex_blockbuilder_consume_cycle_duration_seconds|cortex_blockbuilder_consumer_lag_records|cortex_blockbuilder_tsdb_compact_and_upload_failed_total|cortex_bucket_blocks_count|cortex_bucket_index_estimated_compaction_jobs|cortex_bucket_index_estimated_compaction_jobs_errors_total|cortex_bucket_index_last_successful_update_timestamp_seconds|cortex_bucket_store_block_drop_failures_total|cortex_bucket_store_block_drops_total|cortex_bucket_store_block_load_failures_total|cortex_bucket_store_block_loads_total|cortex_bucket_store_blocks_loaded|cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket|cortex_bucket_store_indexheader_lazy_load_duration_seconds_count|cortex_bucket_store_indexheader_lazy_load_duration_seconds_sum|cortex_bucket_store_indexheader_lazy_load_total|cortex_bucket_store_indexheader_lazy_unload_total|cortex_bucket_store_series_batch_preloading_load_duration_seconds_sum|cortex_bucket_store_series_batch_preloading_wait_duration_seconds_sum|cortex_bucket_store_series_blocks_queried_sum|cortex_bucket_store_series_data_size_fetched_bytes_sum|cortex_bucket_store_series_data_size_touched_bytes_sum|cortex_bucket_store_series_hash_cache_hits_total|cortex_bucket_store_series_hash_cache_requests_total|cortex_bucket_store_series_request_stage_duration_seconds_bucket|cortex_bucket_store_series_request_stage_duration_seconds_count|cortex_bucket_store_series_request_stage_duration_seconds_sum|cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds|cortex_bucket_stores_gate_duration_seconds_bucket|cortex_bucket_stores_gate_duration_seconds_count|cortex_bucket_stores_gate_duration_seconds_sum|cortex_bucket_stores_tenants_synced|cortex_build_info|cortex_cache_memory_hits_total|cortex_cache_memory_requests_total|cortex_compactor_block_cleanup_failures_total|cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds|cortex_compactor_block_max_time_delta_seconds_bucket|cortex_compactor_block_max_time_delta_seconds_count|cortex_compactor_block_max_time_delta_seconds_sum|cortex_compactor_blocks_cleaned_total|cortex_compactor_blocks_marked_for_deletion_total|cortex_compactor_blocks_marked_for_no_compaction_total|cortex_compactor_disk_out_of_space_errors_total|cortex_compactor_group_compaction_runs_started_total|cortex_compactor_last_successful_run_timestamp_seconds|cortex_compactor_meta_sync_duration_seconds_bucket|cortex_compactor_meta_sync_duration_seconds_count|cortex_compactor_meta_sync_duration_seconds_sum|cortex_compactor_meta_sync_failures_total|cortex_compactor_meta_syncs_total|cortex_compactor_runs_completed_total|cortex_compactor_runs_failed_total|cortex_compactor_runs_started_total|cortex_compactor_tenants_discovered|cortex_compactor_tenants_processing_failed|cortex_compactor_tenants_processing_succeeded|cortex_compactor_tenants_skipped|cortex_config_hash|cortex_discarded_exemplars_total|cortex_discarded_requests_total|cortex_discarded_samples_total|cortex_distributor_deduped_samples_total|cortex_distributor_exemplars_in_total|cortex_distributor_inflight_push_requests|cortex_distributor_instance_limits|cortex_distributor_instance_rejected_requests_total|cortex_distributor_latest_seen_sample_timestamp_seconds|cortex_distributor_non_ha_samples_received_total|cortex_distributor_received_exemplars_total|cortex_distributor_received_requests_total|cortex_distributor_received_samples_total|cortex_distributor_replication_factor|cortex_distributor_requests_in_total|cortex_distributor_samples_in_total|cortex_inflight_requests|cortex_ingest_storage_reader_buffered_fetched_records|cortex_ingest_storage_reader_fetch_errors_total|cortex_ingest_storage_reader_fetches_total|cortex_ingest_storage_reader_missed_records_total|cortex_ingest_storage_reader_offset_commit_failures_total|cortex_ingest_storage_reader_offset_commit_requests_total|cortex_ingest_storage_reader_read_errors_total|cortex_ingest_storage_reader_receive_delay_seconds_count|cortex_ingest_storage_reader_receive_delay_seconds_sum|cortex_ingest_storage_reader_records_failed_total|cortex_ingest_storage_reader_records_total|cortex_ingest_storage_reader_requests_failed_total|cortex_ingest_storage_reader_requests_total|cortex_ingest_storage_strong_consistency_failures_total|cortex_ingest_storage_strong_consistency_requests_total|cortex_ingest_storage_writer_buffered_produce_bytes|cortex_ingest_storage_writer_buffered_produce_bytes_limit|cortex_ingester_active_native_histogram_buckets|cortex_ingester_active_native_histogram_buckets_custom_tracker|cortex_ingester_active_native_histogram_series|cortex_ingester_active_native_histogram_series_custom_tracker|cortex_ingester_active_series|cortex_ingester_active_series_custom_tracker|cortex_ingester_client_request_duration_seconds_bucket|cortex_ingester_client_request_duration_seconds_count|cortex_ingester_client_request_duration_seconds_sum|cortex_ingester_ingested_exemplars_total|cortex_ingester_ingested_samples_total|cortex_ingester_instance_limits|cortex_ingester_instance_rejected_requests_total|cortex_ingester_local_limits|cortex_ingester_memory_series|cortex_ingester_memory_series_created_total|cortex_ingester_memory_series_removed_total|cortex_ingester_memory_users|cortex_ingester_oldest_unshipped_block_timestamp_seconds|cortex_ingester_owned_series|cortex_ingester_queried_exemplars_bucket|cortex_ingester_queried_exemplars_count|cortex_ingester_queried_exemplars_sum|cortex_ingester_queried_samples_bucket|cortex_ingester_queried_samples_count|cortex_ingester_queried_samples_sum|cortex_ingester_queried_series_bucket|cortex_ingester_queried_series_count|cortex_ingester_queried_series_sum|cortex_ingester_shipper_last_successful_upload_timestamp_seconds|cortex_ingester_shipper_upload_failures_total|cortex_ingester_shipper_uploads_total|cortex_ingester_tsdb_checkpoint_creations_failed_total|cortex_ingester_tsdb_checkpoint_creations_total|cortex_ingester_tsdb_checkpoint_deletions_failed_total|cortex_ingester_tsdb_compaction_duration_seconds_bucket|cortex_ingester_tsdb_compaction_duration_seconds_count|cortex_ingester_tsdb_compaction_duration_seconds_sum|cortex_ingester_tsdb_compactions_failed_total|cortex_ingester_tsdb_compactions_total|cortex_ingester_tsdb_exemplar_exemplars_appended_total|cortex_ingester_tsdb_exemplar_exemplars_in_storage|cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds|cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage|cortex_ingester_tsdb_head_max_timestamp_seconds|cortex_ingester_tsdb_head_truncations_failed_total|cortex_ingester_tsdb_mmap_chunk_corruptions_total|cortex_ingester_tsdb_out_of_order_samples_appended_total|cortex_ingester_tsdb_storage_blocks_bytes|cortex_ingester_tsdb_symbol_table_size_bytes|cortex_ingester_tsdb_wal_corruptions_total|cortex_ingester_tsdb_wal_truncate_duration_seconds_count|cortex_ingester_tsdb_wal_truncate_duration_seconds_sum|cortex_ingester_tsdb_wal_truncations_failed_total|cortex_ingester_tsdb_wal_truncations_total|cortex_ingester_tsdb_wal_writes_failed_total|cortex_kv_request_duration_seconds_bucket|cortex_kv_request_duration_seconds_count|cortex_kv_request_duration_seconds_sum|cortex_lifecycler_read_only|cortex_limits_defaults|cortex_limits_overrides|cortex_partition_ring_partitions|cortex_prometheus_notifications_dropped_total|cortex_prometheus_notifications_errors_total|cortex_prometheus_notifications_queue_capacity|cortex_prometheus_notifications_queue_length|cortex_prometheus_notifications_sent_total|cortex_prometheus_rule_evaluation_duration_seconds_count|cortex_prometheus_rule_evaluation_duration_seconds_sum|cortex_prometheus_rule_evaluation_failures_total|cortex_prometheus_rule_evaluations_total|cortex_prometheus_rule_group_duration_seconds_count|cortex_prometheus_rule_group_duration_seconds_sum|cortex_prometheus_rule_group_iterations_missed_total|cortex_prometheus_rule_group_iterations_total|cortex_prometheus_rule_group_rules|cortex_querier_blocks_consistency_checks_failed_total|cortex_querier_blocks_consistency_checks_total|cortex_querier_request_duration_seconds_bucket|cortex_querier_request_duration_seconds_count|cortex_querier_request_duration_seconds_sum|cortex_querier_storegateway_instances_hit_per_query_bucket|cortex_querier_storegateway_instances_hit_per_query_count|cortex_querier_storegateway_instances_hit_per_query_sum|cortex_querier_storegateway_refetches_per_query_bucket|cortex_querier_storegateway_refetches_per_query_count|cortex_querier_storegateway_refetches_per_query_sum|cortex_query_frontend_queries_total|cortex_query_frontend_queue_duration_seconds_bucket|cortex_query_frontend_queue_duration_seconds_count|cortex_query_frontend_queue_duration_seconds_sum|cortex_query_frontend_queue_length|cortex_query_frontend_retries_bucket|cortex_query_frontend_retries_count|cortex_query_frontend_retries_sum|cortex_query_scheduler_connected_querier_clients|cortex_query_scheduler_querier_inflight_requests|cortex_query_scheduler_queue_duration_seconds_bucket|cortex_query_scheduler_queue_duration_seconds_count|cortex_query_scheduler_queue_duration_seconds_sum|cortex_query_scheduler_queue_length|cortex_request_duration_seconds|cortex_request_duration_seconds_bucket|cortex_request_duration_seconds_count|cortex_request_duration_seconds_sum|cortex_ring_members|cortex_ruler_managers_total|cortex_ruler_queries_failed_total|cortex_ruler_queries_total|cortex_ruler_ring_check_errors_total|cortex_ruler_write_requests_failed_total|cortex_ruler_write_requests_total|cortex_runtime_config_hash|cortex_runtime_config_last_reload_successful|cortex_tcp_connections|cortex_tcp_connections_limit|go_memstats_heap_inuse_bytes|keda_scaler_errors|keda_scaler_metrics_value|kube_deployment_spec_replicas|kube_deployment_status_replicas_unavailable|kube_deployment_status_replicas_updated|kube_endpoint_address|kube_horizontalpodautoscaler_spec_target_metric|kube_horizontalpodautoscaler_status_condition|kube_pod_info|kube_statefulset_replicas|kube_statefulset_status_current_revision|kube_statefulset_status_replicas_current|kube_statefulset_status_replicas_ready|kube_statefulset_status_replicas_updated|kube_statefulset_status_update_revision|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_used_bytes|memberlist_client_cluster_members_count|memcached_limit_bytes|mimir_continuous_test_queries_failed_total|mimir_continuous_test_query_result_checks_failed_total|mimir_continuous_test_writes_failed_total|node_disk_read_bytes_total|node_disk_written_bytes_total|process_memory_map_areas|process_memory_map_areas_limit|prometheus_tsdb_compaction_duration_seconds_bucket|prometheus_tsdb_compaction_duration_seconds_count|prometheus_tsdb_compaction_duration_seconds_sum|prometheus_tsdb_compactions_total|rollout_operator_last_successful_group_reconcile_timestamp_seconds|thanos_cache_hits_total|thanos_cache_operation_duration_seconds_bucket|thanos_cache_operation_duration_seconds_count|thanos_cache_operation_duration_seconds_sum|thanos_cache_operation_failures_total|thanos_cache_operations_total|thanos_cache_requests_total|thanos_objstore_bucket_last_successful_upload_time|thanos_objstore_bucket_operation_duration_seconds_bucket|thanos_objstore_bucket_operation_duration_seconds_count|thanos_objstore_bucket_operation_duration_seconds_sum|thanos_objstore_bucket_operation_failures_total|thanos_objstore_bucket_operations_total|thanos_store_index_cache_hits_total|thanos_store_index_cache_requests_total" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value @@ -344,7 +344,6 @@ data: prometheus.remote_write.prometheus.receiver, ] } - // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] @@ -387,11 +386,9 @@ data: prometheus.remote_write.prometheus.receiver, ] } - - - - + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart # TYPE grafana_kubernetes_monitoring_build_info gauge grafana_kubernetes_monitoring_build_info{version="2.0.6", namespace="default"} 1 @@ -425,13 +422,12 @@ data: "k8s_cluster_name" = "mimir-integration-cluster", } } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -456,7 +452,7 @@ data: replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -465,28 +461,21 @@ data: replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -508,14 +497,14 @@ data: replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -527,6 +516,14 @@ data: source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } // add static label of integration="mimir" and instance="name" to pods that match the selector so they can be identified in the mimir.process stages rule { source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] @@ -551,7 +548,7 @@ data: replacement = "$1/$2" } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -559,10 +556,10 @@ data: field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -571,22 +568,22 @@ data: target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -595,12 +592,12 @@ data: } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -608,7 +605,7 @@ data: } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -621,7 +618,7 @@ data: // Integration: Mimir stage.match { selector = "{integration=\"mimir\",instance=\"mimir\"}" - + // extract some of the fields from the log line stage.logfmt { mapping = { @@ -629,7 +626,7 @@ data: "level" = "", } } - + // set the level as a label stage.labels { values = { @@ -643,23 +640,17 @@ data: } // remove the timestamp from the log line stage.replace { - expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` + expression = "(ts=[^ ]+\\s+)" replace = "" } - // drop certain log levels - stage.drop { - source = "level" - expression = "(?i)(debug)" - drop_counter_reason = "mimir-drop-log-level" - } - + } - + // Only keep the labels that are defined in the `keepLabels` list. stage.label_keep { values = ["app_kubernetes_io_name","container","instance","job","level","namespace","pod","service_name","service_namespace","deployment_environment","deployment_environment_name","integration"] } - + forward_to = argument.logs_destinations.value } } diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/mysql/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/mysql/alloy-logs.alloy index 9f1d38ed9..6cee613c1 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/mysql/alloy-logs.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/mysql/alloy-logs.alloy @@ -15,13 +15,12 @@ loki.write "loki" { "k8s_cluster_name" = "mysql-integration-cluster", } } - // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { comment = "Must be a list of log destinations where collected logs should be forwarded to" } - + discovery.relabel "filtered_pods" { targets = discovery.kubernetes.pods.targets rule { @@ -46,7 +45,7 @@ declare "pod_logs" { replacement = "$1" target_label = "job" } - + // set the container runtime as a label rule { action = "replace" @@ -55,28 +54,21 @@ declare "pod_logs" { replacement = "$1" target_label = "tmp_container_runtime" } - - // set the job label from the k8s.grafana.com/logs.job annotation if it exists - rule { - source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] - regex = "(.+)" - target_label = "job" - } - + // make all labels on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" } - + // make all annotations on the pod available to the pipeline as labels, // they are omitted before write to loki via stage.label_keep unless explicitly set rule { action = "labelmap" regex = "__meta_kubernetes_pod_annotation_(.+)" } - + // explicitly set service_name. if not set, loki will automatically try to populate a default. // see https://grafana.com/docs/loki/latest/get-started/labels/#default-labels-for-all-users // @@ -98,14 +90,14 @@ declare "pod_logs" { replacement = "$1" target_label = "service_name" } - + // set service_namespace rule { action = "replace" source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_service_namespace"] target_label = "service_namespace" } - + // set deployment_environment and deployment_environment_name rule { action = "replace" @@ -117,6 +109,14 @@ declare "pod_logs" { source_labels = ["__meta_kubernetes_pod_annotation_resource_opentelemetry_io_deployment_environment"] target_label = "deployment_environment" } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_job"] + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"] + target_label = "app_kubernetes_io_name" + } rule { source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_instance"] separator = ";" @@ -146,7 +146,7 @@ declare "pod_logs" { replacement = "prod-db" } } - + discovery.kubernetes "pods" { role = "pod" selectors { @@ -154,10 +154,10 @@ declare "pod_logs" { field = "spec.nodeName=" + sys.env("HOSTNAME") } } - + discovery.relabel "filtered_pods_with_paths" { targets = discovery.relabel.filtered_pods.output - + rule { source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] separator = "/" @@ -166,22 +166,22 @@ declare "pod_logs" { target_label = "__path__" } } - + local.file_match "pod_logs" { path_targets = discovery.relabel.filtered_pods_with_paths.output } - + loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } - + loki.process "pod_logs" { stage.match { selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" // the cri processing stage extracts the following k/v pairs: log, stream, time, flags stage.cri {} - + // Set the extract flags and stream values as labels stage.labels { values = { @@ -190,12 +190,12 @@ declare "pod_logs" { } } } - + stage.match { selector = "{tmp_container_runtime=\"docker\"}" // the docker processing stage extracts the following k/v pairs: log, stream, time stage.docker {} - + // Set the extract stream value as a label stage.labels { values = { @@ -203,7 +203,7 @@ declare "pod_logs" { } } } - + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary // container runtime label as it is no longer needed. @@ -216,11 +216,11 @@ declare "pod_logs" { // Integration: MySQL stage.match { selector = "{integration=\"mysql\"}" - + stage.regex { expression = `(?P.+) (?P[\d]+) \[(?P