diff --git a/charts/k8s-monitoring/charts/feature-application-observability/templates/_notes.tpl b/charts/k8s-monitoring/charts/feature-application-observability/templates/_notes.tpl index 8361257f8f..6deeba2e2b 100644 --- a/charts/k8s-monitoring/charts/feature-application-observability/templates/_notes.tpl +++ b/charts/k8s-monitoring/charts/feature-application-observability/templates/_notes.tpl @@ -17,10 +17,10 @@ Gather application data via {{ include "english_list" $receivers }} {{ $receiver Configure your applications to send telemetry data to: {{- if .Values.receivers.otlp.grpc.enabled }} * http://{{ .Collector.ServiceName }}.{{ .Collector.Namespace }}.svc.cluster.local:{{ .Values.receivers.otlp.grpc.port }} (OTLP gRPC) -{{ end }} +{{- end }} {{- if .Values.receivers.otlp.http.enabled }} * http://{{ .Collector.ServiceName }}.{{ .Collector.Namespace }}.svc.cluster.local:{{ .Values.receivers.otlp.http.port }} (OTLP HTTP) -{{ end }} +{{- end }} {{- if .Values.receivers.jaeger.grpc.enabled }} * http://{{ .Collector.ServiceName }}.{{ .Collector.Namespace }}.svc.cluster.local:{{ .Values.receivers.jaeger.grpc.port }} (Jaeger gRPC) {{- end }} @@ -35,7 +35,7 @@ Configure your applications to send telemetry data to: {{- end }} {{- if .Values.receivers.zipkin.enabled }} * http://{{ .Collector.ServiceName }}.{{ .Collector.Namespace }}.svc.cluster.local:{{ .Values.receivers.zipkin.port }} (Zipkin) -{{ end }} +{{- end }} {{- end }} {{- define "feature.applicationObservability.summary" -}} diff --git a/charts/k8s-monitoring/charts/feature-cluster-metrics/README.md b/charts/k8s-monitoring/charts/feature-cluster-metrics/README.md index df97708617..084fb6a4cf 100644 --- a/charts/k8s-monitoring/charts/feature-cluster-metrics/README.md +++ b/charts/k8s-monitoring/charts/feature-cluster-metrics/README.md @@ -203,6 +203,7 @@ Be sure perform actual integration testing in a live environment in the main [k8 | kube-state-metrics.metricsTuning.includeMetrics | list | `[]` | Metrics to keep. Can use regular expressions. | | kube-state-metrics.metricsTuning.useDefaultAllowList | bool | `true` | Filter the list of metrics from Kube State Metrics to a useful, minimal set. | | kube-state-metrics.namespace | string | `""` | Namespace to locate kube-state-metrics pods. If `deploy` is set to `true`, this will automatically be set to the namespace where this Helm chart is deployed. | +| kube-state-metrics.namespaces | string | `""` | Comma-separated list(string) or yaml list of namespaces to be enabled for collecting resources. By default all namespaces are collected. | | kube-state-metrics.scrapeInterval | string | `60s` | How frequently to scrape kube-state-metrics metrics. | | kube-state-metrics.service.portName | string | `"http"` | The port name used by kube-state-metrics. | | kube-state-metrics.service.scheme | string | `"http"` | The scrape scheme used by kube-state-metrics. | diff --git a/charts/k8s-monitoring/charts/feature-cluster-metrics/values.schema.json b/charts/k8s-monitoring/charts/feature-cluster-metrics/values.schema.json index 756f80bf95..719d581576 100644 --- a/charts/k8s-monitoring/charts/feature-cluster-metrics/values.schema.json +++ b/charts/k8s-monitoring/charts/feature-cluster-metrics/values.schema.json @@ -323,6 +323,9 @@ "namespace": { "type": "string" }, + "namespaces": { + "type": "string" + }, "nodeSelector": { "type": "object", "properties": { diff --git a/charts/k8s-monitoring/charts/feature-cluster-metrics/values.yaml b/charts/k8s-monitoring/charts/feature-cluster-metrics/values.yaml index c4e326f732..eb778d652a 100644 --- a/charts/k8s-monitoring/charts/feature-cluster-metrics/values.yaml +++ b/charts/k8s-monitoring/charts/feature-cluster-metrics/values.yaml @@ -434,6 +434,10 @@ kube-state-metrics: # @section -- kube-state-metrics namespace: "" + # -- Comma-separated list(string) or yaml list of namespaces to be enabled for collecting resources. By default all namespaces are collected. + # @section -- kube-state-metrics + namespaces: "" + # -- Rule blocks to be added to the discovery.relabel component for kube-state-metrics. # These relabeling rules are applied pre-scrape against the targets from service discovery. # Before the scrape, any remaining target labels that start with __ (i.e. __meta_kubernetes*) are dropped. diff --git a/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/grafana.md b/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/grafana.md index 8f61c6d635..82c56fd1d5 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/grafana.md +++ b/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/grafana.md @@ -23,7 +23,7 @@ | Key | Type | Default | Description | |-----|------|---------|-------------| | logs.enabled | bool | `true` | Whether to enable special processing of Grafana pod logs. | -| logs.tuning.dropLogLevels | list | `[]` | The log levels to drop. Will automatically keep all log levels unless specified here. | +| logs.tuning.dropLogLevels | list | `["debug"]` | The log levels to drop. Will automatically keep all log levels unless specified here. | | logs.tuning.excludeLines | list | `[]` | Line patterns (valid RE2 regular expression)to exclude from the logs. | | logs.tuning.scrubTimestamp | bool | `true` | Whether the timestamp should be scrubbed from the log line | | logs.tuning.structuredMetadata | object | `{}` | The structured metadata mappings to set. To not set any structured metadata, set this to an empty object (e.g. `{}`) | diff --git a/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/loki.md b/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/loki.md index 46bf3411e2..810aa78483 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/loki.md +++ b/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/loki.md @@ -16,7 +16,7 @@ | Key | Type | Default | Description | |-----|------|---------|-------------| | logs.enabled | bool | `true` | Whether to enable special processing of Loki pod logs. | -| logs.tuning.dropLogLevels | list | `[]` | The log levels to drop. Will automatically keep all log levels unless specified here. | +| logs.tuning.dropLogLevels | list | `["debug"]` | The log levels to drop. Will automatically keep all log levels unless specified here. | | logs.tuning.excludeLines | list | `[]` | Line patterns (valid RE2 regular expression)to exclude from the logs. | | logs.tuning.scrubTimestamp | bool | `true` | Whether the timestamp should be scrubbed from the log line | | logs.tuning.structuredMetadata | object | `{}` | The structured metadata mappings to set. To not set any structured metadata, set this to an empty object (e.g. `{}`) | diff --git a/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/mimir.md b/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/mimir.md index 0bc74f30dd..781bfb0ed0 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/mimir.md +++ b/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/mimir.md @@ -15,7 +15,7 @@ | Key | Type | Default | Description | |-----|------|---------|-------------| | logs.enabled | bool | `true` | Whether to enable special processing of Mimir pod logs. | -| logs.tuning.dropLogLevels | list | `[]` | The log levels to drop. Will automatically keep all log levels unless specified here. | +| logs.tuning.dropLogLevels | list | `["debug"]` | The log levels to drop. Will automatically keep all log levels unless specified here. | | logs.tuning.excludeLines | list | `[]` | Line patterns (valid RE2 regular expression)to exclude from the logs. | | logs.tuning.scrubTimestamp | bool | `true` | Whether the timestamp should be scrubbed from the log line | | logs.tuning.structuredMetadata | object | `{}` | The structured metadata mappings to set. To not set any structured metadata, set this to an empty object (e.g. `{}`) | diff --git a/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/tempo.md b/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/tempo.md index 094b17e0d1..9b838790c1 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/tempo.md +++ b/charts/k8s-monitoring/charts/feature-integrations/docs/integrations/tempo.md @@ -16,7 +16,7 @@ | Key | Type | Default | Description | |-----|------|---------|-------------| | logs.enabled | bool | `true` | Whether to enable special processing of Tempo pod logs. | -| logs.tuning.dropLogLevels | list | `[]` | The log levels to drop. Will automatically keep all log levels unless specified here. | +| logs.tuning.dropLogLevels | list | `["debug"]` | The log levels to drop. Will automatically keep all log levels unless specified here. | | logs.tuning.excludeLines | list | `[]` | Line patterns (valid RE2 regular expression)to exclude from the logs. | | logs.tuning.scrubTimestamp | bool | `true` | Whether the timestamp should be scrubbed from the log line | | logs.tuning.structuredMetadata | object | `{}` | The structured metadata mappings to set. To not set any structured metadata, set this to an empty object (e.g. `{}`) | diff --git a/charts/k8s-monitoring/charts/feature-integrations/integrations/grafana-values.yaml b/charts/k8s-monitoring/charts/feature-integrations/integrations/grafana-values.yaml index b6472e4df8..0103680163 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/integrations/grafana-values.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/integrations/grafana-values.yaml @@ -69,7 +69,7 @@ logs: # -- The timestamp format to use for the log line, if not set the default timestamp which is the collection # will be used for the log line # @section -- Logs Settings - timestampFormat: "RFC3339Nano" + timestampFormat: RFC3339Nano # -- Whether the timestamp should be scrubbed from the log line # @section -- Logs Settings @@ -78,7 +78,8 @@ logs: # -- The log levels to drop. # Will automatically keep all log levels unless specified here. # @section -- Logs Settings - dropLogLevels: [] + dropLogLevels: + - debug # -- Line patterns (valid RE2 regular expression)to exclude from the logs. # @section -- Logs Settings diff --git a/charts/k8s-monitoring/charts/feature-integrations/integrations/loki-values.yaml b/charts/k8s-monitoring/charts/feature-integrations/integrations/loki-values.yaml index 10c342cd9e..c7d268e732 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/integrations/loki-values.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/integrations/loki-values.yaml @@ -68,7 +68,7 @@ logs: # -- The timestamp format to use for the log line, if not set the default timestamp which is the collection # will be used for the log line # @section -- Logs Settings - timestampFormat: "RFC3339Nano" + timestampFormat: RFC3339Nano # -- Whether the timestamp should be scrubbed from the log line # @section -- Logs Settings @@ -77,7 +77,8 @@ logs: # -- The log levels to drop. # Will automatically keep all log levels unless specified here. # @section -- Logs Settings - dropLogLevels: [] + dropLogLevels: + - debug # -- Line patterns (valid RE2 regular expression)to exclude from the logs. # @section -- Logs Settings diff --git a/charts/k8s-monitoring/charts/feature-integrations/integrations/mimir-values.yaml b/charts/k8s-monitoring/charts/feature-integrations/integrations/mimir-values.yaml index c67cd750ce..3ccfb07199 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/integrations/mimir-values.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/integrations/mimir-values.yaml @@ -68,7 +68,7 @@ logs: # -- The timestamp format to use for the log line, if not set the default timestamp which is the collection # will be used for the log line # @section -- Logs Settings - timestampFormat: "RFC3339Nano" + timestampFormat: RFC3339Nano # -- Whether the timestamp should be scrubbed from the log line # @section -- Logs Settings @@ -77,7 +77,8 @@ logs: # -- The log levels to drop. # Will automatically keep all log levels unless specified here. # @section -- Logs Settings - dropLogLevels: [] + dropLogLevels: + - debug # -- Line patterns (valid RE2 regular expression)to exclude from the logs. # @section -- Logs Settings diff --git a/charts/k8s-monitoring/charts/feature-integrations/integrations/tempo-values.yaml b/charts/k8s-monitoring/charts/feature-integrations/integrations/tempo-values.yaml index 5a98a96640..68e1d5c58e 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/integrations/tempo-values.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/integrations/tempo-values.yaml @@ -68,7 +68,7 @@ logs: # -- The timestamp format to use for the log line, if not set the default timestamp which is the collection # will be used for the log line # @section -- Logs Settings - timestampFormat: "RFC3339Nano" + timestampFormat: RFC3339Nano # -- Whether the timestamp should be scrubbed from the log line # @section -- Logs Settings @@ -77,7 +77,8 @@ logs: # -- The log levels to drop. # Will automatically keep all log levels unless specified here. # @section -- Logs Settings - dropLogLevels: [] + dropLogLevels: + - debug # -- Line patterns (valid RE2 regular expression)to exclude from the logs. # @section -- Logs Settings diff --git a/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/grafana-integration.schema.json b/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/grafana-integration.schema.json index 1d0313bb20..7ff265eb0c 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/grafana-integration.schema.json +++ b/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/grafana-integration.schema.json @@ -25,7 +25,10 @@ "type": "object", "properties": { "dropLogLevels": { - "type": "array" + "type": "array", + "items": { + "type": "string" + } }, "excludeLines": { "type": "array" diff --git a/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/loki-integration.schema.json b/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/loki-integration.schema.json index e8f738f862..22df0865ca 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/loki-integration.schema.json +++ b/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/loki-integration.schema.json @@ -25,7 +25,10 @@ "type": "object", "properties": { "dropLogLevels": { - "type": "array" + "type": "array", + "items": { + "type": "string" + } }, "excludeLines": { "type": "array" diff --git a/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/mimir-integration.schema.json b/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/mimir-integration.schema.json index 0d06db17f3..b75810eddf 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/mimir-integration.schema.json +++ b/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/mimir-integration.schema.json @@ -22,7 +22,10 @@ "type": "object", "properties": { "dropLogLevels": { - "type": "array" + "type": "array", + "items": { + "type": "string" + } }, "excludeLines": { "type": "array" diff --git a/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/tempo-integration.schema.json b/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/tempo-integration.schema.json index c0e48a4716..a86ff1dfa7 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/tempo-integration.schema.json +++ b/charts/k8s-monitoring/charts/feature-integrations/schema-mods/definitions/tempo-integration.schema.json @@ -25,7 +25,10 @@ "type": "object", "properties": { "dropLogLevels": { - "type": "array" + "type": "array", + "items": { + "type": "string" + } }, "excludeLines": { "type": "array" diff --git a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_grafana_logs.tpl b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_grafana_logs.tpl index 3dae1d9fbd..9ec5fe31e4 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_grafana_logs.tpl +++ b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_grafana_logs.tpl @@ -3,8 +3,8 @@ {{- $defaultValues := "integrations/grafana-values.yaml" | .Files.Get | fromYaml }} {{- $logsEnabled := false }} {{- range $instance := .Values.grafana.instances }} - {{- with merge $instance $defaultValues (dict "type" "integration.grafana") }} - {{- $logsEnabled = or $logsEnabled $instance.logs.enabled }} + {{- with merge (deepCopy $defaultValues) (deepCopy $instance) (dict "type" "integration.grafana") }} + {{- $logsEnabled = or $logsEnabled .logs.enabled }} {{- end }} {{- end }} {{- $logsEnabled -}} @@ -13,7 +13,7 @@ {{- define "integrations.grafana.logs.discoveryRules" }} {{- $defaultValues := "integrations/grafana-values.yaml" | .Files.Get | fromYaml }} {{- range $instance := $.Values.grafana.instances }} - {{- with mergeOverwrite $defaultValues (deepCopy $instance) }} + {{- with $defaultValues | merge (deepCopy $instance) }} {{- if .logs.enabled }} {{- $labelList := list }} {{- $valueList := list }} @@ -52,9 +52,9 @@ rule { {{- define "integrations.grafana.logs.processingStage" }} {{- if eq (include "integrations.grafana.type.logs" .) "true" }} {{- $defaultValues := "integrations/grafana-values.yaml" | .Files.Get | fromYaml }} -// Integration: Loki +// Integration: Grafana {{- range $instance := $.Values.grafana.instances }} - {{- with mergeOverwrite $defaultValues (deepCopy $instance) }} + {{- with $defaultValues | merge (deepCopy $instance) }} {{- if .logs.enabled }} stage.match { {{- if $instance.namespaces }} @@ -66,10 +66,8 @@ stage.match { // extract some of the fields from the log line stage.logfmt { mapping = { - "timestamp" = "t", + "ts" = "t", "level" = "", - "logger" = "", - "type" = "", {{- range $key, $value := .logs.tuning.structuredMetadata }} {{ $key | quote }} = {{ if $value }}{{ $value | quote }}{{ else }}{{ $key | quote }}{{ end }}, {{- end }} @@ -86,7 +84,7 @@ stage.match { {{- if .logs.tuning.timestampFormat }} // reset the timestamp to the extracted value stage.timestamp { - source = "timestamp" + source = "ts" format = {{ .logs.tuning.timestampFormat | quote }} } {{- end }} @@ -94,14 +92,23 @@ stage.match { {{- if .logs.tuning.scrubTimestamp }} // remove the timestamp from the log line stage.replace { - expression = "( t=[^ ]+\\s+)" + expression = `(?:^|\s+)(t=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` + replace = "" + } + {{- end }} + + {{- if hasKey .logs.tuning.structuredMetadata "caller" }} + // clean up the caller to remove the line + stage.replace { + source = "caller" + expression = "(:[0-9]+$)" replace = "" } {{- end }} {{- /* the stage.structured_metadata block needs to be conditionalized because the support for enabling structured metadata can be disabled */ -}} {{- /* through the grafana limits_conifg on a per-tenant basis, even if there are no values defined or there are values defined but it is disabled */ -}} - {{- /* in Loki, the write will fail. */ -}} + {{- /* in Grafana, the write will fail. */ -}} {{- if gt (len .logs.tuning.structuredMetadata) 0 }} // set the structured metadata values stage.structured_metadata { @@ -130,6 +137,7 @@ stage.match { drop_counter_reason = "grafana-exclude-line" } {{- end }} + } {{- end }} {{- end }} diff --git a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_grafana_metrics.tpl b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_grafana_metrics.tpl index 56c04a3a11..42b3bcff3e 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_grafana_metrics.tpl +++ b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_grafana_metrics.tpl @@ -4,8 +4,11 @@ {{/* Inputs: instance (grafana integration instance) Files (Files object) */}} {{- define "integrations.grafana.allowList" }} {{- $allowList := list -}} +{{- if .instance.metrics.tuning.useDefaultAllowList -}} +{{- $allowList = concat $allowList (list "up" "scrape_samples_scraped") (.Files.Get "default-allow-lists/grafana.yaml" | fromYamlArray) -}} +{{- end -}} {{- if .instance.metrics.tuning.includeMetrics -}} -{{- $allowList = concat $allowList .instance.metrics.tuning.includeMetrics -}} +{{- $allowList = concat $allowList (list "up" "scrape_samples_scraped") .instance.metrics.tuning.includeMetrics -}} {{- end -}} {{ $allowList | uniq | toYaml }} {{- end -}} @@ -89,7 +92,7 @@ declare "grafana_integration" { } argument "job_label" { - comment = "The job label to add for all Loki metrics (default: integrations/grafana)" + comment = "The job label to add for all Grafana metrics (default: integrations/grafana)" optional = true } @@ -137,9 +140,27 @@ declare "grafana_integration" { // drop metrics that match the drop_metrics regex rule { source_labels = ["__name__"] - regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + regex = coalesce(argument.drop_metrics.value, "") action = "drop" } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(.+)") + action = "keep" + } + + // the grafana-mixin expects the instance label to be the node name + rule { + source_labels = ["node"] + target_label = "instance" + replacement = "$1" + } + rule { + action = "labeldrop" + regex = "node" + } } } {{- range $instance := $.Values.grafana.instances }} @@ -151,10 +172,10 @@ declare "grafana_integration" { {{/* Instantiates the grafana integration */}} {{/* Inputs: integration (grafana integration definition), Values (all values), Files (Files object) */}} {{- define "integrations.grafana.include.metrics" }} -{{- $defaultValues := "integrations/grafana-values.yaml" | .Files.Get | fromYaml }} -{{- with mergeOverwrite $defaultValues (deepCopy .instance) }} +{{- $defaultValues := fromYaml (.Files.Get "integrations/grafana-values.yaml") }} +{{- with mergeOverwrite $defaultValues .instance (dict "type" "integration.grafana") }} {{- $metricAllowList := include "integrations.grafana.allowList" (dict "instance" . "Files" $.Files) | fromYamlArray }} -{{- $metricDenyList := .excludeMetrics }} +{{- $metricDenyList := .metrics.tuning.excludeMetrics }} {{- $labelSelectors := list }} {{- range $k, $v := .labelSelectors }} {{- if kindIs "slice" $v }} @@ -174,7 +195,7 @@ grafana_integration_discovery {{ include "helper.alloy_name" .name | quote }} { grafana_integration_scrape {{ include "helper.alloy_name" .name | quote }} { targets = grafana_integration_discovery.{{ include "helper.alloy_name" .name }}.output - job_label = {{ .jobLabel | quote }} + job_label = "integrations/grafana" clustering = true {{- if $metricAllowList }} keep_metrics = {{ $metricAllowList | join "|" | quote }} diff --git a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_loki_logs.tpl b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_loki_logs.tpl index 43911eb293..1f5278c520 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_loki_logs.tpl +++ b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_loki_logs.tpl @@ -3,8 +3,8 @@ {{- $defaultValues := "integrations/loki-values.yaml" | .Files.Get | fromYaml }} {{- $logsEnabled := false }} {{- range $instance := .Values.loki.instances }} - {{- with merge $instance $defaultValues (dict "type" "integration.loki") }} - {{- $logsEnabled = or $logsEnabled $instance.logs.enabled }} + {{- with merge (deepCopy $instance) (deepCopy $defaultValues) (dict "type" "integration.loki") }} + {{- $logsEnabled = or $logsEnabled .logs.enabled }} {{- end }} {{- end }} {{- $logsEnabled -}} @@ -105,7 +105,7 @@ stage.match { {{- if .logs.tuning.scrubTimestamp }} // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } {{- end }} diff --git a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_loki_metrics.tpl b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_loki_metrics.tpl index e72d2b7d90..2e6a7586b1 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_loki_metrics.tpl +++ b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_loki_metrics.tpl @@ -189,7 +189,7 @@ declare "loki_integration" { {{/* Inputs: integration (loki integration definition), Values (all values), Files (Files object) */}} {{- define "integrations.loki.include.metrics" }} {{- $defaultValues := "integrations/loki-values.yaml" | .Files.Get | fromYaml }} -{{- with mergeOverwrite $defaultValues (deepCopy .instance) }} +{{- with mergeOverwrite $defaultValues .instance (dict "type" "integration.loki") }} {{- $metricAllowList := include "integrations.loki.allowList" (dict "instance" . "Files" $.Files) | fromYamlArray }} {{- $metricDenyList := .metrics.tuning.excludeMetrics }} {{- $labelSelectors := list }} diff --git a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_mimir_logs.tpl b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_mimir_logs.tpl index 835e818df0..81cbf2de5b 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_mimir_logs.tpl +++ b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_mimir_logs.tpl @@ -3,8 +3,8 @@ {{- $defaultValues := "integrations/mimir-values.yaml" | .Files.Get | fromYaml }} {{- $logsEnabled := false }} {{- range $instance := .Values.mimir.instances }} - {{- with merge $instance $defaultValues (dict "type" "integration.mimir") }} - {{- $logsEnabled = or $logsEnabled $instance.logs.enabled }} + {{- with merge (deepCopy $defaultValues) (deepCopy $instance) (dict "type" "integration.mimir") }} + {{- $logsEnabled = or $logsEnabled .logs.enabled }} {{- end }} {{- end }} {{- $logsEnabled -}} @@ -105,7 +105,7 @@ stage.match { {{- if .logs.tuning.scrubTimestamp }} // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } {{- end }} diff --git a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_mimir_metrics.tpl b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_mimir_metrics.tpl index 873e77da84..84a9ae0483 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_mimir_metrics.tpl +++ b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_mimir_metrics.tpl @@ -188,8 +188,8 @@ declare "mimir_integration" { {{/* Instantiates the mimir integration */}} {{/* Inputs: integration (mimir integration definition), Values (all values), Files (Files object) */}} {{- define "integrations.mimir.include.metrics" }} -{{- $defaultValues := "integrations/mimir-values.yaml" | .Files.Get | fromYaml }} -{{- with $defaultValues | merge (deepCopy .instance) }} +{{- $defaultValues := fromYaml (.Files.Get "integrations/mimir-values.yaml") }} +{{- with mergeOverwrite $defaultValues .instance (dict "type" "integration.mimir") }} {{- $metricAllowList := include "integrations.mimir.allowList" (dict "instance" . "Files" $.Files) | fromYamlArray }} {{- $metricDenyList := .metrics.tuning.excludeMetrics }} {{- $labelSelectors := list }} diff --git a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_tempo_logs.tpl b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_tempo_logs.tpl index 53d6e17557..f2bd80aa45 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_tempo_logs.tpl +++ b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_tempo_logs.tpl @@ -3,8 +3,8 @@ {{- $defaultValues := "integrations/tempo-values.yaml" | .Files.Get | fromYaml }} {{- $logsEnabled := false }} {{- range $instance := .Values.tempo.instances }} - {{- with merge $instance $defaultValues (dict "type" "integration.tempo") }} - {{- $logsEnabled = or $logsEnabled $instance.logs.enabled }} + {{- with merge (deepCopy $defaultValues) (deepCopy $instance) (dict "type" "integration.tempo") }} + {{- $logsEnabled = or $logsEnabled .logs.enabled }} {{- end }} {{- end }} {{- $logsEnabled -}} @@ -65,7 +65,7 @@ rule { {{- define "integrations.tempo.logs.processingStage" }} {{- if eq (include "integrations.tempo.type.logs" .) "true" }} {{- $defaultValues := "integrations/tempo-values.yaml" | .Files.Get | fromYaml }} -// Integration: tempo +// Integration: Tempo {{- range $instance := $.Values.tempo.instances }} {{- with $defaultValues | merge (deepCopy $instance) }} {{- if .logs.enabled }} @@ -121,7 +121,7 @@ stage.match { {{- /* the stage.structured_metadata block needs to be conditionalized because the support for enabling structured metadata can be disabled */ -}} {{- /* through the tempo limits_conifg on a per-tenant basis, even if there are no values defined or there are values defined but it is disabled */ -}} - {{- /* in tempo, the write will fail. */ -}} + {{- /* in Tempo, the write will fail. */ -}} {{- if gt (len .logs.tuning.structuredMetadata) 0 }} // set the structured metadata values stage.structured_metadata { diff --git a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_tempo_metrics.tpl b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_tempo_metrics.tpl index 2ed743c04f..a77ac1ea7f 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_tempo_metrics.tpl +++ b/charts/k8s-monitoring/charts/feature-integrations/templates/_integration_tempo_metrics.tpl @@ -99,7 +99,7 @@ declare "tempo_integration" { } argument "job_label" { - comment = "The job label to add for all tempo metrics (default: integrations/tempo)" + comment = "The job label to add for all Tempo metrics (default: integrations/tempo)" optional = true } @@ -188,8 +188,8 @@ declare "tempo_integration" { {{/* Instantiates the tempo integration */}} {{/* Inputs: integration (tempo integration definition), Values (all values), Files (Files object) */}} {{- define "integrations.tempo.include.metrics" }} -{{- $defaultValues := "integrations/tempo-values.yaml" | .Files.Get | fromYaml }} -{{- with $defaultValues | merge (deepCopy .instance) }} +{{- $defaultValues := fromYaml (.Files.Get "integrations/tempo-values.yaml") }} +{{- with mergeOverwrite $defaultValues .instance (dict "type" "integration.tempo") }} {{- $metricAllowList := include "integrations.tempo.allowList" (dict "instance" . "Files" $.Files) | fromYamlArray }} {{- $metricDenyList := .metrics.tuning.excludeMetrics }} {{- $labelSelectors := list }} diff --git a/charts/k8s-monitoring/charts/feature-integrations/tests/alloy_test.yaml b/charts/k8s-monitoring/charts/feature-integrations/tests/alloy_test.yaml index 1ac7028382..93e606c0fe 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/tests/alloy_test.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/tests/alloy_test.yaml @@ -1,5 +1,5 @@ # yamllint disable rule:document-start rule:line-length rule:trailing-spaces rule:empty-lines -suite: Test Alloy integration +suite: Test Alloy Integration - Metrics templates: - configmap.yaml tests: diff --git a/charts/k8s-monitoring/charts/feature-integrations/tests/cert-manager_test.yaml b/charts/k8s-monitoring/charts/feature-integrations/tests/cert-manager_test.yaml index c51d32a0fc..8d5ac90a50 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/tests/cert-manager_test.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/tests/cert-manager_test.yaml @@ -1,5 +1,5 @@ # yamllint disable rule:document-start rule:line-length rule:trailing-spaces -suite: Test cert-manager integration +suite: Test cert-manager Integration - Metrics templates: - configmap.yaml tests: @@ -21,19 +21,19 @@ tests: argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + import.git "cert_manager" { repository = "https://github.com/grafana/alloy-modules.git" revision = "main" path = "modules/kubernetes/cert-manager/metrics.alloy" pull_frequency = "15m" } - + cert_manager.kubernetes "cert_manager" { label_selectors = ["app.kubernetes.io/name=cert-manager"] port_name = "http-metrics" } - + cert_manager.scrape "cert_manager" { targets = cert_manager.kubernetes.cert_manager.output clustering = true @@ -63,20 +63,20 @@ tests: argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + import.git "cert_manager" { repository = "https://github.com/grafana/alloy-modules.git" revision = "main" path = "modules/kubernetes/cert-manager/metrics.alloy" pull_frequency = "15m" } - + cert_manager.kubernetes "my_cert_manager" { namespaces = ["kube-system"] label_selectors = ["app.kubernetes.io/name=my-cert-manager"] port_name = "http-metrics" } - + cert_manager.scrape "my_cert_manager" { targets = cert_manager.kubernetes.my_cert_manager.output clustering = true diff --git a/charts/k8s-monitoring/charts/feature-integrations/tests/etcd_test.yaml b/charts/k8s-monitoring/charts/feature-integrations/tests/etcd_test.yaml index 618ed29d42..9fd0890e98 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/tests/etcd_test.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/tests/etcd_test.yaml @@ -1,5 +1,5 @@ # yamllint disable rule:document-start rule:line-length rule:trailing-spaces -suite: Test etcd integration +suite: Test etcd Integration - Metrics templates: - configmap.yaml tests: @@ -21,19 +21,19 @@ tests: argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + import.git "etcd" { repository = "https://github.com/grafana/alloy-modules.git" revision = "main" path = "modules/databases/kv/etcd/metrics.alloy" pull_frequency = "15m" } - + etcd.kubernetes "etcd" { label_selectors = ["app.kubernetes.io/component=etcd"] port_name = "metrics" } - + etcd.scrape "etcd" { targets = etcd.kubernetes.etcd.output job_label = "integrations/etcd" @@ -63,20 +63,20 @@ tests: argument "metrics_destinations" { comment = "Must be a list of metric destinations where collected metrics should be forwarded to" } - + import.git "etcd" { repository = "https://github.com/grafana/alloy-modules.git" revision = "main" path = "modules/databases/kv/etcd/metrics.alloy" pull_frequency = "15m" } - + etcd.kubernetes "my_etcd" { namespaces = ["kube-system"] label_selectors = ["app.kubernetes.io/component=my-etcd"] port_name = "metrics" } - + etcd.scrape "my_etcd" { targets = etcd.kubernetes.my_etcd.output job_label = "integrations/etcd" diff --git a/charts/k8s-monitoring/charts/feature-integrations/tests/grafana_logs_test.yaml b/charts/k8s-monitoring/charts/feature-integrations/tests/grafana_logs_test.yaml index afd1ab9e75..9a8fca89ee 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/tests/grafana_logs_test.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/tests/grafana_logs_test.yaml @@ -38,17 +38,15 @@ tests: // Processing stages loki.process "test" { - // Integration: Loki + // Integration: Grafana stage.match { selector = "{job=\"integrations/grafana\",instance=\"grafana\"}" // extract some of the fields from the log line stage.logfmt { mapping = { - "timestamp" = "t", + "ts" = "t", "level" = "", - "logger" = "", - "type" = "", } } @@ -60,14 +58,21 @@ tests: } // reset the timestamp to the extracted value stage.timestamp { - source = "timestamp" + source = "ts" format = "RFC3339Nano" } // remove the timestamp from the log line stage.replace { - expression = "( t=[^ ]+\\s+)" + expression = `(?:^|\s+)(t=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "grafana-drop-log-level" + } + } } @@ -119,7 +124,7 @@ tests: path: data["logs.alloy"] pattern: |- \s+stage.timestamp { - \s+ source = "timestamp" + \s+ source = "ts" \s+ format = "RFC3339" \s+} @@ -141,12 +146,12 @@ tests: path: data["logs.alloy"] # The pattern should look like this, but since the regex is escaped, it will be a bit different # stage.replace { - # expression = "( t=[^ ]+\\s+)" + # expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` # replace = "" # } pattern: |- \s+stage.replace { - \s+ expression = "\( t=\[\^ \]\+\\\\s\+\)" + \s+ expression = `\(\?:\^\|\\s\+\)\(t=\\d\{4\}-\\d\{2\}-\\d\{2\}T\\d\{2\}:\\d\{2\}:\\d\{2\}\\.\\d\+\[\^ \]\*\\s\+\)` \s+ replace = "" \s+} diff --git a/charts/k8s-monitoring/charts/feature-integrations/tests/grafana_metrics_test.yaml b/charts/k8s-monitoring/charts/feature-integrations/tests/grafana_metrics_test.yaml index b9c1f5a8dc..32f0d5a322 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/tests/grafana_metrics_test.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/tests/grafana_metrics_test.yaml @@ -166,10 +166,10 @@ tests: } argument "job_label" { - comment = "The job label to add for all Loki metrics (default: integrations/grafana)" + comment = "The job label to add for all Grafana metrics (default: integrations/grafana)" optional = true } - + argument "keep_metrics" { comment = "A regular expression of metrics to keep (default: see below)" optional = true @@ -214,9 +214,27 @@ tests: // drop metrics that match the drop_metrics regex rule { source_labels = ["__name__"] - regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + regex = coalesce(argument.drop_metrics.value, "") action = "drop" } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(.+)") + action = "keep" + } + + // the grafana-mixin expects the instance label to be the node name + rule { + source_labels = ["node"] + target_label = "instance" + replacement = "$1" + } + rule { + action = "labeldrop" + regex = "node" + } } } @@ -236,7 +254,6 @@ tests: } } - - it: should allow you to restrict the namespaces for metrics set: deployAsConfigMap: true @@ -253,3 +270,39 @@ tests: - matchRegex: path: data["metrics.alloy"] pattern: namespaces = \["k8smon"\] + + - it: should allow you to specific which metrics to include + set: + deployAsConfigMap: true + grafana: + instances: + - name: grafana + metrics: + tuning: + includeMetrics: + - foo + - bar + asserts: + - isKind: + of: ConfigMap + - matchRegex: + path: data["metrics.alloy"] + pattern: keep_metrics = "up\|scrape_samples_scraped\|foo\|bar" + + - it: should allow you to specific which metrics to exclude + set: + deployAsConfigMap: true + grafana: + instances: + - name: grafana + metrics: + tuning: + excludeMetrics: + - foo + - bar + asserts: + - isKind: + of: ConfigMap + - matchRegex: + path: data["metrics.alloy"] + pattern: drop_metrics = "foo\|bar" diff --git a/charts/k8s-monitoring/charts/feature-integrations/tests/loki_logs_test.yaml b/charts/k8s-monitoring/charts/feature-integrations/tests/loki_logs_test.yaml index 177a428441..5c039ca9a9 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/tests/loki_logs_test.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/tests/loki_logs_test.yaml @@ -1,6 +1,6 @@ --- # yamllint disable rule:document-start rule:line-length rule:trailing-spaces rule:empty-lines -suite: Test Loki integration +suite: Test Loki Integration - Logs templates: - configmap.yaml tests: @@ -72,9 +72,15 @@ tests: } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "loki-drop-log-level" + } } } @@ -151,12 +157,12 @@ tests: path: data["logs.alloy"] # The pattern should look like this, but since the regex is escaped, it will be a bit different # stage.replace { - # expression = "(ts=[^ ]+\\s+)" + # expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` # replace = "" # } pattern: |- \s+stage.replace { - \s+ expression = "\(ts=\[\^ \]\+\\\\s\+\)" + \s+ expression = `\(\?:\^\|\\s\+\)\(ts=\\d\{4\}-\\d\{2\}-\\d\{2\}T\\d\{2\}:\\d\{2\}:\\d\{2\}\\.\\d\+\[\^ \]\*\\s\+\)` \s+ replace = "" \s+} diff --git a/charts/k8s-monitoring/charts/feature-integrations/tests/loki_metrics_test.yaml b/charts/k8s-monitoring/charts/feature-integrations/tests/loki_metrics_test.yaml index d595ec1c09..c1ea1345e9 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/tests/loki_metrics_test.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/tests/loki_metrics_test.yaml @@ -1,6 +1,6 @@ --- # yamllint disable rule:document-start rule:line-length rule:trailing-spaces rule:empty-lines -suite: Test Loki integration +suite: Test Loki Integration - Metrics templates: - configmap.yaml tests: @@ -285,3 +285,57 @@ tests: - matchRegex: path: data["metrics.alloy"] pattern: namespaces = \["k8smon"\] + + - it: should allow you to set the default allow list + set: + deployAsConfigMap: true + loki: + instances: + - name: loki + metrics: + tuning: + useDefaultAllowList: true + asserts: + - isKind: + of: ConfigMap + - matchRegex: + path: data["metrics.alloy"] + pattern: keep_metrics = "up\|scrape_samples_scraped\|go_gc_cycles_total_gc_cycles_total.+\|loki_s3_request_duration_seconds_count" + + - it: should allow you to specific which metrics to include + set: + deployAsConfigMap: true + loki: + instances: + - name: loki + metrics: + tuning: + useDefaultAllowList: false + includeMetrics: + - foo + - bar + asserts: + - isKind: + of: ConfigMap + - matchRegex: + path: data["metrics.alloy"] + pattern: keep_metrics = "up\|scrape_samples_scraped\|foo\|bar" + + - it: should allow you to specific which metrics to exclude + set: + deployAsConfigMap: true + loki: + instances: + - name: loki + metrics: + tuning: + useDefaultAllowList: false + excludeMetrics: + - foo + - bar + asserts: + - isKind: + of: ConfigMap + - matchRegex: + path: data["metrics.alloy"] + pattern: drop_metrics = "foo\|bar" diff --git a/charts/k8s-monitoring/charts/feature-integrations/tests/mimir_logs_test.yaml b/charts/k8s-monitoring/charts/feature-integrations/tests/mimir_logs_test.yaml index 721f8f4b15..baeead6366 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/tests/mimir_logs_test.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/tests/mimir_logs_test.yaml @@ -1,6 +1,6 @@ --- # yamllint disable rule:document-start rule:line-length rule:trailing-spaces rule:empty-lines -suite: Test Mimir integration +suite: Test Mimir Integration - Logs templates: - configmap.yaml tests: @@ -72,9 +72,15 @@ tests: } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "mimir-drop-log-level" + } } } @@ -151,12 +157,12 @@ tests: path: data["logs.alloy"] # The pattern should look like this, but since the regex is escaped, it will be a bit different # stage.replace { - # expression = "(ts=[^ ]+\\s+)" + # expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` # replace = "" # } pattern: |- \s+stage.replace { - \s+ expression = "\(ts=\[\^ \]\+\\\\s\+\)" + \s+ expression = `\(\?:\^\|\\s\+\)\(ts=\\d\{4\}-\\d\{2\}-\\d\{2\}T\\d\{2\}:\\d\{2\}:\\d\{2\}\\.\\d\+\[\^ \]\*\\s\+\)` \s+ replace = "" \s+} diff --git a/charts/k8s-monitoring/charts/feature-integrations/tests/mimir_metrics_test.yaml b/charts/k8s-monitoring/charts/feature-integrations/tests/mimir_metrics_test.yaml index 0136ae5520..3439889b62 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/tests/mimir_metrics_test.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/tests/mimir_metrics_test.yaml @@ -1,6 +1,6 @@ --- # yamllint disable rule:document-start rule:line-length rule:trailing-spaces rule:empty-lines -suite: Test Mimir integration +suite: Test Mimir Integration - Metrics templates: - configmap.yaml tests: @@ -285,3 +285,57 @@ tests: - matchRegex: path: data["metrics.alloy"] pattern: namespaces = \["k8smon"\] + + - it: should allow you to set the default allow list + set: + deployAsConfigMap: true + mimir: + instances: + - name: mimir + metrics: + tuning: + useDefaultAllowList: true + asserts: + - isKind: + of: ConfigMap + - matchRegex: + path: data["metrics.alloy"] + pattern: keep_metrics = "up\|scrape_samples_scraped\|cortex_alertmanager_alerts.+\|thanos_store_index_cache_requests_total" + + - it: should allow you to specific which metrics to include + set: + deployAsConfigMap: true + mimir: + instances: + - name: mimir + metrics: + tuning: + useDefaultAllowList: false + includeMetrics: + - foo + - bar + asserts: + - isKind: + of: ConfigMap + - matchRegex: + path: data["metrics.alloy"] + pattern: keep_metrics = "up\|scrape_samples_scraped\|foo\|bar" + + - it: should allow you to specific which metrics to exclude + set: + deployAsConfigMap: true + mimir: + instances: + - name: mimir + metrics: + tuning: + useDefaultAllowList: false + excludeMetrics: + - foo + - bar + asserts: + - isKind: + of: ConfigMap + - matchRegex: + path: data["metrics.alloy"] + pattern: drop_metrics = "foo\|bar" diff --git a/charts/k8s-monitoring/charts/feature-integrations/tests/mysql_logs_test.yaml b/charts/k8s-monitoring/charts/feature-integrations/tests/mysql_logs_test.yaml index 40b4909a75..f0ffee1c38 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/tests/mysql_logs_test.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/tests/mysql_logs_test.yaml @@ -1,6 +1,6 @@ --- # yamllint disable rule:document-start rule:line-length rule:trailing-spaces rule:empty-lines -suite: Test MySQL integration +suite: Test MySQL Integration - Logs templates: - configmap.yaml tests: diff --git a/charts/k8s-monitoring/charts/feature-integrations/tests/mysql_metrics_test.yaml b/charts/k8s-monitoring/charts/feature-integrations/tests/mysql_metrics_test.yaml index ee4b99720a..d26b95b929 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/tests/mysql_metrics_test.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/tests/mysql_metrics_test.yaml @@ -1,5 +1,5 @@ # yamllint disable rule:document-start rule:line-length rule:trailing-spaces -suite: Test MySQL integration +suite: Test MySQL Integration - Metrics templates: - configmap.yaml - mysql-secret.yaml diff --git a/charts/k8s-monitoring/charts/feature-integrations/tests/tempo_logs_test.yaml b/charts/k8s-monitoring/charts/feature-integrations/tests/tempo_logs_test.yaml index 5bd0f3b88a..b56eb82576 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/tests/tempo_logs_test.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/tests/tempo_logs_test.yaml @@ -1,6 +1,6 @@ --- # yamllint disable rule:document-start rule:line-length rule:trailing-spaces rule:empty-lines -suite: Test Tempo integration +suite: Test Tempo Integration - Logs templates: - configmap.yaml tests: @@ -47,7 +47,7 @@ tests: // Processing stages loki.process "test" { - // Integration: tempo + // Integration: Tempo stage.match { selector = "{integration=\"tempo\",instance=\"tempo\"}" @@ -75,6 +75,12 @@ tests: expression = "(ts=[^ ]+\\s+)" replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "tempo-drop-log-level" + } } } diff --git a/charts/k8s-monitoring/charts/feature-integrations/tests/tempo_metrics_test.yaml b/charts/k8s-monitoring/charts/feature-integrations/tests/tempo_metrics_test.yaml index ead109862e..a07da2b796 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/tests/tempo_metrics_test.yaml +++ b/charts/k8s-monitoring/charts/feature-integrations/tests/tempo_metrics_test.yaml @@ -1,6 +1,6 @@ --- # yamllint disable rule:document-start rule:line-length rule:trailing-spaces rule:empty-lines -suite: Test Tempo integration +suite: Test Tempo Integration - Metrics templates: - configmap.yaml tests: @@ -173,7 +173,7 @@ tests: } argument "job_label" { - comment = "The job label to add for all tempo metrics (default: integrations/tempo)" + comment = "The job label to add for all Tempo metrics (default: integrations/tempo)" optional = true } @@ -285,3 +285,74 @@ tests: - matchRegex: path: data["metrics.alloy"] pattern: namespaces = \["k8smon"\] + + - it: should allow you to disable the default allow list + set: + deployAsConfigMap: true + tempo: + instances: + - name: tempo + metrics: + tuning: + useDefaultAllowList: false + asserts: + - isKind: + of: ConfigMap + - matchRegex: + path: data["metrics.alloy"] + # The pattern should look like this, but since the regex is escaped, it will be a bit different + # tempo_integration_scrape "tempo" { + # targets = tempo_integration_discovery.tempo.output + # job_label = "integrations/tempo" + # clustering = true + # scrape_interval = "60s" + # max_cache_size = 100000 + # forward_to = argument.metrics_destinations.value + # } + pattern: |- + \s*tempo_integration_scrape "tempo" { + \s* targets = tempo_integration_discovery.tempo.output + \s* job_label = "integrations/tempo" + \s* clustering = true + \s* scrape_interval = "60s" + \s* max_cache_size = 100000 + \s* forward_to = argument.metrics_destinations.value + \s*} + + - it: should allow you to specific which metrics to include + set: + deployAsConfigMap: true + tempo: + instances: + - name: tempo + metrics: + tuning: + useDefaultAllowList: false + includeMetrics: + - foo + - bar + asserts: + - isKind: + of: ConfigMap + - matchRegex: + path: data["metrics.alloy"] + pattern: keep_metrics = "up\|scrape_samples_scraped\|foo\|bar" + + - it: should allow you to specific which metrics to exclude + set: + deployAsConfigMap: true + tempo: + instances: + - name: tempo + metrics: + tuning: + useDefaultAllowList: false + excludeMetrics: + - foo + - bar + asserts: + - isKind: + of: ConfigMap + - matchRegex: + path: data["metrics.alloy"] + pattern: drop_metrics = "foo\|bar" diff --git a/charts/k8s-monitoring/charts/feature-integrations/values.schema.json b/charts/k8s-monitoring/charts/feature-integrations/values.schema.json index 7502c5c2ef..d1fc2d948b 100644 --- a/charts/k8s-monitoring/charts/feature-integrations/values.schema.json +++ b/charts/k8s-monitoring/charts/feature-integrations/values.schema.json @@ -340,7 +340,10 @@ "type": "object", "properties": { "dropLogLevels": { - "type": "array" + "type": "array", + "items": { + "type": "string" + } }, "excludeLines": { "type": "array" @@ -438,7 +441,10 @@ "type": "object", "properties": { "dropLogLevels": { - "type": "array" + "type": "array", + "items": { + "type": "string" + } }, "excludeLines": { "type": "array" @@ -523,7 +529,10 @@ "type": "object", "properties": { "dropLogLevels": { - "type": "array" + "type": "array", + "items": { + "type": "string" + } }, "excludeLines": { "type": "array" @@ -743,7 +752,10 @@ "type": "object", "properties": { "dropLogLevels": { - "type": "array" + "type": "array", + "items": { + "type": "string" + } }, "excludeLines": { "type": "array" diff --git a/charts/k8s-monitoring/charts/feature-node-logs/README.md b/charts/k8s-monitoring/charts/feature-node-logs/README.md index de8e19c4a5..3c8ee0c353 100644 --- a/charts/k8s-monitoring/charts/feature-node-logs/README.md +++ b/charts/k8s-monitoring/charts/feature-node-logs/README.md @@ -62,7 +62,7 @@ Be sure perform actual integration testing in a live environment in the main [k8 | journal.jobLabel | string | `"integrations/kubernetes/journal"` | The value for the job label for journal logs. | | journal.maxAge | string | `"8h"` | The path to the journal logs on the worker node. | | journal.path | string | `"/var/log/journal"` | The path to the journal logs on the worker node. | -| journal.units | list | `[]` | The list of systemd units to keep scraped logs from. If empty, all units are scraped. | +| journal.units | list | `[]` | The list of systemd units to keep scraped logs from, this can be a valid RE2 regex. If empty, all units are scraped. | ### General settings diff --git a/charts/k8s-monitoring/charts/feature-node-logs/values.yaml b/charts/k8s-monitoring/charts/feature-node-logs/values.yaml index ea43df0d9b..a1296764cd 100644 --- a/charts/k8s-monitoring/charts/feature-node-logs/values.yaml +++ b/charts/k8s-monitoring/charts/feature-node-logs/values.yaml @@ -24,7 +24,7 @@ journal: # @section -- Journal Logs formatAsJson: false - # -- The list of systemd units to keep scraped logs from. If empty, all units are scraped. + # -- The list of systemd units to keep scraped logs from, this can be a valid RE2 regex. If empty, all units are scraped. # @section -- Journal Logs units: [] # - kubelet.service diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-logs.alloy index a15eafdcdf..73bd57dcf7 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-logs.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-logs.alloy @@ -201,17 +201,15 @@ declare "pod_logs" { "tmp_container_runtime", ] } - // Integration: Loki + // Integration: Grafana stage.match { selector = "{job=\"integrations/grafana\",instance=\"grafana\",namespace=~\"o11y\"}" // extract some of the fields from the log line stage.logfmt { mapping = { - "timestamp" = "t", + "ts" = "t", "level" = "", - "logger" = "", - "type" = "", } } @@ -223,14 +221,21 @@ declare "pod_logs" { } // reset the timestamp to the extracted value stage.timestamp { - source = "timestamp" + source = "ts" format = "RFC3339Nano" } // remove the timestamp from the log line stage.replace { - expression = "( t=[^ ]+\\s+)" + expression = `(?:^|\s+)(t=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "grafana-drop-log-level" + } + } // Only keep the labels that are defined in the `keepLabels` list. diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-metrics.alloy index 14c396767e..4cd57b5d95 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/grafana/alloy-metrics.alloy @@ -194,7 +194,7 @@ declare "grafana_integration" { } argument "job_label" { - comment = "The job label to add for all Loki metrics (default: integrations/grafana)" + comment = "The job label to add for all Grafana metrics (default: integrations/grafana)" optional = true } @@ -242,9 +242,27 @@ declare "grafana_integration" { // drop metrics that match the drop_metrics regex rule { source_labels = ["__name__"] - regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + regex = coalesce(argument.drop_metrics.value, "") action = "drop" } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(.+)") + action = "keep" + } + + // the grafana-mixin expects the instance label to be the node name + rule { + source_labels = ["node"] + target_label = "instance" + replacement = "$1" + } + rule { + action = "labeldrop" + regex = "node" + } } } diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/grafana/output.yaml b/charts/k8s-monitoring/docs/examples/features/integrations/grafana/output.yaml index 1ec3562233..2a58f2a385 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/grafana/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/integrations/grafana/output.yaml @@ -235,7 +235,7 @@ data: } argument "job_label" { - comment = "The job label to add for all Loki metrics (default: integrations/grafana)" + comment = "The job label to add for all Grafana metrics (default: integrations/grafana)" optional = true } @@ -283,9 +283,27 @@ data: // drop metrics that match the drop_metrics regex rule { source_labels = ["__name__"] - regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + regex = coalesce(argument.drop_metrics.value, "") action = "drop" } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(.+)") + action = "keep" + } + + // the grafana-mixin expects the instance label to be the node name + rule { + source_labels = ["node"] + target_label = "instance" + replacement = "$1" + } + rule { + action = "labeldrop" + regex = "node" + } } } @@ -573,17 +591,15 @@ data: "tmp_container_runtime", ] } - // Integration: Loki + // Integration: Grafana stage.match { selector = "{job=\"integrations/grafana\",instance=\"grafana\",namespace=~\"o11y\"}" // extract some of the fields from the log line stage.logfmt { mapping = { - "timestamp" = "t", + "ts" = "t", "level" = "", - "logger" = "", - "type" = "", } } @@ -595,14 +611,21 @@ data: } // reset the timestamp to the extracted value stage.timestamp { - source = "timestamp" + source = "ts" format = "RFC3339Nano" } // remove the timestamp from the log line stage.replace { - expression = "( t=[^ ]+\\s+)" + expression = `(?:^|\s+)(t=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "grafana-drop-log-level" + } + } // Only keep the labels that are defined in the `keepLabels` list. diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-logs.alloy index cce42c061b..4d954019fc 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-logs.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/loki/alloy-logs.alloy @@ -235,9 +235,15 @@ declare "pod_logs" { } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "loki-drop-log-level" + } } diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/loki/output.yaml b/charts/k8s-monitoring/docs/examples/features/integrations/loki/output.yaml index 85280ff377..a83407d067 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/loki/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/integrations/loki/output.yaml @@ -642,9 +642,15 @@ data: } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "loki-drop-log-level" + } } diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-logs.alloy index 5cdace100f..3a3ae5d994 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-logs.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/mimir/alloy-logs.alloy @@ -235,9 +235,15 @@ declare "pod_logs" { } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "mimir-drop-log-level" + } } diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/mimir/output.yaml b/charts/k8s-monitoring/docs/examples/features/integrations/mimir/output.yaml index 1fd8eab5c5..258fc563b0 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/mimir/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/integrations/mimir/output.yaml @@ -642,9 +642,15 @@ data: } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "mimir-drop-log-level" + } } diff --git a/charts/k8s-monitoring/docs/examples/meta-monitoring/README.md b/charts/k8s-monitoring/docs/examples/meta-monitoring/README.md index 677fd0b7f5..9e5f2c96bb 100644 --- a/charts/k8s-monitoring/docs/examples/meta-monitoring/README.md +++ b/charts/k8s-monitoring/docs/examples/meta-monitoring/README.md @@ -116,24 +116,15 @@ clusterMetrics: enabled: false kube-state-metrics: enabled: true - namespaces: - - collectors - - logs - - metrics - - o11y - extraMetricProcessingRules: |- - rule { - action = "keep" - source_labels = ["namespace"] - regex = "collectors|logs|metrics|o11y" - } + namespaces: collectors,logs,metrics,o11y metricsTuning: useDefaultAllowList: false includeMetrics: [(.+)] node-exporter: enabled: true - useIntegrationAllowList: true deploy: true + metricsTuning: + useIntegrationAllowList: true windows-exporter: enabled: false deploy: false @@ -149,6 +140,7 @@ nodeLogs: podLogs: enabled: true + collector: alloy-singleton labelsToKeep: - app - app_kubernetes_io_name @@ -160,7 +152,6 @@ podLogs: - pod - service_name gatherMethod: kubernetesApi - collector: alloy-singleton namespaces: - collectors - logs @@ -174,6 +165,20 @@ applicationObservability: thriftHttp: enabled: true port: 14268 + processors: + k8sattributes: + metadata: + - k8s.namespace.name + - k8s.pod.name + - k8s.deployment.name + - k8s.statefulset.name + - k8s.daemonset.name + - k8s.cronjob.name + - k8s.job.name + - k8s.node.name + - k8s.pod.uid + - k8s.pod.start_time + - k8s.container.name # Collectors alloy-singleton: diff --git a/charts/k8s-monitoring/docs/examples/meta-monitoring/alloy-receiver.alloy b/charts/k8s-monitoring/docs/examples/meta-monitoring/alloy-receiver.alloy index ca011ebb59..1ce5bbe0ed 100644 --- a/charts/k8s-monitoring/docs/examples/meta-monitoring/alloy-receiver.alloy +++ b/charts/k8s-monitoring/docs/examples/meta-monitoring/alloy-receiver.alloy @@ -160,7 +160,7 @@ declare "application_observability" { // K8s Attributes Processor otelcol.processor.k8sattributes "default" { extract { - metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time"] + metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time","k8s.container.name"] } pod_association { source { diff --git a/charts/k8s-monitoring/docs/examples/meta-monitoring/alloy-singleton.alloy b/charts/k8s-monitoring/docs/examples/meta-monitoring/alloy-singleton.alloy index f9163cdbd3..9cff8f0d44 100644 --- a/charts/k8s-monitoring/docs/examples/meta-monitoring/alloy-singleton.alloy +++ b/charts/k8s-monitoring/docs/examples/meta-monitoring/alloy-singleton.alloy @@ -241,17 +241,6 @@ declare "cluster_metrics" { scheme = "http" scrape_interval = "60s" max_cache_size = 100000 - forward_to = [prometheus.relabel.kube_state_metrics.receiver] - } - - prometheus.relabel "kube_state_metrics" { - max_cache_size = 100000 - - rule { - action = "keep" - source_labels = ["namespace"] - regex = "collectors|logs|metrics|o11y" - } forward_to = argument.metrics_destinations.value } @@ -286,7 +275,7 @@ declare "cluster_metrics" { targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" clustering = true - keep_metrics = "up|scrape_samples_scraped|node_cpu.*|node_exporter_build_info|node_filesystem.*|node_memory.*|node_network_receive_bytes_total|node_network_receive_drop_total|node_network_transmit_bytes_total|node_network_transmit_drop_total|process_cpu_seconds_total|process_resident_memory_bytes" + keep_metrics = "up|scrape_samples_scraped|node_cpu.*|node_exporter_build_info|node_filesystem.*|node_memory.*|node_network_receive_bytes_total|node_network_receive_drop_total|node_network_transmit_bytes_total|node_network_transmit_drop_total|process_cpu_seconds_total|process_resident_memory_bytes|node_arp_entries|node_boot_time_seconds|node_context_switches_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_io_time_weighted_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filefd_maximum|node_filesystem_avail_bytes|node_filesystem_device_error|node_filesystem_files|node_filesystem_files_free|node_filesystem_readonly|node_filesystem_size_bytes|node_intr_total|node_load1|node_load15|node_load5|node_md_disks|node_md_disks_required|node_memory_Active_anon_bytes|node_memory_Active_bytes|node_memory_Active_file_bytes|node_memory_AnonHugePages_bytes|node_memory_AnonPages_bytes|node_memory_Bounce_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_CommitLimit_bytes|node_memory_Committed_AS_bytes|node_memory_DirectMap1G_bytes|node_memory_DirectMap2M_bytes|node_memory_DirectMap4k_bytes|node_memory_Dirty_bytes|node_memory_HugePages_Free|node_memory_HugePages_Rsvd|node_memory_HugePages_Surp|node_memory_HugePages_Total|node_memory_Hugepagesize_bytes|node_memory_Inactive_anon_bytes|node_memory_Inactive_bytes|node_memory_Inactive_file_bytes|node_memory_Mapped_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_Shmem_bytes|node_memory_ShmemHugePages_bytes|node_memory_Slab_bytes|node_memory_SReclaimable_bytes|node_memory_SUnreclaim_bytes|node_memory_SwapTotal_bytes|node_memory_VmallocChunk_bytes|node_memory_VmallocTotal_bytes|node_memory_VmallocUsed_bytes|node_memory_Writeback_bytes|node_memory_WritebackTmp_bytes|node_netstat_Icmp_InErrors|node_netstat_Icmp_InMsgs|node_netstat_Icmp_OutMsgs|node_netstat_Icmp6_InErrors|node_netstat_Icmp6_InMsgs|node_netstat_Icmp6_OutMsgs|node_netstat_IpExt_InOctets|node_netstat_IpExt_OutOctets|node_netstat_Tcp_InErrs|node_netstat_Tcp_InSegs|node_netstat_Tcp_OutRsts|node_netstat_Tcp_OutSegs|node_netstat_Tcp_RetransSegs|node_netstat_TcpExt_ListenDrops|node_netstat_TcpExt_ListenOverflows|node_netstat_TcpExt_TCPSynRetrans|node_netstat_Udp_InDatagrams|node_netstat_Udp_InErrors|node_netstat_Udp_NoPorts|node_netstat_Udp_OutDatagrams|node_netstat_Udp_RcvbufErrors|node_netstat_Udp_SndbufErrors|node_netstat_Udp6_InDatagrams|node_netstat_Udp6_InErrors|node_netstat_Udp6_NoPorts|node_netstat_Udp6_OutDatagrams|node_netstat_Udp6_RcvbufErrors|node_netstat_Udp6_SndbufErrors|node_netstat_UdpLite_InErrors|node_network_carrier|node_network_info|node_network_mtu_bytes|node_network_receive_compressed_total|node_network_receive_errs_total|node_network_receive_fifo_total|node_network_receive_multicast_total|node_network_receive_packets_total|node_network_speed_bytes|node_network_transmit_compressed_total|node_network_transmit_errs_total|node_network_transmit_fifo_total|node_network_transmit_multicast_total|node_network_transmit_packets_total|node_network_transmit_queue_length|node_network_up|node_nf_conntrack_entries|node_nf_conntrack_entries_limit|node_os_info|node_procs_running|node_sockstat_FRAG_inuse|node_sockstat_FRAG6_inuse|node_sockstat_RAW_inuse|node_sockstat_RAW6_inuse|node_sockstat_sockets_used|node_sockstat_TCP_alloc|node_sockstat_TCP_inuse|node_sockstat_TCP_mem|node_sockstat_TCP_mem_bytes|node_sockstat_TCP_orphan|node_sockstat_TCP_tw|node_sockstat_TCP6_inuse|node_sockstat_UDP_inuse|node_sockstat_UDP_mem|node_sockstat_UDP_mem_bytes|node_sockstat_UDP6_inuse|node_sockstat_UDPLITE_inuse|node_sockstat_UDPLITE6_inuse|node_softnet_dropped_total|node_softnet_processed_total|node_softnet_times_squeezed_total|node_systemd_service_restart_total|node_systemd_unit_state|node_textfile_scrape_error|node_time_zone_offset_seconds|node_timex_estimated_error_seconds|node_timex_maxerror_seconds|node_timex_offset_seconds|node_timex_sync_status|node_uname_info|node_vmstat_oom_kill|node_vmstat_pgfault|node_vmstat_pgmajfault|node_vmstat_pgpgin|node_vmstat_pgpgout|node_vmstat_pswpin|node_vmstat_pswpout|process_max_fds|process_open_fds" scheme = "http" scrape_interval = "60s" max_cache_size = 100000 @@ -593,21 +582,15 @@ declare "pod_logs" { "tmp_container_runtime", ] } - // Integration: Loki + // Integration: Grafana stage.match { selector = "{job=\"integrations/grafana\",instance=\"grafana\",namespace=~\"o11y\"}" // extract some of the fields from the log line stage.logfmt { mapping = { - "timestamp" = "t", + "ts" = "t", "level" = "", - "logger" = "", - "type" = "", - "caller" = "caller", - "org_id" = "org_id", - "tenant" = "tenant", - "user" = "user", } } @@ -619,23 +602,21 @@ declare "pod_logs" { } // reset the timestamp to the extracted value stage.timestamp { - source = "timestamp" + source = "ts" format = "RFC3339Nano" } // remove the timestamp from the log line stage.replace { - expression = "( t=[^ ]+\\s+)" + expression = `(?:^|\s+)(t=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } - // set the structured metadata values - stage.structured_metadata { - values = { - "caller" = "caller", - "org_id" = "org_id", - "tenant" = "tenant", - "user" = "user", - } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "grafana-drop-log-level" } + } // Integration: Loki @@ -667,7 +648,7 @@ declare "pod_logs" { } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } // clean up the caller to remove the line @@ -685,6 +666,12 @@ declare "pod_logs" { "user" = "user", } } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "loki-drop-log-level" + } } @@ -717,7 +704,7 @@ declare "pod_logs" { } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } // clean up the caller to remove the line @@ -735,6 +722,12 @@ declare "pod_logs" { "user" = "user", } } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "mimir-drop-log-level" + } } @@ -1026,14 +1019,14 @@ declare "alloy_integration" { } alloy_integration_discovery "alloy_in_logs" { - port_name = "grafana" + port_name = "http-metrics" namespaces = ["logs"] label_selectors = ["app.kubernetes.io/name=alloy-singleton"] } alloy_integration_scrape "alloy_in_logs" { targets = alloy_integration_discovery.alloy_in_logs.output - job_label = "integrations/grafana" + job_label = "integrations/alloy" clustering = true keep_metrics = "up|scrape_samples_scraped|alloy_build_info|alloy_component_controller_running_components|alloy_component_dependencies_wait_seconds|alloy_component_dependencies_wait_seconds_bucket|alloy_component_evaluation_seconds|alloy_component_evaluation_seconds_bucket|alloy_component_evaluation_seconds_count|alloy_component_evaluation_seconds_sum|alloy_component_evaluation_slow_seconds|alloy_config_hash|alloy_resources_machine_rx_bytes_total|alloy_resources_machine_tx_bytes_total|alloy_resources_process_cpu_seconds_total|alloy_resources_process_resident_memory_bytes|alloy_tcp_connections|alloy_wal_samples_appended_total|alloy_wal_storage_active_series|cluster_node_gossip_health_score|cluster_node_gossip_proto_version|cluster_node_gossip_received_events_total|cluster_node_info|cluster_node_lamport_time|cluster_node_peers|cluster_node_update_observers|cluster_transport_rx_bytes_total|cluster_transport_rx_packet_queue_length|cluster_transport_rx_packets_failed_total|cluster_transport_rx_packets_total|cluster_transport_stream_rx_bytes_total|cluster_transport_stream_rx_packets_failed_total|cluster_transport_stream_rx_packets_total|cluster_transport_stream_tx_bytes_total|cluster_transport_stream_tx_packets_failed_total|cluster_transport_stream_tx_packets_total|cluster_transport_streams|cluster_transport_tx_bytes_total|cluster_transport_tx_packet_queue_length|cluster_transport_tx_packets_failed_total|cluster_transport_tx_packets_total|otelcol_exporter_send_failed_spans_total|otelcol_exporter_sent_spans_total|go_gc_duration_seconds_count|go_goroutines|go_memstats_heap_inuse_bytes|loki_process_dropped_lines_total|loki_write_batch_retries_total|loki_write_dropped_bytes_total|loki_write_dropped_entries_total|loki_write_encoded_bytes_total|loki_write_mutated_bytes_total|loki_write_mutated_entries_total|loki_write_request_duration_seconds_bucket|loki_write_sent_bytes_total|loki_write_sent_entries_total|process_cpu_seconds_total|process_start_time_seconds|otelcol_processor_batch_batch_send_size_bucket|otelcol_processor_batch_metadata_cardinality|otelcol_processor_batch_timeout_trigger_send_total|prometheus_remote_storage_bytes_total|prometheus_remote_storage_enqueue_retries_total|prometheus_remote_storage_highest_timestamp_in_seconds|prometheus_remote_storage_metadata_bytes_total|prometheus_remote_storage_queue_highest_sent_timestamp_seconds|prometheus_remote_storage_samples_dropped_total|prometheus_remote_storage_samples_failed_total|prometheus_remote_storage_samples_pending|prometheus_remote_storage_samples_retried_total|prometheus_remote_storage_samples_total|prometheus_remote_storage_sent_batch_duration_seconds_bucket|prometheus_remote_storage_sent_batch_duration_seconds_count|prometheus_remote_storage_sent_batch_duration_seconds_sum|prometheus_remote_storage_shard_capacity|prometheus_remote_storage_shards|prometheus_remote_storage_shards_desired|prometheus_remote_storage_shards_max|prometheus_remote_storage_shards_min|prometheus_remote_storage_succeeded_samples_total|prometheus_remote_write_wal_samples_appended_total|prometheus_remote_write_wal_storage_active_series|prometheus_sd_discovered_targets|prometheus_target_interval_length_seconds_count|prometheus_target_interval_length_seconds_sum|prometheus_target_scrapes_exceeded_sample_limit_total|prometheus_target_scrapes_sample_duplicate_timestamp_total|prometheus_target_scrapes_sample_out_of_bounds_total|prometheus_target_scrapes_sample_out_of_order_total|prometheus_target_sync_length_seconds_sum|prometheus_wal_watcher_current_segment|otelcol_receiver_accepted_spans_total|otelcol_receiver_refused_spans_total|rpc_server_duration_milliseconds_bucket|scrape_duration_seconds|traces_exporter_send_failed_spans|traces_exporter_send_failed_spans_total|traces_exporter_sent_spans|traces_exporter_sent_spans_total|traces_loadbalancer_backend_outcome|traces_loadbalancer_num_backends|traces_receiver_accepted_spans|traces_receiver_accepted_spans_total|traces_receiver_refused_spans|traces_receiver_refused_spans_total" scrape_interval = "60s" @@ -1210,7 +1203,7 @@ declare "grafana_integration" { } argument "job_label" { - comment = "The job label to add for all Loki metrics (default: integrations/grafana)" + comment = "The job label to add for all Grafana metrics (default: integrations/grafana)" optional = true } @@ -1258,9 +1251,27 @@ declare "grafana_integration" { // drop metrics that match the drop_metrics regex rule { source_labels = ["__name__"] - regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + regex = coalesce(argument.drop_metrics.value, "") action = "drop" } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(.+)") + action = "keep" + } + + // the grafana-mixin expects the instance label to be the node name + rule { + source_labels = ["node"] + target_label = "instance" + replacement = "$1" + } + rule { + action = "labeldrop" + regex = "node" + } } } diff --git a/charts/k8s-monitoring/docs/examples/meta-monitoring/output.yaml b/charts/k8s-monitoring/docs/examples/meta-monitoring/output.yaml index 39b186ccd6..5e2480eeb9 100644 --- a/charts/k8s-monitoring/docs/examples/meta-monitoring/output.yaml +++ b/charts/k8s-monitoring/docs/examples/meta-monitoring/output.yaml @@ -316,17 +316,6 @@ data: scheme = "http" scrape_interval = "60s" max_cache_size = 100000 - forward_to = [prometheus.relabel.kube_state_metrics.receiver] - } - - prometheus.relabel "kube_state_metrics" { - max_cache_size = 100000 - - rule { - action = "keep" - source_labels = ["namespace"] - regex = "collectors|logs|metrics|o11y" - } forward_to = argument.metrics_destinations.value } @@ -361,7 +350,7 @@ data: targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" clustering = true - keep_metrics = "up|scrape_samples_scraped|node_cpu.*|node_exporter_build_info|node_filesystem.*|node_memory.*|node_network_receive_bytes_total|node_network_receive_drop_total|node_network_transmit_bytes_total|node_network_transmit_drop_total|process_cpu_seconds_total|process_resident_memory_bytes" + keep_metrics = "up|scrape_samples_scraped|node_cpu.*|node_exporter_build_info|node_filesystem.*|node_memory.*|node_network_receive_bytes_total|node_network_receive_drop_total|node_network_transmit_bytes_total|node_network_transmit_drop_total|process_cpu_seconds_total|process_resident_memory_bytes|node_arp_entries|node_boot_time_seconds|node_context_switches_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_io_time_weighted_seconds_total|node_disk_read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_filefd_allocated|node_filefd_maximum|node_filesystem_avail_bytes|node_filesystem_device_error|node_filesystem_files|node_filesystem_files_free|node_filesystem_readonly|node_filesystem_size_bytes|node_intr_total|node_load1|node_load15|node_load5|node_md_disks|node_md_disks_required|node_memory_Active_anon_bytes|node_memory_Active_bytes|node_memory_Active_file_bytes|node_memory_AnonHugePages_bytes|node_memory_AnonPages_bytes|node_memory_Bounce_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_CommitLimit_bytes|node_memory_Committed_AS_bytes|node_memory_DirectMap1G_bytes|node_memory_DirectMap2M_bytes|node_memory_DirectMap4k_bytes|node_memory_Dirty_bytes|node_memory_HugePages_Free|node_memory_HugePages_Rsvd|node_memory_HugePages_Surp|node_memory_HugePages_Total|node_memory_Hugepagesize_bytes|node_memory_Inactive_anon_bytes|node_memory_Inactive_bytes|node_memory_Inactive_file_bytes|node_memory_Mapped_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_Shmem_bytes|node_memory_ShmemHugePages_bytes|node_memory_Slab_bytes|node_memory_SReclaimable_bytes|node_memory_SUnreclaim_bytes|node_memory_SwapTotal_bytes|node_memory_VmallocChunk_bytes|node_memory_VmallocTotal_bytes|node_memory_VmallocUsed_bytes|node_memory_Writeback_bytes|node_memory_WritebackTmp_bytes|node_netstat_Icmp_InErrors|node_netstat_Icmp_InMsgs|node_netstat_Icmp_OutMsgs|node_netstat_Icmp6_InErrors|node_netstat_Icmp6_InMsgs|node_netstat_Icmp6_OutMsgs|node_netstat_IpExt_InOctets|node_netstat_IpExt_OutOctets|node_netstat_Tcp_InErrs|node_netstat_Tcp_InSegs|node_netstat_Tcp_OutRsts|node_netstat_Tcp_OutSegs|node_netstat_Tcp_RetransSegs|node_netstat_TcpExt_ListenDrops|node_netstat_TcpExt_ListenOverflows|node_netstat_TcpExt_TCPSynRetrans|node_netstat_Udp_InDatagrams|node_netstat_Udp_InErrors|node_netstat_Udp_NoPorts|node_netstat_Udp_OutDatagrams|node_netstat_Udp_RcvbufErrors|node_netstat_Udp_SndbufErrors|node_netstat_Udp6_InDatagrams|node_netstat_Udp6_InErrors|node_netstat_Udp6_NoPorts|node_netstat_Udp6_OutDatagrams|node_netstat_Udp6_RcvbufErrors|node_netstat_Udp6_SndbufErrors|node_netstat_UdpLite_InErrors|node_network_carrier|node_network_info|node_network_mtu_bytes|node_network_receive_compressed_total|node_network_receive_errs_total|node_network_receive_fifo_total|node_network_receive_multicast_total|node_network_receive_packets_total|node_network_speed_bytes|node_network_transmit_compressed_total|node_network_transmit_errs_total|node_network_transmit_fifo_total|node_network_transmit_multicast_total|node_network_transmit_packets_total|node_network_transmit_queue_length|node_network_up|node_nf_conntrack_entries|node_nf_conntrack_entries_limit|node_os_info|node_procs_running|node_sockstat_FRAG_inuse|node_sockstat_FRAG6_inuse|node_sockstat_RAW_inuse|node_sockstat_RAW6_inuse|node_sockstat_sockets_used|node_sockstat_TCP_alloc|node_sockstat_TCP_inuse|node_sockstat_TCP_mem|node_sockstat_TCP_mem_bytes|node_sockstat_TCP_orphan|node_sockstat_TCP_tw|node_sockstat_TCP6_inuse|node_sockstat_UDP_inuse|node_sockstat_UDP_mem|node_sockstat_UDP_mem_bytes|node_sockstat_UDP6_inuse|node_sockstat_UDPLITE_inuse|node_sockstat_UDPLITE6_inuse|node_softnet_dropped_total|node_softnet_processed_total|node_softnet_times_squeezed_total|node_systemd_service_restart_total|node_systemd_unit_state|node_textfile_scrape_error|node_time_zone_offset_seconds|node_timex_estimated_error_seconds|node_timex_maxerror_seconds|node_timex_offset_seconds|node_timex_sync_status|node_uname_info|node_vmstat_oom_kill|node_vmstat_pgfault|node_vmstat_pgmajfault|node_vmstat_pgpgin|node_vmstat_pgpgout|node_vmstat_pswpin|node_vmstat_pswpout|process_max_fds|process_open_fds" scheme = "http" scrape_interval = "60s" max_cache_size = 100000 @@ -668,21 +657,15 @@ data: "tmp_container_runtime", ] } - // Integration: Loki + // Integration: Grafana stage.match { selector = "{job=\"integrations/grafana\",instance=\"grafana\",namespace=~\"o11y\"}" // extract some of the fields from the log line stage.logfmt { mapping = { - "timestamp" = "t", + "ts" = "t", "level" = "", - "logger" = "", - "type" = "", - "caller" = "caller", - "org_id" = "org_id", - "tenant" = "tenant", - "user" = "user", } } @@ -694,23 +677,21 @@ data: } // reset the timestamp to the extracted value stage.timestamp { - source = "timestamp" + source = "ts" format = "RFC3339Nano" } // remove the timestamp from the log line stage.replace { - expression = "( t=[^ ]+\\s+)" + expression = `(?:^|\s+)(t=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } - // set the structured metadata values - stage.structured_metadata { - values = { - "caller" = "caller", - "org_id" = "org_id", - "tenant" = "tenant", - "user" = "user", - } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "grafana-drop-log-level" } + } // Integration: Loki @@ -742,7 +723,7 @@ data: } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } // clean up the caller to remove the line @@ -760,6 +741,12 @@ data: "user" = "user", } } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "loki-drop-log-level" + } } @@ -792,7 +779,7 @@ data: } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } // clean up the caller to remove the line @@ -810,6 +797,12 @@ data: "user" = "user", } } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "mimir-drop-log-level" + } } @@ -1101,14 +1094,14 @@ data: } alloy_integration_discovery "alloy_in_logs" { - port_name = "grafana" + port_name = "http-metrics" namespaces = ["logs"] label_selectors = ["app.kubernetes.io/name=alloy-singleton"] } alloy_integration_scrape "alloy_in_logs" { targets = alloy_integration_discovery.alloy_in_logs.output - job_label = "integrations/grafana" + job_label = "integrations/alloy" clustering = true keep_metrics = "up|scrape_samples_scraped|alloy_build_info|alloy_component_controller_running_components|alloy_component_dependencies_wait_seconds|alloy_component_dependencies_wait_seconds_bucket|alloy_component_evaluation_seconds|alloy_component_evaluation_seconds_bucket|alloy_component_evaluation_seconds_count|alloy_component_evaluation_seconds_sum|alloy_component_evaluation_slow_seconds|alloy_config_hash|alloy_resources_machine_rx_bytes_total|alloy_resources_machine_tx_bytes_total|alloy_resources_process_cpu_seconds_total|alloy_resources_process_resident_memory_bytes|alloy_tcp_connections|alloy_wal_samples_appended_total|alloy_wal_storage_active_series|cluster_node_gossip_health_score|cluster_node_gossip_proto_version|cluster_node_gossip_received_events_total|cluster_node_info|cluster_node_lamport_time|cluster_node_peers|cluster_node_update_observers|cluster_transport_rx_bytes_total|cluster_transport_rx_packet_queue_length|cluster_transport_rx_packets_failed_total|cluster_transport_rx_packets_total|cluster_transport_stream_rx_bytes_total|cluster_transport_stream_rx_packets_failed_total|cluster_transport_stream_rx_packets_total|cluster_transport_stream_tx_bytes_total|cluster_transport_stream_tx_packets_failed_total|cluster_transport_stream_tx_packets_total|cluster_transport_streams|cluster_transport_tx_bytes_total|cluster_transport_tx_packet_queue_length|cluster_transport_tx_packets_failed_total|cluster_transport_tx_packets_total|otelcol_exporter_send_failed_spans_total|otelcol_exporter_sent_spans_total|go_gc_duration_seconds_count|go_goroutines|go_memstats_heap_inuse_bytes|loki_process_dropped_lines_total|loki_write_batch_retries_total|loki_write_dropped_bytes_total|loki_write_dropped_entries_total|loki_write_encoded_bytes_total|loki_write_mutated_bytes_total|loki_write_mutated_entries_total|loki_write_request_duration_seconds_bucket|loki_write_sent_bytes_total|loki_write_sent_entries_total|process_cpu_seconds_total|process_start_time_seconds|otelcol_processor_batch_batch_send_size_bucket|otelcol_processor_batch_metadata_cardinality|otelcol_processor_batch_timeout_trigger_send_total|prometheus_remote_storage_bytes_total|prometheus_remote_storage_enqueue_retries_total|prometheus_remote_storage_highest_timestamp_in_seconds|prometheus_remote_storage_metadata_bytes_total|prometheus_remote_storage_queue_highest_sent_timestamp_seconds|prometheus_remote_storage_samples_dropped_total|prometheus_remote_storage_samples_failed_total|prometheus_remote_storage_samples_pending|prometheus_remote_storage_samples_retried_total|prometheus_remote_storage_samples_total|prometheus_remote_storage_sent_batch_duration_seconds_bucket|prometheus_remote_storage_sent_batch_duration_seconds_count|prometheus_remote_storage_sent_batch_duration_seconds_sum|prometheus_remote_storage_shard_capacity|prometheus_remote_storage_shards|prometheus_remote_storage_shards_desired|prometheus_remote_storage_shards_max|prometheus_remote_storage_shards_min|prometheus_remote_storage_succeeded_samples_total|prometheus_remote_write_wal_samples_appended_total|prometheus_remote_write_wal_storage_active_series|prometheus_sd_discovered_targets|prometheus_target_interval_length_seconds_count|prometheus_target_interval_length_seconds_sum|prometheus_target_scrapes_exceeded_sample_limit_total|prometheus_target_scrapes_sample_duplicate_timestamp_total|prometheus_target_scrapes_sample_out_of_bounds_total|prometheus_target_scrapes_sample_out_of_order_total|prometheus_target_sync_length_seconds_sum|prometheus_wal_watcher_current_segment|otelcol_receiver_accepted_spans_total|otelcol_receiver_refused_spans_total|rpc_server_duration_milliseconds_bucket|scrape_duration_seconds|traces_exporter_send_failed_spans|traces_exporter_send_failed_spans_total|traces_exporter_sent_spans|traces_exporter_sent_spans_total|traces_loadbalancer_backend_outcome|traces_loadbalancer_num_backends|traces_receiver_accepted_spans|traces_receiver_accepted_spans_total|traces_receiver_refused_spans|traces_receiver_refused_spans_total" scrape_interval = "60s" @@ -1285,7 +1278,7 @@ data: } argument "job_label" { - comment = "The job label to add for all Loki metrics (default: integrations/grafana)" + comment = "The job label to add for all Grafana metrics (default: integrations/grafana)" optional = true } @@ -1333,9 +1326,27 @@ data: // drop metrics that match the drop_metrics regex rule { source_labels = ["__name__"] - regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + regex = coalesce(argument.drop_metrics.value, "") action = "drop" } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(.+)") + action = "keep" + } + + // the grafana-mixin expects the instance label to be the node name + rule { + source_labels = ["node"] + target_label = "instance" + replacement = "$1" + } + rule { + action = "labeldrop" + regex = "node" + } } } @@ -2099,7 +2110,7 @@ data: // K8s Attributes Processor otelcol.processor.k8sattributes "default" { extract { - metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time"] + metadata = ["k8s.namespace.name","k8s.pod.name","k8s.deployment.name","k8s.statefulset.name","k8s.daemonset.name","k8s.cronjob.name","k8s.job.name","k8s.node.name","k8s.pod.uid","k8s.pod.start_time","k8s.container.name"] } pod_association { source { diff --git a/charts/k8s-monitoring/docs/examples/meta-monitoring/values.yaml b/charts/k8s-monitoring/docs/examples/meta-monitoring/values.yaml index e4653ffbb8..d01935d111 100644 --- a/charts/k8s-monitoring/docs/examples/meta-monitoring/values.yaml +++ b/charts/k8s-monitoring/docs/examples/meta-monitoring/values.yaml @@ -107,24 +107,15 @@ clusterMetrics: enabled: false kube-state-metrics: enabled: true - namespaces: - - collectors - - logs - - metrics - - o11y - extraMetricProcessingRules: |- - rule { - action = "keep" - source_labels = ["namespace"] - regex = "collectors|logs|metrics|o11y" - } + namespaces: collectors,logs,metrics,o11y metricsTuning: useDefaultAllowList: false includeMetrics: [(.+)] node-exporter: enabled: true - useIntegrationAllowList: true deploy: true + metricsTuning: + useIntegrationAllowList: true windows-exporter: enabled: false deploy: false @@ -140,6 +131,7 @@ nodeLogs: podLogs: enabled: true + collector: alloy-singleton labelsToKeep: - app - app_kubernetes_io_name @@ -151,7 +143,6 @@ podLogs: - pod - service_name gatherMethod: kubernetesApi - collector: alloy-singleton namespaces: - collectors - logs @@ -165,6 +156,20 @@ applicationObservability: thriftHttp: enabled: true port: 14268 + processors: + k8sattributes: + metadata: + - k8s.namespace.name + - k8s.pod.name + - k8s.deployment.name + - k8s.statefulset.name + - k8s.daemonset.name + - k8s.cronjob.name + - k8s.job.name + - k8s.node.name + - k8s.pod.uid + - k8s.pod.start_time + - k8s.container.name # Collectors alloy-singleton: diff --git a/charts/k8s-monitoring/tests/integration/integration-tempo/.rendered/output.yaml b/charts/k8s-monitoring/tests/integration/integration-tempo/.rendered/output.yaml index 15ecd053ff..664c43d122 100644 --- a/charts/k8s-monitoring/tests/integration/integration-tempo/.rendered/output.yaml +++ b/charts/k8s-monitoring/tests/integration/integration-tempo/.rendered/output.yaml @@ -761,7 +761,7 @@ data: } argument "job_label" { - comment = "The job label to add for all tempo metrics (default: integrations/tempo)" + comment = "The job label to add for all Tempo metrics (default: integrations/tempo)" optional = true } @@ -1148,7 +1148,7 @@ data: "tmp_container_runtime", ] } - // Integration: tempo + // Integration: Tempo stage.match { selector = "{integration=\"tempo\",instance=\"tempo\"}" @@ -1176,6 +1176,12 @@ data: expression = "(ts=[^ ]+\\s+)" replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "tempo-drop-log-level" + } } diff --git a/charts/k8s-monitoring/tests/integration/service-integrations/grafana/.rendered/output.yaml b/charts/k8s-monitoring/tests/integration/service-integrations/grafana/.rendered/output.yaml index cef352b894..aed7c1818c 100644 --- a/charts/k8s-monitoring/tests/integration/service-integrations/grafana/.rendered/output.yaml +++ b/charts/k8s-monitoring/tests/integration/service-integrations/grafana/.rendered/output.yaml @@ -583,7 +583,7 @@ data: } argument "job_label" { - comment = "The job label to add for all Loki metrics (default: integrations/grafana)" + comment = "The job label to add for all Grafana metrics (default: integrations/grafana)" optional = true } @@ -631,9 +631,27 @@ data: // drop metrics that match the drop_metrics regex rule { source_labels = ["__name__"] - regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + regex = coalesce(argument.drop_metrics.value, "") action = "drop" } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(.+)") + action = "keep" + } + + // the grafana-mixin expects the instance label to be the node name + rule { + source_labels = ["node"] + target_label = "instance" + replacement = "$1" + } + rule { + action = "labeldrop" + regex = "node" + } } } @@ -1104,17 +1122,15 @@ data: "tmp_container_runtime", ] } - // Integration: Loki + // Integration: Grafana stage.match { selector = "{job=\"integrations/grafana\",instance=\"grafana\"}" // extract some of the fields from the log line stage.logfmt { mapping = { - "timestamp" = "t", + "ts" = "t", "level" = "", - "logger" = "", - "type" = "", } } @@ -1126,14 +1142,21 @@ data: } // reset the timestamp to the extracted value stage.timestamp { - source = "timestamp" + source = "ts" format = "RFC3339Nano" } // remove the timestamp from the log line stage.replace { - expression = "( t=[^ ]+\\s+)" + expression = `(?:^|\s+)(t=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "grafana-drop-log-level" + } + } // Only keep the labels that are defined in the `keepLabels` list. diff --git a/charts/k8s-monitoring/tests/integration/service-integrations/loki/.rendered/output.yaml b/charts/k8s-monitoring/tests/integration/service-integrations/loki/.rendered/output.yaml index 70b8124d46..0d22e466c5 100644 --- a/charts/k8s-monitoring/tests/integration/service-integrations/loki/.rendered/output.yaml +++ b/charts/k8s-monitoring/tests/integration/service-integrations/loki/.rendered/output.yaml @@ -1173,9 +1173,15 @@ data: } // remove the timestamp from the log line stage.replace { - expression = "(ts=[^ ]+\\s+)" + expression = `(?:^|\s+)(ts=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+[^ ]*\s+)` replace = "" } + // drop certain log levels + stage.drop { + source = "level" + expression = "(?i)(debug)" + drop_counter_reason = "loki-drop-log-level" + } }