From 9619cb3a3712737add15f53f0d0d13432f5adcfc Mon Sep 17 00:00:00 2001 From: esara Date: Tue, 3 Jan 2023 21:32:51 -0500 Subject: [PATCH] kube-prometheus move from coreos to prometheus-operator --- base_operator_stack.jsonnet | 4 +- jsonnetfile.json | 10 +- jsonnetfile.lock.json | 128 +- main.jsonnet | 9 +- manifests/alertmanager-alertmanager.yaml | 36 +- manifests/alertmanager-networkPolicy.yaml | 42 + .../alertmanager-podDisruptionBudget.yaml | 19 + manifests/alertmanager-prometheusRule.yaml | 139 + manifests/alertmanager-secret.yaml | 41 +- manifests/alertmanager-service.yaml | 15 +- manifests/alertmanager-serviceAccount.yaml | 7 + manifests/alertmanager-serviceMonitor.yaml | 15 +- manifests/grafana-config.yaml | 11 +- manifests/grafana-dashboardDatasources.yaml | 23 +- manifests/grafana-dashboardDefinitions.yaml | 37062 +++++++--------- manifests/grafana-dashboardSources.yaml | 6 + manifests/grafana-deployment.yaml | 74 +- manifests/grafana-networkPolicy.yaml | 29 + manifests/grafana-prometheusRule.yaml | 33 + manifests/grafana-service.yaml | 9 +- manifests/grafana-serviceAccount.yaml | 6 + manifests/grafana-serviceMonitor.yaml | 7 +- manifests/kube-state-metrics-clusterRole.yaml | 34 +- ...kube-state-metrics-clusterRoleBinding.yaml | 4 +- manifests/kube-state-metrics-deployment.yaml | 67 +- .../kube-state-metrics-networkPolicy.yaml | 31 + .../kube-state-metrics-prometheusRule.yaml | 65 + manifests/kube-state-metrics-service.yaml | 6 +- .../kube-state-metrics-serviceAccount.yaml | 5 +- .../kube-state-metrics-serviceMonitor.yaml | 11 +- manifests/node-exporter-clusterRole.yaml | 6 + .../node-exporter-clusterRoleBinding.yaml | 6 + manifests/node-exporter-daemonset.yaml | 56 +- manifests/node-exporter-networkPolicy.yaml | 29 + manifests/node-exporter-prometheusRule.yaml | 316 + manifests/node-exporter-service.yaml | 6 +- manifests/node-exporter-serviceAccount.yaml | 6 + manifests/node-exporter-serviceMonitor.yaml | 6 +- manifests/prometheus-adapter-apiService.yaml | 5 + manifests/prometheus-adapter-clusterRole.yaml | 6 + ...er-clusterRoleAggregatedMetricsReader.yaml | 5 + ...prometheus-adapter-clusterRoleBinding.yaml | 6 + ...s-adapter-clusterRoleBindingDelegator.yaml | 6 + ...us-adapter-clusterRoleServerResources.yaml | 6 + manifests/prometheus-adapter-configMap.yaml | 44 +- manifests/prometheus-adapter-deployment.yaml | 65 +- .../prometheus-adapter-networkPolicy.yaml | 23 + ...rometheus-adapter-podDisruptionBudget.yaml | 17 + ...metheus-adapter-roleBindingAuthReader.yaml | 5 + manifests/prometheus-adapter-service.yaml | 9 +- .../prometheus-adapter-serviceAccount.yaml | 6 + .../prometheus-adapter-serviceMonitor.yaml | 28 + manifests/prometheus-clusterRole.yaml | 6 + manifests/prometheus-clusterRoleBinding.yaml | 6 + ...llerManagerPrometheusDiscoveryService.yaml | 15 - ...eus-kubeDnsPrometheusDiscoveryService.yaml | 15 - ...beSchedulerPrometheusDiscoveryService.yaml | 15 - manifests/prometheus-networkPolicy.yaml | 40 + .../prometheus-operator-serviceMonitor.yaml | 6 +- manifests/prometheus-podDisruptionBudget.yaml | 19 + manifests/prometheus-prometheus.yaml | 40 +- manifests/prometheus-prometheusRule.yaml | 280 + manifests/prometheus-roleBindingConfig.yaml | 6 + ...metheus-roleBindingSpecificNamespaces.yaml | 18 + manifests/prometheus-roleConfig.yaml | 6 + .../prometheus-roleSpecificNamespaces.yaml | 66 + manifests/prometheus-rules.yaml | 1759 - manifests/prometheus-service.yaml | 15 +- manifests/prometheus-serviceAccount.yaml | 7 + manifests/prometheus-serviceMonitor.yaml | 15 +- .../prometheus-serviceMonitorApiserver.yaml | 74 - .../prometheus-serviceMonitorCoreDNS.yaml | 19 - ...s-serviceMonitorKubeControllerManager.yaml | 55 - ...rometheus-serviceMonitorKubeScheduler.yaml | 18 - .../prometheus-serviceMonitorKubelet.yaml | 77 - .../setup/0namespace-prometheusRule.yaml | 83 + ...managerConfigCustomResourceDefinition.yaml | 3401 ++ ...0alertmanagerCustomResourceDefinition.yaml | 1915 +- ...r-0podmonitorCustomResourceDefinition.yaml | 352 +- ...erator-0probeCustomResourceDefinition.yaml | 566 + ...r-0prometheusCustomResourceDefinition.yaml | 2427 +- ...rometheusruleCustomResourceDefinition.yaml | 35 +- ...ervicemonitorCustomResourceDefinition.yaml | 224 +- ...-0thanosrulerCustomResourceDefinition.yaml | 1438 +- .../prometheus-operator-clusterRole.yaml | 14 +- ...rometheus-operator-clusterRoleBinding.yaml | 3 +- .../setup/prometheus-operator-deployment.yaml | 42 +- .../prometheus-operator-networkPolicy.yaml | 29 + .../prometheus-operator-prometheusRule.yaml | 100 + .../setup/prometheus-operator-service.yaml | 4 +- .../prometheus-operator-serviceAccount.yaml | 4 +- modules/arm_exporter.jsonnet | 2 +- modules/elasticsearch_exporter.jsonnet | 2 +- modules/k3s-overrides.jsonnet | 2 +- modules/metallb.jsonnet | 2 +- modules/nginx.jsonnet | 6 +- modules/smtp_relay.jsonnet | 2 +- modules/speedtest_exporter.jsonnet | 4 +- modules/traefik.jsonnet | 2 +- modules/ups_exporter.jsonnet | 2 +- scripts/build_images.sh | 14 +- utils.libsonnet | 2 +- vars.jsonnet | 2 +- 103 files changed, 27599 insertions(+), 24267 deletions(-) create mode 100644 manifests/alertmanager-networkPolicy.yaml create mode 100644 manifests/alertmanager-podDisruptionBudget.yaml create mode 100644 manifests/alertmanager-prometheusRule.yaml create mode 100644 manifests/grafana-networkPolicy.yaml create mode 100644 manifests/grafana-prometheusRule.yaml create mode 100644 manifests/kube-state-metrics-networkPolicy.yaml create mode 100644 manifests/kube-state-metrics-prometheusRule.yaml create mode 100644 manifests/node-exporter-networkPolicy.yaml create mode 100644 manifests/node-exporter-prometheusRule.yaml create mode 100644 manifests/prometheus-adapter-networkPolicy.yaml create mode 100644 manifests/prometheus-adapter-podDisruptionBudget.yaml create mode 100644 manifests/prometheus-adapter-serviceMonitor.yaml delete mode 100644 manifests/prometheus-kubeControllerManagerPrometheusDiscoveryService.yaml delete mode 100644 manifests/prometheus-kubeDnsPrometheusDiscoveryService.yaml delete mode 100644 manifests/prometheus-kubeSchedulerPrometheusDiscoveryService.yaml create mode 100644 manifests/prometheus-networkPolicy.yaml create mode 100644 manifests/prometheus-podDisruptionBudget.yaml create mode 100644 manifests/prometheus-prometheusRule.yaml delete mode 100644 manifests/prometheus-rules.yaml delete mode 100644 manifests/prometheus-serviceMonitorApiserver.yaml delete mode 100644 manifests/prometheus-serviceMonitorCoreDNS.yaml delete mode 100644 manifests/prometheus-serviceMonitorKubeControllerManager.yaml delete mode 100644 manifests/prometheus-serviceMonitorKubeScheduler.yaml delete mode 100644 manifests/prometheus-serviceMonitorKubelet.yaml create mode 100644 manifests/setup/0namespace-prometheusRule.yaml create mode 100644 manifests/setup/prometheus-operator-0alertmanagerConfigCustomResourceDefinition.yaml create mode 100644 manifests/setup/prometheus-operator-0probeCustomResourceDefinition.yaml create mode 100644 manifests/setup/prometheus-operator-networkPolicy.yaml create mode 100644 manifests/setup/prometheus-operator-prometheusRule.yaml diff --git a/base_operator_stack.jsonnet b/base_operator_stack.jsonnet index 03328e9..6997966 100644 --- a/base_operator_stack.jsonnet +++ b/base_operator_stack.jsonnet @@ -1,4 +1,4 @@ -local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet'; +local k = import 'ksonnet-lib/ksonnet.beta.4/k.libsonnet'; local utils = import 'utils.libsonnet'; local vars = import 'vars.jsonnet'; @@ -60,7 +60,7 @@ local vars = import 'vars.jsonnet'; }, }, plugins: vars.grafana.plugins, - env: vars.grafana.env + env: vars.grafana.env, }, }, //--------------------------------------- diff --git a/jsonnetfile.json b/jsonnetfile.json index 420c9b3..7d75ad2 100644 --- a/jsonnetfile.json +++ b/jsonnetfile.json @@ -4,10 +4,18 @@ { "source": { "git": { - "remote": "https://github.com/coreos/kube-prometheus.git", + "remote": "https://github.com/prometheus-operator/kube-prometheus.git", "subdir": "jsonnet/kube-prometheus" } }, + "version": "main" + }, + { + "source": { + "git": { + "remote": "https://github.com/ksonnet/ksonnet-lib.git" + } + }, "version": "master" } ], diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 49ad402..06cee35 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -8,48 +8,48 @@ "subdir": "grafana" } }, - "version": "57b4365eacda291b82e0d55ba7eec573a8198dda", - "sum": "92DWADwGjnCfpZaL7Q07C0GZayxBziGla/O03qWea34=" + "version": "d039275e4916aceae1c137120882e01d857787ac", + "sum": "515vMn4x4tP8vegL4HLW0nDO5+njGTgnDZB5OOhtsCI=" }, { "source": { "git": { - "remote": "https://github.com/coreos/etcd.git", - "subdir": "Documentation/etcd-mixin" + "remote": "https://github.com/etcd-io/etcd.git", + "subdir": "contrib/mixin" } }, - "version": "d8c8f903eee10b8391abaef7758c38b2cd393c55", - "sum": "pk7mLpdUrHuJKkj2vhD6LGMU7P+oYYooBXAeZyZa398=" + "version": "e73f55d4e94666c99558baa2fd4e365aeaca4dc4", + "sum": "IkDHlaE0gvvcPjSNurFT+jQ2aCOAbqHF1WVmXbAgkds=" }, { "source": { "git": { - "remote": "https://github.com/coreos/kube-prometheus.git", - "subdir": "jsonnet/kube-prometheus" + "remote": "https://github.com/grafana/grafana.git", + "subdir": "grafana-mixin" } }, - "version": "17989b42aa10b1c6afa07043cb05bcd5ae492284", - "sum": "2FR289B1LGUf5tTN4PXBj5TjRX7okSFxE8uHkSslzDQ=" + "version": "1120f9e255760a3c104b57871fcb91801e934382", + "sum": "MkjR7zCgq6MUZgjDzop574tFKoTX2OBr7DTwm1K+Ofs=" }, { "source": { "git": { - "remote": "https://github.com/coreos/prometheus-operator.git", - "subdir": "jsonnet/prometheus-operator" + "remote": "https://github.com/grafana/grafonnet-lib.git", + "subdir": "grafonnet" } }, - "version": "e31c69f9b5c6555e0f4a5c1f39d0f03182dd6b41", - "sum": "WggWVWZ+CBEUThQCztSaRELbtqdXf9s3OFzf06HbYNA=" + "version": "30280196507e0fe6fa978a3e0eaca3a62844f817", + "sum": "342u++/7rViR/zj2jeJOjshzglkZ1SY+hFNuyCBFMdc=" }, { "source": { "git": { "remote": "https://github.com/grafana/grafonnet-lib.git", - "subdir": "grafonnet" + "subdir": "grafonnet-7.0" } }, - "version": "8fb95bd89990e493a8534205ee636bfcb8db67bd", - "sum": "tDuuSKE9f4Ew2bjBM33Rs6behLEAzkmKkShSt+jpAak=" + "version": "30280196507e0fe6fa978a3e0eaca3a62844f817", + "sum": "gCtR9s/4D5fxU9aKXg0Bru+/njZhA0YjLjPiASc61FM=" }, { "source": { @@ -58,8 +58,8 @@ "subdir": "grafana-builder" } }, - "version": "881db2241f0c5007c3e831caf34b0c645202b4ab", - "sum": "slxrtftVDiTlQK22ertdfrg4Epnq97gdrLI63ftUfaE=" + "version": "d68f9a6e0b1af7c4c4056dc2b43fb8f3bac01f43", + "sum": "tDR6yT2GVfw0wTU12iZH+m01HrbIr6g/xN+/8nzNkU0=" }, { "source": { @@ -69,8 +69,7 @@ } }, "version": "0d2f82676817bbf9e4acf6495b2090205f323b9f", - "sum": "h28BXZ7+vczxYJ2sCt8JuR9+yznRtU/iA6DCpQUrtEg=", - "name": "ksonnet" + "sum": "h28BXZ7+vczxYJ2sCt8JuR9+yznRtU/iA6DCpQUrtEg=" }, { "source": { @@ -79,38 +78,70 @@ "subdir": "" } }, - "version": "b61c5a34051f8f57284a08fe78ad8a45b430252b", - "sum": "7Hx/5eNm7ubLTsdrpk3b2+e/FLR3XOa4HCukmbRUCAY=" + "version": "3c386687c1f8ceb6b79ff887c4a934e9cee1b90a", + "sum": "H8lcnk7gQEUoRi58/xq+JTfd2PcjJUjMQHgxGklUiFY=" }, { "source": { "git": { - "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin.git", - "subdir": "lib/promgrafonnet" + "remote": "https://github.com/kubernetes/kube-state-metrics.git", + "subdir": "jsonnet/kube-state-metrics" } }, - "version": "b61c5a34051f8f57284a08fe78ad8a45b430252b", - "sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc=" + "version": "71200632a6c97e9b87166dbb27489798a05effe3", + "sum": "4PJ2ROxODsoYO/1Y70+dgLZVjW5zlfzB+TDpxJBHwaI=" }, { "source": { "git": { "remote": "https://github.com/kubernetes/kube-state-metrics.git", - "subdir": "jsonnet/kube-state-metrics" + "subdir": "jsonnet/kube-state-metrics-mixin" } }, - "version": "d667979ed55ad1c4db44d331b51d646f5b903aa7", - "sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA=" + "version": "71200632a6c97e9b87166dbb27489798a05effe3", + "sum": "u8gaydJoxEjzizQ8jY8xSjYgWooPmxw+wIWdDxifMAk=" }, { "source": { "git": { - "remote": "https://github.com/kubernetes/kube-state-metrics.git", - "subdir": "jsonnet/kube-state-metrics-mixin" + "remote": "https://github.com/prometheus-operator/kube-prometheus.git", + "subdir": "jsonnet/kube-prometheus" + } + }, + "version": "37d00082289c587f5a02a343ba23cfbe167000e2", + "sum": "5onAaPSrjnmgXIAsypnx0W/sIA7iTsHCeCjPrhGxj5A=" + }, + { + "source": { + "git": { + "remote": "https://github.com/prometheus-operator/prometheus-operator.git", + "subdir": "jsonnet/mixin" + } + }, + "version": "df4cbd9526d8ff8e404a903b7ed2532847551d19", + "sum": "GQmaVFJwKMiD/P4n3N2LrAZVcwutriWrP8joclDtBYQ=", + "name": "prometheus-operator-mixin" + }, + { + "source": { + "git": { + "remote": "https://github.com/prometheus-operator/prometheus-operator.git", + "subdir": "jsonnet/prometheus-operator" } }, - "version": "d667979ed55ad1c4db44d331b51d646f5b903aa7", - "sum": "o5avaguRsfFwYFNen00ZEsub1x4i8Z/ZZ2QoEjFMff8=" + "version": "df4cbd9526d8ff8e404a903b7ed2532847551d19", + "sum": "wJ1E8XxYJ0RJrUuDNWLzE7bzo6JrH7P9q1lAu/xi4Ow=" + }, + { + "source": { + "git": { + "remote": "https://github.com/prometheus/alertmanager.git", + "subdir": "doc/alertmanager-mixin" + } + }, + "version": "8afd462a9eaa3979bddf7bd6278bede4bc1f30e2", + "sum": "PsK+V7oETCPKu2gLoPfqY0wwPKH9TzhNj6o2xezjjXc=", + "name": "alertmanager" }, { "source": { @@ -119,8 +150,8 @@ "subdir": "docs/node-mixin" } }, - "version": "08ce3c6dd430deb51798826701a395e460620d60", - "sum": "3jFV2qsc/GZe2GADswTYqxxP2zGOiANTj73W/VNFGqc=" + "version": "a3bd2e13052929663dbd7d680fab4a952efb1de6", + "sum": "TwdaTm0Z++diiLyaKAAimmC6hBL7XbrJc0RHhBCpAdU=" }, { "source": { @@ -129,9 +160,30 @@ "subdir": "documentation/prometheus-mixin" } }, - "version": "74207c04655e1fd93eea0e9a5d2f31b1cbc4d3d0", - "sum": "lEzhZ8gllSfAO4kmXeTwl4W0anapIeFd5GCaCNuDe18=", + "version": "84c6f0e58444a452a5e2e19d14221409d2b9d790", + "sum": "LRx0tbMnoE1p8KEn+i81j2YsA5Sgt3itE5Y6jBf5eOQ=", "name": "prometheus" + }, + { + "source": { + "git": { + "remote": "https://github.com/pyrra-dev/pyrra.git", + "subdir": "config/crd/bases" + } + }, + "version": "2584cefb8e6859eb9ee103df199e232cd0066aab", + "sum": "d1550yhsX4VxdVN7b0gWT0cido/W90P6OGLzLqPwZcs=" + }, + { + "source": { + "git": { + "remote": "https://github.com/thanos-io/thanos.git", + "subdir": "mixin" + } + }, + "version": "3327c510076a77f876ac26e699d5252a61fc529a", + "sum": "Io++1+lp1oQVoQiVRSCXUiGdTIRPV7aL6Ewgs3bShEs=", + "name": "thanos-mixin" } ], "legacyImports": false diff --git a/main.jsonnet b/main.jsonnet index 681366a..e08470f 100644 --- a/main.jsonnet +++ b/main.jsonnet @@ -1,10 +1,10 @@ local utils = import 'utils.libsonnet'; local vars = import 'vars.jsonnet'; -local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') - + (import 'kube-prometheus/kube-prometheus-anti-affinity.libsonnet') - + (import 'kube-prometheus/kube-prometheus-kops-coredns.libsonnet') - + (import 'kube-prometheus/kube-prometheus-kubeadm.libsonnet') +local kp = (import 'kube-prometheus/main.libsonnet') + { values+:: { common+: { namespace: 'monitoring' } } } + + (import 'kube-prometheus/addons/anti-affinity.libsonnet') + + (import 'kube-prometheus/platforms/kops-coredns.libsonnet') + + (import 'kube-prometheus/platforms/kubeadm.libsonnet') // Additional modules are loaded dynamically from vars.jsonnet + utils.join_objects([module.file for module in vars.modules if module.enabled]) // Load K3s customized modules @@ -14,6 +14,7 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') // Load image versions last to override default from modules + (import 'image_sources_versions.jsonnet'); + // Generate core modules { ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } // First generate operator resources except the serviceMonitors diff --git a/manifests/alertmanager-alertmanager.yaml b/manifests/alertmanager-alertmanager.yaml index cedc323..8086809 100644 --- a/manifests/alertmanager-alertmanager.yaml +++ b/manifests/alertmanager-alertmanager.yaml @@ -2,7 +2,11 @@ apiVersion: monitoring.coreos.com/v1 kind: Alertmanager metadata: labels: - alertmanager: main + app.kubernetes.io/component: alert-router + app.kubernetes.io/instance: main + app.kubernetes.io/name: alertmanager + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.25.0 name: main namespace: monitoring spec: @@ -11,22 +15,36 @@ spec: preferredDuringSchedulingIgnoredDuringExecution: - podAffinityTerm: labelSelector: - matchExpressions: - - key: alertmanager - operator: In - values: - - main + matchLabels: + app.kubernetes.io/component: alert-router + app.kubernetes.io/instance: main + app.kubernetes.io/name: alertmanager + app.kubernetes.io/part-of: kube-prometheus namespaces: - monitoring topologyKey: kubernetes.io/hostname weight: 100 - image: prom/alertmanager:v0.21.0 + image: quay.io/prometheus/alertmanager:v0.25.0 nodeSelector: kubernetes.io/os: linux - replicas: 1 + podMetadata: + labels: + app.kubernetes.io/component: alert-router + app.kubernetes.io/instance: main + app.kubernetes.io/name: alertmanager + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.25.0 + replicas: 3 + resources: + limits: + cpu: 100m + memory: 100Mi + requests: + cpu: 4m + memory: 100Mi securityContext: fsGroup: 2000 runAsNonRoot: true runAsUser: 1000 serviceAccountName: alertmanager-main - version: v0.21.0 + version: 0.25.0 diff --git a/manifests/alertmanager-networkPolicy.yaml b/manifests/alertmanager-networkPolicy.yaml new file mode 100644 index 0000000..d84f477 --- /dev/null +++ b/manifests/alertmanager-networkPolicy.yaml @@ -0,0 +1,42 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + labels: + app.kubernetes.io/component: alert-router + app.kubernetes.io/instance: main + app.kubernetes.io/name: alertmanager + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.25.0 + name: alertmanager-main + namespace: monitoring +spec: + egress: + - {} + ingress: + - from: + - podSelector: + matchLabels: + app.kubernetes.io/name: prometheus + ports: + - port: 9093 + protocol: TCP + - port: 8080 + protocol: TCP + - from: + - podSelector: + matchLabels: + app.kubernetes.io/name: alertmanager + ports: + - port: 9094 + protocol: TCP + - port: 9094 + protocol: UDP + podSelector: + matchLabels: + app.kubernetes.io/component: alert-router + app.kubernetes.io/instance: main + app.kubernetes.io/name: alertmanager + app.kubernetes.io/part-of: kube-prometheus + policyTypes: + - Egress + - Ingress diff --git a/manifests/alertmanager-podDisruptionBudget.yaml b/manifests/alertmanager-podDisruptionBudget.yaml new file mode 100644 index 0000000..85cae78 --- /dev/null +++ b/manifests/alertmanager-podDisruptionBudget.yaml @@ -0,0 +1,19 @@ +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + labels: + app.kubernetes.io/component: alert-router + app.kubernetes.io/instance: main + app.kubernetes.io/name: alertmanager + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.25.0 + name: alertmanager-main + namespace: monitoring +spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: alert-router + app.kubernetes.io/instance: main + app.kubernetes.io/name: alertmanager + app.kubernetes.io/part-of: kube-prometheus diff --git a/manifests/alertmanager-prometheusRule.yaml b/manifests/alertmanager-prometheusRule.yaml new file mode 100644 index 0000000..534bca2 --- /dev/null +++ b/manifests/alertmanager-prometheusRule.yaml @@ -0,0 +1,139 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + app.kubernetes.io/component: alert-router + app.kubernetes.io/instance: main + app.kubernetes.io/name: alertmanager + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.25.0 + prometheus: k8s + role: alert-rules + name: alertmanager-main-rules + namespace: monitoring +spec: + groups: + - name: alertmanager.rules + rules: + - alert: AlertmanagerFailedReload + annotations: + description: Configuration has failed to load for {{ $labels.namespace }}/{{ $labels.pod}}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedreload + summary: Reloading an Alertmanager configuration has failed. + expr: | + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"}[5m]) == 0 + for: 10m + labels: + severity: critical + - alert: AlertmanagerMembersInconsistent + annotations: + description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only found {{ $value }} members of the {{$labels.job}} cluster. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagermembersinconsistent + summary: A member of an Alertmanager cluster has not found all other cluster members. + expr: | + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m]) + < on (namespace,service) group_left + count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m])) + for: 15m + labels: + severity: critical + - alert: AlertmanagerFailedToSendAlerts + annotations: + description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed to send {{ $value | humanizePercentage }} of notifications to {{ $labels.integration }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedtosendalerts + summary: An Alertmanager instance failed to send notifications. + expr: | + ( + rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring"}[5m]) + / + rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring"}[5m]) + ) + > 0.01 + for: 5m + labels: + severity: warning + - alert: AlertmanagerClusterFailedToSendAlerts + annotations: + description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts + summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration. + expr: | + min by (namespace,service, integration) ( + rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m]) + / + rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m]) + ) + > 0.01 + for: 5m + labels: + severity: critical + - alert: AlertmanagerClusterFailedToSendAlerts + annotations: + description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts + summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration. + expr: | + min by (namespace,service, integration) ( + rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m]) + / + rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m]) + ) + > 0.01 + for: 5m + labels: + severity: warning + - alert: AlertmanagerConfigInconsistent + annotations: + description: Alertmanager instances within the {{$labels.job}} cluster have different configurations. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerconfiginconsistent + summary: Alertmanager instances within the same cluster have different configurations. + expr: | + count by (namespace,service) ( + count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) + ) + != 1 + for: 20m + labels: + severity: critical + - alert: AlertmanagerClusterDown + annotations: + description: '{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have been up for less than half of the last 5m.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterdown + summary: Half or more of the Alertmanager instances within the same cluster are down. + expr: | + ( + count by (namespace,service) ( + avg_over_time(up{job="alertmanager-main",namespace="monitoring"}[5m]) < 0.5 + ) + / + count by (namespace,service) ( + up{job="alertmanager-main",namespace="monitoring"} + ) + ) + >= 0.5 + for: 5m + labels: + severity: critical + - alert: AlertmanagerClusterCrashlooping + annotations: + description: '{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have restarted at least 5 times in the last 10m.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclustercrashlooping + summary: Half or more of the Alertmanager instances within the same cluster are crashlooping. + expr: | + ( + count by (namespace,service) ( + changes(process_start_time_seconds{job="alertmanager-main",namespace="monitoring"}[10m]) > 4 + ) + / + count by (namespace,service) ( + up{job="alertmanager-main",namespace="monitoring"} + ) + ) + >= 0.5 + for: 5m + labels: + severity: critical diff --git a/manifests/alertmanager-secret.yaml b/manifests/alertmanager-secret.yaml index e019922..54dfb43 100644 --- a/manifests/alertmanager-secret.yaml +++ b/manifests/alertmanager-secret.yaml @@ -1,7 +1,12 @@ apiVersion: v1 -data: {} kind: Secret metadata: + labels: + app.kubernetes.io/component: alert-router + app.kubernetes.io/instance: main + app.kubernetes.io/name: alertmanager + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.25.0 name: alertmanager-main namespace: monitoring stringData: @@ -12,21 +17,28 @@ stringData: - "equal": - "namespace" - "alertname" - "source_match": - "severity": "critical" - "target_match_re": - "severity": "warning|info" + "source_matchers": + - "severity = critical" + "target_matchers": + - "severity =~ warning|info" - "equal": - "namespace" - "alertname" - "source_match": - "severity": "warning" - "target_match_re": - "severity": "info" + "source_matchers": + - "severity = warning" + "target_matchers": + - "severity = info" + - "equal": + - "namespace" + "source_matchers": + - "alertname = InfoInhibitor" + "target_matchers": + - "severity = info" "receivers": - "name": "Default" - "name": "Watchdog" - "name": "Critical" + - "name": "null" "route": "group_by": - "namespace" @@ -35,10 +47,13 @@ stringData: "receiver": "Default" "repeat_interval": "12h" "routes": - - "match": - "alertname": "Watchdog" + - "matchers": + - "alertname = Watchdog" "receiver": "Watchdog" - - "match": - "severity": "critical" + - "matchers": + - "alertname = InfoInhibitor" + "receiver": "null" + - "matchers": + - "severity = critical" "receiver": "Critical" type: Opaque diff --git a/manifests/alertmanager-service.yaml b/manifests/alertmanager-service.yaml index df4c9ff..33c960d 100644 --- a/manifests/alertmanager-service.yaml +++ b/manifests/alertmanager-service.yaml @@ -2,7 +2,11 @@ apiVersion: v1 kind: Service metadata: labels: - alertmanager: main + app.kubernetes.io/component: alert-router + app.kubernetes.io/instance: main + app.kubernetes.io/name: alertmanager + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.25.0 name: alertmanager-main namespace: monitoring spec: @@ -10,7 +14,12 @@ spec: - name: web port: 9093 targetPort: web + - name: reloader-web + port: 8080 + targetPort: reloader-web selector: - alertmanager: main - app: alertmanager + app.kubernetes.io/component: alert-router + app.kubernetes.io/instance: main + app.kubernetes.io/name: alertmanager + app.kubernetes.io/part-of: kube-prometheus sessionAffinity: ClientIP diff --git a/manifests/alertmanager-serviceAccount.yaml b/manifests/alertmanager-serviceAccount.yaml index 5c06d5e..dc2eb85 100644 --- a/manifests/alertmanager-serviceAccount.yaml +++ b/manifests/alertmanager-serviceAccount.yaml @@ -1,5 +1,12 @@ apiVersion: v1 +automountServiceAccountToken: false kind: ServiceAccount metadata: + labels: + app.kubernetes.io/component: alert-router + app.kubernetes.io/instance: main + app.kubernetes.io/name: alertmanager + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.25.0 name: alertmanager-main namespace: monitoring diff --git a/manifests/alertmanager-serviceMonitor.yaml b/manifests/alertmanager-serviceMonitor.yaml index 548af0d..492a9f0 100644 --- a/manifests/alertmanager-serviceMonitor.yaml +++ b/manifests/alertmanager-serviceMonitor.yaml @@ -2,13 +2,22 @@ apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: labels: - k8s-app: alertmanager - name: alertmanager + app.kubernetes.io/component: alert-router + app.kubernetes.io/instance: main + app.kubernetes.io/name: alertmanager + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.25.0 + name: alertmanager-main namespace: monitoring spec: endpoints: - interval: 30s port: web + - interval: 30s + port: reloader-web selector: matchLabels: - alertmanager: main + app.kubernetes.io/component: alert-router + app.kubernetes.io/instance: main + app.kubernetes.io/name: alertmanager + app.kubernetes.io/part-of: kube-prometheus diff --git a/manifests/grafana-config.yaml b/manifests/grafana-config.yaml index 750e3c8..10d9c6a 100644 --- a/manifests/grafana-config.yaml +++ b/manifests/grafana-config.yaml @@ -1,8 +1,15 @@ apiVersion: v1 -data: - grafana.ini: W2F1dGguYW5vbnltb3VzXQplbmFibGVkID0gZmFsc2UKW2F1dGguYmFzaWNdCmVuYWJsZWQgPSBmYWxzZQpbc2Vzc2lvbl0KcHJvdmlkZXIgPSBtZW1vcnkKW3NtdHBdCmVuYWJsZWQgPSB0cnVlCmZyb21fYWRkcmVzcyA9IG15ZW1haWxAZ21haWwuY29tCmZyb21fbmFtZSA9IEdyYWZhbmEgQWxlcnQKaG9zdCA9IHNtdHAtc2VydmVyLm1vbml0b3Jpbmcuc3ZjOjI1CnBhc3N3b3JkID0gCnNraXBfdmVyaWZ5ID0gdHJ1ZQp1c2VyID0gCg== kind: Secret metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 name: grafana-config namespace: monitoring +stringData: + grafana.ini: | + [date_formats] + default_timezone = UTC type: Opaque diff --git a/manifests/grafana-dashboardDatasources.yaml b/manifests/grafana-dashboardDatasources.yaml index 22d4748..f4c4cde 100644 --- a/manifests/grafana-dashboardDatasources.yaml +++ b/manifests/grafana-dashboardDatasources.yaml @@ -1,8 +1,27 @@ apiVersion: v1 -data: - datasources.yaml: ewogICAgImFwaVZlcnNpb24iOiAxLAogICAgImRhdGFzb3VyY2VzIjogWwogICAgICAgIHsKICAgICAgICAgICAgImFjY2VzcyI6ICJwcm94eSIsCiAgICAgICAgICAgICJlZGl0YWJsZSI6IGZhbHNlLAogICAgICAgICAgICAibmFtZSI6ICJwcm9tZXRoZXVzIiwKICAgICAgICAgICAgIm9yZ0lkIjogMSwKICAgICAgICAgICAgInR5cGUiOiAicHJvbWV0aGV1cyIsCiAgICAgICAgICAgICJ1cmwiOiAiaHR0cDovL3Byb21ldGhldXMtazhzLm1vbml0b3Jpbmcuc3ZjOjkwOTAiLAogICAgICAgICAgICAidmVyc2lvbiI6IDEKICAgICAgICB9CiAgICBdCn0= kind: Secret metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 name: grafana-datasources namespace: monitoring +stringData: + datasources.yaml: |- + { + "apiVersion": 1, + "datasources": [ + { + "access": "proxy", + "editable": false, + "name": "prometheus", + "orgId": 1, + "type": "prometheus", + "url": "http://prometheus-k8s.monitoring.svc:9090", + "version": 1 + } + ] + } type: Opaque diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index e7a908e..9ec56fe 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -2,7 +2,7 @@ apiVersion: v1 items: - apiVersion: v1 data: - apiserver.json: |- + alertmanager-overview.json: |- { "__inputs": [ @@ -17,122 +17,18 @@ items: }, "editable": false, "gnetId": null, - "graphTooltip": 0, + "graphTooltip": 1, "hideControls": false, "id": null, "links": [ ], - "panels": [ - { - "content": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.", - "datasource": null, - "description": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.", - "gridPos": { - "h": 2, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 2, - "mode": "markdown", - "span": 12, - "title": "Notice", - "type": "text" - } - ], - "refresh": "10s", + "refresh": "30s", "rows": [ { "collapse": false, "collapsed": false, "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "decimals": 3, - "description": "How many percent of requests (both read and write) in 30 days have been answered successfully and fast enough?", - "format": "percentunit", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 3, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "apiserver_request:availability30d{verb=\"all\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "Availability (30d) > 99.000%", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, { "aliasColors": { @@ -141,13 +37,13 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "decimals": 3, - "description": "How much error budget is left looking at our 0.990% availability gurantees?", - "fill": 10, + "description": "current set of alerts stored in the Alertmanager", + "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 4, + "id": 2, "legend": { "alignAsTable": false, "avg": false, @@ -155,7 +51,7 @@ items: "max": false, "min": false, "rightSide": false, - "show": true, + "show": false, "sideWidth": null, "total": false, "values": false @@ -175,15 +71,15 @@ items: ], "spaceLength": 10, - "span": 8, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "100 * (apiserver_request:availability30d{verb=\"all\"} - 0.990000)", + "expr": "sum(alertmanager_alerts{namespace=~\"$namespace\",service=~\"$service\"}) by (namespace,service,instance)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "errorbudget", + "legendFormat": "{{instance}}", "refId": "A" } ], @@ -192,9 +88,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "ErrorBudget (30d) > 99.000%", + "title": "Alerts", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -210,8 +106,7 @@ items: }, "yaxes": [ { - "decimals": 3, - "format": "percentunit", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -219,8 +114,7 @@ items: "show": true }, { - "decimals": 3, - "format": "percentunit", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -228,105 +122,6 @@ items: "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "decimals": 3, - "description": "How many percent of read requests (LIST,GET) in 30 days have been answered successfully and fast enough?", - "format": "percentunit", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 5, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "apiserver_request:availability30d{verb=\"read\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "Read Availability (30d)", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" }, { "aliasColors": { @@ -336,12 +131,13 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "How many read requests (LIST,GET) per second do the apiservers get by code?", - "fill": 10, + "description": "rate of successful and invalid alerts received by the Alertmanager", + "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 6, + "id": 3, "legend": { "alignAsTable": false, "avg": false, @@ -349,7 +145,7 @@ items: "max": false, "min": false, "rightSide": false, - "show": true, + "show": false, "sideWidth": null, "total": false, "values": false @@ -366,34 +162,26 @@ items: "renderer": "flot", "repeat": null, "seriesOverrides": [ - { - "alias": "/2../i", - "color": "#56A64B" - }, - { - "alias": "/3../i", - "color": "#F2CC0C" - }, - { - "alias": "/4../i", - "color": "#3274D9" - }, - { - "alias": "/5../i", - "color": "#E02F44" - } + ], "spaceLength": 10, - "span": 3, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"read\"})", + "expr": "sum(rate(alertmanager_alerts_received_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval])) by (namespace,service,instance)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{ code }}", + "legendFormat": "{{instance}} Received", "refId": "A" + }, + { + "expr": "sum(rate(alertmanager_alerts_invalid_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval])) by (namespace,service,instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} Invalid", + "refId": "B" } ], "thresholds": [ @@ -401,9 +189,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Read SLI - Requests", + "title": "Alerts receive rate", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -419,7 +207,7 @@ items: }, "yaxes": [ { - "format": "reqps", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -427,7 +215,7 @@ items: "show": true }, { - "format": "reqps", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -435,7 +223,20 @@ items: "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alerts", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -444,12 +245,13 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "How many percent of read requests (LIST,GET) per second are returned with errors (5xx)?", + "description": "rate of successful and invalid notifications sent by the Alertmanager", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 7, + "id": 4, "legend": { "alignAsTable": false, "avg": false, @@ -457,7 +259,7 @@ items: "max": false, "min": false, "rightSide": false, - "show": true, + "show": false, "sideWidth": null, "total": false, "values": false @@ -472,21 +274,27 @@ items: "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, + "repeat": "integration", "seriesOverrides": [ ], "spaceLength": 10, - "span": 3, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\",code=~\"5..\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\"})", + "expr": "sum(rate(alertmanager_notifications_total{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{ resource }}", + "legendFormat": "{{instance}} Total", "refId": "A" + }, + { + "expr": "sum(rate(alertmanager_notifications_failed_total{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} Failed", + "refId": "B" } ], "thresholds": [ @@ -494,9 +302,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Read SLI - Errors", + "title": "$integration: Notifications Send Rate", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -512,19 +320,19 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "percentunit", + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } ] @@ -537,12 +345,13 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "How many seconds is the 99th percentile for reading (LIST|GET) a given resource?", + "description": "latency of notifications sent by the Alertmanager", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 8, + "id": 5, "legend": { "alignAsTable": false, "avg": false, @@ -550,7 +359,7 @@ items: "max": false, "min": false, "rightSide": false, - "show": true, + "show": false, "sideWidth": null, "total": false, "values": false @@ -565,21 +374,34 @@ items: "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, + "repeat": "integration", "seriesOverrides": [ ], "spaceLength": 10, - "span": 3, "stack": false, "steppedLine": false, "targets": [ { - "expr": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{verb=\"read\"}", + "expr": "histogram_quantile(0.99,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (le,namespace,service,instance)\n) \n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{ resource }}", + "legendFormat": "{{instance}} 99th Percentile", "refId": "A" + }, + { + "expr": "histogram_quantile(0.50,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (le,namespace,service,instance)\n) \n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} Median", + "refId": "B" + }, + { + "expr": "sum(rate(alertmanager_notification_latency_seconds_sum{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (namespace,service,instance)\n/\nsum(rate(alertmanager_notification_latency_seconds_count{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (namespace,service,instance)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} Average", + "refId": "C" } ], "thresholds": [ @@ -587,9 +409,9 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Read SLI - Duration", + "title": "$integration: Notification Duration", "tooltip": { - "shared": false, + "shared": true, "sort": 0, "value_type": "individual" }, @@ -626,11 +448,204 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", + "showTitle": true, + "title": "Notifications", "titleSize": "h6", "type": "row" - }, + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "alertmanager-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(alertmanager_alerts, namespace)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "service", + "multi": false, + "name": "service", + "options": [ + + ], + "query": "label_values(alertmanager_alerts, service)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "all", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "integration", + "options": [ + + ], + "query": "label_values(alertmanager_notifications_total{integration=~\".*\"}, integration)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Alertmanager / Overview", + "uid": "alertmanager-overview", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-alertmanager-overview + namespace: monitoring +- apiVersion: v1 + data: + apiserver.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ + { + "content": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.", + "datasource": null, + "description": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "mode": "markdown", + "span": 12, + "title": "Notice", + "type": "text" + } + ], + "refresh": "10s", + "rows": [ { "collapse": false, "collapsed": false, @@ -646,7 +661,7 @@ items: ], "datasource": "$datasource", "decimals": 3, - "description": "How many percent of write requests (POST|PUT|PATCH|DELETE) in 30 days have been answered successfully and fast enough?", + "description": "How many percent of requests (both read and write) in 30 days have been answered successfully and fast enough?", "format": "percentunit", "gauge": { "maxValue": 100, @@ -658,8 +673,213 @@ items: "gridPos": { }, - "id": 9, - "interval": null, + "id": 3, + "interval": "1m", + "legend": { + "alignAsTable": true, + "rightSide": true + }, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Availability (30d) > 99.000%", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "decimals": 3, + "description": "How much error budget is left looking at our 0.990% availability guarantees?", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 4, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 8, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 * (apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"} - 0.990000)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "errorbudget", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "ErrorBudget (30d) > 99.000%", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "decimals": 3, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": 3, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 3, + "description": "How many percent of read requests (LIST,GET) in 30 days have been answered successfully and fast enough?", + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 5, + "interval": "1m", + "legend": { + "alignAsTable": true, + "rightSide": true + }, "links": [ ], @@ -698,7 +918,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "apiserver_request:availability30d{verb=\"write\"}", + "expr": "apiserver_request:availability30d{verb=\"read\", cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -706,7 +926,7 @@ items: } ], "thresholds": "", - "title": "Write Availability (30d)", + "title": "Read Availability (30d)", "tooltip": { "shared": false }, @@ -729,19 +949,21 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "How many write requests (POST|PUT|PATCH|DELETE) per second do the apiservers get by code?", + "description": "How many read requests (LIST,GET) per second do the apiservers get by code?", "fill": 10, + "fillGradient": 0, "gridPos": { }, - "id": 10, + "id": 6, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, @@ -782,7 +1004,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"write\"})", + "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ code }}", @@ -794,7 +1016,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Write SLI - Requests", + "title": "Read SLI - Requests", "tooltip": { "shared": false, "sort": 0, @@ -837,19 +1059,21 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "How many percent of write requests (POST|PUT|PATCH|DELETE) per second are returned with errors (5xx)?", + "description": "How many percent of read requests (LIST,GET) per second are returned with errors (5xx)?", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 11, + "id": 7, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, @@ -875,7 +1099,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\",code=~\"5..\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\"})", + "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ resource }}", @@ -887,7 +1111,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Write SLI - Errors", + "title": "Read SLI - Errors", "tooltip": { "shared": false, "sort": 0, @@ -930,19 +1154,21 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "How many seconds is the 99th percentile for writing (POST|PUT|PATCH|DELETE) a given resource?", + "description": "How many seconds is the 99th percentile for reading (LIST|GET) a given resource?", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 12, + "id": 8, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, @@ -968,7 +1194,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{verb=\"write\"}", + "expr": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile{verb=\"read\", cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ resource }}", @@ -980,7 +1206,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Write SLI - Duration", + "title": "Read SLI - Duration", "tooltip": { "shared": false, "sort": 0, @@ -1028,6 +1254,96 @@ items: "collapse": false, "collapsed": false, "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 3, + "description": "How many percent of write requests (POST|PUT|PATCH|DELETE) in 30 days have been answered successfully and fast enough?", + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 9, + "interval": "1m", + "legend": { + "alignAsTable": true, + "rightSide": true + }, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "apiserver_request:availability30d{verb=\"write\", cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Write Availability (30d)", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, { "aliasColors": { @@ -1036,19 +1352,22 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "description": "How many write requests (POST|PUT|PATCH|DELETE) per second do the apiservers get by code?", + "fill": 10, + "fillGradient": 0, "gridPos": { }, - "id": 13, + "id": 10, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, - "show": false, + "rightSide": true, + "show": true, "sideWidth": null, "total": false, "values": false @@ -1065,18 +1384,33 @@ items: "renderer": "flot", "repeat": null, "seriesOverrides": [ - + { + "alias": "/2../i", + "color": "#56A64B" + }, + { + "alias": "/3../i", + "color": "#F2CC0C" + }, + { + "alias": "/4../i", + "color": "#3274D9" + }, + { + "alias": "/5../i", + "color": "#E02F44" + } ], "spaceLength": 10, - "span": 4, - "stack": false, + "span": 3, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name)", + "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{name}}", + "legendFormat": "{{ code }}", "refId": "A" } ], @@ -1085,7 +1419,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Work Queue Add Rate", + "title": "Write SLI - Requests", "tooltip": { "shared": false, "sort": 0, @@ -1103,19 +1437,19 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "reqps", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "ops", + "format": "reqps", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } ] @@ -1128,19 +1462,22 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", + "description": "How many percent of write requests (POST|PUT|PATCH|DELETE) per second are returned with errors (5xx)?", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 14, + "id": 11, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, - "show": false, + "rightSide": true, + "show": true, "sideWidth": null, "total": false, "values": false @@ -1160,15 +1497,15 @@ items: ], "spaceLength": 10, - "span": 4, + "span": 3, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name)", + "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{name}}", + "legendFormat": "{{ resource }}", "refId": "A" } ], @@ -1177,7 +1514,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Work Queue Depth", + "title": "Write SLI - Errors", "tooltip": { "shared": false, "sort": 0, @@ -1195,7 +1532,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -1203,7 +1540,7 @@ items: "show": true }, { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -1220,22 +1557,25 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", + "description": "How many seconds is the 99th percentile for writing (POST|PUT|PATCH|DELETE) a given resource?", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 15, + "id": 12, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, @@ -1252,15 +1592,15 @@ items: ], "spaceLength": 10, - "span": 4, + "span": 3, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name, le))", + "expr": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile{verb=\"write\", cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{name}}", + "legendFormat": "{{ resource }}", "refId": "A" } ], @@ -1269,7 +1609,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Work Queue Latency", + "title": "Write SLI - Duration", "tooltip": { "shared": false, "sort": 0, @@ -1326,18 +1666,20 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 16, + "id": 13, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, - "show": true, + "rightSide": true, + "show": false, "sideWidth": null, "total": false, "values": false @@ -1357,15 +1699,15 @@ items: ], "spaceLength": 10, - "span": 4, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "etcd_helper_cache_entry_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}", + "expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{instance}} {{name}}", "refId": "A" } ], @@ -1374,7 +1716,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "ETCD Cache Entry Total", + "title": "Work Queue Add Rate", "tooltip": { "shared": false, "sort": 0, @@ -1392,7 +1734,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -1400,7 +1742,7 @@ items: "show": true }, { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -1418,18 +1760,20 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 17, + "id": 14, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, - "show": true, + "rightSide": true, + "show": false, "sideWidth": null, "total": false, "values": false @@ -1449,23 +1793,16 @@ items: ], "spaceLength": 10, - "span": 4, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(etcd_helper_cache_hit_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance)", + "expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} hit", + "legendFormat": "{{instance}} {{name}}", "refId": "A" - }, - { - "expr": "sum(rate(etcd_helper_cache_miss_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} miss", - "refId": "B" } ], "thresholds": [ @@ -1473,7 +1810,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "ETCD Cache Hit/Miss Rate", + "title": "Work Queue Depth", "tooltip": { "shared": false, "sort": 0, @@ -1491,7 +1828,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -1499,7 +1836,7 @@ items: "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -1517,21 +1854,23 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 18, + "id": 15, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, + "current": true, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -1548,23 +1887,16 @@ items: ], "spaceLength": 10, - "span": 4, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99,sum(rate(etcd_request_cache_get_duration_seconds_bucket{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, le))", + "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} get", + "legendFormat": "{{instance}} {{name}}", "refId": "A" - }, - { - "expr": "histogram_quantile(0.99,sum(rate(etcd_request_cache_add_duration_seconds_bucket{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} miss", - "refId": "B" } ], "thresholds": [ @@ -1572,7 +1904,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "ETCD Cache Duration 99th Quantile", + "title": "Work Queue Latency", "tooltip": { "shared": false, "sort": 0, @@ -1594,7 +1926,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -1602,7 +1934,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } ] @@ -1629,17 +1961,19 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 19, + "id": 16, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, @@ -1721,17 +2055,19 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 20, + "id": 17, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, @@ -1757,7 +2093,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])", + "expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -1813,17 +2149,19 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 21, + "id": 18, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, @@ -1919,7 +2257,7 @@ items: "value": "default" }, "hide": 0, - "label": null, + "label": "Data Source", "name": "datasource", "options": [ @@ -1932,20 +2270,19 @@ items: { "allValue": null, "current": { - "text": "prod", - "value": "prod" + }, "datasource": "$datasource", "hide": 2, "includeAll": false, - "label": null, + "label": "cluster", "multi": false, "name": "cluster", "options": [ ], - "query": "label_values(apiserver_request_total, cluster)", - "refresh": 1, + "query": "label_values(up{job=\"apiserver\"}, cluster)", + "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", @@ -1970,7 +2307,7 @@ items: "options": [ ], - "query": "label_values(apiserver_request_total{job=\"apiserver\", cluster=\"$cluster\"}, instance)", + "query": "label_values(up{job=\"apiserver\", cluster=\"$cluster\"}, instance)", "refresh": 2, "regex": "", "sort": 1, @@ -2020,6 +2357,11 @@ items: } kind: ConfigMap metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 name: grafana-dashboard-apiserver namespace: monitoring - apiVersion: v1 @@ -2084,6 +2426,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, @@ -2129,7 +2472,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", @@ -2186,6 +2529,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, @@ -2231,7 +2575,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", @@ -2334,6 +2678,9 @@ items: "id": 5, "lines": true, "linewidth": 1, + "links": [ + + ], "minSpan": 24, "nullPointMode": "null as zero", "renderer": "flot", @@ -2529,7 +2876,7 @@ items: ], "targets": [ { - "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2538,7 +2885,7 @@ items: "step": 10 }, { - "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2547,7 +2894,7 @@ items: "step": 10 }, { - "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2556,7 +2903,7 @@ items: "step": 10 }, { - "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2565,7 +2912,7 @@ items: "step": 10 }, { - "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2574,7 +2921,7 @@ items: "step": 10 }, { - "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2583,7 +2930,7 @@ items: "step": 10 }, { - "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2592,7 +2939,7 @@ items: "step": 10 }, { - "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "table", "instant": true, "intervalFactor": 2, @@ -2626,6 +2973,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, @@ -2671,7 +3019,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", @@ -2728,6 +3076,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, @@ -2773,7 +3122,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", @@ -2860,6 +3209,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 24, @@ -2903,7 +3253,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", @@ -2960,6 +3310,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 24, @@ -3003,7 +3354,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", @@ -3071,6 +3422,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 24, @@ -3114,7 +3466,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", @@ -3171,6 +3523,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 24, @@ -3214,7 +3567,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", @@ -3291,6 +3644,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 24, @@ -3334,7 +3688,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", @@ -3391,6 +3745,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 24, @@ -3434,7 +3789,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{namespace}}", @@ -3491,6 +3846,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 24, @@ -3538,7 +3894,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(rate(node_netstat_Tcp_RetransSegs[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs[$interval:$resolution])) by (instance))", + "expr": "sort_desc(sum(rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs{cluster=\"$cluster\"}[$interval:$resolution])) by (instance))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}", @@ -3595,6 +3951,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 24, @@ -3642,7 +3999,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs[$interval:$resolution])) by (instance))", + "expr": "sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans{cluster=\"$cluster\"}[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$interval:$resolution])) by (instance))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}", @@ -3797,7 +4154,7 @@ items: "value": "default" }, "hide": 0, - "label": null, + "label": "Data Source", "name": "datasource", "options": [ @@ -3806,6 +4163,32 @@ items: "refresh": 1, "regex": "", "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false } ] }, @@ -3845,6 +4228,11 @@ items: } kind: ConfigMap metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 name: grafana-dashboard-cluster-total namespace: monitoring - apiVersion: v1 @@ -3898,7 +4286,11 @@ items: }, "id": 2, - "interval": null, + "interval": "1m", + "legend": { + "alignAsTable": true, + "rightSide": true + }, "links": [ ], @@ -3937,7 +4329,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(up{job=\"kube-controller-manager\"})", + "expr": "sum(up{cluster=\"$cluster\", job=\"kube-controller-manager\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -3969,10 +4361,12 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, "id": 3, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, @@ -4005,10 +4399,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(workqueue_adds_total{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", + "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{name}}", + "legendFormat": "{{cluster}} {{instance}} {{name}}", "refId": "A" } ], @@ -4074,10 +4468,12 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, "id": 4, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, @@ -4110,10 +4506,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(workqueue_depth{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", + "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{name}}", + "legendFormat": "{{cluster}} {{instance}} {{name}}", "refId": "A" } ], @@ -4179,10 +4575,12 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, "id": 5, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, @@ -4215,10 +4613,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name, le))", + "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{name}}", + "legendFormat": "{{cluster}} {{instance}} {{name}}", "refId": "A" } ], @@ -4284,340 +4682,19 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, "id": 6, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "2xx", - "refId": "A" - }, - { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "3xx", - "refId": "B" - }, - { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "4xx", - "refId": "C" - }, - { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "5xx", - "refId": "D" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Kube API Request Rate", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 7, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 8, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{verb}} {{url}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Post Request Latency 99th Quantile", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 8, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{verb}} {{url}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Get Request Latency 99th Quantile", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 9, - "legend": { - "alignAsTable": false, - "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, @@ -4643,7 +4720,336 @@ items: "steppedLine": false, "targets": [ { - "expr": "process_resident_memory_bytes{job=\"kube-controller-manager\",instance=~\"$instance\"}", + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5xx", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Kube API Request Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 7, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 8, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{verb}} {{url}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Post Request Latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 8, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{verb}} {{url}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Get Request Latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 9, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -4699,17 +5105,19 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, "id": 10, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, @@ -4735,7 +5143,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "rate(process_cpu_seconds_total{job=\"kube-controller-manager\",instance=~\"$instance\"}[5m])", + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -4791,17 +5199,19 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, "id": 11, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, @@ -4827,7 +5237,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "go_goroutines{job=\"kube-controller-manager\",instance=~\"$instance\"}", + "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -4897,7 +5307,7 @@ items: "value": "default" }, "hide": 0, - "label": null, + "label": "Data Source", "name": "datasource", "options": [ @@ -4911,6 +5321,32 @@ items: "allValue": null, "current": { + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"kube-controller-manager\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + }, "datasource": "$datasource", "hide": 0, @@ -4921,7 +5357,7 @@ items: "options": [ ], - "query": "label_values(process_cpu_seconds_total{job=\"kube-controller-manager\"}, instance)", + "query": "label_values(up{cluster=\"$cluster\", job=\"kube-controller-manager\"}, instance)", "refresh": 2, "regex": "", "sort": 1, @@ -4971,11 +5407,16 @@ items: } kind: ConfigMap metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 name: grafana-dashboard-controller-manager namespace: monitoring - apiVersion: v1 data: - coredns-dashboard.json: |- + grafana-overview.json: |- { "annotations": { "list": [ @@ -4986,238 +5427,248 @@ items: "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [ + + ], + "type": "dashboard" + }, "type": "dashboard" } ] }, - "description": "A dashboard for the CoreDNS DNS server.", "editable": true, - "gnetId": 5926, + "gnetId": null, "graphTooltip": 0, - "id": 14, - "iteration": 1549319226130, + "id": 3085, + "iteration": 1631554945276, "links": [ ], "panels": [ { - "aliasColors": { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "mappings": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fill": 1, - "grid": { + ], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + ] }, "gridPos": { - "h": 7, - "w": 8, + "h": 5, + "w": 6, "x": 0, "y": 0 }, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": { - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m])) by (proto)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{proto}}", - "refId": "A", - "step": 60 }, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ { - "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "total", - "refId": "B", - "step": 60 + "expr": "grafana_alerting_result_total{job=~\"$job\", instance=~\"$instance\", state=\"alerting\"}", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" } - ], - "thresholds": [ - ], "timeFrom": null, - "timeRegions": [ - - ], "timeShift": null, - "title": "Requests (total)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "title": "Firing Alerts", + "type": "stat" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ ] }, - "yaxes": [ - { - "format": "pps", - "logBase": 1, - "max": null, - "min": 0, - "show": true + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": { + }, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ { - "format": "pps", - "logBase": 1, - "max": null, - "min": 0, - "show": true + "expr": "sum(grafana_stat_totals_dashboard{job=~\"$job\", instance=~\"$instance\"})", + "interval": "", + "legendFormat": "", + "refId": "A" } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "timeFrom": null, + "timeShift": null, + "title": "Dashboards", + "type": "stat" }, { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fill": 1, - "grid": { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "align": null, + "displayMode": "auto" + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + ] }, "gridPos": { - "h": 7, - "w": 8, - "x": 8, + "h": 5, + "w": 12, + "x": 12, "y": 0 }, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + "id": 10, + "options": { + "showHeader": true }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "yaxis": 2 - }, - { - "alias": "other", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "8.1.3", "targets": [ { - "expr": "sum(rate(coredns_dns_request_type_count_total{instance=~\"$instance\"}[5m])) by (type)", - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A", - "step": 60 + "expr": "grafana_build_info{job=~\"$job\", instance=~\"$instance\"}", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" } - ], - "thresholds": [ - ], "timeFrom": null, - "timeRegions": [ - - ], "timeShift": null, - "title": "Requests (by qtype)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ + "title": "Build Info", + "transformations": [ { - "format": "pps", - "logBase": 1, - "max": null, - "min": 0, - "show": true + "id": "labelsToFields", + "options": { + + } }, { - "format": "pps", - "logBase": 1, - "max": null, - "min": 0, - "show": true + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "branch": true, + "container": true, + "goversion": true, + "namespace": true, + "pod": true, + "revision": true + }, + "indexByName": { + "Time": 7, + "Value": 11, + "branch": 4, + "container": 8, + "edition": 2, + "goversion": 6, + "instance": 1, + "job": 0, + "namespace": 9, + "pod": 10, + "revision": 5, + "version": 3 + }, + "renameByName": { + + } + } } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "table" }, { "aliasColors": { @@ -5226,130 +5677,27 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fill": 1, - "grid": { - - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 0 - }, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [ - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m])) by (zone)", - "intervalFactor": 2, - "legendFormat": "{{zone}}", - "refId": "A", - "step": 60 + ] }, - { - "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m]))", - "intervalFactor": 2, - "legendFormat": "total", - "refId": "B", - "step": 60 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Requests (by zone)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "overrides": [ ] }, - "yaxes": [ - { - "format": "pps", - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "pps", - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, "fill": 1, - "grid": { - - }, + "fillGradient": 0, "gridPos": { - "h": 7, + "h": 8, "w": 12, "x": 0, - "y": 7 + "y": 5 }, - "id": 10, + "hiddenSeries": false, + "id": 2, "legend": { "avg": false, "current": false, @@ -5360,38 +5708,28 @@ items: "values": false }, "lines": true, - "linewidth": 2, - "links": [ - - ], - "nullPointMode": "connected", + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, - "pointradius": 5, + "pluginVersion": "8.1.3", + "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [ - { - "alias": "total", - "yaxis": 2 - } + ], "spaceLength": 10, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(coredns_dns_request_do_count_total{instance=~\"$instance\"}[5m]))", - "intervalFactor": 2, - "legendFormat": "DO", - "refId": "A", - "step": 40 - }, - { - "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m]))", - "intervalFactor": 2, - "legendFormat": "total", - "refId": "B", - "step": 40 + "expr": "sum by (status_code) (irate(grafana_http_request_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[1m])) ", + "interval": "", + "legendFormat": "{{status_code}}", + "refId": "A" } ], "thresholds": [ @@ -5402,11 +5740,11 @@ items: ], "timeShift": null, - "title": "Requests (DO bit)", + "title": "RPS", "tooltip": { "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -5420,18 +5758,22 @@ items: }, "yaxes": [ { - "format": "pps", + "$$hashKey": "object:157", + "format": "reqps", + "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "pps", + "$$hashKey": "object:158", + "format": "short", + "label": null, "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ], "yaxis": { @@ -5446,20 +5788,27 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fill": 1, - "grid": { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [ + ] + }, + "overrides": [ + + ] }, + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 7, - "w": 6, + "h": 8, + "w": 12, "x": 12, - "y": 7 + "y": 5 }, - "id": 9, + "hiddenSeries": false, + "id": 4, "legend": { "avg": false, "current": false, @@ -5470,53 +5819,43 @@ items: "values": false }, "lines": true, - "linewidth": 2, - "links": [ - - ], - "nullPointMode": "connected", + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, - "pointradius": 5, + "pluginVersion": "8.1.3", + "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [ - { - "alias": "tcp:90", - "yaxis": 2 - }, - { - "alias": "tcp:99 ", - "yaxis": 2 - }, - { - "alias": "tcp:50", - "yaxis": 2 - } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))", - "intervalFactor": 2, - "legendFormat": "{{proto}}:99 ", - "refId": "A", - "step": 60 + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1", + "interval": "", + "legendFormat": "99th Percentile", + "refId": "A" }, { - "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))", - "intervalFactor": 2, - "legendFormat": "{{proto}}:90", - "refId": "B", - "step": 60 + "exemplar": true, + "expr": "histogram_quantile(0.50, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1", + "interval": "", + "legendFormat": "50th Percentile", + "refId": "B" }, { - "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))", - "intervalFactor": 2, - "legendFormat": "{{proto}}:50", - "refId": "C", - "step": 60 + "exemplar": true, + "expr": "sum(irate(grafana_http_request_duration_seconds_sum{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) * 1 / sum(irate(grafana_http_request_duration_seconds_count{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Average", + "refId": "C" } ], "thresholds": [ @@ -5527,11 +5866,11 @@ items: ], "timeShift": null, - "title": "Requests (size, udp)", + "title": "Request Latency", "tooltip": { "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -5545,17 +5884,21 @@ items: }, "yaxes": [ { - "format": "bytes", + "$$hashKey": "object:210", + "format": "ms", + "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { + "$$hashKey": "object:211", "format": "short", + "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } ], @@ -5563,916 +5906,509 @@ items: "align": false, "alignLevel": null } - }, - { - "aliasColors": { + } + ], + "schemaVersion": 30, + "style": "dark", + "tags": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fill": 1, - "grid": { + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "dev-cortex", + "value": "dev-cortex" + }, + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "datasource", + "options": [ + ], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" }, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 7 - }, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "tcp:90", - "yaxis": 1 - }, - { - "alias": "tcp:99 ", - "yaxis": 1 - }, - { - "alias": "tcp:50", - "yaxis": 1 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))", - "intervalFactor": 2, - "legendFormat": "{{proto}}:99 ", - "refId": "A", - "step": 60 - }, - { - "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))", - "intervalFactor": 2, - "legendFormat": "{{proto}}:90", - "refId": "B", - "step": 60 + { + "allValue": ".*", + "current": { + "selected": false, + "text": [ + "default/grafana" + ], + "value": [ + "default/grafana" + ] }, - { - "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))", - "intervalFactor": 2, - "legendFormat": "{{proto}}:50", - "refId": "C", - "step": 60 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ + "datasource": "$datasource", + "definition": "label_values(grafana_build_info, job)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "job", + "options": [ - ], - "timeShift": null, - "title": "Requests (size,tcp)", - "tooltip": { - "shared": true, + ], + "query": { + "query": "label_values(grafana_build_info, job)", + "refId": "Billing Admin-job-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, "sort": 0, - "value_type": "cumulative" + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + { + "allValue": ".*", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "definition": "label_values(grafana_build_info, instance)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "instance", + "options": [ - ] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "max": null, - "min": 0, - "show": true + ], + "query": { + "query": "label_values(grafana_build_info, instance)", + "refId": "Billing Admin-instance-Variable-Query" }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false } - }, - { - "aliasColors": { + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Grafana Overview", + "uid": "6be0s85Mk", + "version": 2 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-grafana-overview + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-cluster.json: |- + { + "annotations": { + "list": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fill": 1, - "grid": { + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 14 - }, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(coredns_dns_response_rcode_count_total{instance=~\"$instance\"}[5m])) by (rcode)", - "intervalFactor": 2, - "legendFormat": "{{rcode}}", - "refId": "A", - "step": 40 - } - ], - "thresholds": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 1, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "timeFrom": null, - "timeRegions": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "timeShift": null, - "title": "Responses (by rcode)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cluster:node_cpu:ratio_rate5m{cluster=\"$cluster\"}", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ] - }, - "yaxes": [ - { - "format": "pps", - "logBase": 1, - "max": null, - "min": 0, - "show": true + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { + "aliasColors": { - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fill": 1, - "grid": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 2, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 14 - }, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Requests Commitment", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_milliseconds_bucket{instance=~\"$instance\"}[5m])) by (le, job))", - "intervalFactor": 2, - "legendFormat": "99%", - "refId": "A", - "step": 40 - }, - { - "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_duration_milliseconds_bucket{instance=~\"$instance\"}[5m])) by (le))", - "intervalFactor": 2, - "legendFormat": "90%", - "refId": "B", - "step": 40 + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { - "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_duration_milliseconds_bucket{instance=~\"$instance\"}[5m])) by (le))", - "intervalFactor": 2, - "legendFormat": "50%", - "refId": "C", - "step": 40 - } - ], - "thresholds": [ + "aliasColors": { - ], - "timeFrom": null, - "timeRegions": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 3, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "timeShift": null, - "title": "Responses (duration)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ] - }, - "yaxes": [ - { - "format": "ms", - "logBase": 1, - "max": null, - "min": 0, - "show": true + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Limits Commitment", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { + "aliasColors": { - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fill": 1, - "grid": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 4, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 21 - }, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "udp:50%", - "yaxis": 1 - }, - { - "alias": "tcp:50%", - "yaxis": 2 - }, - { - "alias": "tcp:90%", - "yaxis": 2 + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\",cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, - { - "alias": "tcp:99%", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ", - "intervalFactor": 2, - "legendFormat": "{{proto}}:99%", - "refId": "A", - "step": 40 - }, - { - "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance=\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ", - "intervalFactor": 2, - "legendFormat": "{{proto}}:90%", - "refId": "B", - "step": 40 - }, - { - "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ", - "intervalFactor": 2, - "legendFormat": "{{proto}}:50%", - "metric": "", - "refId": "C", - "step": 40 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Responses (size, udp)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fill": 1, - "grid": { - - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 21 - }, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "udp:50%", - "yaxis": 1 - }, - { - "alias": "tcp:50%", - "yaxis": 1 - }, - { - "alias": "tcp:90%", - "yaxis": 1 - }, - { - "alias": "tcp:99%", - "yaxis": 1 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)) ", - "intervalFactor": 2, - "legendFormat": "{{proto}}:99%", - "refId": "A", - "step": 40 - }, - { - "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)) ", - "intervalFactor": 2, - "legendFormat": "{{proto}}:90%", - "refId": "B", - "step": 40 - }, - { - "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le, proto)) ", - "intervalFactor": 2, - "legendFormat": "{{proto}}:50%", - "metric": "", - "refId": "C", - "step": 40 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Responses (size, tcp)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fill": 1, - "grid": { - - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 28 - }, - "id": 15, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(coredns_cache_size{instance=~\"$instance\"}) by (type)", - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A", - "step": 40 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Cache (size)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fill": 1, - "grid": { - - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 28 - }, - "id": 16, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "misses", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(coredns_cache_hits_total{instance=~\"$instance\"}[5m])) by (type)", - "intervalFactor": 2, - "legendFormat": "hits:{{type}}", - "refId": "A", - "step": 40 - }, - { - "expr": "sum(rate(coredns_cache_misses_total{instance=~\"$instance\"}[5m])) by (type)", - "intervalFactor": 2, - "legendFormat": "misses", - "refId": "B", - "step": 40 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Cache (hitrate)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "pps", - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "pps", - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "schemaVersion": 16, - "style": "dark", - "tags": [ - "dns", - "coredns" - ], - "templating": { - "list": [ - { - "allValue": ".*", - "current": { - "text": "All", - "value": "$__all" - }, - "datasource": "prometheus", - "definition": "", - "hide": 0, - "includeAll": true, - "label": "Instance", - "multi": false, - "name": "instance", - "options": [ - - ], - "query": "up{job=\"coredns\"}", - "refresh": 1, - "regex": ".*instance=\"(.*?)\".*", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-3h", - "to": "now" - }, - "timepicker": { - "now": true, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "CoreDNS", - "version": 1 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-coredns-dashboard - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-cluster.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "100px", - "panels": [ { "aliasColors": { @@ -6483,13 +6419,15 @@ items: "datasource": "$datasource", "fill": 1, "format": "percentunit", - "id": 1, + "id": 5, "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -6513,7 +6451,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[$__interval]))", + "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -6523,10 +6461,10 @@ items: "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "CPU Utilisation", + "title": "Memory Requests Commitment", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "singlestat", @@ -6568,12 +6506,15 @@ items: "datasource": "$datasource", "fill": 1, "format": "percentunit", - "id": 2, + "id": 6, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -6597,7 +6538,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})", + "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -6607,10 +6548,10 @@ items: "thresholds": "70,80", "timeFrom": null, "timeShift": null, - "title": "CPU Requests Commitment", + "title": "Memory Limits Commitment", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "singlestat", @@ -6641,7 +6582,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -6650,20 +6603,22 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 3, + "fill": 10, + "id": 7, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -6676,28 +6631,31 @@ items: ], "spaceLength": 10, - "span": 2, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)", "format": "time_series", - "instant": true, "intervalFactor": 2, - "refId": "A" + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "CPU Limits Commitment", + "title": "CPU Usage", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -6725,7 +6683,19 @@ items: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -6735,374 +6705,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "format": "percentunit", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Memory Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Memory Requests Commitment", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Memory Limits Commitment", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{namespace}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 8, + "id": 8, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -7140,8 +6751,9 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 0, "link": true, + "linkTargetBlank": false, "linkTooltip": "Drill down to pods", - "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", "pattern": "Value #A", "thresholds": [ @@ -7158,8 +6770,9 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 0, "link": true, + "linkTargetBlank": false, "linkTooltip": "Drill down to workloads", - "linkUrl": "./d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", "pattern": "Value #B", "thresholds": [ @@ -7176,6 +6789,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", @@ -7194,6 +6808,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", @@ -7212,6 +6827,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", @@ -7230,6 +6846,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", @@ -7248,6 +6865,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #G", @@ -7266,8 +6884,9 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, + "linkTargetBlank": false, "linkTooltip": "Drill down to pods", - "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", "pattern": "namespace", "thresholds": [ @@ -7293,7 +6912,7 @@ items: ], "targets": [ { - "expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7302,7 +6921,7 @@ items: "step": 10 }, { - "expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", + "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7311,7 +6930,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7320,7 +6939,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7329,7 +6948,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7338,7 +6957,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7347,7 +6966,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7364,7 +6983,7 @@ items: "title": "CPU Quota", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "transform": "table", @@ -7419,11 +7038,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 9, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -7447,7 +7069,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)", + "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", @@ -7463,7 +7085,7 @@ items: "title": "Memory Usage (w/o cache)", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -7517,11 +7139,14 @@ items: "datasource": "$datasource", "fill": 1, "id": 10, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -7559,8 +7184,9 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 0, "link": true, + "linkTargetBlank": false, "linkTooltip": "Drill down to pods", - "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", "pattern": "Value #A", "thresholds": [ @@ -7577,8 +7203,9 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 0, "link": true, + "linkTargetBlank": false, "linkTooltip": "Drill down to workloads", - "linkUrl": "./d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", "pattern": "Value #B", "thresholds": [ @@ -7595,6 +7222,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", @@ -7613,6 +7241,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", @@ -7631,6 +7260,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", @@ -7649,6 +7279,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", @@ -7667,6 +7298,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #G", @@ -7685,8 +7317,9 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, + "linkTargetBlank": false, "linkTooltip": "Drill down to pods", - "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", "pattern": "namespace", "thresholds": [ @@ -7712,7 +7345,7 @@ items: ], "targets": [ { - "expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7721,7 +7354,7 @@ items: "step": 10 }, { - "expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", + "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7730,7 +7363,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)", + "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7739,7 +7372,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7748,7 +7381,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7757,7 +7390,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7766,7 +7399,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7783,7 +7416,7 @@ items: "title": "Requests by Namespace", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "transform": "table", @@ -7840,10 +7473,12 @@ items: "id": 11, "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -7881,6 +7516,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", @@ -7899,6 +7535,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", @@ -7917,6 +7554,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", @@ -7935,6 +7573,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", @@ -7953,6 +7592,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", @@ -7971,6 +7611,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", @@ -7989,8 +7630,9 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, + "linkTargetBlank": false, "linkTooltip": "Drill down to pods", - "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", "pattern": "namespace", "thresholds": [ @@ -8016,7 +7658,7 @@ items: ], "targets": [ { - "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", + "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8025,7 +7667,7 @@ items: "step": 10 }, { - "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", + "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8034,7 +7676,7 @@ items: "step": 10 }, { - "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", + "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8043,7 +7685,7 @@ items: "step": 10 }, { - "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", + "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8052,7 +7694,7 @@ items: "step": 10 }, { - "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", + "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8061,7 +7703,7 @@ items: "step": 10 }, { - "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8078,7 +7720,7 @@ items: "title": "Current Network Usage", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "transform": "table", @@ -8116,7 +7758,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Current Network Usage", "titleSize": "h6" }, { @@ -8133,11 +7775,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 12, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -8156,12 +7801,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", + "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", @@ -8177,7 +7822,7 @@ items: "title": "Receive Bandwidth", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -8208,19 +7853,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -8231,11 +7864,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 13, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -8254,12 +7890,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", + "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", @@ -8275,7 +7911,7 @@ items: "title": "Transmit Bandwidth", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -8312,7 +7948,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Bandwidth", "titleSize": "h6" }, { @@ -8329,11 +7965,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 14, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -8352,12 +7991,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", + "expr": "avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", @@ -8373,7 +8012,7 @@ items: "title": "Average Container Bandwidth by Namespace: Received", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -8404,19 +8043,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -8427,11 +8054,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 15, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -8450,12 +8080,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", + "expr": "avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", @@ -8471,7 +8101,7 @@ items: "title": "Average Container Bandwidth by Namespace: Transmitted", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -8508,7 +8138,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Average Container Bandwidth by Namespace", "titleSize": "h6" }, { @@ -8525,11 +8155,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 16, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -8548,12 +8181,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", + "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", @@ -8569,7 +8202,7 @@ items: "title": "Rate of Received Packets", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -8584,7 +8217,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -8600,19 +8233,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -8623,11 +8244,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 17, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -8646,12 +8270,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", + "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", @@ -8667,7 +8291,7 @@ items: "title": "Rate of Transmitted Packets", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -8682,7 +8306,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -8704,7 +8328,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Rate of Packets", "titleSize": "h6" }, { @@ -8721,11 +8345,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 18, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -8744,12 +8371,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", + "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", @@ -8765,7 +8392,7 @@ items: "title": "Rate of Received Packets Dropped", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -8780,7 +8407,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -8796,19 +8423,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -8819,11 +8434,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 19, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -8842,12 +8460,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", @@ -8863,7 +8481,7 @@ items: "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -8878,7 +8496,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -8900,121 +8518,12 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Rate of Packets Dropped", "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "", - "value": "" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(node_cpu_seconds_total, cluster)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "UTC", - "title": "Kubernetes / Compute Resources / Cluster", - "uid": "efa86fd1d0c121a26444b636a3f509a8", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-resources-cluster - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-namespace.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ + }, { "collapse": false, - "height": "100px", + "height": "250px", "panels": [ { "aliasColors": { @@ -9024,20 +8533,23 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 1, + "decimals": -1, + "fill": 10, + "id": 20, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -9050,28 +8562,31 @@ items: ], "spaceLength": 10, - "span": 3, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"})", + "expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval])))", "format": "time_series", - "instant": true, "intervalFactor": 2, - "refId": "A" + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 } ], - "thresholds": "70,80", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "CPU Utilisation (from requests)", + "title": "IOPS(Reads+Writes)", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, - "type": "singlestat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -9100,270 +8615,6 @@ items: } ] }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"})", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "CPU Utilisation (from limits)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"})", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Memory Utilization (from requests)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "format": "percentunit", - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"})", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "70,80", - "timeFrom": null, - "timeShift": null, - "title": "Memory Utilisation (from limits)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "singlestat", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Headlines", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ { "aliasColors": { @@ -9373,12 +8624,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 5, + "id": 21, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -9394,53 +8648,18 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - { - "alias": "quota - requests", - "color": "#F2495C", - "dashes": true, - "fill": 0, - "hideTooltip": true, - "legend": false, - "linewidth": 2, - "stack": false - }, - { - "alias": "quota - limits", - "color": "#FF9830", - "dashes": true, - "fill": 0, - "hideTooltip": true, - "legend": false, - "linewidth": 2, - "stack": false - } + ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - }, - { - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "quota - requests", - "legendLink": null, - "step": 10 - }, - { - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "quota - limits", + "legendFormat": "{{namespace}}", "legendLink": null, "step": 10 } @@ -9450,10 +8669,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "ThroughPut(Read+Write)", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -9468,7 +8687,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -9490,7 +8709,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Usage", + "title": "Storage IO", "titleSize": "h6" }, { @@ -9506,12 +8725,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 6, + "id": 22, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -9529,6 +8751,10 @@ items: "seriesOverrides": [ ], + "sort": { + "col": 4, + "desc": true + }, "spaceLength": 10, "span": 12, "stack": false, @@ -9541,14 +8767,15 @@ items: "type": "hidden" }, { - "alias": "CPU Usage", + "alias": "IOPS(Reads)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, + "decimals": -1, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", @@ -9559,14 +8786,15 @@ items: "unit": "short" }, { - "alias": "CPU Requests", + "alias": "IOPS(Writes)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, + "decimals": -1, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", @@ -9577,14 +8805,15 @@ items: "unit": "short" }, { - "alias": "CPU Requests %", + "alias": "IOPS(Reads + Writes)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, + "decimals": -1, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", @@ -9592,10 +8821,10 @@ items: ], "type": "number", - "unit": "percentunit" + "unit": "short" }, { - "alias": "CPU Limits", + "alias": "Throughput(Read)", "colorMode": null, "colors": [ @@ -9603,6 +8832,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", @@ -9610,10 +8840,10 @@ items: ], "type": "number", - "unit": "short" + "unit": "Bps" }, { - "alias": "CPU Limits %", + "alias": "Throughput(Write)", "colorMode": null, "colors": [ @@ -9621,6 +8851,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", @@ -9628,20 +8859,40 @@ items: ], "type": "number", - "unit": "percentunit" + "unit": "Bps" }, { - "alias": "Pod", + "alias": "Throughput(Read + Write)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, - "link": true, + "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", - "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Namespace", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to pods", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "pattern": "namespace", "thresholds": [ ], @@ -9666,7 +8917,7 @@ items: ], "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -9675,7 +8926,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum by(namespace) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -9684,7 +8935,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -9693,7 +8944,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -9702,13 +8953,22 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 + }, + { + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 } ], "thresholds": [ @@ -9716,10 +8976,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "Current Storage IO", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "transform": "table", @@ -9757,12 +9017,126 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Quota", + "title": "Storage IO - Distribution", "titleSize": "h6" - }, + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "UTC", + "title": "Kubernetes / Compute Resources / Cluster", + "uid": "efa86fd1d0c121a26444b636a3f509a8", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-k8s-resources-cluster + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-namespace.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ { "collapse": false, - "height": "250px", + "height": "100px", "panels": [ { "aliasColors": { @@ -9772,19 +9146,23 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 7, + "fill": 1, + "format": "percentunit", + "id": 1, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -9794,42 +9172,403 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - { - "alias": "quota - requests", - "color": "#F2495C", - "dashes": true, - "fill": 0, - "hideTooltip": true, - "legend": false, - "linewidth": 2, - "stack": false - }, - { - "alias": "quota - limits", - "color": "#FF9830", - "dashes": true, - "fill": 0, - "hideTooltip": true, - "legend": false, - "linewidth": 2, - "stack": false - } + ], "spaceLength": 10, - "span": 12, - "stack": true, + "span": 3, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", "format": "time_series", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation (from requests)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 2, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation (from limits)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 3, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation (from requests)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 4, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation (from limits)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 5, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "quota - requests", + "color": "#F2495C", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + }, + { + "alias": "quota - limits", + "color": "#FF9830", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, "step": 10 }, { - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - requests", @@ -9837,7 +9576,7 @@ items: "step": 10 }, { - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "quota - limits", @@ -9850,10 +9589,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage (w/o cache)", + "title": "CPU Usage", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -9868,7 +9607,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -9890,7 +9629,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Usage", + "title": "CPU Usage", "titleSize": "h6" }, { @@ -9906,12 +9645,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 8, + "id": 6, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -9941,7 +9683,7 @@ items: "type": "hidden" }, { - "alias": "Memory Usage", + "alias": "CPU Usage", "colorMode": null, "colors": [ @@ -9949,6 +9691,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", @@ -9956,10 +9699,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Requests", + "alias": "CPU Requests", "colorMode": null, "colors": [ @@ -9967,6 +9710,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", @@ -9974,10 +9718,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Requests %", + "alias": "CPU Requests %", "colorMode": null, "colors": [ @@ -9985,6 +9729,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", @@ -9995,7 +9740,7 @@ items: "unit": "percentunit" }, { - "alias": "Memory Limits", + "alias": "CPU Limits", "colorMode": null, "colors": [ @@ -10003,6 +9748,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", @@ -10010,10 +9756,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Limits %", + "alias": "CPU Limits %", "colorMode": null, "colors": [ @@ -10021,6 +9767,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", @@ -10030,60 +9777,6 @@ items: "type": "number", "unit": "percentunit" }, - { - "alias": "Memory Usage (RSS)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Usage (Cache)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #G", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Usage (Swap)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #H", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, { "alias": "Pod", "colorMode": null, @@ -10093,8 +9786,9 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, + "linkTargetBlank": false, "linkTooltip": "Drill down", - "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", "pattern": "pod", "thresholds": [ @@ -10120,7 +9814,7 @@ items: ], "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10129,7 +9823,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10138,7 +9832,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10147,7 +9841,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10156,40 +9850,13 @@ items: "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 - }, - { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 - }, - { - "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "G", - "step": 10 - }, - { - "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "H", - "step": 10 } ], "thresholds": [ @@ -10197,10 +9864,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "CPU Quota", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "transform": "table", @@ -10238,7 +9905,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Quota", + "title": "CPU Quota", "titleSize": "h6" }, { @@ -10253,20 +9920,22 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 9, + "fill": 10, + "id": 7, "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -10276,21 +9945,159 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, { - "alias": "Current Receive Bandwidth", + "alias": "quota - requests", + "color": "#F2495C", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + }, + { + "alias": "quota - limits", + "color": "#FF9830", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quota - requests", + "legendLink": null, + "step": 10 + }, + { + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quota - limits", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage (w/o cache)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage", "colorMode": null, "colors": [ @@ -10298,6 +10105,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", @@ -10305,10 +10113,10 @@ items: ], "type": "number", - "unit": "Bps" + "unit": "bytes" }, { - "alias": "Current Transmit Bandwidth", + "alias": "Memory Requests", "colorMode": null, "colors": [ @@ -10316,6 +10124,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", @@ -10323,10 +10132,10 @@ items: ], "type": "number", - "unit": "Bps" + "unit": "bytes" }, { - "alias": "Rate of Received Packets", + "alias": "Memory Requests %", "colorMode": null, "colors": [ @@ -10334,6 +10143,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", @@ -10341,10 +10151,10 @@ items: ], "type": "number", - "unit": "pps" + "unit": "percentunit" }, { - "alias": "Rate of Transmitted Packets", + "alias": "Memory Limits", "colorMode": null, "colors": [ @@ -10352,6 +10162,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", @@ -10359,10 +10170,10 @@ items: ], "type": "number", - "unit": "pps" + "unit": "bytes" }, { - "alias": "Rate of Received Packets Dropped", + "alias": "Memory Limits %", "colorMode": null, "colors": [ @@ -10370,6 +10181,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", @@ -10377,10 +10189,10 @@ items: ], "type": "number", - "unit": "pps" + "unit": "percentunit" }, { - "alias": "Rate of Transmitted Packets Dropped", + "alias": "Memory Usage (RSS)", "colorMode": null, "colors": [ @@ -10388,6 +10200,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", @@ -10395,7 +10208,45 @@ items: ], "type": "number", - "unit": "pps" + "unit": "bytes" + }, + { + "alias": "Memory Usage (Cache)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Swap)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" }, { "alias": "Pod", @@ -10406,8 +10257,9 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, - "linkTooltip": "Drill down to pods", - "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", "pattern": "pod", "thresholds": [ @@ -10433,7 +10285,7 @@ items: ], "targets": [ { - "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)", + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10442,7 +10294,7 @@ items: "step": 10 }, { - "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)", + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10451,7 +10303,7 @@ items: "step": 10 }, { - "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)", + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10460,7 +10312,7 @@ items: "step": 10 }, { - "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)", + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10469,7 +10321,7 @@ items: "step": 10 }, { - "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)", + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10478,13 +10330,31 @@ items: "step": 10 }, { - "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)", + "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 + }, + { + "expr": "sum(container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 } ], "thresholds": [ @@ -10492,10 +10362,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Current Network Usage", + "title": "Memory Quota", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "transform": "table", @@ -10533,7 +10403,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Memory Quota", "titleSize": "h6" }, { @@ -10548,19 +10418,22 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 10, + "fill": 1, + "id": 9, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -10574,15 +10447,217 @@ items: ], "spaceLength": 10, "span": 12, - "stack": true, + "stack": false, "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Current Receive Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Transmit Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to pods", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], "targets": [ { - "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)", - "format": "time_series", + "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", "step": 10 } ], @@ -10591,13 +10666,14 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Receive Bandwidth", + "title": "Current Network Usage", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -10609,7 +10685,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -10631,7 +10707,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Current Network Usage", "titleSize": "h6" }, { @@ -10647,12 +10723,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 11, + "id": 10, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -10671,12 +10750,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)", + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -10689,10 +10768,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Transmit Bandwidth", + "title": "Receive Bandwidth", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -10723,19 +10802,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -10745,12 +10812,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 12, + "id": 11, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -10769,12 +10839,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)", + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -10787,10 +10857,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets", + "title": "Transmit Bandwidth", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -10827,7 +10897,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Bandwidth", "titleSize": "h6" }, { @@ -10843,12 +10913,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 13, + "id": 12, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -10867,12 +10940,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)", + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -10885,10 +10958,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Transmitted Packets", + "title": "Rate of Received Packets", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -10903,7 +10976,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -10919,19 +10992,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -10941,12 +11002,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 14, + "id": 13, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -10965,12 +11029,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)", + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -10983,10 +11047,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets Dropped", + "title": "Rate of Transmitted Packets", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -11001,7 +11065,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -11023,7 +11087,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Rate of Packets", "titleSize": "h6" }, { @@ -11039,12 +11103,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 15, + "id": 14, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -11063,12 +11130,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)", + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -11081,10 +11148,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Transmitted Packets Dropped", + "title": "Rate of Received Packets Dropped", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -11099,7 +11166,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -11115,155 +11182,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "", - "value": "" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "", - "value": "" }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "UTC", - "title": "Kubernetes / Compute Resources / Namespace (Pods)", - "uid": "85a562078cdf77779eaa1add43ccec1e", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-resources-namespace - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-node.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ { "aliasColors": { @@ -11273,12 +11192,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 1, + "id": 15, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -11297,12 +11219,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -11315,10 +11237,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -11333,7 +11255,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -11355,7 +11277,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Usage", + "title": "Rate of Packets Dropped", "titleSize": "h6" }, { @@ -11370,19 +11292,23 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 2, + "decimals": -1, + "fill": 10, + "id": 16, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -11395,184 +11321,16 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "CPU Usage", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "CPU Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "CPU Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "pod", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - - ], - "type": "string", - "unit": "short" - } - ], "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", - "format": "table", - "instant": true, + "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "E", + "legendFormat": "{{pod}}", + "legendLink": null, "step": 10 } ], @@ -11581,14 +11339,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "IOPS(Reads+Writes)", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -11616,19 +11373,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU Quota", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -11638,12 +11383,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 3, + "id": 17, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -11662,12 +11410,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)", + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -11680,10 +11428,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage (w/o cache)", + "title": "ThroughPut(Read+Write)", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -11698,7 +11446,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -11720,7 +11468,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Usage", + "title": "Storage IO", "titleSize": "h6" }, { @@ -11736,12 +11484,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 4, + "id": 18, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -11759,6 +11510,10 @@ items: "seriesOverrides": [ ], + "sort": { + "col": 4, + "desc": true + }, "spaceLength": 10, "span": 12, "stack": false, @@ -11771,14 +11526,15 @@ items: "type": "hidden" }, { - "alias": "Memory Usage", + "alias": "IOPS(Reads)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, + "decimals": -1, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", @@ -11786,17 +11542,18 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Requests", + "alias": "IOPS(Writes)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, + "decimals": -1, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", @@ -11804,17 +11561,18 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "short" }, { - "alias": "Memory Requests %", + "alias": "IOPS(Reads + Writes)", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, + "decimals": -1, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", @@ -11822,10 +11580,10 @@ items: ], "type": "number", - "unit": "percentunit" + "unit": "short" }, { - "alias": "Memory Limits", + "alias": "Throughput(Read)", "colorMode": null, "colors": [ @@ -11833,6 +11591,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", @@ -11840,10 +11599,10 @@ items: ], "type": "number", - "unit": "bytes" + "unit": "Bps" }, { - "alias": "Memory Limits %", + "alias": "Throughput(Write)", "colorMode": null, "colors": [ @@ -11851,6 +11610,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", @@ -11858,10 +11618,10 @@ items: ], "type": "number", - "unit": "percentunit" + "unit": "Bps" }, { - "alias": "Memory Usage (RSS)", + "alias": "Throughput(Read + Write)", "colorMode": null, "colors": [ @@ -11869,6 +11629,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", @@ -11876,43 +11637,7 @@ items: ], "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Usage (Cache)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #G", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Usage (Swap)", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #H", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" + "unit": "Bps" }, { "alias": "Pod", @@ -11922,9 +11647,10 @@ items: ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to pods", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", "pattern": "pod", "thresholds": [ @@ -11950,7 +11676,7 @@ items: ], "targets": [ { - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", + "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -11959,7 +11685,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "expr": "sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -11968,7 +11694,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=~\"$node\"}) by (pod)", + "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -11977,7 +11703,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -11986,7 +11712,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=~\"$node\"}) by (pod)", + "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, @@ -11995,31 +11721,13 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "F", "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "G", - "step": 10 - }, - { - "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "H", - "step": 10 } ], "thresholds": [ @@ -12027,10 +11735,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "Current Storage IO", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "transform": "table", @@ -12068,7 +11776,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Quota", + "title": "Storage IO - Distribution", "titleSize": "h6" } ], @@ -12085,7 +11793,7 @@ items: "value": "default" }, "hide": 0, - "label": null, + "label": "Data Source", "name": "datasource", "options": [ @@ -12110,8 +11818,8 @@ items: "options": [ ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 1, + "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", + "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", @@ -12132,13 +11840,13 @@ items: "hide": 0, "includeAll": false, "label": null, - "multi": true, - "name": "node", + "multi": false, + "name": "namespace", "options": [ ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, node)", - "refresh": 1, + "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", + "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", @@ -12181,17 +11889,22 @@ items: ] }, "timezone": "UTC", - "title": "Kubernetes / Compute Resources / Node (Pods)", - "uid": "200ac8fdbfbb74b39aff88118e4d1c2c", + "title": "Kubernetes / Compute Resources / Namespace (Pods)", + "uid": "85a562078cdf77779eaa1add43ccec1e", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-k8s-resources-node + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-k8s-resources-namespace namespace: monitoring - apiVersion: v1 data: - k8s-resources-pod.json: |- + k8s-resources-node.json: |- { "annotations": { "list": [ @@ -12221,11 +11934,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 1, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -12242,18 +11958,11 @@ items: "renderer": "flot", "seriesOverrides": [ { - "alias": "requests", + "alias": "max capacity", "color": "#F2495C", + "dashes": true, "fill": 0, - "hideTooltip": true, - "legend": true, - "linewidth": 2, - "stack": false - }, - { - "alias": "limits", - "color": "#FF9830", - "fill": 0, + "hiddenSeries": true, "hideTooltip": true, "legend": true, "linewidth": 2, @@ -12266,26 +11975,18 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}) by (container)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{container}}", - "legendLink": null, - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n", + "expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"})", "format": "time_series", "intervalFactor": 2, - "legendFormat": "requests", + "legendFormat": "max capacity", "legendLink": null, "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "limits", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -12298,7 +11999,7 @@ items: "title": "CPU Usage", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -12345,123 +12046,21 @@ items: { "aliasColors": { - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 2, - "legend": { - "avg": false, - "current": true, - "max": true, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(container_cpu_cfs_throttled_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}[5m])) by (container) /sum(increase(container_cpu_cfs_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}[5m])) by (container)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{container}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0.25, - "yaxis": "left" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Throttling", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": 1, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU Throttling", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 3, + "id": 2, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -12499,6 +12098,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", @@ -12517,6 +12117,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", @@ -12535,6 +12136,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", @@ -12553,6 +12155,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", @@ -12571,6 +12174,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", @@ -12581,7 +12185,7 @@ items: "unit": "percentunit" }, { - "alias": "Container", + "alias": "Pod", "colorMode": null, "colors": [ @@ -12589,9 +12193,10 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "container", + "pattern": "pod", "thresholds": [ ], @@ -12616,7 +12221,7 @@ items: ], "targets": [ { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -12625,7 +12230,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -12634,7 +12239,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -12643,7 +12248,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -12652,7 +12257,7 @@ items: "step": 10 }, { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -12669,7 +12274,7 @@ items: "title": "CPU Quota", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "transform": "table", @@ -12723,12 +12328,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 4, + "id": 3, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -12745,22 +12353,13 @@ items: "renderer": "flot", "seriesOverrides": [ { - "alias": "requests", + "alias": "max capacity", "color": "#F2495C", "dashes": true, "fill": 0, + "hiddenSeries": true, "hideTooltip": true, - "legend": false, - "linewidth": 2, - "stack": false - }, - { - "alias": "limits", - "color": "#FF9830", - "dashes": true, - "fill": 0, - "hideTooltip": true, - "legend": false, + "legend": true, "linewidth": 2, "stack": false } @@ -12771,26 +12370,18 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{container}}", - "legendLink": null, - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n", + "expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"})", "format": "time_series", "intervalFactor": 2, - "legendFormat": "requests", + "legendFormat": "max capacity", "legendLink": null, "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "limits", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -12800,10 +12391,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Memory Usage (w/o cache)", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -12856,12 +12447,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 5, + "id": 4, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -12899,6 +12493,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", @@ -12917,6 +12512,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", @@ -12935,6 +12531,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", @@ -12953,6 +12550,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", @@ -12971,6 +12569,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", @@ -12989,6 +12588,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", @@ -13007,6 +12607,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #G", @@ -13025,6 +12626,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #H", @@ -13035,7 +12637,7 @@ items: "unit": "bytes" }, { - "alias": "Container", + "alias": "Pod", "colorMode": null, "colors": [ @@ -13043,9 +12645,10 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "container", + "pattern": "pod", "thresholds": [ ], @@ -13070,7 +12673,7 @@ items: ], "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -13079,7 +12682,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -13088,7 +12691,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -13097,7 +12700,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)", + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -13106,7 +12709,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -13115,7 +12718,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -13124,7 +12727,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -13133,7 +12736,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -13150,7 +12753,7 @@ items: "title": "Memory Quota", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "transform": "table", @@ -13190,7 +12793,148 @@ items: "showTitle": true, "title": "Memory Quota", "titleSize": "h6" - }, + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": true, + "name": "node", + "options": [ + + ], + "query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "UTC", + "title": "Kubernetes / Compute Resources / Node (Pods)", + "uid": "200ac8fdbfbb74b39aff88118e4d1c2c", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-k8s-resources-node + namespace: monitoring +- apiVersion: v1 + data: + k8s-resources-pod.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ { "collapse": false, "height": "250px", @@ -13204,13 +12948,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 6, + "id": 1, "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -13226,7 +12972,24 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - + { + "alias": "requests", + "color": "#F2495C", + "fill": 0, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + }, + { + "alias": "limits", + "color": "#FF9830", + "fill": 0, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } ], "spaceLength": 10, "span": 12, @@ -13234,10 +12997,26 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{container}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "requests", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "limits", "legendLink": null, "step": 10 } @@ -13247,10 +13026,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Receive Bandwidth", + "title": "CPU Usage", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -13265,7 +13044,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -13287,7 +13066,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "CPU Usage", "titleSize": "h6" }, { @@ -13303,13 +13082,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 7, + "id": 2, "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "max": true, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -13333,23 +13114,30 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)", + "expr": "sum(increase(container_cpu_cfs_throttled_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) /sum(increase(container_cpu_cfs_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{container}}", "legendLink": null, "step": 10 } ], "thresholds": [ - + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.25, + "yaxis": "left" + } ], "timeFrom": null, "timeShift": null, - "title": "Transmit Bandwidth", + "title": "CPU Throttling", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -13364,10 +13152,10 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": 0, "show": true }, @@ -13386,7 +13174,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "CPU Throttling", "titleSize": "h6" }, { @@ -13401,20 +13189,22 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 8, + "fill": 1, + "id": 3, "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -13428,114 +13218,189 @@ items: ], "spaceLength": 10, "span": 12, - "stack": true, + "stack": false, "steppedLine": false, - "targets": [ + "styles": [ { - "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of Received Packets", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ - ] - }, - "yaxes": [ + ], + "type": "number", + "unit": "short" + }, { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 9, - "interval": "1m", - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Container", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + ], + "type": "string", + "unit": "short" + } ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)", - "format": "time_series", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", "step": 10 } ], @@ -13544,13 +13409,14 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Transmitted Packets", + "title": "CPU Quota", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -13562,7 +13428,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -13584,7 +13450,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "CPU Quota", "titleSize": "h6" }, { @@ -13600,13 +13466,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 10, + "id": 4, "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -13622,7 +13490,26 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - + { + "alias": "requests", + "color": "#F2495C", + "dashes": true, + "fill": 0, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + }, + { + "alias": "limits", + "color": "#FF9830", + "dashes": true, + "fill": 0, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } ], "spaceLength": 10, "span": 12, @@ -13630,10 +13517,26 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)", + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{container}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "requests", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "limits", "legendLink": null, "step": 10 } @@ -13643,10 +13546,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets Dropped", + "title": "Memory Usage (WSS)", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -13661,7 +13564,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -13683,7 +13586,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Memory Usage", "titleSize": "h6" }, { @@ -13698,20 +13601,22 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 11, + "fill": 1, + "id": 5, "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -13725,394 +13630,93 @@ items: ], "spaceLength": 10, "span": 12, - "stack": true, + "stack": false, "steppedLine": false, - "targets": [ + "styles": [ { - "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage (WSS)", + "colorMode": null, + "colors": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of Transmitted Packets Dropped", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ - ] - }, - "yaxes": [ + ], + "type": "number", + "unit": "bytes" + }, { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "", - "value": "" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "cluster", - "options": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "", - "value": "" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "", - "value": "" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "pod", - "options": [ - - ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "UTC", - "title": "Kubernetes / Compute Resources / Pod", - "uid": "6581e46e4e5c7ba40a07646395ef7b23", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-k8s-resources-pod - namespace: monitoring -- apiVersion: v1 - data: - k8s-resources-workload.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU Usage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "CPU Usage", - "colorMode": null, - "colors": [ + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #A", + "pattern": "Value #D", "thresholds": [ ], "type": "number", - "unit": "short" + "unit": "bytes" }, { - "alias": "CPU Requests", + "alias": "Memory Limits %", "colorMode": null, "colors": [ @@ -14120,17 +13724,18 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #B", + "pattern": "Value #E", "thresholds": [ ], "type": "number", - "unit": "short" + "unit": "percentunit" }, { - "alias": "CPU Requests %", + "alias": "Memory Usage (RSS)", "colorMode": null, "colors": [ @@ -14138,17 +13743,18 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #C", + "pattern": "Value #F", "thresholds": [ ], "type": "number", - "unit": "percentunit" + "unit": "bytes" }, { - "alias": "CPU Limits", + "alias": "Memory Usage (Cache)", "colorMode": null, "colors": [ @@ -14156,17 +13762,18 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #D", + "pattern": "Value #G", "thresholds": [ ], "type": "number", - "unit": "short" + "unit": "bytes" }, { - "alias": "CPU Limits %", + "alias": "Memory Usage (Swap)", "colorMode": null, "colors": [ @@ -14174,27 +13781,29 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #E", + "pattern": "Value #H", "thresholds": [ ], "type": "number", - "unit": "percentunit" + "unit": "bytes" }, { - "alias": "Pod", + "alias": "Container", "colorMode": null, "colors": [ ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, - "link": true, + "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", - "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", + "linkUrl": "", + "pattern": "container", "thresholds": [ ], @@ -14219,7 +13828,7 @@ items: ], "targets": [ { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -14228,7 +13837,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -14237,7 +13846,7 @@ items: "step": 10 }, { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -14246,7 +13855,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -14255,13 +13864,40 @@ items: "step": 10 }, { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 + }, + { + "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 } ], "thresholds": [ @@ -14269,10 +13905,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU Quota", + "title": "Memory Quota", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "transform": "table", @@ -14310,7 +13946,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "CPU Quota", + "title": "Memory Quota", "titleSize": "h6" }, { @@ -14326,12 +13962,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 3, + "id": 6, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -14350,12 +13989,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -14368,10 +14007,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Receive Bandwidth", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -14386,7 +14025,96 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -14408,7 +14136,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Usage", + "title": "Bandwidth", "titleSize": "h6" }, { @@ -14423,19 +14151,22 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 4, + "fill": 10, + "id": 8, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -14448,184 +14179,105 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, + "targets": [ { - "alias": "Memory Usage", - "colorMode": null, - "colors": [ + "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "type": "number", - "unit": "bytes" - }, + ] + }, + "yaxes": [ { - "alias": "Memory Requests", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "alias": "Memory Requests %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Memory Limits", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Memory Limits %", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "percentunit" - }, - { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTooltip": "Drill down", - "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", - "thresholds": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "type": "string", - "unit": "short" - } ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", - "format": "table", - "instant": true, + "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "E", + "legendFormat": "{{pod}}", + "legendLink": null, "step": 10 } ], @@ -14634,14 +14286,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Quota", + "title": "Rate of Transmitted Packets", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -14653,7 +14304,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -14675,7 +14326,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Memory Quota", + "title": "Rate of Packets", "titleSize": "h6" }, { @@ -14690,20 +14341,22 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "id": 5, + "fill": 10, + "id": 10, "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], @@ -14716,211 +14369,16 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Current Receive Bandwidth", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "Bps" - }, - { - "alias": "Current Transmit Bandwidth", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "Bps" - }, - { - "alias": "Rate of Received Packets", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Rate of Transmitted Packets", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Rate of Received Packets Dropped", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Rate of Transmitted Packets Dropped", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTooltip": "Drill down", - "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - - ], - "type": "string", - "unit": "short" - } - ], "targets": [ { - "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 - }, - { - "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", - "format": "table", - "instant": true, + "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "F", + "legendFormat": "{{pod}}", + "legendLink": null, "step": 10 } ], @@ -14929,14 +14387,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Current Network Usage", + "title": "Rate of Received Packets Dropped", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -14948,7 +14405,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -14964,19 +14421,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -14986,12 +14431,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 6, + "id": 11, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -15010,12 +14458,12 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -15028,10 +14476,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Receive Bandwidth", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -15046,7 +14494,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -15068,7 +14516,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Rate of Packets Dropped", "titleSize": "h6" }, { @@ -15083,13 +14531,17 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", + "decimals": -1, "fill": 10, - "id": 7, + "id": 12, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -15108,15 +14560,23 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "Reads", + "legendLink": null, + "step": 10 + }, + { + "expr": "ceil(sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Writes", "legendLink": null, "step": 10 } @@ -15126,10 +14586,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Transmit Bandwidth", + "title": "IOPS", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -15144,7 +14604,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -15160,19 +14620,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -15182,12 +14630,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 8, + "id": 13, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -15206,15 +14657,23 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "Reads", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Writes", "legendLink": null, "step": 10 } @@ -15224,10 +14683,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Average Container Bandwidth by Pod: Received", + "title": "ThroughPut", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -15264,7 +14723,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Storage IO - Distribution(Pod - Read & Writes)", "titleSize": "h6" }, { @@ -15279,13 +14738,17 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", + "decimals": -1, "fill": 10, - "id": 9, + "id": 14, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -15304,15 +14767,15 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "ceil(sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{container}}", "legendLink": null, "step": 10 } @@ -15322,10 +14785,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Average Container Bandwidth by Pod: Transmitted", + "title": "IOPS(Reads+Writes)", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -15340,7 +14803,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -15356,19 +14819,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -15378,12 +14829,15 @@ items: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 10, + "id": 15, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -15402,15 +14856,15 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{container}}", "legendLink": null, "step": 10 } @@ -15420,10 +14874,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Received Packets", + "title": "ThroughPut(Read+Write)", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -15460,7 +14914,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Storage IO - Distribution(Containers)", "titleSize": "h6" }, { @@ -15475,19 +14929,22 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 11, + "fill": 1, + "id": 16, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], @@ -15499,213 +14956,223 @@ items: "seriesOverrides": [ ], + "sort": { + "col": 4, + "desc": true + }, "spaceLength": 10, "span": 12, - "stack": true, + "stack": false, "steppedLine": false, - "targets": [ + "styles": [ { - "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "IOPS(Reads)", + "colorMode": null, + "colors": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of Transmitted Packets", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ - ] - }, - "yaxes": [ + ], + "type": "number", + "unit": "short" + }, { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "alias": "IOPS(Writes)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { + "alias": "IOPS(Reads + Writes)", + "colorMode": null, + "colors": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Throughput(Read)", + "colorMode": null, + "colors": [ - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, { - "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ + "alias": "Throughput(Write)", + "colorMode": null, + "colors": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of Received Packets Dropped", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ - ] - }, - "yaxes": [ + ], + "type": "number", + "unit": "Bps" + }, { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true + "alias": "Throughput(Read + Write)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { + "alias": "Container", + "colorMode": null, + "colors": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + ], + "type": "string", + "unit": "short" + } ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", - "format": "time_series", + "expr": "sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{pod}}", - "legendLink": null, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\",device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", "step": 10 } ], @@ -15714,13 +15181,14 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rate of Transmitted Packets Dropped", + "title": "Current Storage IO", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -15732,7 +15200,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -15754,7 +15222,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Storage IO - Distribution", "titleSize": "h6" } ], @@ -15771,7 +15239,7 @@ items: "value": "default" }, "hide": 0, - "label": null, + "label": "Data Source", "name": "datasource", "options": [ @@ -15796,8 +15264,8 @@ items: "options": [ ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 1, + "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", + "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", @@ -15823,35 +15291,8 @@ items: "options": [ ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "", - "value": "" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "workload", - "options": [ - - ], - "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)", - "refresh": 1, + "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", + "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", @@ -15873,12 +15314,12 @@ items: "includeAll": false, "label": null, "multi": false, - "name": "type", + "name": "pod", "options": [ ], - "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)", - "refresh": 1, + "query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", + "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", @@ -15921,17 +15362,22 @@ items: ] }, "timezone": "UTC", - "title": "Kubernetes / Compute Resources / Workload", - "uid": "a164a7f0339f99e89cea5cb47e9be617", + "title": "Kubernetes / Compute Resources / Pod", + "uid": "6581e46e4e5c7ba40a07646395ef7b23", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-k8s-resources-workload + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-k8s-resources-pod namespace: monitoring - apiVersion: v1 data: - k8s-resources-workloads-namespace.json: |- + k8s-resources-workload.json: |- { "annotations": { "list": [ @@ -15961,11 +15407,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 1, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -15981,26 +15430,7 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - { - "alias": "quota - requests", - "color": "#F2495C", - "dashes": true, - "fill": 0, - "hideTooltip": true, - "legend": false, - "linewidth": 2, - "stack": false - }, - { - "alias": "quota - limits", - "color": "#FF9830", - "dashes": true, - "fill": 0, - "hideTooltip": true, - "legend": false, - "linewidth": 2, - "stack": false - } + ], "spaceLength": 10, "span": 12, @@ -16008,26 +15438,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{workload}} - {{workload_type}}", - "legendLink": null, - "step": 10 - }, - { - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "quota - requests", - "legendLink": null, - "step": 10 - }, - { - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "quota - limits", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -16040,7 +15454,7 @@ items: "title": "CPU Usage", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -16094,11 +15508,14 @@ items: "datasource": "$datasource", "fill": 1, "id": 2, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -16127,24 +15544,6 @@ items: "pattern": "Time", "type": "hidden" }, - { - "alias": "Running Pods", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, { "alias": "CPU Usage", "colorMode": null, @@ -16154,9 +15553,10 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #B", + "pattern": "Value #A", "thresholds": [ ], @@ -16172,9 +15572,10 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #C", + "pattern": "Value #B", "thresholds": [ ], @@ -16190,9 +15591,10 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #D", + "pattern": "Value #C", "thresholds": [ ], @@ -16208,9 +15610,10 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #E", + "pattern": "Value #D", "thresholds": [ ], @@ -16226,9 +15629,10 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #F", + "pattern": "Value #E", "thresholds": [ ], @@ -16236,7 +15640,7 @@ items: "unit": "percentunit" }, { - "alias": "Workload", + "alias": "Pod", "colorMode": null, "colors": [ @@ -16244,27 +15648,10 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, + "linkTargetBlank": false, "linkTooltip": "Drill down", - "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", - "pattern": "workload", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Workload Type", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "workload_type", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", "thresholds": [ ], @@ -16289,7 +15676,7 @@ items: ], "targets": [ { - "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)", + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -16298,7 +15685,7 @@ items: "step": 10 }, { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -16307,7 +15694,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -16316,7 +15703,7 @@ items: "step": 10 }, { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -16325,22 +15712,13 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 - }, - { - "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 } ], "thresholds": [ @@ -16351,7 +15729,7 @@ items: "title": "CPU Quota", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "transform": "table", @@ -16406,11 +15784,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 3, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -16426,26 +15807,7 @@ items: "points": false, "renderer": "flot", "seriesOverrides": [ - { - "alias": "quota - requests", - "color": "#F2495C", - "dashes": true, - "fill": 0, - "hideTooltip": true, - "legend": false, - "linewidth": 2, - "stack": false - }, - { - "alias": "quota - limits", - "color": "#FF9830", - "dashes": true, - "fill": 0, - "hideTooltip": true, - "legend": false, - "linewidth": 2, - "stack": false - } + ], "spaceLength": 10, "span": 12, @@ -16453,26 +15815,10 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}} - {{workload_type}}", - "legendLink": null, - "step": 10 - }, - { - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "quota - requests", - "legendLink": null, - "step": 10 - }, - { - "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "quota - limits", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -16485,7 +15831,7 @@ items: "title": "Memory Usage", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -16539,11 +15885,14 @@ items: "datasource": "$datasource", "fill": 1, "id": 4, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -16572,24 +15921,6 @@ items: "pattern": "Time", "type": "hidden" }, - { - "alias": "Running Pods", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, { "alias": "Memory Usage", "colorMode": null, @@ -16599,9 +15930,10 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #B", + "pattern": "Value #A", "thresholds": [ ], @@ -16617,9 +15949,10 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #C", + "pattern": "Value #B", "thresholds": [ ], @@ -16635,9 +15968,10 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #D", + "pattern": "Value #C", "thresholds": [ ], @@ -16653,9 +15987,10 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #E", + "pattern": "Value #D", "thresholds": [ ], @@ -16671,9 +16006,10 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", - "pattern": "Value #F", + "pattern": "Value #E", "thresholds": [ ], @@ -16681,7 +16017,7 @@ items: "unit": "percentunit" }, { - "alias": "Workload", + "alias": "Pod", "colorMode": null, "colors": [ @@ -16689,27 +16025,10 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, + "linkTargetBlank": false, "linkTooltip": "Drill down", - "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", - "pattern": "workload", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Workload Type", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "workload_type", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", "thresholds": [ ], @@ -16734,7 +16053,7 @@ items: ], "targets": [ { - "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -16743,7 +16062,7 @@ items: "step": 10 }, { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -16752,7 +16071,7 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -16761,7 +16080,7 @@ items: "step": 10 }, { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -16770,22 +16089,13 @@ items: "step": 10 }, { - "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", "format": "table", "instant": true, "intervalFactor": 2, "legendFormat": "", "refId": "E", "step": 10 - }, - { - "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 } ], "thresholds": [ @@ -16796,7 +16106,7 @@ items: "title": "Memory Quota", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "transform": "table", @@ -16853,10 +16163,12 @@ items: "id": 5, "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -16894,6 +16206,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #A", @@ -16912,6 +16225,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #B", @@ -16930,6 +16244,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #C", @@ -16948,6 +16263,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #D", @@ -16966,6 +16282,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #E", @@ -16984,6 +16301,7 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", "linkUrl": "", "pattern": "Value #F", @@ -16994,7 +16312,7 @@ items: "unit": "pps" }, { - "alias": "Workload", + "alias": "Pod", "colorMode": null, "colors": [ @@ -17002,27 +16320,10 @@ items: "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, - "linkTooltip": "Drill down to pods", - "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$type", - "pattern": "workload", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Workload Type", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, + "linkTargetBlank": false, "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "workload_type", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", "thresholds": [ ], @@ -17047,7 +16348,7 @@ items: ], "targets": [ { - "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -17056,7 +16357,7 @@ items: "step": 10 }, { - "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -17065,7 +16366,7 @@ items: "step": 10 }, { - "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -17074,7 +16375,7 @@ items: "step": 10 }, { - "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -17083,7 +16384,7 @@ items: "step": 10 }, { - "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -17092,7 +16393,7 @@ items: "step": 10 }, { - "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "table", "instant": true, "intervalFactor": 2, @@ -17109,7 +16410,7 @@ items: "title": "Current Network Usage", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "transform": "table", @@ -17147,7 +16448,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Current Network Usage", "titleSize": "h6" }, { @@ -17164,11 +16465,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 6, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -17187,15 +16491,15 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -17208,7 +16512,7 @@ items: "title": "Receive Bandwidth", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -17239,19 +16543,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -17262,11 +16554,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 7, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -17285,15 +16580,15 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -17306,7 +16601,7 @@ items: "title": "Transmit Bandwidth", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -17343,7 +16638,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Bandwidth", "titleSize": "h6" }, { @@ -17360,11 +16655,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 8, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -17383,15 +16681,15 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -17401,10 +16699,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Average Container Bandwidth by Workload: Received", + "title": "Average Container Bandwidth by Pod: Received", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -17435,19 +16733,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -17458,11 +16744,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 9, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -17481,15 +16770,15 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -17499,10 +16788,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Average Container Bandwidth by Workload: Transmitted", + "title": "Average Container Bandwidth by Pod: Transmitted", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -17539,7 +16828,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Average Container Bandwidth by Pod", "titleSize": "h6" }, { @@ -17556,11 +16845,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 10, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -17579,15 +16871,15 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -17600,7 +16892,7 @@ items: "title": "Rate of Received Packets", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -17615,7 +16907,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -17631,19 +16923,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -17654,11 +16934,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 11, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -17677,15 +16960,15 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -17698,7 +16981,7 @@ items: "title": "Rate of Transmitted Packets", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -17713,7 +16996,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -17735,7 +17018,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Rate of Packets", "titleSize": "h6" }, { @@ -17752,11 +17035,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 12, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -17775,15 +17061,15 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -17796,7 +17082,7 @@ items: "title": "Rate of Received Packets Dropped", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -17811,7 +17097,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -17827,19 +17113,7 @@ items: "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -17850,11 +17124,14 @@ items: "datasource": "$datasource", "fill": 10, "id": 13, + "interval": "1m", "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -17873,15 +17150,15 @@ items: ], "spaceLength": 10, - "span": 12, + "span": 6, "stack": true, "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{workload}}", + "legendFormat": "{{pod}}", "legendLink": null, "step": 10 } @@ -17894,7 +17171,7 @@ items: "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -17909,7 +17186,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -17931,7 +17208,7 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Network", + "title": "Rate of Packets Dropped", "titleSize": "h6" } ], @@ -17948,7 +17225,7 @@ items: "value": "default" }, "hide": 0, - "label": null, + "label": "Data Source", "name": "datasource", "options": [ @@ -17960,28 +17237,50 @@ items: }, { "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", "current": { - "text": "deployment", - "value": "deployment" + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" }, "datasource": "$datasource", - "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", "hide": 0, "includeAll": false, "label": null, "multi": false, - "name": "type", + "name": "namespace", "options": [ ], - "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", - "refresh": 1, + "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", + "refresh": 2, "regex": "", - "skipUrlSync": false, - "sort": 0, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -17997,16 +17296,16 @@ items: "value": "" }, "datasource": "$datasource", - "hide": 2, + "hide": 0, "includeAll": false, "label": null, "multi": false, - "name": "cluster", + "name": "type", "options": [ ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 1, + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload_type)", + "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", @@ -18028,12 +17327,12 @@ items: "includeAll": false, "label": null, "multi": false, - "name": "namespace", + "name": "workload", "options": [ ], - "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", - "refresh": 1, + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}, workload)", + "refresh": 2, "regex": "", "sort": 1, "tagValuesQuery": "", @@ -18076,34 +17375,32 @@ items: ] }, "timezone": "UTC", - "title": "Kubernetes / Compute Resources / Namespace (Workloads)", - "uid": "a87fb0d919ec0ea5f6543124e16c42a5", + "title": "Kubernetes / Compute Resources / Workload", + "uid": "a164a7f0339f99e89cea5cb47e9be617", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-k8s-resources-workloads-namespace + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-k8s-resources-workload namespace: monitoring - apiVersion: v1 data: - kubelet.json: |- + k8s-resources-workloads-namespace.json: |- { - "__inputs": [ - - ], - "__requires": [ - - ], "annotations": { "list": [ ] }, - "editable": false, + "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, - "id": null, "links": [ ], @@ -18111,575 +17408,414 @@ items: "rows": [ { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { + "aliasColors": { }, - "id": 2, - "interval": null, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, "links": [ ], - "mappingType": 1, - "mappingTypes": [ + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ { - "name": "value to text", - "value": 1 + "alias": "quota - requests", + "color": "#F2495C", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" + "alias": "quota - limits", + "color": "#FF9830", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false } ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "sum(up{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})", + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "Up", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "min" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 3, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + "legendFormat": "{{workload}} - {{workload_type}}", + "legendLink": null, + "step": 10 }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": "", - "title": "Running Pods", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "legendFormat": "quota - requests", + "legendLink": null, + "step": 10 + }, { - "op": "=", - "text": "N/A", - "value": "null" + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quota - limits", + "legendLink": null, + "step": 10 } ], - "valueName": "min" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" + "thresholds": [ + ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" }, - "gridPos": { + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + ] }, - "id": 4, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ + "yaxes": [ { - "name": "value to text", - "value": 1 + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": "", - "title": "Running Container", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false } - ], - "valueName": "min" - }, + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { + "aliasColors": { }, - "id": 5, - "interval": null, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ { - "name": "value to text", - "value": 1 + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "alias": "Running Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, { - "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": "", - "title": "Actual Volume Count", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "min" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ - }, - "id": 6, - "interval": null, - "links": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ - ], - "mappingType": 1, - "mappingTypes": [ + ], + "type": "number", + "unit": "percentunit" + }, { - "name": "value to text", - "value": 1 + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "short" }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, { - "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": "", - "title": "Desired Volume Count", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "min" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { + "alias": "", + "colorMode": null, + "colors": [ - }, - "id": 7, - "interval": null, - "links": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + ], + "type": "string", + "unit": "short" + } ], - "mappingType": 1, - "mappingTypes": [ + "targets": [ { - "name": "value to text", - "value": 1 + "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ + "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, { - "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m]))", - "format": "time_series", + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": "", - "title": "Config Error Count", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "legendFormat": "", + "refId": "D", + "step": 10 + }, { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "min" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 8, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, { - "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (operation_type, instance)", - "format": "time_series", + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_type}}", - "refId": "A" + "legendFormat": "", + "refId": "F", + "step": 10 } ], "thresholds": [ @@ -18687,13 +17823,14 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Operation Rate", + "title": "CPU Quota", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -18705,23 +17842,35 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -18730,48 +17879,82 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 9, + "fill": 10, + "id": 3, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ - + { + "alias": "quota - requests", + "color": "#F2495C", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + }, + { + "alias": "quota - limits", + "color": "#FF9830", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_type}}", - "refId": "A" + "legendFormat": "{{workload}} - {{workload_type}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quota - requests", + "legendLink": null, + "step": 10 + }, + { + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quota - limits", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -18779,10 +17962,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Operation Error Rate", + "title": "Memory Usage", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -18797,7 +17980,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -18805,12 +17988,12 @@ items: "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] } @@ -18818,14 +18001,13 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "aliasColors": { @@ -18836,33 +18018,29 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "gridPos": { - - }, - "id": 10, + "id": 4, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], @@ -18870,27 +18048,250 @@ items: "span": 12, "stack": false, "steppedLine": false, - "targets": [ + "styles": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_type}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Operation duration 99th quantile", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Running Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -18902,20 +18303,20 @@ items: }, "yaxes": [ { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ] } @@ -18923,14 +18324,13 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "aliasColors": { @@ -18941,54 +18341,265 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "gridPos": { - - }, - "id": 11, + "id": 5, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 12, "stack": false, "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Current Receive Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Transmit Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to pods", + "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$type", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], "targets": [ { - "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)", - "format": "time_series", + "expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{instance}} pod", - "refId": "A" + "legendFormat": "", + "refId": "A", + "step": 10 }, { - "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)", - "format": "time_series", + "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "{{instance}} worker", - "refId": "B" + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 } ], "thresholds": [ @@ -18996,13 +18607,14 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Pod Start Rate", + "title": "Current Network Usage", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -19014,7 +18626,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -19022,15 +18634,27 @@ items: "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Network Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -19039,55 +18663,45 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 12, + "fill": 10, + "id": 6, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} pod", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} worker", - "refId": "B" + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -19095,10 +18709,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Pod Start Duration", + "title": "Receive Bandwidth", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -19113,7 +18727,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -19121,28 +18735,15 @@ items: "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -19151,50 +18752,45 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 13, + "fill": 10, + "id": 7, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, + "current": false, "max": false, "min": false, "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", + "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", - "refId": "A" + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -19202,10 +18798,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Storage Operation Rate", + "title": "Transmit Bandwidth", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -19220,7 +18816,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -19228,15 +18824,27 @@ items: "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -19245,50 +18853,45 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 14, + "fill": 10, + "id": 8, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, + "current": false, "max": false, "min": false, "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", + "expr": "(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", - "refId": "A" + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -19296,10 +18899,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Storage Operation Error Rate", + "title": "Average Container Bandwidth by Workload: Received", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -19314,7 +18917,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -19322,28 +18925,15 @@ items: "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -19352,50 +18942,45 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 15, + "fill": 10, + "id": 9, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, + "current": false, "max": false, "min": false, "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))", + "expr": "(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", - "refId": "A" + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -19403,10 +18988,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Storage Operation Duration 99th quantile", + "title": "Average Container Bandwidth by Workload: Transmitted", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -19421,7 +19006,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -19429,12 +19014,12 @@ items: "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] } @@ -19442,14 +19027,13 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "showTitle": true, + "title": "Average Container Bandwidth by Workload", + "titleSize": "h6" }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "aliasColors": { @@ -19459,48 +19043,45 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 16, + "fill": 10, + "id": 10, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{operation_type}}", - "refId": "A" + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -19508,10 +19089,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Cgroup manager operation rate", + "title": "Rate of Received Packets", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -19526,7 +19107,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -19534,12 +19115,12 @@ items: "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] }, @@ -19551,48 +19132,45 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 17, + "fill": 10, + "id": 11, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", + "expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} {{operation_type}}", - "refId": "A" + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -19600,10 +19178,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Cgroup manager 99th quantile", + "title": "Rate of Transmitted Packets", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -19618,7 +19196,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -19626,12 +19204,12 @@ items: "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] } @@ -19639,14 +19217,13 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "showTitle": true, + "title": "Rate of Packets", + "titleSize": "h6" }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "aliasColors": { @@ -19656,49 +19233,45 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "Pod lifecycle event generator", - "fill": 1, - "gridPos": { - - }, - "id": 18, + "fill": 10, + "id": 12, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance)", + "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -19706,10 +19279,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "PLEG relist rate", + "title": "Rate of Received Packets Dropped", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -19724,7 +19297,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -19732,12 +19305,12 @@ items: "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] }, @@ -19749,48 +19322,45 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 19, + "fill": 10, + "id": 13, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "legendFormat": "{{workload}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -19798,10 +19368,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "PLEG relist interval", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": false, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -19816,7 +19386,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -19824,12 +19394,12 @@ items: "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] } @@ -19837,1268 +19407,749 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { + "showTitle": true, + "title": "Rate of Packets Dropped", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ - }, - "id": 20, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "namespace", + "options": [ - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": [ + ], + "query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "PLEG relist duration", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "deployment", + "value": "deployment" + }, + "datasource": "$datasource", + "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "type", + "options": [ - ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { + ], + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "UTC", + "title": "Kubernetes / Compute Resources / Namespace (Workloads)", + "uid": "a87fb0d919ec0ea5f6543124e16c42a5", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-k8s-resources-workloads-namespace + namespace: monitoring +- apiVersion: v1 + data: + kubelet.json: |- + { + "__inputs": [ - }, - "id": 21, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "__requires": [ - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [ ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "2xx", - "refId": "A" - }, - { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "3xx", - "refId": "B" - }, - { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "4xx", - "refId": "C" - }, - { - "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "5xx", - "refId": "D" - } - ], - "thresholds": [ + "mappings": [ ], - "timeFrom": null, - "timeShift": null, - "title": "RPC Rate", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "thresholds": { + "mode": "absolute", + "steps": [ ] }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] + "unit": "none" } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 2, + "links": [ + ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "7", + "targets": [ + { + "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Running Kubelets", + "transparent": false, + "type": "stat" }, { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 22, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { "links": [ ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{verb}} {{url}}", - "refId": "A" - } - ], - "thresholds": [ + "mappings": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Request duration 99th quantile", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "thresholds": { + "mode": "absolute", + "steps": [ ] }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] + "unit": "none" } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 3, + "links": [ + ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "7", + "targets": [ + { + "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Running Pods", + "transparent": false, + "type": "stat" }, { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { + ], + "mappings": [ + ], + "thresholds": { + "mode": "absolute", + "steps": [ + + ] }, - "id": 23, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 4, + "links": [ + ], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "7", + "targets": [ + { + "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Running Containers", + "transparent": false, + "type": "stat" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [ ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": [ + "mappings": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Memory", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "thresholds": { + "mode": "absolute", + "steps": [ ] }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 5, + "links": [ + + ], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, + "textMode": "auto" + }, + "pluginVersion": "7", + "targets": [ { - "aliasColors": { + "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Actual Volume Count", + "transparent": false, + "type": "stat" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 24, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU usage", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 25, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": [ + ], + "mappings": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Goroutines", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "thresholds": { + "mode": "absolute", + "steps": [ ] }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] + "unit": "none" } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_info, cluster)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "instance", - "options": [ - - ], - "query": "label_values(kubelet_runtime_operations_total{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"}, instance)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "UTC", - "title": "Kubernetes / Kubelet", - "uid": "3138fa155d5915769fbded898ac09fd9", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-kubelet - namespace: monitoring -- apiVersion: v1 - data: - kubernetes-cluster-dashboard.json: |- - { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Monitor a Kubernetes cluster using Prometheus TSDB. Shows overall cluster CPU / Memory / Disk usage as well as individual pod statistics. ", - "editable": true, - "gnetId": 162, - "graphTooltip": 1, - "links": [ - - ], - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true }, "gridPos": { "h": 7, - "w": 8, - "x": 0, + "w": 4, + "x": 16, "y": 0 }, - "id": 4, - "interval": null, - "isNew": true, + "id": 6, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "textMode": "auto" }, - "tableColumn": "", + "pluginVersion": "7", "targets": [ { - "expr": "(sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100", + "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})", "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "refId": "A", - "step": 10 - } - ], - "thresholds": "65, 90", - "title": "Cluster memory usage", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" } ], - "valueName": "current" + "title": "Desired Volume Count", + "transparent": false, + "type": "stat" }, { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(0, 0, 0, 0)", - "rgb(210, 1, 1)", - "#890f02" - ], - "datasource": "prometheus", + "datasource": "$datasource", "fieldConfig": { "defaults": { - "custom": { + "links": [ - } - }, - "overrides": [ + ], + "mappings": [ - ] - }, - "format": "percentunit", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + ], + "thresholds": { + "mode": "absolute", + "steps": [ + + ] + }, + "unit": "none" + } }, "gridPos": { - "h": 2, - "w": 8, - "x": 8, + "h": 7, + "w": 4, + "x": 20, "y": 0 }, - "id": 23, - "interval": null, + "id": 7, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "textMode": "auto" }, - "tableColumn": "{job=\"kubelet\"}", + "pluginVersion": "7", "targets": [ { - "expr": "avg(up{job=\"kubelet\"}) BY (job)", + "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", - "instant": true, - "intervalFactor": 1, - "legendFormat": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", "refId": "A" } ], - "thresholds": "1.1", - "title": "Up Nodes", - "type": "singlestat", - "valueFontSize": "120%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" + "title": "Config Error Count", + "transparent": false, + "type": "stat" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "decimals": 0, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ + "aliasColors": { - ] - }, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, "gridPos": { "h": 7, - "w": 8, - "x": 16, - "y": 0 + "w": 12, + "x": 0, + "y": 7 }, - "id": 6, - "interval": null, - "isNew": true, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } + "id": 8, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "avg(100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100))", + "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (operation_type, instance)", "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "refId": "A", - "step": 10 - } - ], - "thresholds": "65, 90", - "title": "Cluster CPU usage", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" } ], - "valueName": "current" - }, - { - "columns": [ + "thresholds": [ ], - "datasource": "prometheus", - "fieldConfig": { - "defaults": { - "custom": { + "timeFrom": null, + "timeShift": null, + "title": "Operation Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - } + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, - "overrides": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { - ] }, - "fontSize": "90%", + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 5, - "w": 8, - "x": 8, - "y": 2 + "h": 7, + "w": 12, + "x": 12, + "y": 7 }, - "id": 25, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, "links": [ ], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 2, - "desc": false - }, - "styles": [ - { - "alias": "Time", - "align": "auto", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "link": false, - "pattern": "Time", - "type": "date" - }, - { - "alias": "Uptime", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value", - "thresholds": [ - - ], - "type": "number", - "unit": "s" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/endpoint|job|namespace|pod|service/", - "thresholds": [ - - ], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "instance", - "preserveFormat": false, - "sanitize": false, - "thresholds": [ + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "type": "string", - "unit": "short" - } ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "(time() - node_boot_time_seconds)", - "format": "table", - "instant": true, - "intervalFactor": 1, + "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_type}}", "refId": "A" } ], - "title": "Node Uptime", - "transform": "table", - "transparent": true, - "type": "table-old" - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 7 - }, - "id": 15, - "panels": [ + "thresholds": [ ], - "title": "Nodes", - "type": "row" - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 0.85 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "B", - "5m", - "now" - ] - }, - "reducer": { - "params": [ - - ], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "60s", - "handler": 1, - "name": "Memory Usage alert", - "noDataState": "no_data", - "notifications": [ + "timeFrom": null, + "timeShift": null, + "title": "Operation Error Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, + "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { - "h": 9, - "w": 12, + "h": 7, + "w": 24, "x": 0, - "y": 8 + "y": 14 }, - "hiddenSeries": false, "id": 10, "legend": { + "alignAsTable": true, "avg": false, - "current": false, + "current": true, "max": false, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -21106,15 +20157,11 @@ items: ], "nullPointMode": "null", - "options": { - "dataLinks": [ - - ] - }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], @@ -21123,35 +20170,19 @@ items: "steppedLine": false, "targets": [ { - "expr": "node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes)", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{ instance }}", + "legendFormat": "{{instance}} {{operation_type}}", "refId": "A" - }, - { - "expr": "(node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes))/node_memory_MemTotal_bytes", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "refId": "B" } ], "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0.85 - } - ], - "timeFrom": null, - "timeRegions": [ ], + "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Operation duration 99th quantile", "tooltip": { "shared": true, "sort": 0, @@ -21169,7 +20200,7 @@ items: }, "yaxes": [ { - "format": "decbytes", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -21177,92 +20208,43 @@ items: "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + ] }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 90 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "15m", - "now" - ] - }, - "reducer": { - "params": [ - - ], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "CPU Usage alert", - "noDataState": "no_data", - "notifications": [ - - ] - }, "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, + "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { - "h": 9, + "h": 7, "w": 12, - "x": 12, - "y": 8 + "x": 0, + "y": 21 }, - "hiddenSeries": false, "id": 11, "legend": { + "alignAsTable": true, "avg": false, - "current": false, + "current": true, "max": false, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -21270,15 +20252,11 @@ items: ], "nullPointMode": "null", - "options": { - "dataLinks": [ - - ] - }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], @@ -21287,28 +20265,26 @@ items: "steppedLine": false, "targets": [ { - "expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100)", + "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance)", "format": "time_series", - "intervalFactor": 3, - "legendFormat": "{{instance}}", + "intervalFactor": 2, + "legendFormat": "{{instance}} pod", "refId": "A" - } - ], - "thresholds": [ + }, { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 90 + "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} worker", + "refId": "B" } ], - "timeFrom": null, - "timeRegions": [ + "thresholds": [ ], + "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Pod Start Rate", "tooltip": { "shared": true, "sort": 0, @@ -21326,8 +20302,7 @@ items: }, "yaxes": [ { - "decimals": null, - "format": "percent", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -21335,343 +20310,159 @@ items: "show": true }, { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + ] }, { - "columns": [ - - ], - "datasource": "prometheus", - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ + "aliasColors": { - ] }, - "fontSize": "100%", + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 9, + "h": 7, "w": 12, - "x": 0, - "y": 17 + "x": 12, + "y": 21 + }, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true }, - "id": 31, + "lines": true, + "linewidth": 1, "links": [ ], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": true - }, - "styles": [ - { - "alias": "Time", - "align": "auto", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "link": false, - "pattern": "Time", - "type": "date" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "condition|container|daemonset|endpoint|namespace|node", - "thresholds": [ - - ], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "type": "number", - "unit": "short" - } ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "ALERTS{alertstate=\"firing\"}", - "format": "table", - "instant": true, - "intervalFactor": 1, + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} pod", "refId": "A" }, { - "expr": "ALERTS{alertstate=\"firing\",alertname!=\"DeadMansSwitch\"}", - "format": "table", - "hide": true, - "intervalFactor": 1, + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} worker", "refId": "B" } ], - "title": "Active Alerts", - "transform": "table", - "type": "table-old" - }, - { - "dashboardFilter": "", - "dashboardTags": [ + "thresholds": [ ], - "datasource": null, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ + "timeFrom": null, + "timeShift": null, + "title": "Pod Start Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] }, - "folderId": null, - "gridPos": { - "h": 9, - "w": 5, - "x": 12, - "y": 17 - }, - "id": 27, - "limit": 10, - "links": [ - - ], - "nameFilter": "", - "onlyAlertsOnDashboard": false, - "show": "current", - "sortOrder": 1, - "stateFilter": [ - - ], - "title": "Alarms", - "type": "alertlist" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "decimals": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 7, - "x": 17, - "y": 17 - }, - "id": 7, - "interval": null, - "isNew": true, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ + "yaxes": [ { - "name": "value to text", - "value": 1 + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "(sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) - sum(node_filesystem_free_bytes{device=~\"/dev/.*\"}) ) / sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) * 100", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "metric": "", - "refId": "A", - "step": 10 - } - ], - "thresholds": "65, 90", - "title": "Cluster Filesystem usage", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } - ], - "valueName": "current" + ] }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 1 - ], - "type": "lt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "C", - "5m", - "now" - ] - }, - "reducer": { - "params": [ - - ], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "Node Down", - "noDataState": "alerting", - "notifications": [ - - ] - }, "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, + "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { - "h": 4, - "w": 7, - "x": 17, - "y": 22 + "h": 7, + "w": 12, + "x": 0, + "y": 28 }, - "hiddenSeries": false, - "id": 29, + "id": 13, "legend": { + "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "show": false, + "rightSide": true, + "show": true, + "sideWidth": null, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", - "options": { - "dataLinks": [ - - ] - }, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], @@ -21680,46 +20471,19 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(up{job=\"kubelet\"}) BY (job)", + "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)", "format": "time_series", - "hide": true, - "instant": false, - "intervalFactor": 1, - "legendFormat": "Up Nodes", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", "refId": "A" - }, - { - "expr": "count(up{job=\"kubelet\"})", - "format": "time_series", - "hide": true, - "instant": false, - "intervalFactor": 1, - "legendFormat": "Total Nodes", - "refId": "B" - }, - { - "expr": "avg(up{job=\"kubelet\"}) BY (job)", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "refId": "C" } ], "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "lt", - "value": 1 - } - ], - "timeFrom": null, - "timeRegions": [ ], + "timeFrom": null, "timeShift": null, - "title": "Up Nodes", + "title": "Storage Operation Rate", "tooltip": { "shared": true, "sort": 0, @@ -21737,7 +20501,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -21745,92 +20509,45 @@ items: "show": true }, { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + ] }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 85 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [ - - ], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "CPU Temperature alert", - "noDataState": "no_data", - "notifications": [ - - ] - }, "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, + "datasource": "$datasource", "fill": 1, "fillGradient": 0, "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 26 + "h": 7, + "w": 12, + "x": 12, + "y": 28 }, - "hiddenSeries": false, - "id": 13, + "id": 14, "legend": { + "alignAsTable": true, "avg": false, - "current": false, + "current": true, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -21838,15 +20555,11 @@ items: ], "nullPointMode": "null", - "options": { - "dataLinks": [ - - ] - }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], @@ -21855,28 +20568,19 @@ items: "steppedLine": false, "targets": [ { - "expr": "rpi_cpu_temperature_celsius", + "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)", "format": "time_series", - "intervalFactor": 5, - "legendFormat": "{{instance}}", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", "refId": "A" } ], "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 85 - } - ], - "timeFrom": null, - "timeRegions": [ ], + "timeFrom": null, "timeShift": null, - "title": "CPU Temperature", + "title": "Storage Operation Error Rate", "tooltip": { "shared": true, "sort": 0, @@ -21894,7 +20598,7 @@ items: }, "yaxes": [ { - "format": "celsius", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -21902,34 +20606,14 @@ items: "show": true }, { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 32 - }, - "id": 17, - "panels": [ - - ], - "title": "Pods", - "type": "row" + ] }, { "aliasColors": { @@ -21938,37 +20622,19 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "decimals": 0, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, - "fill": 0, + "datasource": "$datasource", + "fill": 1, "fillGradient": 0, - "grid": { - - }, "gridPos": { "h": 7, "w": 24, "x": 0, - "y": 33 + "y": 35 }, - "hiddenSeries": false, - "id": 3, - "isNew": true, + "id": 15, "legend": { "alignAsTable": true, - "avg": true, + "avg": false, "current": true, "hideEmpty": true, "hideZero": true, @@ -21976,27 +20642,21 @@ items: "min": false, "rightSide": true, "show": true, - "sideWidth": 270, - "sort": "current", - "sortDesc": true, + "sideWidth": null, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", - "options": { - "dataLinks": [ - - ] - }, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], @@ -22005,31 +20665,23 @@ items: "steppedLine": false, "targets": [ { - "expr": "topk(10,sum by (pod)(rate(container_cpu_usage_seconds_total{image!=\"\"}[1m] ) ))", + "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin, le))", "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ pod}}", - "metric": "container_cpu", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}", + "refId": "A" } ], "thresholds": [ ], "timeFrom": null, - "timeRegions": [ - - ], "timeShift": null, - "title": "Pod CPU usage", + "title": "Storage Operation Duration 99th quantile", "tooltip": { - "msResolution": true, "shared": true, - "sort": 2, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -22043,7 +20695,7 @@ items: }, "yaxes": [ { - "format": "percentunit", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -22051,18 +20703,14 @@ items: "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + ] }, { "aliasColors": { @@ -22071,63 +20719,39 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, - "fill": 0, + "datasource": "$datasource", + "fill": 1, "fillGradient": 0, - "grid": { - - }, "gridPos": { "h": 7, - "w": 24, + "w": 12, "x": 0, - "y": 40 + "y": 42 }, - "hiddenSeries": false, - "id": 2, - "isNew": true, + "id": 16, "legend": { "alignAsTable": true, - "avg": true, + "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, - "sideWidth": 250, - "sort": "avg", - "sortDesc": true, + "sideWidth": null, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], - "nullPointMode": "connected", - "options": { - "dataLinks": [ - - ] - }, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], @@ -22136,39 +20760,23 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (pod, image))", - "format": "time_series", - "hide": true, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "{{ pod }}", - "metric": "container_memory_usage:sort_desc", - "refId": "A", - "step": 10 - }, - { - "expr": "topk(10,sum(container_memory_rss{name=~\".+\"}) by (pod))", + "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)", "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ pod }}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{operation_type}}", + "refId": "A" } ], "thresholds": [ ], "timeFrom": null, - "timeRegions": [ - - ], "timeShift": null, - "title": "Pod memory usage", + "title": "Cgroup manager operation rate", "tooltip": { - "msResolution": false, "shared": true, - "sort": 2, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -22182,7 +20790,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -22190,18 +20798,14 @@ items: "show": true }, { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + ] }, { "aliasColors": { @@ -22210,63 +20814,39 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, + "datasource": "$datasource", "fill": 1, "fillGradient": 0, - "grid": { - - }, "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 47 + "x": 12, + "y": 42 }, - "hiddenSeries": false, - "id": 19, + "id": 17, "legend": { "alignAsTable": true, - "avg": true, - "current": false, - "hideEmpty": true, - "hideZero": true, + "avg": false, + "current": true, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, - "sideWidth": 550, - "sort": "avg", - "sortDesc": true, + "sideWidth": null, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], "nullPointMode": "null", - "options": { - "dataLinks": [ - - ] - }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], @@ -22275,39 +20855,23 @@ items: "steppedLine": false, "targets": [ { - "expr": "topk(10,sum(rate(container_network_transmit_bytes_total{pod=~\".+\"}[5m])) by (pod))", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))", "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ pod_name }}", - "refId": "A", - "step": 240 - }, - { - "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])", - "format": "time_series", - "hide": true, - "interval": "", "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 + "legendFormat": "{{instance}} {{operation_type}}", + "refId": "A" } ], "thresholds": [ ], "timeFrom": null, - "timeRegions": [ - - ], "timeShift": null, - "title": "Sent Network Traffic per Container", + "title": "Cgroup manager 99th quantile", "tooltip": { - "msResolution": true, "shared": true, - "sort": 2, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -22321,26 +20885,22 @@ items: }, "yaxes": [ { - "format": "Bps", - "label": "", + "format": "s", + "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { - "format": "short", - "label": "", - "logBase": 10, - "max": 8, - "min": 0, - "show": false + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + ] }, { "aliasColors": { @@ -22349,63 +20909,40 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, + "datasource": "$datasource", + "description": "Pod lifecycle event generator", "fill": 1, "fillGradient": 0, - "grid": { - - }, "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 47 + "x": 0, + "y": 49 }, - "hiddenSeries": false, - "id": 21, + "id": 18, "legend": { "alignAsTable": true, - "avg": true, - "current": false, - "hideEmpty": true, - "hideZero": true, + "avg": false, + "current": true, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, - "sideWidth": 150, - "sort": "avg", - "sortDesc": true, + "sideWidth": null, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], "nullPointMode": "null", - "options": { - "dataLinks": [ - - ] - }, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], @@ -22414,38 +20951,23 @@ items: "steppedLine": false, "targets": [ { - "expr": "topk(10,sum(rate(container_network_receive_bytes_total{pod=~\".+\"}[5m])) by (pod))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod_name}}", - "refId": "A", - "step": 240 - }, - { - "expr": "- rate(container_network_transmit_bytes_total{pod_name=~\".+\"}[$interval])", + "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", "format": "time_series", - "hide": true, "intervalFactor": 2, - "legendFormat": "{{pod_name}}", - "refId": "B", - "step": 10 + "legendFormat": "{{instance}}", + "refId": "A" } ], "thresholds": [ ], "timeFrom": null, - "timeRegions": [ - - ], "timeShift": null, - "title": "Received Network Traffic per Container", + "title": "PLEG relist rate", "tooltip": { - "msResolution": true, "shared": true, - "sort": 2, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -22459,7 +20981,7 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "ops", "label": null, "logBase": 1, "max": null, @@ -22467,18 +20989,14 @@ items: "show": true }, { - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, "min": null, "show": true } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + ] }, { "aliasColors": { @@ -22487,63 +21005,39 @@ items: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, - "fill": 0, + "datasource": "$datasource", + "fill": 1, "fillGradient": 0, - "grid": { - - }, "gridPos": { "h": 7, - "w": 24, - "x": 0, - "y": 54 + "w": 12, + "x": 12, + "y": 49 }, - "hiddenSeries": false, - "id": 8, - "isNew": true, + "id": 19, "legend": { "alignAsTable": true, - "avg": true, + "avg": false, "current": true, "max": false, "min": false, "rightSide": true, "show": true, - "sideWidth": 220, - "sort": "current", - "sortDesc": true, + "sideWidth": null, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], - "nullPointMode": "connected", - "options": { - "dataLinks": [ - - ] - }, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], @@ -22552,40 +21046,23 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_receive_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "Receive Traffic", - "metric": "network", - "refId": "A", - "step": 10 - }, - { - "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_transmit_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "Transmit Traffic", - "metric": "network", - "refId": "B", - "step": 10 + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" } ], "thresholds": [ ], "timeFrom": null, - "timeRegions": [ - - ], "timeShift": null, - "title": "Pod Network i/o", + "title": "PLEG relist interval", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -22599,8 +21076,7 @@ items: }, "yaxes": [ { - "$$hashKey": "object:1163", - "format": "bytes", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -22608,700 +21084,356 @@ items: "show": true }, { - "$$hashKey": "object:1164", - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, "min": null, "show": true } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "refresh": "10s", - "schemaVersion": 25, - "style": "dark", - "tags": [ - "custom" - ], - "templating": { - "list": [ - - ] - }, - "time": { - "from": "now-3h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kubernetes cluster monitoring (via Prometheus)", - "version": 1 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-kubernetes-cluster-dashboard - namespace: monitoring -- apiVersion: v1 - data: - namespace-by-pod.json: |- - { - "__inputs": [ - - ], - "__requires": [ - - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - - ], - "panels": [ + ] + }, { - "collapse": false, - "collapsed": false, + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 1, + "h": 7, "w": 24, "x": 0, - "y": 0 + "y": 56 }, - "id": 2, - "panels": [ + "id": 20, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Current Bandwidth", - "titleSize": "h6", - "type": "row" + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "PLEG relist duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "decimals": 0, - "format": "time_series", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 9, - "w": 12, + "h": 7, + "w": 24, "x": 0, - "y": 1 + "y": 63 }, - "height": 9, - "id": 3, - "interval": null, + "id": 21, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ { - "name": "value to text", - "value": 1 + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A" }, { - "name": "range to text", - "value": 2 + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5xx", + "refId": "D" } ], - "maxDataPoints": 100, - "minSpan": 12, - "nullPointMode": "connected", - "nullText": null, - "options": { - "fieldOptions": { - "calcs": [ - "last" - ], - "defaults": { - "max": 10000000000, - "min": 0, - "title": "$namespace", - "unit": "Bps" - }, - "mappings": [ + "thresholds": [ - ], - "override": { + ], + "timeFrom": null, + "timeShift": null, + "title": "RPC Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - }, - "thresholds": [ - { - "color": "dark-green", - "index": 0, - "value": null - }, - { - "color": "dark-yellow", - "index": 1, - "value": 5000000000 - }, - { - "color": "dark-red", - "index": 2, - "value": 7000000000 - } - ], - "values": false - } + ] }, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "yaxes": [ { - "from": "null", - "text": "N/A", - "to": "null" + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } - ], - "span": 12, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + ] + }, + { + "aliasColors": { + }, - "tableColumn": "", + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 70 + }, + "id": 22, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution]))", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, url, le))", "format": "time_series", - "instant": null, - "intervalFactor": 1, - "legendFormat": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{verb}} {{url}}", "refId": "A" } ], - "thresholds": "", + "thresholds": [ + + ], "timeFrom": null, "timeShift": null, - "title": "Current Rate of Bytes Received", - "type": "gauge", - "valueFontSize": "80%", - "valueMaps": [ + "title": "Request duration 99th quantile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "N/A", - "value": "null" + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } - ], - "valueName": "current" + ] }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "decimals": 0, - "format": "time_series", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 1 + "h": 7, + "w": 8, + "x": 0, + "y": 77 }, - "height": 9, - "id": 4, - "interval": null, + "id": 23, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 12, - "nullPointMode": "connected", - "nullText": null, - "options": { - "fieldOptions": { - "calcs": [ - "last" - ], - "defaults": { - "max": 10000000000, - "min": 0, - "title": "$namespace", - "unit": "Bps" - }, - "mappings": [ - - ], - "override": { - - }, - "thresholds": [ - { - "color": "dark-green", - "index": 0, - "value": null - }, - { - "color": "dark-yellow", - "index": 1, - "value": 5000000000 - }, - { - "color": "dark-red", - "index": 2, - "value": 7000000000 - } - ], - "values": false - } - }, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 12, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution]))", - "format": "time_series", - "instant": null, - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "timeFrom": null, - "timeShift": null, - "title": "Current Rate of Bytes Transmitted", - "type": "gauge", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "columns": [ - { - "text": "Time", - "value": "Time" - }, - { - "text": "Value #A", - "value": "Value #A" - }, - { - "text": "Value #B", - "value": "Value #B" - }, - { - "text": "Value #C", - "value": "Value #C" - }, - { - "text": "Value #D", - "value": "Value #D" - }, - { - "text": "Value #E", - "value": "Value #E" - }, - { - "text": "Value #F", - "value": "Value #F" - }, - { - "text": "pod", - "value": "pod" - } - ], - "datasource": "$datasource", - "fill": 1, - "fontSize": "100%", - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 5, - "lines": true, - "linewidth": 1, - "minSpan": 24, - "nullPointMode": "null as zero", - "renderer": "flot", - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": false - }, - "spaceLength": 10, - "span": 24, - "styles": [ - { - "alias": "Time", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Time", - "thresholds": [ - - ], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Bandwidth Received", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "Bps" - }, - { - "alias": "Bandwidth Transmitted", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "Bps" - }, - { - "alias": "Rate of Received Packets", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Rate of Transmitted Packets", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Rate of Received Packets Dropped", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Rate of Transmitted Packets Dropped", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Pod", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTooltip": "Drill down", - "linkUrl": "d/7a18067ce943a40ae25454675c19ff5c/kubernetes-networking-pod?orgId=1&refresh=30s&var-namespace=$namespace&var-pod=$__cell", - "pattern": "pod", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 - }, - { - "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Current Status", - "type": "table" - }, - { - "collapse": false, - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 19 - }, - "id": 6, - "panels": [ - - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Bandwidth", - "titleSize": "h6", - "type": "row" - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 20 - }, - "id": 7, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -23311,17 +21443,15 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" } ], "thresholds": [ @@ -23329,10 +21459,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Receive Bandwidth", + "title": "Memory", "tooltip": { "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -23347,19 +21477,19 @@ items: }, "yaxes": [ { - "format": "Bps", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { - "format": "Bps", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } ] @@ -23372,20 +21502,19 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 2, + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 20 + "h": 7, + "w": 8, + "x": 8, + "y": 77 }, - "id": 8, + "id": 24, "legend": { "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": true, - "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -23395,13 +21524,11 @@ items: "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [ ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -23411,17 +21538,15 @@ items: ], "spaceLength": 10, - "span": 12, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" } ], "thresholds": [ @@ -23429,5660 +21554,140 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Transmit Bandwidth", + "title": "CPU usage", "tooltip": { "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "collapse": true, - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 29 - }, - "id": 9, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 30 - }, - "id": 10, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of Received Packets", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 30 - }, - "id": 11, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of Transmitted Packets", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Packets", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": true, - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 12, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 40 - }, - "id": 13, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of Received Packets Dropped", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 40 - }, - "id": 14, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of Transmitted Packets Dropped", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Errors", - "titleSize": "h6", - "type": "row" - } - ], - "refresh": "10s", - "rows": [ - - ], - "schemaVersion": 18, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "kube-system", - "value": "kube-system" - }, - "datasource": "$datasource", - "definition": "label_values(container_network_receive_packets_total, namespace)", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(container_network_receive_packets_total, namespace)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "resolution", - "options": [ - { - "selected": false, - "text": "30s", - "value": "30s" - }, - { - "selected": true, - "text": "5m", - "value": "5m" - }, - { - "selected": false, - "text": "1h", - "value": "1h" - } - ], - "query": "30s,5m,1h", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "interval", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "interval", - "options": [ - { - "selected": true, - "text": "4h", - "value": "4h" - } - ], - "query": "4h", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "interval", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "UTC", - "title": "Kubernetes / Networking / Namespace (Pods)", - "uid": "8b7a8b326d7a6f1f04244066368c67af", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-namespace-by-pod - namespace: monitoring -- apiVersion: v1 - data: - namespace-by-workload.json: |- - { - "__inputs": [ - - ], - "__requires": [ - - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - - ], - "panels": [ - { - "collapse": false, - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 2, - "panels": [ - - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Current Bandwidth", - "titleSize": "h6", - "type": "row" - }, - { - "aliasColors": { - - }, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 1 - }, - "id": 3, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "links": [ - - ], - "minSpan": 24, - "nullPointMode": "null", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 24, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ workload }}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Current Rate of Bytes Received", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "series", - "name": null, - "show": false, - "values": [ - "current" - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 1 - }, - "id": 4, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "links": [ - - ], - "minSpan": 24, - "nullPointMode": "null", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 24, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ workload }}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Current Rate of Bytes Transmitted", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "series", - "name": null, - "show": false, - "values": [ - "current" - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "columns": [ - { - "text": "Time", - "value": "Time" - }, - { - "text": "Value #A", - "value": "Value #A" - }, - { - "text": "Value #B", - "value": "Value #B" - }, - { - "text": "Value #C", - "value": "Value #C" - }, - { - "text": "Value #D", - "value": "Value #D" - }, - { - "text": "Value #E", - "value": "Value #E" - }, - { - "text": "Value #F", - "value": "Value #F" - }, - { - "text": "Value #G", - "value": "Value #G" - }, - { - "text": "Value #H", - "value": "Value #H" - }, - { - "text": "workload", - "value": "workload" - } - ], - "datasource": "$datasource", - "fill": 1, - "fontSize": "90%", - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 5, - "lines": true, - "linewidth": 1, - "minSpan": 24, - "nullPointMode": "null as zero", - "renderer": "flot", - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": false - }, - "spaceLength": 10, - "span": 24, - "styles": [ - { - "alias": "Time", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Time", - "thresholds": [ - - ], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Current Bandwidth Received", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "number", - "unit": "Bps" - }, - { - "alias": "Current Bandwidth Transmitted", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "Bps" - }, - { - "alias": "Average Bandwidth Received", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #C", - "thresholds": [ - - ], - "type": "number", - "unit": "Bps" - }, - { - "alias": "Average Bandwidth Transmitted", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #D", - "thresholds": [ - - ], - "type": "number", - "unit": "Bps" - }, - { - "alias": "Rate of Received Packets", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #E", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Rate of Transmitted Packets", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #F", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Rate of Received Packets Dropped", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #G", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Rate of Transmitted Packets Dropped", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #H", - "thresholds": [ - - ], - "type": "number", - "unit": "pps" - }, - { - "alias": "Workload", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": true, - "linkTooltip": "Drill down", - "linkUrl": "d/728bf77cc1166d2f3133bf25846876cc/kubernetes-networking-workload?orgId=1&refresh=30s&var-namespace=$namespace&var-type=$type&var-workload=$__cell", - "pattern": "workload", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, - { - "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 - }, - { - "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 - }, - { - "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 - }, - { - "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 - }, - { - "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "G", - "step": 10 - }, - { - "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "H", - "step": 10 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Current Status", - "type": "table" - }, - { - "collapse": true, - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 19 - }, - "id": 6, - "panels": [ - { - "aliasColors": { - - }, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 20 - }, - "id": 7, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "links": [ - - ], - "minSpan": 24, - "nullPointMode": "null", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 24, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ workload }}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Average Rate of Bytes Received", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "series", - "name": null, - "show": false, - "values": [ - "current" - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 20 - }, - "id": 8, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "links": [ - - ], - "minSpan": 24, - "nullPointMode": "null", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 24, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ workload }}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Average Rate of Bytes Transmitted", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "series", - "name": null, - "show": false, - "values": [ - "current" - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Average Bandwidth", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 29 - }, - "id": 9, - "panels": [ - - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Bandwidth HIstory", - "titleSize": "h6", - "type": "row" - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 38 - }, - "id": 10, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{workload}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Receive Bandwidth", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 38 - }, - "id": 11, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{workload}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Transmit Bandwidth", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "collapse": true, - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 39 - }, - "id": 12, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 40 - }, - "id": 13, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{workload}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of Received Packets", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 40 - }, - "id": 14, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{workload}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of Transmitted Packets", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Packets", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": true, - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 40 - }, - "id": 15, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 41 - }, - "id": 16, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{workload}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of Received Packets Dropped", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 2, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 41 - }, - "id": 17, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "minSpan": 12, - "nullPointMode": "connected", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{workload}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of Transmitted Packets Dropped", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "pps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Errors", - "titleSize": "h6", - "type": "row" - } - ], - "refresh": "10s", - "rows": [ - - ], - "schemaVersion": 18, - "style": "dark", - "tags": [ - "kubernetes-mixin" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "kube-system", - "value": "kube-system" - }, - "datasource": "$datasource", - "definition": "label_values(container_network_receive_packets_total, namespace)", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(container_network_receive_packets_total, namespace)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "deployment", - "value": "deployment" - }, - "datasource": "$datasource", - "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "type", - "options": [ - - ], - "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "resolution", - "options": [ - { - "selected": false, - "text": "30s", - "value": "30s" - }, - { - "selected": true, - "text": "5m", - "value": "5m" - }, - { - "selected": false, - "text": "1h", - "value": "1h" - } - ], - "query": "30s,5m,1h", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "interval", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "text": "5m", - "value": "5m" - }, - "datasource": "$datasource", - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "interval", - "options": [ - { - "selected": true, - "text": "4h", - "value": "4h" - } - ], - "query": "4h", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "interval", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "UTC", - "title": "Kubernetes / Networking / Namespace (Workload)", - "uid": "bbb2a765a623ae38130206c7d94a160f", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-namespace-by-workload - namespace: monitoring -- apiVersion: v1 - data: - node-cluster-rsrc-use.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "(\n instance:node_cpu_utilisation:rate1m{job=\"node-exporter\"}\n*\n instance:node_num_cpu:sum{job=\"node-exporter\"}\n)\n/ scalar(sum(instance:node_num_cpu:sum{job=\"node-exporter\"}))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": 1, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "instance:node_load1_per_cpu:ratio{job=\"node-exporter\"}\n/ scalar(count(instance:node_load1_per_cpu:ratio{job=\"node-exporter\"}))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Saturation (load1 per CPU)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": 1, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "instance:node_memory_utilisation:ratio{job=\"node-exporter\"}\n/ scalar(count(instance:node_memory_utilisation:ratio{job=\"node-exporter\"}))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": 1, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "instance:node_vmstat_pgmajfault:rate1m{job=\"node-exporter\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Saturation (Major Page Faults)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "rps", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memory", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/ Receive/", - "stack": "A" - }, - { - "alias": "/ Transmit/", - "stack": "B", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "instance:node_network_receive_bytes_excluding_lo:rate1m{job=\"node-exporter\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} Receive", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - }, - { - "expr": "instance:node_network_transmit_bytes_excluding_lo:rate1m{job=\"node-exporter\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} Transmit", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Net Utilisation (Bytes Receive/Transmit)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/ Receive/", - "stack": "A" - }, - { - "alias": "/ Transmit/", - "stack": "B", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "instance:node_network_receive_drop_excluding_lo:rate1m{job=\"node-exporter\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} Receive", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - }, - { - "expr": "instance:node_network_transmit_drop_excluding_lo:rate1m{job=\"node-exporter\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} Transmit", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Net Saturation (Drops Receive/Transmit)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "rps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Network", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "instance_device:node_disk_io_time_seconds:rate1m{job=\"node-exporter\"}\n/ scalar(count(instance_device:node_disk_io_time_seconds:rate1m{job=\"node-exporter\"}))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{device}}", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk IO Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": 1, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "instance_device:node_disk_io_time_weighted_seconds:rate1m{job=\"node-exporter\"}\n/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate1m{job=\"node-exporter\"}))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} {{device}}", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk IO Saturation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": 1, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk IO", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 10, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum without (device) (\n max without (fstype, mountpoint) (\n node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\"} - node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\"}\n )\n) \n/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\"})))\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "legendLink": "/dashboard/file/node-rsrc-use.json", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk Space Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": 1, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk Space", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "UTC", - "title": "USE Method / Cluster", - "uid": "3e97d1d02672cdd0861f4c97c64f89b2", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-node-cluster-rsrc-use - namespace: monitoring -- apiVersion: v1 - data: - node-rsrc-use.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "instance:node_cpu_utilisation:rate1m{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Utilisation", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "instance:node_load1_per_cpu:ratio{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Saturation", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Saturation (Load1 per CPU)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "CPU", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "instance:node_memory_utilisation:ratio{job=\"node-exporter\", job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Memory", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "instance:node_vmstat_pgmajfault:rate1m{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Major page faults", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Saturation (Major Page Faults)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Memory", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Receive/", - "stack": "A" - }, - { - "alias": "/Transmit/", - "stack": "B", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "instance:node_network_receive_bytes_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Receive", - "legendLink": null, - "step": 10 - }, - { - "expr": "instance:node_network_transmit_bytes_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Transmit", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Net Utilisation (Bytes Receive/Transmit)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Receive/", - "stack": "A" - }, - { - "alias": "/Transmit/", - "stack": "B", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "instance:node_network_receive_drop_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Receive drops", - "legendLink": null, - "step": 10 - }, - { - "expr": "instance:node_network_transmit_drop_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Transmit drops", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Net Saturation (Drops Receive/Transmit)", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "rps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Net", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "instance_device:node_disk_io_time_seconds:rate1m{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk IO Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "instance_device:node_disk_io_time_weighted_seconds:rate1m{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk IO Saturation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk IO", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "1 -\n(\n max without (mountpoint, fstype) (node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\"})\n/\n max without (mountpoint, fstype) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\"})\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}}", - "legendLink": null, - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk Space Utilisation", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Disk Space", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "instance", - "multi": false, - "name": "instance", - "options": [ - - ], - "query": "label_values(up{job=\"node-exporter\"}, instance)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "UTC", - "title": "USE Method / Node", - "uid": "fac67cfbe174d3ef53eb473d73d9212f", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-node-rsrc-use - namespace: monitoring -- apiVersion: v1 - data: - nodes.json: |- - { - "__inputs": [ - - ], - "__requires": [ - - ], - "annotations": { - "list": [ - - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 2, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "(\n (1 - rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"}[$__interval]))\n/ ignoring(cpu) group_left\n count without (cpu)( node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n", - "format": "time_series", - "interval": "1m", - "intervalFactor": 5, - "legendFormat": "{{cpu}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": 1, - "min": 0, - "show": true - }, - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": 1, - "min": 0, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "gridPos": { - - }, - "id": 3, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "1m load average", - "refId": "A" - }, - { - "expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "5m load average", - "refId": "B" - }, - { - "expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "15m load average", - "refId": "C" - }, - { - "expr": "count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", mode=\"idle\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "logical cores", - "refId": "D" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Load Average", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 4, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 9, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "(\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "memory used", - "refId": "A" - }, - { - "expr": "node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "memory buffers", - "refId": "B" - }, - { - "expr": "node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "memory cached", - "refId": "C" - }, - { - "expr": "node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "memory free", - "refId": "D" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Usage", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 5, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "100 -\n(\n node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"}\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n* 100\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "80, 90", - "title": "Memory Usage", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "gridPos": { - - }, - "id": 6, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - { - "alias": "/ read| written/", - "yaxis": 1 - }, - { - "alias": "/ io time/", - "yaxis": 2 - } - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{device}} read", - "refId": "A" - }, - { - "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{device}} written", - "refId": "B" - }, - { - "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{device}} io time", - "refId": "C" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk I/O", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 7, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - { - "alias": "used", - "color": "#E0B400" - }, - { - "alias": "available", - "color": "#73BF69" - } - ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n max by (device) (\n node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "used", - "refId": "A" - }, - { - "expr": "sum(\n max by (device) (\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "available", - "refId": "B" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk Space Usage", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "gridPos": { - - }, - "id": 8, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{device}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Network Received", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 0, - "gridPos": { - - }, - "id": 9, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{device}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Network Transmitted", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "instance", - "options": [ - - ], - "query": "label_values(node_exporter_build_info{job=\"node-exporter\"}, instance)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "UTC", - "title": "Nodes", - "uid": "fa49a4706d07a042595b664c87fb33ea", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-nodes - namespace: monitoring -- apiVersion: v1 - data: - persistentvolumesusage.json: |- - { - "__inputs": [ - - ], - "__requires": [ - - ], - "annotations": { - "list": [ - - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 2, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 9, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Used Space", - "refId": "A" - }, - { - "expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Free Space", - "refId": "B" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Volume Space Usage", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 3, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "80, 90", - "title": "Volume Space Usage", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 4, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 9, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Used inodes", - "refId": "A" - }, - { - "expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": " Free inodes", - "refId": "B" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Volume inodes Usage", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { - }, - "id": 5, - "interval": null, - "links": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 77 + }, + "id": 25, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "80, 90", - "title": "Volume inodes Usage", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - } ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Goroutines", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] } + ], + "refresh": "10s", + "rows": [ + ], "schemaVersion": 14, "style": "dark", @@ -29097,7 +21702,7 @@ items: "value": "default" }, "hide": 0, - "label": null, + "label": "Data Source", "name": "datasource", "options": [ @@ -29121,33 +21726,7 @@ items: "options": [ ], - "query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"}, namespace)", + "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics\"}, cluster)", "refresh": 2, "regex": "", "sort": 1, @@ -29166,14 +21745,14 @@ items: }, "datasource": "$datasource", "hide": 0, - "includeAll": false, - "label": "PersistentVolumeClaim", + "includeAll": true, + "label": "instance", "multi": false, - "name": "volume", + "name": "instance", "options": [ ], - "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\"}, persistentvolumeclaim)", + "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics\",cluster=\"$cluster\"}, instance)", "refresh": 2, "regex": "", "sort": 1, @@ -29188,7 +21767,7 @@ items: ] }, "time": { - "from": "now-7d", + "from": "now-1h", "to": "now" }, "timepicker": { @@ -29217,17 +21796,22 @@ items: ] }, "timezone": "UTC", - "title": "Kubernetes / Persistent Volumes", - "uid": "919b92a8e8041bd567af9edab12c840c", + "title": "Kubernetes / Kubelet", + "uid": "3138fa155d5915769fbded898ac09fd9", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-persistentvolumesusage + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-kubelet namespace: monitoring - apiVersion: v1 data: - pod-total.json: |- + namespace-by-pod.json: |- { "__inputs": [ @@ -29332,7 +21916,7 @@ items: "defaults": { "max": 10000000000, "min": 0, - "title": "$namespace: $pod", + "title": "$namespace", "unit": "Bps" }, "mappings": [ @@ -29382,7 +21966,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))", + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution]))", "format": "time_series", "instant": null, "intervalFactor": 1, @@ -29459,7 +22043,7 @@ items: "defaults": { "max": 10000000000, "min": 0, - "title": "$namespace: $pod", + "title": "$namespace", "unit": "Bps" }, "mappings": [ @@ -29509,7 +22093,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))", + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution]))", "format": "time_series", "instant": null, "intervalFactor": 1, @@ -29532,6 +22116,274 @@ items: ], "valueName": "current" }, + { + "columns": [ + { + "text": "Time", + "value": "Time" + }, + { + "text": "Value #A", + "value": "Value #A" + }, + { + "text": "Value #B", + "value": "Value #B" + }, + { + "text": "Value #C", + "value": "Value #C" + }, + { + "text": "Value #D", + "value": "Value #D" + }, + { + "text": "Value #E", + "value": "Value #E" + }, + { + "text": "Value #F", + "value": "Value #F" + }, + { + "text": "pod", + "value": "pod" + } + ], + "datasource": "$datasource", + "fill": 1, + "fontSize": "100%", + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null as zero", + "renderer": "flot", + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "spaceLength": 10, + "span": 24, + "styles": [ + { + "alias": "Time", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Time", + "thresholds": [ + + ], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Bandwidth Received", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Bandwidth Transmitted", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "d/7a18067ce943a40ae25454675c19ff5c/kubernetes-networking-pod?orgId=1&refresh=30s&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Status", + "type": "table" + }, { "collapse": false, "collapsed": false, @@ -29539,9 +22391,9 @@ items: "h": 1, "w": 24, "x": 0, - "y": 10 + "y": 19 }, - "id": 5, + "id": 6, "panels": [ ], @@ -29562,13 +22414,14 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, - "y": 11 + "y": 20 }, - "id": 6, + "id": 7, "legend": { "alignAsTable": false, "avg": false, @@ -29605,7 +22458,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", @@ -29662,13 +22515,14 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, - "y": 11 + "y": 20 }, - "id": 7, + "id": 8, "legend": { "alignAsTable": false, "avg": false, @@ -29705,7 +22559,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", @@ -29760,9 +22614,9 @@ items: "h": 1, "w": 24, "x": 0, - "y": 20 + "y": 29 }, - "id": 8, + "id": 9, "panels": [ { "aliasColors": { @@ -29773,13 +22627,14 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 10, "w": 12, "x": 0, - "y": 21 + "y": 30 }, - "id": 9, + "id": 10, "legend": { "alignAsTable": false, "avg": false, @@ -29816,7 +22671,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", @@ -29873,13 +22728,14 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 10, "w": 12, "x": 12, - "y": 21 + "y": 30 }, - "id": 10, + "id": 11, "legend": { "alignAsTable": false, "avg": false, @@ -29916,7 +22772,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", @@ -29980,9 +22836,9 @@ items: "h": 1, "w": 24, "x": 0, - "y": 21 + "y": 30 }, - "id": 11, + "id": 12, "panels": [ { "aliasColors": { @@ -29993,13 +22849,14 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 10, "w": 12, "x": 0, - "y": 32 + "y": 40 }, - "id": 12, + "id": 13, "legend": { "alignAsTable": false, "avg": false, @@ -30036,7 +22893,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", @@ -30093,13 +22950,14 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 10, "w": 12, "x": 12, - "y": 32 + "y": 40 }, - "id": 13, + "id": 14, "legend": { "alignAsTable": false, "avg": false, @@ -30136,7 +22994,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", @@ -30211,7 +23069,7 @@ items: "value": "default" }, "hide": 0, - "label": null, + "label": "Data Source", "name": "datasource", "options": [ @@ -30222,29 +23080,23 @@ items: "type": "datasource" }, { - "allValue": ".+", - "auto": false, - "auto_count": 30, - "auto_min": "10s", + "allValue": null, "current": { - "text": "kube-system", - "value": "kube-system" + }, "datasource": "$datasource", - "definition": "label_values(container_network_receive_packets_total, namespace)", - "hide": 0, - "includeAll": true, + "hide": 2, + "includeAll": false, "label": null, "multi": false, - "name": "namespace", + "name": "cluster", "options": [ ], - "query": "label_values(container_network_receive_packets_total, namespace)", - "refresh": 1, + "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)", + "refresh": 2, "regex": "", - "skipUrlSync": false, - "sort": 1, + "sort": 0, "tagValuesQuery": "", "tags": [ @@ -30259,21 +23111,21 @@ items: "auto_count": 30, "auto_min": "10s", "current": { - "text": "", - "value": "" + "text": "kube-system", + "value": "kube-system" }, "datasource": "$datasource", - "definition": "label_values(container_network_receive_packets_total{namespace=~\"$namespace\"}, pod)", + "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", "hide": 0, - "includeAll": false, + "includeAll": true, "label": null, "multi": false, - "name": "pod", + "name": "namespace", "options": [ ], - "query": "label_values(container_network_receive_packets_total{namespace=~\"$namespace\"}, pod)", - "refresh": 1, + "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, @@ -30397,18 +23249,29 @@ items: ] }, "timezone": "UTC", - "title": "Kubernetes / Networking / Pod", - "uid": "7a18067ce943a40ae25454675c19ff5c", + "title": "Kubernetes / Networking / Namespace (Pods)", + "uid": "8b7a8b326d7a6f1f04244066368c67af", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-pod-total + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-namespace-by-pod namespace: monitoring - apiVersion: v1 data: - prometheus-dashboard.json: |- + namespace-by-workload.json: |- { + "__inputs": [ + + ], + "__requires": [ + + ], "annotations": { "list": [ { @@ -30419,607 +23282,922 @@ items: "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" - }, - { - "datasource": "$datasource", - "enable": true, - "expr": "count(sum(up{instance=\"$instance\"}) by (instance) < 1)", - "hide": false, - "iconColor": "rgb(250, 44, 18)", - "limit": 100, - "name": "downage", - "showIn": 0, - "step": "30s", - "tagKeys": "instance", - "textFormat": "prometheus down", - "titleFormat": "Downage", - "type": "alert" - }, - { - "datasource": "$datasource", - "enable": true, - "expr": "sum(changes(prometheus_config_last_reload_success_timestamp_seconds[10m])) by (instance)", - "hide": false, - "iconColor": "#fceaca", - "limit": 100, - "name": "Reload", - "showIn": 0, - "step": "5m", - "tagKeys": "instance", - "tags": [ - - ], - "titleFormat": "Reload", - "type": "tags" } ] }, - "description": "Dashboard for monitoring of Prometheus v2.x.x", "editable": true, - "gnetId": 3681, - "graphTooltip": 1, - "id": 4, - "iteration": 1596721016726, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, "links": [ - { - "icon": "info", - "tags": [ - - ], - "targetBlank": true, - "title": "Dashboard's Github ", - "tooltip": "Github repo of this dashboard", - "type": "link", - "url": "https://github.com/FUSAKLA/Prometheus2-grafana-dashboard" - }, - { - "icon": "doc", - "tags": [ - ], - "targetBlank": true, - "title": "Prometheus Docs", - "tooltip": "", - "type": "link", - "url": "http://prometheus.io/docs/introduction/overview/" - } ], "panels": [ { + "collapse": false, "collapsed": false, - "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 55, + "id": 2, "panels": [ ], "repeat": null, - "title": "Header instance info", + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", + "titleSize": "h6", "type": "row" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#bf1b00" - ], - "datasource": "$datasource", - "decimals": 1, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ + "aliasColors": { - ] - }, - "format": "s", - "gauge": { - "maxValue": 1000000, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, "gridPos": { - "h": 5, - "w": 4, + "h": 9, + "w": 12, "x": 0, "y": 1 }, - "id": 41, - "interval": null, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ { - "name": "value to text", - "value": 1 + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ workload }}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Received", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "name": "range to text", - "value": 2 + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } + ] + }, + { + "aliasColors": { + + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ { - "from": "null", - "text": "N/A", - "to": "null" + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ workload }}", + "refId": "A", + "step": 10 } ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Transmitted", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" }, - "tableColumn": "", - "targets": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ { - "expr": "min(time() - process_start_time_seconds{instance=\"$instance\"})", - "format": "time_series", - "instant": false, - "intervalFactor": 2, - "refId": "A" + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "columns": [ + { + "text": "Time", + "value": "Time" + }, + { + "text": "Value #A", + "value": "Value #A" + }, + { + "text": "Value #B", + "value": "Value #B" + }, + { + "text": "Value #C", + "value": "Value #C" + }, + { + "text": "Value #D", + "value": "Value #D" + }, + { + "text": "Value #E", + "value": "Value #E" + }, + { + "text": "Value #F", + "value": "Value #F" + }, + { + "text": "Value #G", + "value": "Value #G" + }, + { + "text": "Value #H", + "value": "Value #H" + }, + { + "text": "workload", + "value": "workload" } ], - "thresholds": "", - "title": "Uptime", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "datasource": "$datasource", + "fill": 1, + "fontSize": "90%", + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null as zero", + "renderer": "flot", + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "spaceLength": 10, + "span": 24, + "styles": [ + { + "alias": "Time", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Time", + "thresholds": [ + + ], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Current Bandwidth Received", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Bandwidth Transmitted", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Average Bandwidth Received", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Average Bandwidth Transmitted", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": true, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#bf1b00" - ], - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ - } + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" }, - "overrides": [ + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ - ] - }, - "format": "short", - "gauge": { - "maxValue": 1000000, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 8, - "x": 4, - "y": 1 - }, - "id": 42, - "interval": null, - "links": [ + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ - ], - "mappingType": 1, - "mappingTypes": [ + ], + "type": "number", + "unit": "pps" + }, { - "name": "value to text", - "value": 1 + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, { - "from": "null", - "text": "N/A", - "to": "null" + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "d/728bf77cc1166d2f3133bf25846876cc/kubernetes-networking-workload?orgId=1&refresh=30s&var-namespace=$namespace&var-type=$type&var-workload=$__cell", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" } ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "prometheus_tsdb_head_series{instance=\"localhost:9090\", job=\"prometheus\"}", "targets": [ { - "expr": "prometheus_tsdb_head_series{instance=\"$instance\"}", - "format": "time_series", - "instant": false, + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "500000,800000,1000000", - "title": "Total count of time series", - "type": "singlestat", - "valueFontSize": "150%", - "valueMaps": [ + "legendFormat": "", + "refId": "A", + "step": 10 + }, { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - - } + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 }, - "overrides": [ - - ] - }, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 3, - "x": 12, - "y": 1 - }, - "id": 48, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ { - "name": "value to text", - "value": 1 + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "version", - "targets": [ + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, { - "expr": "prometheus_build_info{instance=\"$instance\"}", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "table", "instant": true, "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "Version", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "legendFormat": "", + "refId": "F", + "step": 10 + }, { - "op": "=", - "text": "N/A", - "value": "null" + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 } ], - "valueName": "first" + "timeFrom": null, + "timeShift": null, + "title": "Current Status", + "type": "table" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "decimals": 2, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, - "format": "ms", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, + "collapse": true, + "collapsed": true, "gridPos": { - "h": 5, - "w": 4, - "x": 15, - "y": 1 + "h": 1, + "w": 24, + "x": 0, + "y": 19 }, - "id": 49, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ + "id": 6, + "panels": [ { - "name": "value to text", - "value": 1 + "aliasColors": { + + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ workload }}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Rate of Bytes Received", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "{instance=\"localhost:9090\", job=\"prometheus\"}", - "targets": [ - { - "expr": "prometheus_tsdb_head_max_time{instance=\"$instance\"} - prometheus_tsdb_head_min_time{instance=\"$instance\"}", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "Actual head block length", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" + "aliasColors": { + + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 20 + }, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ workload }}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Rate of Bytes Transmitted", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] } ], - "valueName": "current" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Average Bandwidth", + "titleSize": "h6", + "type": "row" }, { - "content": "", - "datasource": null, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, + "collapse": false, + "collapsed": false, "gridPos": { - "h": 5, - "w": 2, - "x": 19, - "y": 1 + "h": 1, + "w": 24, + "x": 0, + "y": 29 }, - "height": "", - "id": 50, - "links": [ + "id": 9, + "panels": [ ], - "mode": "html", - "options": { - "content": "", - "mode": "html" - }, - "pluginVersion": "7.1.0", - "title": "", - "transparent": true, - "type": "text" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth HIstory", + "titleSize": "h6", + "type": "row" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": true, - "colors": [ - "#e6522c", - "rgba(237, 129, 40, 0.89)", - "#299c46" - ], - "datasource": "$datasource", - "decimals": 1, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ + "aliasColors": { - ] - }, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, "gridPos": { - "h": 5, - "w": 3, - "x": 21, - "y": 1 + "h": 9, + "w": 12, + "x": 0, + "y": 38 }, - "id": 52, - "interval": null, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, + "minSpan": 12, "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "2", + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", - "intervalFactor": 2, - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 } ], - "thresholds": "10,20", - "title": "", - "transparent": true, - "type": "singlestat", - "valueFontSize": "200%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } + "thresholds": [ + ], - "valueName": "avg" - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 6 + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" }, - "id": 56, - "panels": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "repeat": null, - "title": "Main info", - "type": "row" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { "aliasColors": { @@ -31029,76 +24207,66 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, - "fill": 1, + "fill": 2, "fillGradient": 0, "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 7 + "h": 9, + "w": 12, + "x": 12, + "y": 38 }, - "hiddenSeries": false, - "id": 15, + "id": 11, "legend": { - "avg": true, + "alignAsTable": false, + "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "show": false, + "rightSide": false, + "show": true, + "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, - "pluginVersion": "7.1.2", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, + "span": 12, "stack": true, "steppedLine": false, "targets": [ { - "expr": "max(prometheus_engine_query_duration_seconds{instance=\"$instance\"}) by (instance, slice)", + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 1, - "legendFormat": "max duration for {{slice}}", - "metric": "prometheus_local_storage_rushed_mode", + "legendFormat": "{{workload}}", "refId": "A", - "step": 900 + "step": 10 } ], "thresholds": [ ], "timeFrom": null, - "timeRegions": [ - - ], "timeShift": null, - "title": "Query elapsed time", + "title": "Transmit Bandwidth", "tooltip": { - "msResolution": false, "shared": true, "sort": 2, "value_type": "individual" @@ -31115,3799 +24283,5214 @@ items: }, "yaxes": [ { - "format": "s", - "label": "", + "format": "Bps", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": 0, "show": true }, { - "format": "short", + "format": "Bps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } + ] + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 12, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 40 + }, + "id": 14, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Packets", + "titleSize": "h6", + "type": "row" }, { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { + "id": 15, + "panels": [ + { + "aliasColors": { - } - }, - "overrides": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 16, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 7 - }, - "hiddenSeries": false, - "id": 17, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(prometheus_tsdb_head_series_created_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "created on {{ instance }}", - "metric": "prometheus_local_storage_maintain_series_duration_seconds_count", - "refId": "A", - "step": 1800 + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { - "expr": "sum(increase(prometheus_tsdb_head_series_removed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) * -1", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "removed on {{ instance }}", - "refId": "B" - } - ], - "thresholds": [ + "aliasColors": { - ], - "timeFrom": null, - "timeRegions": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 41 + }, + "id": 17, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ - ], - "timeShift": null, - "title": "Head series created/deleted", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{workload}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] } ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Errors", + "titleSize": "h6", + "type": "row" + } + ], + "refresh": "10s", + "rows": [ - } + ], + "schemaVersion": 18, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" }, - "overrides": [ + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 7 - }, - "hiddenSeries": false, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + { + "allValue": null, + "current": { - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "exceeded_sample_limit on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "A", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "duplicate_timestamp on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "B", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_bounds_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "out_of_bounds on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "C", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_order_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "out_of_order on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "D", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_rule_evaluation_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "rule_evaluation_failure on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "G", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_tsdb_compactions_failed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "tsdb_compactions_failed on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "K", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_tsdb_reloads_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "tsdb_reloads_failures on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "L", - "step": 1800 }, - { - "expr": "sum(increase(prometheus_tsdb_head_series_not_found{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "head_series_not_found on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "N", - "step": 1800 + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "kube-system", + "value": "kube-system" }, - { - "expr": "sum(increase(prometheus_evaluator_iterations_missed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "evaluator_iterations_missed on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "O", - "step": 1800 + "datasource": "$datasource", + "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "deployment", + "value": "deployment" }, - { - "expr": "sum(increase(prometheus_evaluator_iterations_skipped_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "evaluator_iterations_skipped on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "P", - "step": 1800 - } - ], - "thresholds": [ + "datasource": "$datasource", + "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\"}, workload_type)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "type", + "options": [ - ], - "timeFrom": null, - "timeRegions": [ + ], + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\"}, workload_type)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [ - ], - "timeShift": null, - "title": "Prometheus errors", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" + ], + "tagsQuery": "", + "type": "query", + "useTags": false }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ - ] + ], + "tagsQuery": "", + "type": "interval", + "useTags": false }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false } - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 14 - }, - "id": 57, - "panels": [ + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "UTC", + "title": "Kubernetes / Networking / Namespace (Workload)", + "uid": "bbb2a765a623ae38130206c7d94a160f", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-namespace-by-workload + namespace: monitoring +- apiVersion: v1 + data: + node-cluster-rsrc-use.json: |- + { + "__inputs": [ - ], - "repeat": null, - "title": "Scrape & rule duration", - "type": "row" - }, - { - "aliasColors": { + ], + "__requires": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { + ], + "annotations": { + "list": [ - } - }, - "overrides": [ + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "id": null, + "links": [ - ] - }, - "fill": 1, - "fillGradient": 0, - "grid": { + ], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 15 - }, - "hiddenSeries": false, - "id": 25, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "show": false, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { - ], - "nullPointMode": "connected", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_target_interval_length_seconds{instance=\"$instance\",quantile=\"0.99\"} - $scrape_interval", - "format": "time_series", - "interval": "2m", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "metric": "", - "refId": "A", - "step": 300 - } - ], - "thresholds": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "timeFrom": null, - "timeRegions": [ + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "((\n instance:node_cpu_utilisation:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}\n *\n instance:node_num_cpu:sum{job=\"node-exporter\", cluster=\"$cluster\"}\n) != 0 )\n/ scalar(sum(instance:node_num_cpu:sum{job=\"node-exporter\", cluster=\"$cluster\"}))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "thresholds": [ - ], - "timeShift": null, - "title": "Scrape delay (counts with 1m scrape interval)", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ] - }, - "yaxes": [ - { - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { + "aliasColors": { - } - }, - "overrides": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 15 - }, - "hiddenSeries": false, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Queue length", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(prometheus_evaluator_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Queue length", - "metric": "prometheus_local_storage_indexing_queue_length", - "refId": "B", - "step": 1800 - } - ], - "thresholds": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "timeFrom": null, - "timeRegions": [ + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(\n instance:node_load1_per_cpu:ratio{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance:node_load1_per_cpu:ratio{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ - ], - "timeShift": null, - "title": "Rule evaulation duration", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Saturation (Load1 per CPU)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6", + "type": "row" }, { + "collapse": false, "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 22 - }, - "id": 58, "panels": [ + { + "aliasColors": { - ], - "repeat": null, - "title": "Requests & queries", - "type": "row" - }, - { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { - } - }, - "overrides": [ + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 23 - }, - "hiddenSeries": false, - "id": 18, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(\n instance:node_memory_utilisation:ratio{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance:node_memory_utilisation:ratio{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, { - "expr": "sum(increase(http_requests_total{instance=\"$instance\"}[$aggregation_interval])) by (instance, handler) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ handler }} on {{ instance }}", - "metric": "", - "refId": "A", - "step": 1800 - } - ], - "thresholds": [ + "aliasColors": { - ], - "timeFrom": null, - "timeRegions": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { - ], - "timeShift": null, - "title": "Request count", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Saturation (Major Page Faults)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "rds", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "rds", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory", + "titleSize": "h6", + "type": "row" }, { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - } - }, - "overrides": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 23 - }, - "hiddenSeries": false, - "id": 16, - "legend": { - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/Receive/", + "stack": "A" + }, + { + "alias": "/Transmit/", + "stack": "B", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} Receive", + "refId": "A" + }, + { + "expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} Transmit", + "refId": "B" + } + ], + "thresholds": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Utilisation (Bytes Receive/Transmit)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, { - "expr": "max(sum(http_request_duration_microseconds{instance=\"$instance\"}) by (instance, handler, quantile)) by (instance, handler) > 0", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{ handler }} on {{ instance }}", - "refId": "B" - } - ], - "thresholds": [ + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "timeFrom": null, - "timeRegions": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/ Receive/", + "stack": "A" + }, + { + "alias": "/ Transmit/", + "stack": "B", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} Receive", + "refId": "A" + }, + { + "expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} Transmit", + "refId": "B" + } + ], + "thresholds": [ - ], - "timeShift": null, - "title": "Request duration per handler", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Saturation (Drops Receive/Transmit)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ] - }, - "yaxes": [ - { - "format": "µs", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6", + "type": "row" }, { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 23 - }, - "hiddenSeries": false, - "id": 19, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(http_request_size_bytes{instance=\"$instance\", quantile=\"0.99\"}[$aggregation_interval])) by (instance, handler) > 0", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{ handler }} in {{ instance }}", - "refId": "B" - } - ], - "thresholds": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "timeFrom": null, - "timeRegions": [ + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(\n instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{device}}", + "refId": "A" + } + ], + "thresholds": [ - ], - "timeShift": null, - "title": "Request size by handler", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Utilisation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Allocated bytes": "#F9BA8F", - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max count collector": "#bf1b00", - "Max count harvester": "#bf1b00", - "Max to persist": "#3F6833", - "RSS": "#890F02" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { + "aliasColors": { - } - }, - "overrides": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 23 - }, - "hiddenSeries": false, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Max.*/", - "fill": 0, - "linewidth": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(prometheus_engine_queries{instance=\"$instance\"}) by (instance, handler)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Current count ", - "metric": "last", - "refId": "A", - "step": 1800 - }, - { - "expr": "sum(prometheus_engine_queries_concurrent_max{instance=\"$instance\"}) by (instance, handler)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Max count", - "metric": "last", - "refId": "B", - "step": 1800 - } - ], - "thresholds": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "timeFrom": null, - "timeRegions": [ + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(\n instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} {{device}}", + "refId": "A" + } + ], + "thresholds": [ - ], - "timeShift": null, - "title": "Cont of concurent queries", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Saturation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 59, - "panels": [ - ], "repeat": null, - "title": "Alerting", + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk IO", + "titleSize": "h6", "type": "row" }, { - "aliasColors": { - "Alert queue capacity on o collector": "#bf1b00", - "Alert queue capacity on o harvester": "#bf1b00", - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - } - }, - "overrides": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 31 - }, - "hiddenSeries": false, - "id": 20, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*capacity.*/", - "fill": 0, - "linewidth": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(prometheus_notifications_queue_capacity{instance=\"$instance\"})by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Alert queue capacity ", - "metric": "prometheus_local_storage_checkpoint_last_size_bytes", - "refId": "A", - "step": 1800 - }, - { - "expr": "sum(prometheus_notifications_queue_length{instance=\"$instance\"})by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Alert queue size on ", - "metric": "prometheus_local_storage_checkpoint_last_size_bytes", - "refId": "B", - "step": 1800 - } - ], - "thresholds": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "timeFrom": null, - "timeRegions": [ + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum without (device) (\n max without (fstype, mountpoint) ((\n node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"}\n ) != 0)\n)\n/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"})))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ - ], - "timeShift": null, - "title": "Alert queue size", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Space Utilisation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] } ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - - } + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk Space", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "node-exporter-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" }, - "overrides": [ + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 31 - }, - "hiddenSeries": false, - "id": 21, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" }, - "lines": true, - "linewidth": 1, - "links": [ + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "query": "label_values(node_time_seconds, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(prometheus_notifications_alertmanagers_discovered{instance=\"$instance\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Checkpoint chunks written/s", - "metric": "prometheus_local_storage_checkpoint_series_chunks_written_sum", - "refId": "A", - "step": 1800 - } - ], - "thresholds": [ + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Node Exporter / USE Method / Cluster", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-node-cluster-rsrc-use + namespace: monitoring +- apiVersion: v1 + data: + node-rsrc-use.json: |- + { + "__inputs": [ - ], - "timeFrom": null, - "timeRegions": [ + ], + "__requires": [ - ], - "timeShift": null, - "title": "Count of discovered alertmanagers", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "annotations": { + "list": [ - ] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "30s", + "rows": [ { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - } - }, - "overrides": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 31 - }, - "hiddenSeries": false, - "id": 39, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_cpu_utilisation:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Utilisation", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(prometheus_notifications_dropped_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "notifications_dropped on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "F", - "step": 1800 + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, { - "expr": "sum(increase(prometheus_rule_evaluation_failures_total{rule_type=\"alerting\",instance=\"$instance\"}[$aggregation_interval])) by (rule_type,instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "rule_evaluation_failures on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "A", - "step": 1800 - } - ], - "thresholds": [ + "aliasColors": { - ], - "timeFrom": null, - "timeRegions": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { - ], - "timeShift": null, - "title": "Alerting errors", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 38 - }, - "id": 60, - "panels": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_load1_per_cpu:ratio{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Saturation", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Saturation (Load1 per CPU)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } ], "repeat": null, - "title": "Service discovery", + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6", "type": "row" }, { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 39 - }, - "hiddenSeries": false, - "id": 43, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"consul\", instance=\"$instance\"}[$aggregation_interval])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Consul target sync count", - "refId": "A", - "step": 240 - } - ], - "thresholds": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "timeFrom": null, - "timeRegions": [ + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_memory_utilisation:ratio{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Utilisation", + "refId": "A" + } + ], + "thresholds": [ - ], - "timeShift": null, - "title": "Consul SD sync count", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 39 - }, - "hiddenSeries": false, - "id": 44, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + "aliasColors": { - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"marathon\", instance=\"$instance\"}[$aggregation_interval])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Marathon target sync count", - "refId": "A", - "step": 240 - } - ], - "thresholds": [ + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "timeFrom": null, - "timeRegions": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "timeShift": null, - "title": "Marathon SD sync count", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Major page Faults", + "refId": "A" + } + ], + "thresholds": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Saturation (Major Page Faults)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "rds", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "rds", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory", + "titleSize": "h6", + "type": "row" }, { - "aliasColors": { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { - } - }, - "overrides": [ + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 39 - }, - "hiddenSeries": false, - "id": 45, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/Receive/", + "stack": "A" + }, + { + "alias": "/Transmit/", + "stack": "B", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Receive", + "refId": "A" + }, + { + "expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Transmit", + "refId": "B" + } + ], + "thresholds": [ - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Utilisation (Bytes Receive/Transmit)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, { - "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"kubernetes\"}[$aggregation_interval])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Count of target synces", - "refId": "A", - "step": 240 - } - ], - "thresholds": [ + "aliasColors": { - ], - "timeFrom": null, - "timeRegions": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { - ], - "timeShift": null, - "title": "Kubernetes SD sync count", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/ Receive/", + "stack": "A" + }, + { + "alias": "/ Transmit/", + "stack": "B", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Receive", + "refId": "A" + }, + { + "expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Transmit", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Saturation (Drops Receive/Transmit)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6", + "type": "row" }, { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - } - }, - "overrides": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 39 - }, - "hiddenSeries": false, - "id": 46, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "exceeded_sample_limit on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "A", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_sd_file_read_errors_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "sd_file_read_error on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "E", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_sd_consul_rpc_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "sd_consul_rpc_failure on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "H", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_sd_marathon_refresh_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "sd_marathon_refresh_failure on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "I", - "step": 1800 + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Utilisation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, { - "expr": "sum(increase(prometheus_sd_openstack_refresh_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "sd_openstack_refresh_failure on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "J", - "step": 1800 - } - ], - "thresholds": [ + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { - ], - "timeFrom": null, - "timeRegions": [ + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "timeShift": null, - "title": "Service discovery errors", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Saturation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk IO", + "titleSize": "h6", + "type": "row" }, { + "collapse": false, "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 46 - }, - "id": 61, "panels": [ + { + "aliasColors": { - ], - "repeat": null, - "title": "TSDB stats", - "type": "row" - }, - { - "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - } - }, - "overrides": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 47 - }, - "hiddenSeries": false, - "id": 36, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(1 -\n (\n max without (mountpoint, fstype) (node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\", cluster=\"$cluster\"})\n /\n max without (mountpoint, fstype) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\", cluster=\"$cluster\"})\n ) != 0\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "refId": "A" + } + ], + "thresholds": [ - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Space Utilisation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(prometheus_tsdb_reloads_total{instance=\"$instance\"}[30m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ instance }}", - "refId": "A" + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] } ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Reloaded block from disk", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk Space", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "node-exporter-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ - ] + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + { + "allValue": null, + "current": { + "text": "", + "value": "" }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ - } - }, - "overrides": [ + ], + "query": "label_values(node_time_seconds, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 47 - }, - "hiddenSeries": false, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false + ], + "tagsQuery": "", + "type": "query", + "useTags": false }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(prometheus_tsdb_blocks_loaded{instance=\"$instance\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Loaded data blocks", - "metric": "prometheus_local_storage_memory_chunkdescs", - "refId": "A", - "step": 1800 - } - ], - "thresholds": [ + { + "allValue": null, + "current": { - ], - "timeFrom": null, - "timeRegions": [ + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "instance", + "options": [ - ], - "timeShift": null, - "title": "Loaded data blocks", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "query": "label_values(node_exporter_build_info{job=\"node-exporter\", cluster=\"$cluster\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null + ], + "tagsQuery": "", + "type": "query", + "useTags": false } - }, - { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Node Exporter / USE Method / Node", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-node-rsrc-use + namespace: monitoring +- apiVersion: v1 + data: + nodes-darwin.json: |- + { + "__inputs": [ - } - }, - "overrides": [ + ], + "__requires": [ - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 47 - }, - "hiddenSeries": false, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "annotations": { + "list": [ - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "id": null, + "links": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ + ], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ { - "expr": "prometheus_tsdb_head_series{instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Time series count", - "metric": "prometheus_local_storage_memory_series", - "refId": "A", - "step": 1800 - } - ], - "thresholds": [ + "aliasColors": { - ], - "timeFrom": null, - "timeRegions": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { - ], - "timeShift": null, - "title": "Time series total count", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(\n (1 - sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", instance=\"$instance\"}[$__rate_interval])))\n/ ignoring(cpu) group_left\n count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n", + "format": "time_series", + "intervalFactor": 5, + "legendFormat": "{{cpu}}", + "refId": "A" + } + ], + "thresholds": [ - } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + } + ] }, - "overrides": [ + { + "aliasColors": { - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 47 - }, - "hiddenSeries": false, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(prometheus_tsdb_head_samples_appended_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "samples/s {{instance}}", - "metric": "prometheus_local_storage_ingested_samples_total", - "refId": "A", - "step": 1800 - } - ], - "thresholds": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "timeFrom": null, - "timeRegions": [ + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "1m load average", + "refId": "A" + }, + { + "expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5m load average", + "refId": "B" + }, + { + "expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "15m load average", + "refId": "C" + }, + { + "expr": "count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", mode=\"idle\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "logical cores", + "refId": "D" + } + ], + "thresholds": [ - ], - "timeShift": null, - "title": "Samples Appended per second", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Load Average", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6", + "type": "row" }, { + "collapse": false, "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 54 - }, - "id": 62, "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_total_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Physical Memory", + "refId": "A" + }, + { + "expr": "(\n node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\"} -\n node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\"} +\n node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\"} +\n node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Memory Used", + "refId": "B" + }, + { + "expr": "(\n node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\"} -\n node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "App Memory", + "refId": "C" + }, + { + "expr": "node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Wired Memory", + "refId": "D" + }, + { + "expr": "node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Compressed", + "refId": "E" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)" + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 90 + } + ] + }, + "unit": "percent" + } + }, + "gridPos": { + }, + "id": 5, + "span": 3, + "targets": [ + { + "expr": "(\n (\n avg(node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\"}) -\n avg(node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\"}) +\n avg(node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\"}) +\n avg(node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\"})\n ) /\n avg(node_memory_total_bytes{job=\"node-exporter\", instance=\"$instance\"})\n)\n*\n100\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "" + } + ], + "title": "Memory Usage", + "transparent": false, + "type": "gauge" + } ], "repeat": null, - "title": "Head block stats", + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory", + "titleSize": "h6", "type": "row" }, { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833", - "To persist": "#9AC48A" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 55 - }, - "hiddenSeries": false, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Max.*/", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ + "collapse": false, + "collapsed": false, + "panels": [ { - "expr": "sum(prometheus_tsdb_head_chunks{instance=\"$instance\"}) by (instance)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Head chunk count", - "metric": "prometheus_local_storage_memory_chunks", - "refId": "A", - "step": 1800 - } - ], - "thresholds": [ + "aliasColors": { - ], - "timeFrom": null, - "timeRegions": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { - ], - "timeShift": null, - "title": "Head chunks count", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/ read| written/", + "yaxis": 1 + }, + { + "alias": "/ io time/", + "yaxis": 2 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} read", + "refId": "A" + }, + { + "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} written", + "refId": "B" + }, + { + "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} io time", + "refId": "C" + } + ], + "thresholds": [ - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - } + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] }, - "overrides": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 55 - }, - "hiddenSeries": false, - "id": 35, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "yellow", + "value": 0.8 + }, + { + "color": "red", + "value": 0.9 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Mounted on" + }, + "properties": [ + { + "id": "custom.width", + "value": 260 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Size" + }, + "properties": [ + { + "id": "custom.width", + "value": 93 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Used" + }, + "properties": [ + { + "id": "custom.width", + "value": 72 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Available" + }, + "properties": [ + { + "id": "custom.width", + "value": 88 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Used, %" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "custom.displayMode", + "value": "gradient-gauge" + }, + { + "id": "max", + "value": 1 + }, + { + "id": "min", + "value": 0 + } + ] + } + ] + }, + "gridPos": { - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + }, + "id": 7, + "span": 6, + "targets": [ + { + "expr": "max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "" + }, + { + "expr": "max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "" + } + ], + "title": "Disk Space Usage", + "transformations": [ + { + "id": "groupBy", + "options": { + "fields": { + "Value #A": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #B": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "mountpoint": { + "aggregations": [ + + ], + "operation": "groupby" + } + } + } + }, + { + "id": "merge", + "options": { - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(prometheus_tsdb_head_max_time{instance=\"$instance\"}) by (instance) - min(prometheus_tsdb_head_min_time{instance=\"$instance\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ instance }}", - "refId": "A" - } - ], - "thresholds": [ + } + }, + { + "id": "calculateField", + "options": { + "alias": "Used", + "binary": { + "left": "Value #A (lastNotNull)", + "operator": "-", + "reducer": "sum", + "right": "Value #B (lastNotNull)" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "Used, %", + "binary": { + "left": "Used", + "operator": "/", + "reducer": "sum", + "right": "Value #A (lastNotNull)" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": { - ], - "timeFrom": null, - "timeRegions": [ + }, + "indexByName": { - ], - "timeShift": null, - "title": "Length of head block", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + }, + "renameByName": { + "Value #A (lastNotNull)": "Size", + "Value #B (lastNotNull)": "Available", + "mountpoint": "Mounted on" + } + } + }, + { + "id": "sortBy", + "options": { + "fields": { - ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + }, + "sort": [ + { + "field": "Mounted on" + } + ] + } + } + ], + "transparent": false, + "type": "table" } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk", + "titleSize": "h6", + "type": "row" }, { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - } - }, - "overrides": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Network received (bits/s)", + "fill": 0, + "fillGradient": 0, + "gridPos": { - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 55 - }, - "hiddenSeries": false, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(prometheus_tsdb_head_chunks_created_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "created on {{ instance }}", - "refId": "B" + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Received", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { - "expr": "sum(rate(prometheus_tsdb_head_chunks_removed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) * -1", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "deleted on {{ instance }}", - "refId": "C" - } - ], - "thresholds": [ + "aliasColors": { - ], - "timeFrom": null, - "timeRegions": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Network transmitted (bits/s)", + "fill": 0, + "fillGradient": 0, + "gridPos": { - ], - "timeShift": null, - "title": "Head Chunks Created/Deleted per second", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 62 - }, - "id": 63, - "panels": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Transmitted", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + ] + }, + "yaxes": [ + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } ], "repeat": null, - "title": "Data maintenance", + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6", "type": "row" - }, - { - "aliasColors": { + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "node-exporter-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { + { + "allValue": null, + "current": { - } }, - "overrides": [ + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Instance", + "multi": false, + "name": "instance", + "options": [ - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 63 - }, - "hiddenSeries": false, - "id": 33, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "query": "label_values(node_uname_info{job=\"node-exporter\", sysname=\"Darwin\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ - ], - "nullPointMode": "connected", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Node Exporter / MacOS", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-nodes-darwin + namespace: monitoring +- apiVersion: v1 + data: + nodes.json: |- + { + "__inputs": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(prometheus_tsdb_compaction_duration_sum{instance=\"$instance\"}[30m]) / increase(prometheus_tsdb_compaction_duration_count{instance=\"$instance\"}[30m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ instance }}", - "refId": "B" - } - ], - "thresholds": [ + ], + "__requires": [ - ], - "timeFrom": null, - "timeRegions": [ + ], + "annotations": { + "list": [ - ], - "timeShift": null, - "title": "Compaction duration", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "id": null, + "links": [ - ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, + ], + "refresh": "30s", + "rows": [ { - "aliasColors": { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { - } + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(\n (1 - sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", instance=\"$instance\"}[$__rate_interval])))\n/ ignoring(cpu) group_left\n count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n", + "format": "time_series", + "intervalFactor": 5, + "legendFormat": "{{cpu}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + } + ] }, - "overrides": [ + { + "aliasColors": { - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 63 - }, - "hiddenSeries": false, - "id": 34, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { - ], - "nullPointMode": "connected", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(prometheus_tsdb_head_gc_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ quantile }} on {{ instance }}", - "refId": "A" - } - ], - "thresholds": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "timeFrom": null, - "timeRegions": [ + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "1m load average", + "refId": "A" + }, + { + "expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5m load average", + "refId": "B" + }, + { + "expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "15m load average", + "refId": "C" + }, + { + "expr": "count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", mode=\"idle\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "logical cores", + "refId": "D" + } + ], + "thresholds": [ - ], - "timeShift": null, - "title": "Go Garbage collection duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Load Average", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6", + "type": "row" }, { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 63 - }, - "hiddenSeries": false, - "id": 37, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { - ], - "nullPointMode": "connected", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(prometheus_tsdb_wal_truncate_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ quantile }} on {{ instance }}", - "refId": "A" - } - ], - "thresholds": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "timeFrom": null, - "timeRegions": [ + ], + "spaceLength": 10, + "span": 9, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory used", + "refId": "A" + }, + { + "expr": "node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory buffers", + "refId": "B" + }, + { + "expr": "node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory cached", + "refId": "C" + }, + { + "expr": "node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory free", + "refId": "D" + } + ], + "thresholds": [ - ], - "timeShift": null, - "title": "WAL truncate duration seconds", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)" + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 90 + } + ] + }, + "unit": "percent" + } + }, + "gridPos": { + + }, + "id": 5, + "span": 3, + "targets": [ + { + "expr": "100 -\n(\n avg(node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"}) /\n avg(node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"})\n* 100\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "" + } + ], + "title": "Memory Usage", + "transparent": false, + "type": "gauge" } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory", + "titleSize": "h6", + "type": "row" }, { - "aliasColors": { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { - } - }, - "overrides": [ + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 63 - }, - "hiddenSeries": false, - "id": 38, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/ read| written/", + "yaxis": 1 + }, + { + "alias": "/ io time/", + "yaxis": 2 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} read", + "refId": "A" + }, + { + "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} written", + "refId": "B" + }, + { + "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} io time", + "refId": "C" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { - ], - "nullPointMode": "connected", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "yellow", + "value": 0.8 + }, + { + "color": "red", + "value": 0.9 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Mounted on" + }, + "properties": [ + { + "id": "custom.width", + "value": 260 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Size" + }, + "properties": [ + { + "id": "custom.width", + "value": 93 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Used" + }, + "properties": [ + { + "id": "custom.width", + "value": 72 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Available" + }, + "properties": [ + { + "id": "custom.width", + "value": 88 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Used, %" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "custom.displayMode", + "value": "gradient-gauge" + }, + { + "id": "max", + "value": 1 + }, + { + "id": "min", + "value": 0 + } + ] + } + ] + }, + "gridPos": { - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(tsdb_wal_fsync_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ quantile }} {{ instance }}", - "refId": "A" - } - ], - "thresholds": [ + }, + "id": 7, + "span": 6, + "targets": [ + { + "expr": "max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "" + }, + { + "expr": "max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "" + } + ], + "title": "Disk Space Usage", + "transformations": [ + { + "id": "groupBy", + "options": { + "fields": { + "Value #A": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #B": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "mountpoint": { + "aggregations": [ + + ], + "operation": "groupby" + } + } + } + }, + { + "id": "merge", + "options": { - ], - "timeFrom": null, - "timeRegions": [ + } + }, + { + "id": "calculateField", + "options": { + "alias": "Used", + "binary": { + "left": "Value #A (lastNotNull)", + "operator": "-", + "reducer": "sum", + "right": "Value #B (lastNotNull)" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "Used, %", + "binary": { + "left": "Used", + "operator": "/", + "reducer": "sum", + "right": "Value #A (lastNotNull)" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": { - ], - "timeShift": null, - "title": "WAL fsync duration seconds", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + }, + "indexByName": { - ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + }, + "renameByName": { + "Value #A (lastNotNull)": "Size", + "Value #B (lastNotNull)": "Available", + "mountpoint": "Mounted on" + } + } + }, + { + "id": "sortBy", + "options": { + "fields": { + + }, + "sort": [ + { + "field": "Mounted on" + } + ] + } + } + ], + "transparent": false, + "type": "table" } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk", + "titleSize": "h6", + "type": "row" }, { + "collapse": false, "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 70 - }, - "id": 64, "panels": [ + { + "aliasColors": { - ], - "repeat": null, - "title": "RAM&CPU", - "type": "row" - }, - { - "aliasColors": { - "Allocated bytes": "#7EB26D", - "Allocated bytes - 1m max": "#BF1B00", - "Allocated bytes - 1m min": "#BF1B00", - "Allocated bytes - 5m max": "#BF1B00", - "Allocated bytes - 5m min": "#BF1B00", - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833", - "RSS": "#447EBC" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "decimals": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Network received (bits/s)", + "fill": 0, + "fillGradient": 0, + "gridPos": { - } - }, - "overrides": [ + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 71 - }, - "hiddenSeries": false, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/-/", - "fill": 0 - }, - { - "alias": "collector heap size", - "color": "#E0752D", - "fill": 0, - "linewidth": 2 + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Received", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { - "alias": "collector kubernetes memory limit", - "color": "#BF1B00", + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Network transmitted (bits/s)", "fill": 0, - "linewidth": 3 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(process_resident_memory_bytes{instance=\"$instance\"}) by (instance)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Total resident memory - {{instance}}", - "metric": "process_resident_memory_bytes", - "refId": "B", - "step": 1800 - }, - { - "expr": "sum(go_memstats_alloc_bytes{instance=\"$instance\"}) by (instance)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Total llocated bytes - {{instance}}", - "metric": "go_memstats_alloc_bytes", - "refId": "A", - "step": 1800 - } - ], - "thresholds": [ + "fillGradient": 0, + "gridPos": { - ], - "timeFrom": null, - "timeRegions": [ + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "timeShift": null, - "title": "Memory", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Transmitted", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] } ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Allocated bytes": "#F9BA8F", - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833", - "RSS": "#890F02" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "node-exporter-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { + { + "allValue": null, + "current": { - } }, - "overrides": [ + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Instance", + "multi": false, + "name": "instance", + "options": [ - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 71 - }, - "hiddenSeries": false, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + ], + "query": "label_values(node_uname_info{job=\"node-exporter\", sysname!=\"Darwin\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Node Exporter / Nodes", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-nodes + namespace: monitoring +- apiVersion: v1 + data: + persistentvolumesusage.json: |- + { + "__inputs": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(go_memstats_alloc_bytes_total{instance=\"$instance\"}[$aggregation_interval])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Allocated Bytes/s", - "metric": "go_memstats_alloc_bytes", - "refId": "A", - "step": 1800 - } - ], - "thresholds": [ + ], + "__requires": [ - ], - "timeFrom": null, - "timeRegions": [ + ], + "annotations": { + "list": [ - ], - "timeShift": null, - "title": "Allocations per second", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, + ], + "refresh": "10s", + "rows": [ { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "decimals": 2, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { - } - }, - "overrides": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 71 - }, - "hiddenSeries": false, - "id": 9, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + }, + "id": 2, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(process_cpu_seconds_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "CPU/s", - "metric": "prometheus_local_storage_ingested_samples_total", - "refId": "B", - "step": 1800 - } - ], - "thresholds": [ + ], + "spaceLength": 10, + "span": 9, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used Space", + "refId": "A" + }, + { + "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Free Space", + "refId": "B" + } + ], + "thresholds": [ - ], - "timeFrom": null, - "timeRegions": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Volume Space Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "timeShift": null, - "title": "CPU per second", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - "avg" - ] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 3, + "interval": "1m", + "legend": { + "alignAsTable": true, + "rightSide": true + }, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max without(instance,node) (\n(\n topk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n topk(1, kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n/\ntopk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "Volume Space Usage", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" }, { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 78 - }, - "id": 68, + "collapse": false, + "collapsed": false, "panels": [ { "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" + }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 79 + }, - "hiddenSeries": false, - "id": 47, + "id": 4, + "interval": "1m", "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sideWidth": null, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -34916,187 +29499,212 @@ items: ], "nullPointMode": "null", "percentage": false, - "pluginVersion": "7.1.2", "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "spaceLength": 10, + "span": 9, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used inodes", + "refId": "A" + }, + { + "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": " Free inodes", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Volume inodes Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 5, + "interval": "1m", + "legend": { + "alignAsTable": true, + "rightSide": true + }, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", "targets": [ { - "expr": "sum(increase(net_conntrack_dialer_conn_failed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "expr": "max without(instance,node) (\ntopk(1, kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n/\ntopk(1, kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n", "format": "time_series", - "hide": false, - "interval": "", "intervalFactor": 2, - "legendFormat": "conntrack_dialer_conn_failed on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "M", - "step": 1800 + "legendFormat": "", + "refId": "A" } ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Net errors", + "thresholds": "80, 90", + "title": "Volume inodes Usage", "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] + "shared": false }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "op": "=", + "text": "N/A", + "value": "null" } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "valueName": "current" } ], "repeat": null, - "title": "Contrac errors", + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", "type": "row" } ], - "refresh": "5m", - "schemaVersion": 26, + "schemaVersion": 14, "style": "dark", "tags": [ - "custom" + "kubernetes-mixin" ], "templating": { "list": [ { - "auto": true, - "auto_count": 30, - "auto_min": "2m", "current": { - "selected": false, - "text": "auto", - "value": "$__auto_interval_aggregation_interval" + "text": "default", + "value": "default" }, "hide": 0, - "label": "aggregation intarval", - "name": "aggregation_interval", + "label": "Data Source", + "name": "datasource", "options": [ - { - "selected": true, - "text": "auto", - "value": "$__auto_interval_aggregation_interval" - }, - { - "selected": false, - "text": "1m", - "value": "1m" - }, - { - "selected": false, - "text": "10m", - "value": "10m" - }, - { - "selected": false, - "text": "30m", - "value": "30m" - }, - { - "selected": false, - "text": "1h", - "value": "1h" - }, - { - "selected": false, - "text": "6h", - "value": "6h" - }, - { - "selected": false, - "text": "12h", - "value": "12h" - }, - { - "selected": false, - "text": "1d", - "value": "1d" - }, - { - "selected": false, - "text": "7d", - "value": "7d" - }, - { - "selected": false, - "text": "14d", - "value": "14d" - }, - { - "selected": false, - "text": "30d", - "value": "30d" - } + ], - "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", - "refresh": 2, - "skipUrlSync": false, - "type": "interval" + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" }, { "allValue": null, "current": { - "selected": false, - "text": "localhost:9090", - "value": "localhost:9090" + }, "datasource": "$datasource", - "definition": "", - "hide": 0, + "hide": 2, "includeAll": false, - "label": "Instance", + "label": "cluster", "multi": false, - "name": "instance", + "name": "cluster", "options": [ ], - "query": "label_values(prometheus_build_info, instance)", + "query": "label_values(kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}, cluster)", "refresh": 2, "regex": "", - "skipUrlSync": false, - "sort": 2, + "sort": 1, "tagValuesQuery": "", "tags": [ @@ -35106,67 +29714,61 @@ items: "useTags": false }, { + "allValue": null, "current": { - "text": "60", - "value": "60" - }, - "hide": 0, - "label": "Scrape interval seconds", - "name": "scrape_interval", - "options": [ - { - "text": "60", - "value": "60" - } - ], - "query": "60", - "skipUrlSync": false, - "type": "constant" - }, - { - "current": { - "selected": false, - "text": "Prometheus", - "value": "Prometheus" + }, + "datasource": "$datasource", "hide": 0, "includeAll": false, - "label": "Prometheus datasource", + "label": "Namespace", "multi": false, - "name": "datasource", + "name": "namespace", "options": [ ], - "query": "prometheus", - "refresh": 1, + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"}, namespace)", + "refresh": 2, "regex": "", - "skipUrlSync": false, - "type": "datasource" + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false }, { + "allValue": null, "current": { - "selected": false, - "text": "No data sources found", - "value": "" + }, + "datasource": "$datasource", "hide": 0, "includeAll": false, - "label": "InfluxDB datasource", + "label": "PersistentVolumeClaim", "multi": false, - "name": "influx_datasource", + "name": "volume", "options": [ ], - "query": "influxdb", - "refresh": 1, + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\"}, persistentvolumeclaim)", + "refresh": 2, "regex": "", - "skipUrlSync": false, - "type": "datasource" + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false } ] }, "time": { - "from": "now-3h", + "from": "now-7d", "to": "now" }, "timepicker": { @@ -35194,18 +29796,23 @@ items: "30d" ] }, - "timezone": "browser", - "title": "Prometheus Monitoring", - "uid": "XmsJC9mRz", - "version": 2 + "timezone": "UTC", + "title": "Kubernetes / Persistent Volumes", + "uid": "919b92a8e8041bd567af9edab12c840c", + "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-prometheus-dashboard + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-persistentvolumesusage namespace: monitoring - apiVersion: v1 data: - prometheus-remote-write.json: |- + pod-total.json: |- { "__inputs": [ @@ -35215,7 +29822,15 @@ items: ], "annotations": { "list": [ - + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } ] }, "editable": true, @@ -35226,695 +29841,515 @@ items: "links": [ ], - "refresh": "", - "rows": [ + "panels": [ { "collapse": false, "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 2, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 0, + "format": "time_series", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "height": 9, + "id": 3, + "interval": null, + "links": [ + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 12, + "nullPointMode": "connected", + "nullText": null, + "options": { + "fieldOptions": { + "calcs": [ + "last" ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(remote_name, url) group_right(instance) prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ + "defaults": { + "max": 10000000000, + "min": 0, + "title": "$namespace: $pod", + "unit": "Bps" + }, + "mappings": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Highest Timestamp In vs. Highest Timestamp Sent", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "override": { - ] }, - "yaxes": [ + "thresholds": [ { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "color": "dark-green", + "index": 0, + "value": null }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 3, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + "values": false + } + }, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Received", + "type": "gauge", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 0, + "format": "time_series", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "height": 9, + "id": 4, + "interval": null, + "links": [ + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 12, + "nullPointMode": "connected", + "nullText": null, + "options": { + "fieldOptions": { + "calcs": [ + "last" ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (remote_name, url) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ + "defaults": { + "max": 10000000000, + "min": 0, + "title": "$namespace: $pod", + "unit": "Bps" + }, + "mappings": [ ], - "timeFrom": null, - "timeShift": null, - "title": "Rate[5m]", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "override": { - ] }, - "yaxes": [ + "thresholds": [ { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "color": "dark-green", + "index": 0, + "value": null }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 } - ] + ], + "values": false + } + }, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" } ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Timestamps", - "titleSize": "h6", - "type": "row" + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Transmitted", + "type": "gauge", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" }, { "collapse": false, "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 4, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate, in vs. succeeded or dropped [5m]", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Samples", + "title": "Bandwidth", "titleSize": "h6", "type": "row" }, { - "collapse": false, - "collapsed": false, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 5, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "minSpan": 6, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Current Shards", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 6, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_shards_max{cluster=~\"$cluster\", instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Max Shards", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 7, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + "aliasColors": { - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_shards_min{cluster=~\"$cluster\", instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Min Shards", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { - }, - "id": 8, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_remote_storage_shards_desired{cluster=~\"$cluster\", instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" - } - ], - "thresholds": [ + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ - ], - "timeFrom": null, - "timeShift": null, - "title": "Desired Shards", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Shards", - "titleSize": "h6", - "type": "row" + ] }, { - "collapse": false, - "collapsed": false, + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 8, "panels": [ { "aliasColors": { @@ -35924,15 +30359,21 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 2, + "fillGradient": 0, "gridPos": { - + "h": 10, + "w": 12, + "x": 0, + "y": 21 }, "id": 9, "legend": { "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -35942,11 +30383,13 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -35956,16 +30399,17 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "prometheus_remote_storage_shard_capacity{cluster=~\"$cluster\", instance=~\"$instance\"}", + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -35973,10 +30417,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Shard Capacity", + "title": "Rate of Received Packets", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -35991,19 +30435,19 @@ items: }, "yaxes": [ { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -36016,15 +30460,21 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 2, + "fillGradient": 0, "gridPos": { - + "h": 10, + "w": 12, + "x": 12, + "y": 21 }, "id": 10, "legend": { "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -36034,11 +30484,13 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -36048,16 +30500,17 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"}", + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -36065,10 +30518,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Pending Samples", + "title": "Rate of Transmitted Packets", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -36083,19 +30536,19 @@ items: }, "yaxes": [ { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] @@ -36105,13 +30558,20 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Shard Details", + "title": "Packets", "titleSize": "h6", "type": "row" }, { - "collapse": false, - "collapsed": false, + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 11, "panels": [ { "aliasColors": { @@ -36121,107 +30581,21 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 11, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_tsdb_wal_segment_current{cluster=~\"$cluster\", instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "TSDB Current Segment", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, + "fill": 2, + "fillGradient": 0, "gridPos": { - + "h": 10, + "w": 12, + "x": 0, + "y": 32 }, "id": 12, "legend": { "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -36231,11 +30605,13 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -36245,16 +30621,17 @@ items: ], "spaceLength": 10, - "span": 6, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "prometheus_wal_watcher_current_segment{cluster=~\"$cluster\", instance=~\"$instance\"}", + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{consumer}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -36262,10 +30639,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Remote Write Current Segment", + "title": "Rate of Received Packets Dropped", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -36280,36 +30657,23 @@ items: }, "yaxes": [ { - "format": "none", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Segments", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -36318,15 +30682,21 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 2, + "fillGradient": 0, "gridPos": { - + "h": 10, + "w": 12, + "x": 12, + "y": 32 }, "id": 13, "legend": { "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": false, @@ -36336,11 +30706,13 @@ items: "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, @@ -36350,16 +30722,17 @@ items: ], "spaceLength": 10, - "span": 3, - "stack": false, + "span": 12, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", - "refId": "A" + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -36367,10 +30740,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Dropped Samples", + "title": "Rate of Transmitted Packets Dropped", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -36385,23 +30758,304 @@ items: }, "yaxes": [ { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true } ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Errors", + "titleSize": "h6", + "type": "row" + } + ], + "refresh": "10s", + "rows": [ + + ], + "schemaVersion": 18, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "kube-system", + "value": "kube-system" + }, + "datasource": "$datasource", + "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "pod", + "options": [ + + ], + "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "UTC", + "title": "Kubernetes / Networking / Pod", + "uid": "7a18067ce943a40ae25454675c19ff5c", + "version": 0 + } + kind: ConfigMap + metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-pod-total + namespace: monitoring +- apiVersion: v1 + data: + prometheus-remote-write.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "60s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -36411,10 +31065,11 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 14, + "id": 2, "legend": { "alignAsTable": false, "avg": false, @@ -36442,12 +31097,12 @@ items: ], "spaceLength": 10, - "span": 3, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(remote_name, url) group_right(instance) (prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} != 0)\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", @@ -36459,7 +31114,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Failed Samples", + "title": "Highest Timestamp In vs. Highest Timestamp Sent", "tooltip": { "shared": true, "sort": 0, @@ -36503,10 +31158,11 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 15, + "id": 3, "legend": { "alignAsTable": false, "avg": false, @@ -36534,12 +31190,12 @@ items: ], "spaceLength": 10, - "span": 3, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "expr": "clamp_min(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (remote_name, url) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n, 0)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", @@ -36551,7 +31207,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Retried Samples", + "title": "Rate[5m]", "tooltip": { "shared": true, "sort": 0, @@ -36585,7 +31241,20 @@ items: "show": true } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Timestamps", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { "aliasColors": { @@ -36595,10 +31264,11 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 16, + "id": 4, "legend": { "alignAsTable": false, "avg": false, @@ -36626,12 +31296,12 @@ items: ], "spaceLength": 10, - "span": 3, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_remote_storage_enqueue_retries_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) (rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n- \n (rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", @@ -36643,7 +31313,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Enqueue Retries", + "title": "Rate, in vs. succeeded or dropped [5m]", "tooltip": { "shared": true, "sort": 0, @@ -36683,186 +31353,13 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Misc. Rates", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - - ], - "templating": { - "list": [ - { - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "instance", - "options": [ - - ], - "query": "label_values(prometheus_build_info, instance)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": { - "selected": true, - "text": "All", - "value": "$__all" - }, - "value": { - "selected": true, - "text": "All", - "value": "$__all" - } - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "cluster", - "options": [ - - ], - "query": "label_values(kube_pod_container_info{image=~\".*prometheus.*\"}, cluster)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "url", - "options": [ - - ], - "query": "label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}, url)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Prometheus Remote Write", - "version": 0 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-prometheus-remote-write - namespace: monitoring -- apiVersion: v1 - data: - prometheus.json: |- - { - "annotations": { - "list": [ - - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - - ], - "refresh": "10s", - "rows": [ + "title": "Samples", + "titleSize": "h6", + "type": "row" + }, { "collapse": false, - "height": "250px", + "collapsed": false, "panels": [ { "aliasColors": { @@ -36873,13 +31370,20 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 5, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -36888,11 +31392,13 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "minSpan": 6, + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], @@ -36900,137 +31406,106 @@ items: "span": 12, "stack": false, "steppedLine": false, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "Count", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #A", - "thresholds": [ - - ], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Uptime", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "Value #B", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - }, + "targets": [ { - "alias": "Instance", - "colorMode": null, - "colors": [ + "expr": "prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" + } + ], + "thresholds": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "instance", - "thresholds": [ + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Shards", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ - ], - "type": "number", - "unit": "short" - }, + ] + }, + "yaxes": [ { - "alias": "Job", - "colorMode": null, - "colors": [ - - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "job", - "thresholds": [ - - ], - "type": "number", - "unit": "short" + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "alias": "Version", - "colorMode": null, - "colors": [ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "link": false, - "linkTooltip": "Drill down", - "linkUrl": "", - "pattern": "version", - "thresholds": [ + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ - ], - "type": "string", - "unit": "short" - } ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "count by (job, instance, version) (prometheus_build_info{job=~\"$job\", instance=~\"$instance\"})", - "format": "table", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "expr": "max by (job, instance) (time() - process_start_time_seconds{job=~\"$job\", instance=~\"$instance\"})", - "format": "table", - "instant": true, + "expr": "prometheus_remote_storage_shards_max{cluster=~\"$cluster\", instance=~\"$instance\"}", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" } ], "thresholds": [ @@ -37038,14 +31513,13 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Prometheus Stats", + "title": "Max Shards", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "transform": "table", - "type": "table", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -37061,7 +31535,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -37070,22 +31544,10 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Prometheus Stats", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -37095,13 +31557,20 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 2, + "fillGradient": 0, + "gridPos": { + + }, + "id": 7, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -37110,26 +31579,26 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(prometheus_target_sync_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[5m])) by (scrape_job) * 1e3", + "expr": "prometheus_remote_storage_shards_min{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{scrape_job}}", - "legendLink": null, - "step": 10 + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" } ], "thresholds": [ @@ -37137,7 +31606,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Target Sync", + "title": "Min Shards", "tooltip": { "shared": true, "sort": 0, @@ -37155,11 +31624,11 @@ items: }, "yaxes": [ { - "format": "ms", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -37168,7 +31637,7 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] }, @@ -37180,42 +31649,49 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 3, + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 8, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 4, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(prometheus_sd_discovered_targets{job=~\"$job\",instance=~\"$instance\"})", + "expr": "prometheus_remote_storage_shards_desired{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "Targets", - "legendLink": null, - "step": 10 + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" } ], "thresholds": [ @@ -37223,7 +31699,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Targets", + "title": "Desired Shards", "tooltip": { "shared": true, "sort": 0, @@ -37245,7 +31721,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -37254,7 +31730,7 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] } @@ -37263,12 +31739,13 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Discovery", - "titleSize": "h6" + "title": "Shards", + "titleSize": "h6", + "type": "row" }, { "collapse": false, - "height": "250px", + "collapsed": false, "panels": [ { "aliasColors": { @@ -37279,13 +31756,20 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 4, + "fillGradient": 0, + "gridPos": { + + }, + "id": 9, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -37294,26 +31778,26 @@ items: "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_target_interval_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[5m]) / rate(prometheus_target_interval_length_seconds_count{job=~\"$job\",instance=~\"$instance\"}[5m]) * 1e3", + "expr": "prometheus_remote_storage_shard_capacity{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{interval}} configured", - "legendLink": null, - "step": 10 + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" } ], "thresholds": [ @@ -37321,7 +31805,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Average Scrape Interval Duration", + "title": "Shard Capacity", "tooltip": { "shared": true, "sort": 0, @@ -37339,11 +31823,11 @@ items: }, "yaxes": [ { - "format": "ms", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -37352,7 +31836,7 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] }, @@ -37364,66 +31848,155 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 5, + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 10, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, - "stack": true, + "span": 6, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total[1m]))", + "expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"} or prometheus_remote_storage_samples_pending{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "exceeded sample limit: {{job}}", - "legendLink": null, - "step": 10 - }, + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Pending Samples", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "duplicate timestamp: {{job}}", - "legendLink": null, - "step": 10 + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_bounds_total[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "out of bounds: {{job}}", - "legendLink": null, - "step": 10 - }, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Shard Details", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ { - "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_order_total[1m]))", + "expr": "prometheus_tsdb_wal_segment_current{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "out of order: {{job}}", - "legendLink": null, - "step": 10 + "legendFormat": "{{cluster}}:{{instance}}", + "refId": "A" } ], "thresholds": [ @@ -37431,7 +32004,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Scrape failures", + "title": "TSDB Current Segment", "tooltip": { "shared": true, "sort": 0, @@ -37449,11 +32022,11 @@ items: }, "yaxes": [ { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -37462,7 +32035,7 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] }, @@ -37474,42 +32047,49 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 6, + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 12, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, - "stack": true, + "span": 6, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_tsdb_head_samples_appended_total{job=~\"$job\",instance=~\"$instance\"}[5m])", + "expr": "prometheus_wal_watcher_current_segment{cluster=~\"$cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{job}} {{instance}}", - "legendLink": null, - "step": 10 + "legendFormat": "{{cluster}}:{{instance}} {{consumer}}", + "refId": "A" } ], "thresholds": [ @@ -37517,7 +32097,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Appended Samples", + "title": "Remote Write Current Segment", "tooltip": { "shared": true, "sort": 0, @@ -37535,11 +32115,11 @@ items: }, "yaxes": [ { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -37548,7 +32128,7 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] } @@ -37557,12 +32137,13 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Retrieval", - "titleSize": "h6" + "title": "Segments", + "titleSize": "h6", + "type": "row" }, { "collapse": false, - "height": "250px", + "collapsed": false, "panels": [ { "aliasColors": { @@ -37572,42 +32153,49 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 7, + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 13, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 3, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "prometheus_tsdb_head_series{job=~\"$job\",instance=~\"$instance\"}", + "expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{job}} {{instance}} head series", - "legendLink": null, - "step": 10 + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" } ], "thresholds": [ @@ -37615,7 +32203,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Head Series", + "title": "Dropped Samples", "tooltip": { "shared": true, "sort": 0, @@ -37637,7 +32225,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -37646,7 +32234,7 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] }, @@ -37658,42 +32246,49 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 8, + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 14, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 3, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "prometheus_tsdb_head_chunks{job=~\"$job\",instance=~\"$instance\"}", + "expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{job}} {{instance}} head chunks", - "legendLink": null, - "step": 10 + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" } ], "thresholds": [ @@ -37701,7 +32296,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Head Chunks", + "title": "Failed Samples", "tooltip": { "shared": true, "sort": 0, @@ -37723,7 +32318,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -37732,22 +32327,10 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Storage", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { "aliasColors": { @@ -37756,42 +32339,49 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 9, + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 15, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 3, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "rate(prometheus_engine_query_duration_seconds_count{job=~\"$job\",instance=~\"$instance\",slice=\"inner_eval\"}[5m])", + "expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_retried_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{job}} {{instance}}", - "legendLink": null, - "step": 10 + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" } ], "thresholds": [ @@ -37799,7 +32389,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Query Rate", + "title": "Retried Samples", "tooltip": { "shared": true, "sort": 0, @@ -37821,7 +32411,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -37830,7 +32420,7 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] }, @@ -37842,42 +32432,49 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 10, - "id": 10, + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 16, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, + "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 0, + "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", + "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, - "stack": true, + "span": 3, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "max by (slice) (prometheus_engine_query_duration_seconds{quantile=\"0.9\",job=~\"$job\",instance=~\"$instance\"}) * 1e3", + "expr": "rate(prometheus_remote_storage_enqueue_retries_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{slice}}", - "legendLink": null, - "step": 10 + "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}", + "refId": "A" } ], "thresholds": [ @@ -37885,7 +32482,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Stage Duration", + "title": "Enqueue Retries", "tooltip": { "shared": true, "sort": 0, @@ -37903,11 +32500,11 @@ items: }, "yaxes": [ { - "format": "ms", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -37916,7 +32513,7 @@ items: "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] } @@ -37925,22 +32522,19 @@ items: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Query", - "titleSize": "h6" + "title": "Misc. Rates", + "titleSize": "h6", + "type": "row" } ], "schemaVersion": 14, "style": "dark", "tags": [ - + "prometheus-mixin" ], "templating": { "list": [ { - "current": { - "text": "default", - "value": "default" - }, "hide": 0, "label": null, "name": "datasource", @@ -37955,23 +32549,30 @@ items: { "allValue": null, "current": { - "selected": true, - "text": "All", - "value": "$__all" + "text": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "value": { + "selected": true, + "text": "All", + "value": "$__all" + } }, "datasource": "$datasource", "hide": 0, "includeAll": true, - "label": "job", - "multi": true, - "name": "job", + "label": null, + "multi": false, + "name": "cluster", "options": [ ], - "query": "label_values(prometheus_build_info, job)", - "refresh": 1, + "query": "label_values(kube_pod_container_info{image=~\".*prometheus.*\"}, cluster)", + "refresh": 2, "regex": "", - "sort": 2, + "sort": 0, "tagValuesQuery": "", "tags": [ @@ -37983,23 +32584,56 @@ items: { "allValue": null, "current": { - "selected": true, - "text": "All", - "value": "$__all" + "text": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "value": { + "selected": true, + "text": "All", + "value": "$__all" + } }, "datasource": "$datasource", "hide": 0, "includeAll": true, - "label": "instance", - "multi": true, + "label": null, + "multi": false, "name": "instance", "options": [ ], - "query": "label_values(prometheus_build_info, instance)", - "refresh": 1, + "query": "label_values(prometheus_build_info{cluster=~\"$cluster\"}, instance)", + "refresh": 2, "regex": "", - "sort": 2, + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "url", + "options": [ + + ], + "query": "label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}, url)", + "refresh": 2, + "regex": "", + "sort": 0, "tagValuesQuery": "", "tags": [ @@ -38011,7 +32645,7 @@ items: ] }, "time": { - "from": "now-1h", + "from": "now-6h", "to": "now" }, "timepicker": { @@ -38039,128 +32673,41 @@ items: "30d" ] }, - "timezone": "utc", - "title": "Prometheus", - "uid": "", + "timezone": "browser", + "title": "Prometheus / Remote Write", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-prometheus + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-prometheus-remote-write namespace: monitoring - apiVersion: v1 data: - proxy.json: |- + prometheus.json: |- { - "__inputs": [ - - ], - "__requires": [ - - ], "annotations": { "list": [ ] }, - "editable": false, + "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": false, - "id": null, "links": [ ], - "refresh": "10s", + "refresh": "60s", "rows": [ { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 2, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(up{job=\"kube-proxy\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "Up", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "min" - }, { "aliasColors": { @@ -38170,19 +32717,13 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "gridPos": { - - }, - "id": 3, + "id": 1, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, @@ -38191,26 +32732,154 @@ items: "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 5, + "span": 12, "stack": false, "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Count", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Uptime", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "s" + }, + { + "alias": "Instance", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "instance", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Job", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "job", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Version", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "version", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], "targets": [ { - "expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", - "format": "time_series", + "expr": "count by (job, instance, version) (prometheus_build_info{job=~\"$job\", instance=~\"$instance\"})", + "format": "table", + "instant": true, "intervalFactor": 2, - "legendFormat": "rate", - "refId": "A" + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "max by (job, instance) (time() - process_start_time_seconds{job=~\"$job\", instance=~\"$instance\"})", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 } ], "thresholds": [ @@ -38218,13 +32887,14 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rules Sync Rate", + "title": "Prometheus Stats", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, - "type": "graph", + "transform": "table", + "type": "table", "xaxis": { "buckets": null, "mode": "time", @@ -38236,7 +32906,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -38244,15 +32914,27 @@ items: "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, - "max": null, - "min": 0, - "show": true + "max": null, + "min": null, + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Prometheus Stats", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -38262,47 +32944,41 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "gridPos": { - - }, - "id": 4, + "id": 2, "legend": { - "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 5, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", + "expr": "sum(rate(prometheus_target_sync_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[5m])) by (scrape_job) * 1e3", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "legendFormat": "{{scrape_job}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -38310,10 +32986,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Rule Sync Latency 99th Quantile", + "title": "Target Sync", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -38328,7 +33004,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "ms", "label": null, "logBase": 1, "max": null, @@ -38336,28 +33012,15 @@ items: "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -38366,48 +33029,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 5, + "fill": 10, + "id": 3, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", + "expr": "sum(prometheus_sd_discovered_targets{job=~\"$job\",instance=~\"$instance\"})", "format": "time_series", "intervalFactor": 2, - "legendFormat": "rate", - "refId": "A" + "legendFormat": "Targets", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -38415,10 +33072,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Network Programming Rate", + "title": "Targets", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -38433,7 +33090,7 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -38441,15 +33098,27 @@ items: "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Discovery", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -38459,47 +33128,41 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, - "gridPos": { - - }, - "id": 6, + "id": 4, "legend": { - "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{job=\"kube-proxy\", instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "rate(prometheus_target_interval_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[5m]) / rate(prometheus_target_interval_length_seconds_count{job=~\"$job\",instance=~\"$instance\"}[5m]) * 1e3", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "legendFormat": "{{interval}} configured", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -38507,10 +33170,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Network Programming Latency 99th Quantile", + "title": "Average Scrape Interval Duration", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -38525,7 +33188,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "ms", "label": null, "logBase": 1, "max": null, @@ -38533,28 +33196,15 @@ items: "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -38563,69 +33213,74 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 7, + "fill": 10, + "id": 5, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "span": 4, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[5m]))", + "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total[1m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "2xx", - "refId": "A" + "legendFormat": "exceeded body size limit: {{job}}", + "legendLink": null, + "step": 10 }, { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total[1m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "3xx", - "refId": "B" + "legendFormat": "exceeded sample limit: {{job}}", + "legendLink": null, + "step": 10 }, { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total[1m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "4xx", - "refId": "C" + "legendFormat": "duplicate timestamp: {{job}}", + "legendLink": null, + "step": 10 }, { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_bounds_total[1m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "5xx", - "refId": "D" + "legendFormat": "out of bounds: {{job}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_order_total[1m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "out of order: {{job}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -38633,10 +33288,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Kube API Request Rate", + "title": "Scrape failures", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -38651,20 +33306,20 @@ items: }, "yaxes": [ { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "ops", + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ] }, @@ -38676,48 +33331,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 8, + "fill": 10, + "id": 6, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 8, - "stack": false, + "span": 4, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[5m])) by (verb, url, le))", + "expr": "rate(prometheus_tsdb_head_samples_appended_total{job=~\"$job\",instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{verb}} {{url}}", - "refId": "A" + "legendFormat": "{{job}} {{instance}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -38725,10 +33374,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Post Request Latency 99th Quantile", + "title": "Appended Samples", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -38743,7 +33392,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -38751,12 +33400,12 @@ items: "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] } @@ -38764,14 +33413,13 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "showTitle": true, + "title": "Retrieval", + "titleSize": "h6" }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "aliasColors": { @@ -38781,48 +33429,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 9, + "fill": 10, + "id": 7, "legend": { - "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 12, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", + "expr": "prometheus_tsdb_head_series{job=~\"$job\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{verb}} {{url}}", - "refId": "A" + "legendFormat": "{{job}} {{instance}} head series", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -38830,10 +33472,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Get Request Latency 99th Quantile", + "title": "Head Series", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -38848,7 +33490,7 @@ items: }, "yaxes": [ { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -38856,28 +33498,15 @@ items: "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "panels": [ + }, { "aliasColors": { @@ -38886,48 +33515,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 10, + "fill": 10, + "id": 8, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "process_resident_memory_bytes{job=\"kube-proxy\",instance=~\"$instance\"}", + "expr": "prometheus_tsdb_head_chunks{job=~\"$job\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "legendFormat": "{{job}} {{instance}} head chunks", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -38935,10 +33558,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory", + "title": "Head Chunks", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -38953,23 +33576,35 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { @@ -38978,48 +33613,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 11, + "fill": 10, + "id": 9, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "rate(process_cpu_seconds_total{job=\"kube-proxy\",instance=~\"$instance\"}[5m])", + "expr": "rate(prometheus_engine_query_duration_seconds_count{job=~\"$job\",instance=~\"$instance\",slice=\"inner_eval\"}[5m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "legendFormat": "{{job}} {{instance}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -39027,10 +33656,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "CPU usage", + "title": "Query Rate", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -39057,8 +33686,8 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, - "show": true + "min": null, + "show": false } ] }, @@ -39070,48 +33699,42 @@ items: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 12, + "fill": 10, + "id": 10, "legend": { - "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, - "stack": false, + "span": 6, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "go_goroutines{job=\"kube-proxy\",instance=~\"$instance\"}", + "expr": "max by (slice) (prometheus_engine_query_duration_seconds{quantile=\"0.9\",job=~\"$job\",instance=~\"$instance\"}) * 1e3", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" + "legendFormat": "{{slice}}", + "legendLink": null, + "step": 10 } ], "thresholds": [ @@ -39119,10 +33742,10 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Goroutines", + "title": "Stage Duration", "tooltip": { - "shared": false, - "sort": 0, + "shared": true, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -39137,11 +33760,11 @@ items: }, "yaxes": [ { - "format": "short", + "format": "ms", "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { @@ -39150,7 +33773,7 @@ items: "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ] } @@ -39158,16 +33781,15 @@ items: "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" + "showTitle": true, + "title": "Query", + "titleSize": "h6" } ], "schemaVersion": 14, "style": "dark", "tags": [ - "kubernetes-mixin" + "prometheus-mixin" ], "templating": { "list": [ @@ -39177,7 +33799,7 @@ items: "value": "default" }, "hide": 0, - "label": null, + "label": "Data Source", "name": "datasource", "options": [ @@ -39188,23 +33810,53 @@ items: "type": "datasource" }, { - "allValue": null, + "allValue": ".+", "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ + + ], + "query": "label_values(prometheus_build_info{job=\"prometheus-k8s\",namespace=\"monitoring\"}, job)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" }, "datasource": "$datasource", "hide": 0, "includeAll": true, - "label": null, - "multi": false, + "label": "instance", + "multi": true, "name": "instance", "options": [ ], - "query": "label_values(kubeproxy_network_programming_duration_seconds_bucket{job=\"kube-proxy\"}, instance)", - "refresh": 2, + "query": "label_values(prometheus_build_info{job=~\"$job\"}, instance)", + "refresh": 1, "regex": "", - "sort": 1, + "sort": 2, "tagValuesQuery": "", "tags": [ @@ -39244,18 +33896,23 @@ items: "30d" ] }, - "timezone": "UTC", - "title": "Kubernetes / Proxy", - "uid": "632e265de029684c40b21cb76bca4f94", + "timezone": "utc", + "title": "Prometheus / Overview", + "uid": "", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-proxy + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-prometheus namespace: monitoring - apiVersion: v1 data: - scheduler.json: |- + proxy.json: |- { "__inputs": [ @@ -39304,7 +33961,11 @@ items: }, "id": 2, - "interval": null, + "interval": "1m", + "legend": { + "alignAsTable": true, + "rightSide": true + }, "links": [ ], @@ -39343,7 +34004,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(up{job=\"kube-scheduler\"})", + "expr": "sum(up{cluster=\"$cluster\", job=\"kube-proxy\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -39375,21 +34036,23 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, "id": 3, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, - "current": true, + "current": false, "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, @@ -39411,32 +34074,11 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", + "expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} e2e", + "legendFormat": "rate", "refId": "A" - }, - { - "expr": "sum(rate(scheduler_binding_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} binding", - "refId": "B" - }, - { - "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} scheduling algorithm", - "refId": "C" - }, - { - "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} volume", - "refId": "D" } ], "thresholds": [ @@ -39444,7 +34086,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Scheduling Rate", + "title": "Rules Sync Rate", "tooltip": { "shared": false, "sort": 0, @@ -39488,10 +34130,12 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, "id": 4, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, @@ -39524,32 +34168,212 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} e2e", + "legendFormat": "{{instance}}", "refId": "A" - }, + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rule Sync Latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} binding", - "refId": "B" + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 5, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} scheduling algorithm", - "refId": "C" + "legendFormat": "rate", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Programming Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 6, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} volume", - "refId": "D" + "legendFormat": "{{instance}}", + "refId": "A" } ], "thresholds": [ @@ -39557,7 +34381,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Scheduling latency 99th Quantile", + "title": "Network Programming Latency 99th Quantile", "tooltip": { "shared": false, "sort": 0, @@ -39614,17 +34438,19 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 5, + "id": 7, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, @@ -39650,28 +34476,28 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "2xx", "refId": "A" }, { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "3xx", "refId": "B" }, { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "4xx", "refId": "C" }, { - "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "5xx", @@ -39705,7 +34531,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true }, { @@ -39713,7 +34539,7 @@ items: "label": null, "logBase": 1, "max": null, - "min": 0, + "min": null, "show": true } ] @@ -39727,17 +34553,19 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 6, + "id": 8, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, @@ -39763,7 +34591,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{verb}} {{url}}", @@ -39832,10 +34660,12 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 7, + "id": 9, + "interval": "1m", "legend": { "alignAsTable": true, "avg": false, @@ -39868,7 +34698,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{verb}} {{url}}", @@ -39937,17 +34767,19 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 8, + "id": 10, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, @@ -39973,7 +34805,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "process_resident_memory_bytes{job=\"kube-scheduler\", instance=~\"$instance\"}", + "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -40029,17 +34861,19 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 9, + "id": 11, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, @@ -40065,7 +34899,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "rate(process_cpu_seconds_total{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])", + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -40095,7 +34929,7 @@ items: }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -40103,7 +34937,7 @@ items: "show": true }, { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -40121,17 +34955,19 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 10, + "id": 12, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, @@ -40157,7 +34993,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "go_goroutines{job=\"kube-scheduler\",instance=~\"$instance\"}", + "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -40227,7 +35063,7 @@ items: "value": "default" }, "hide": 0, - "label": null, + "label": "Data Source", "name": "datasource", "options": [ @@ -40241,6 +35077,32 @@ items: "allValue": null, "current": { + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"kube-proxy\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + }, "datasource": "$datasource", "hide": 0, @@ -40251,7 +35113,7 @@ items: "options": [ ], - "query": "label_values(process_cpu_seconds_total{job=\"kube-scheduler\"}, instance)", + "query": "label_values(up{job=\"kube-proxy\", cluster=\"$cluster\", job=\"kube-proxy\"}, instance)", "refresh": 2, "regex": "", "sort": 1, @@ -40295,17 +35157,22 @@ items: ] }, "timezone": "UTC", - "title": "Kubernetes / Scheduler", - "uid": "2e6b6a3b4bddf1427b3a55aa1311c656", + "title": "Kubernetes / Proxy", + "uid": "632e265de029684c40b21cb76bca4f94", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-scheduler + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-proxy namespace: monitoring - apiVersion: v1 data: - statefulset.json: |- + scheduler.json: |- { "__inputs": [ @@ -40326,7 +35193,7 @@ items: "links": [ ], - "refresh": "", + "refresh": "10s", "rows": [ { "collapse": false, @@ -40354,7 +35221,11 @@ items: }, "id": 2, - "interval": null, + "interval": "1m", + "legend": { + "alignAsTable": true, + "rightSide": true + }, "links": [ ], @@ -40372,7 +35243,7 @@ items: "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, - "postfix": "cores", + "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", @@ -40383,16 +35254,17 @@ items: "to": "null" } ], - "span": 4, + "span": 2, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, "lineColor": "rgb(31, 120, 193)", - "show": true + "show": false }, "tableColumn": "", "targets": [ { - "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m]))", + "expr": "sum(up{cluster=\"$cluster\", job=\"kube-scheduler\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -40400,7 +35272,7 @@ items: } ], "thresholds": "", - "title": "CPU", + "title": "Up", "tooltip": { "shared": false }, @@ -40409,177 +35281,241 @@ items: "valueMaps": [ { "op": "=", - "text": "0", + "text": "N/A", "value": "null" } ], - "valueName": "current" + "valueName": "min" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, "gridPos": { }, "id": 3, - "interval": null, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ { - "name": "value to text", - "value": 1 + "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}} {{instance}} e2e", + "refId": "A" }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "GB", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}} {{instance}} binding", + "refId": "B" + }, { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ + "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}} {{instance}} scheduling algorithm", + "refId": "C" + }, { - "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}) / 1024^3", + "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "legendFormat": "{{cluster}} {{instance}} volume", + "refId": "D" } ], - "thresholds": "", - "title": "Memory", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Scheduling Rate", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "0", - "value": "null" + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "current" + ] }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, "gridPos": { }, "id": 4, - "interval": null, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ { - "name": "value to text", - "value": 1 + "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}} {{instance}} e2e", + "refId": "A" }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "Bps", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}} {{instance}} binding", + "refId": "B" + }, { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ + "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{cluster}} {{instance}} scheduling algorithm", + "refId": "C" + }, { - "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod=~\"$statefulset.*\"}[3m]))", + "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "legendFormat": "{{cluster}} {{instance}} volume", + "refId": "D" } ], - "thresholds": "", - "title": "Network", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Scheduling latency 99th Quantile", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "0", - "value": "null" + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "current" + ] } ], "repeat": null, @@ -40593,343 +35529,322 @@ items: { "collapse": false, "collapsed": false, - "height": "100px", "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, "gridPos": { }, "id": 5, - "interval": null, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ { - "name": "value to text", - "value": 1 + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A" }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, { - "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "legendFormat": "5xx", + "refId": "D" } ], - "thresholds": "", - "title": "Desired Replicas", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Kube API Request Rate", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "0", - "value": "null" + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "current" + ] }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, "gridPos": { }, "id": 6, - "interval": null, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 8, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", + "legendFormat": "{{verb}} {{url}}", "refId": "A" } ], - "thresholds": "", - "title": "Replicas of current version", + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Post Request Latency 99th Quantile", "tooltip": { - "shared": false + "shared": false, + "sort": 0, + "value_type": "individual" }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "op": "=", - "text": "0", - "value": "null" + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "current" - }, + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, "gridPos": { }, "id": 7, - "interval": null, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, "links": [ ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", + "legendFormat": "{{verb}} {{url}}", "refId": "A" } ], - "thresholds": "", - "title": "Observed Generation", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" + "thresholds": [ + ], - "datasource": "$datasource", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "timeFrom": null, + "timeShift": null, + "title": "Get Request Latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" }, - "gridPos": { + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + ] }, - "id": 8, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ + "yaxes": [ { - "name": "value to text", - "value": 1 + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "Metadata Generation", - "tooltip": { - "shared": false - }, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true } - ], - "valueName": "current" + ] } ], "repeat": null, @@ -40953,17 +35868,19 @@ items: "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { }, - "id": 9, + "id": 8, + "interval": "1m", "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "sideWidth": null, "total": false, @@ -40984,43 +35901,204 @@ items: ], "spaceLength": 10, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "replicas specified", + "legendFormat": "{{instance}}", "refId": "A" - }, + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ { - "expr": "max(kube_statefulset_status_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "replicas created", - "refId": "B" + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "expr": "min(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "ready", - "refId": "C" - }, + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 9, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ { - "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "replicas of current version", - "refId": "D" + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, { - "expr": "min(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 10, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "updated", - "refId": "E" + "legendFormat": "{{instance}}", + "refId": "A" } ], "thresholds": [ @@ -41028,7 +36106,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Replicas", + "title": "Goroutines", "tooltip": { "shared": false, "sort": 0, @@ -41086,7 +36164,7 @@ items: "value": "default" }, "hide": 0, - "label": null, + "label": "Data Source", "name": "datasource", "options": [ @@ -41110,33 +36188,7 @@ items: "options": [ ], - "query": "label_values(kube_statefulset_metadata_generation, cluster)", - "refresh": 2, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", + "query": "label_values(up{job=\"kube-scheduler\"}, cluster)", "refresh": 2, "regex": "", "sort": 1, @@ -41155,14 +36207,14 @@ items: }, "datasource": "$datasource", "hide": 0, - "includeAll": false, - "label": "Name", + "includeAll": true, + "label": null, "multi": false, - "name": "statefulset", + "name": "instance", "options": [ ], - "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, statefulset)", + "query": "label_values(up{job=\"kube-scheduler\", cluster=\"$cluster\"}, instance)", "refresh": 2, "regex": "", "sort": 1, @@ -41206,13 +36258,18 @@ items: ] }, "timezone": "UTC", - "title": "Kubernetes / StatefulSets", - "uid": "a31c1f46e6f727cb37c0d731a7245005", + "title": "Kubernetes / Scheduler", + "uid": "2e6b6a3b4bddf1427b3a55aa1311c656", "version": 0 } kind: ConfigMap metadata: - name: grafana-dashboard-statefulset + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana-dashboard-scheduler namespace: monitoring - apiVersion: v1 data: @@ -41276,6 +36333,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, @@ -41321,7 +36379,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", @@ -41378,6 +36436,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, @@ -41423,7 +36482,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", @@ -41491,6 +36550,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, @@ -41536,7 +36596,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", @@ -41593,6 +36653,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, @@ -41638,7 +36699,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", @@ -41725,6 +36786,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, @@ -41768,7 +36830,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", @@ -41825,6 +36887,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, @@ -41868,7 +36931,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", @@ -41936,6 +36999,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, @@ -41979,7 +37043,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", @@ -42036,6 +37100,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, @@ -42079,7 +37144,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", @@ -42156,6 +37221,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, @@ -42199,7 +37265,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", @@ -42256,6 +37322,7 @@ items: "dashes": false, "datasource": "$datasource", "fill": 2, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, @@ -42299,7 +37366,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pod}}", @@ -42374,7 +37441,7 @@ items: "value": "default" }, "hide": 0, - "label": null, + "label": "Data Source", "name": "datasource", "options": [ @@ -42384,6 +37451,32 @@ items: "regex": "", "type": "datasource" }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info{job=\"kube-state-metrics\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, { "allValue": ".+", "auto": false, @@ -42394,7 +37487,7 @@ items: "value": "kube-system" }, "datasource": "$datasource", - "definition": "label_values(container_network_receive_packets_total, namespace)", + "definition": "label_values(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\"}, namespace)", "hide": 0, "includeAll": true, "label": null, @@ -42403,8 +37496,8 @@ items: "options": [ ], - "query": "label_values(container_network_receive_packets_total, namespace)", - "refresh": 1, + "query": "label_values(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\"}, namespace)", + "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, @@ -42426,7 +37519,7 @@ items: "value": "" }, "datasource": "$datasource", - "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\"}, workload)", + "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\"}, workload)", "hide": 0, "includeAll": false, "label": null, @@ -42435,8 +37528,8 @@ items: "options": [ ], - "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\"}, workload)", - "refresh": 1, + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\"}, workload)", + "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, @@ -42458,7 +37551,7 @@ items: "value": "deployment" }, "datasource": "$datasource", - "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)", + "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)", "hide": 0, "includeAll": false, "label": null, @@ -42467,8 +37560,8 @@ items: "options": [ ], - "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)", - "refresh": 1, + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)", + "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, @@ -42598,6 +37691,11 @@ items: } kind: ConfigMap metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 name: grafana-dashboard-workload-total namespace: monitoring kind: ConfigMapList diff --git a/manifests/grafana-dashboardSources.yaml b/manifests/grafana-dashboardSources.yaml index fffec98..8fc6d8e 100644 --- a/manifests/grafana-dashboardSources.yaml +++ b/manifests/grafana-dashboardSources.yaml @@ -6,6 +6,7 @@ data: "providers": [ { "folder": "Default", + "folderUid": "", "name": "0", "options": { "path": "/grafana-dashboard-definitions/0" @@ -17,5 +18,10 @@ data: } kind: ConfigMap metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 name: grafana-dashboards namespace: monitoring diff --git a/manifests/grafana-deployment.yaml b/manifests/grafana-deployment.yaml index ed55fa9..eca41b6 100644 --- a/manifests/grafana-deployment.yaml +++ b/manifests/grafana-deployment.yaml @@ -2,22 +2,35 @@ apiVersion: apps/v1 kind: Deployment metadata: labels: - app: grafana + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 name: grafana namespace: monitoring spec: replicas: 1 selector: matchLabels: - app: grafana + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus template: metadata: + annotations: + checksum/grafana-config: adbde4cde1aa3ca57c408943af53e6f7 + checksum/grafana-dashboardproviders: d8fb24844314114bed088b83042b1bdb + checksum/grafana-datasources: 0800bab7ea1e2d8ad5c09586d089e033 labels: - app: grafana + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 spec: + automountServiceAccountToken: false containers: - env: [] - image: grafana/grafana:7.0.3 + image: grafana/grafana:9.3.2 name: grafana ports: - containerPort: 3000 @@ -33,6 +46,12 @@ spec: requests: cpu: 100m memory: 100Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true volumeMounts: - mountPath: /var/lib/grafana name: grafana-storage @@ -43,6 +62,12 @@ spec: - mountPath: /etc/grafana/provisioning/dashboards name: grafana-dashboards readOnly: false + - mountPath: /tmp + name: tmp-plugins + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/alertmanager-overview + name: grafana-dashboard-alertmanager-overview + readOnly: false - mountPath: /grafana-dashboard-definitions/0/apiserver name: grafana-dashboard-apiserver readOnly: false @@ -52,8 +77,8 @@ spec: - mountPath: /grafana-dashboard-definitions/0/controller-manager name: grafana-dashboard-controller-manager readOnly: false - - mountPath: /grafana-dashboard-definitions/0/coredns-dashboard - name: grafana-dashboard-coredns-dashboard + - mountPath: /grafana-dashboard-definitions/0/grafana-overview + name: grafana-dashboard-grafana-overview readOnly: false - mountPath: /grafana-dashboard-definitions/0/k8s-resources-cluster name: grafana-dashboard-k8s-resources-cluster @@ -76,9 +101,6 @@ spec: - mountPath: /grafana-dashboard-definitions/0/kubelet name: grafana-dashboard-kubelet readOnly: false - - mountPath: /grafana-dashboard-definitions/0/kubernetes-cluster-dashboard - name: grafana-dashboard-kubernetes-cluster-dashboard - readOnly: false - mountPath: /grafana-dashboard-definitions/0/namespace-by-pod name: grafana-dashboard-namespace-by-pod readOnly: false @@ -91,6 +113,9 @@ spec: - mountPath: /grafana-dashboard-definitions/0/node-rsrc-use name: grafana-dashboard-node-rsrc-use readOnly: false + - mountPath: /grafana-dashboard-definitions/0/nodes-darwin + name: grafana-dashboard-nodes-darwin + readOnly: false - mountPath: /grafana-dashboard-definitions/0/nodes name: grafana-dashboard-nodes readOnly: false @@ -100,9 +125,6 @@ spec: - mountPath: /grafana-dashboard-definitions/0/pod-total name: grafana-dashboard-pod-total readOnly: false - - mountPath: /grafana-dashboard-definitions/0/prometheus-dashboard - name: grafana-dashboard-prometheus-dashboard - readOnly: false - mountPath: /grafana-dashboard-definitions/0/prometheus-remote-write name: grafana-dashboard-prometheus-remote-write readOnly: false @@ -115,9 +137,6 @@ spec: - mountPath: /grafana-dashboard-definitions/0/scheduler name: grafana-dashboard-scheduler readOnly: false - - mountPath: /grafana-dashboard-definitions/0/statefulset - name: grafana-dashboard-statefulset - readOnly: false - mountPath: /grafana-dashboard-definitions/0/workload-total name: grafana-dashboard-workload-total readOnly: false @@ -125,8 +144,9 @@ spec: name: grafana-config readOnly: false nodeSelector: - beta.kubernetes.io/os: linux + kubernetes.io/os: linux securityContext: + fsGroup: 65534 runAsNonRoot: true runAsUser: 65534 serviceAccountName: grafana @@ -139,6 +159,12 @@ spec: - configMap: name: grafana-dashboards name: grafana-dashboards + - emptyDir: + medium: Memory + name: tmp-plugins + - configMap: + name: grafana-dashboard-alertmanager-overview + name: grafana-dashboard-alertmanager-overview - configMap: name: grafana-dashboard-apiserver name: grafana-dashboard-apiserver @@ -149,8 +175,8 @@ spec: name: grafana-dashboard-controller-manager name: grafana-dashboard-controller-manager - configMap: - name: grafana-dashboard-coredns-dashboard - name: grafana-dashboard-coredns-dashboard + name: grafana-dashboard-grafana-overview + name: grafana-dashboard-grafana-overview - configMap: name: grafana-dashboard-k8s-resources-cluster name: grafana-dashboard-k8s-resources-cluster @@ -172,9 +198,6 @@ spec: - configMap: name: grafana-dashboard-kubelet name: grafana-dashboard-kubelet - - configMap: - name: grafana-dashboard-kubernetes-cluster-dashboard - name: grafana-dashboard-kubernetes-cluster-dashboard - configMap: name: grafana-dashboard-namespace-by-pod name: grafana-dashboard-namespace-by-pod @@ -187,6 +210,9 @@ spec: - configMap: name: grafana-dashboard-node-rsrc-use name: grafana-dashboard-node-rsrc-use + - configMap: + name: grafana-dashboard-nodes-darwin + name: grafana-dashboard-nodes-darwin - configMap: name: grafana-dashboard-nodes name: grafana-dashboard-nodes @@ -196,9 +222,6 @@ spec: - configMap: name: grafana-dashboard-pod-total name: grafana-dashboard-pod-total - - configMap: - name: grafana-dashboard-prometheus-dashboard - name: grafana-dashboard-prometheus-dashboard - configMap: name: grafana-dashboard-prometheus-remote-write name: grafana-dashboard-prometheus-remote-write @@ -211,9 +234,6 @@ spec: - configMap: name: grafana-dashboard-scheduler name: grafana-dashboard-scheduler - - configMap: - name: grafana-dashboard-statefulset - name: grafana-dashboard-statefulset - configMap: name: grafana-dashboard-workload-total name: grafana-dashboard-workload-total diff --git a/manifests/grafana-networkPolicy.yaml b/manifests/grafana-networkPolicy.yaml new file mode 100644 index 0000000..cab676c --- /dev/null +++ b/manifests/grafana-networkPolicy.yaml @@ -0,0 +1,29 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + name: grafana + namespace: monitoring +spec: + egress: + - {} + ingress: + - from: + - podSelector: + matchLabels: + app.kubernetes.io/name: prometheus + ports: + - port: 3000 + protocol: TCP + podSelector: + matchLabels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + policyTypes: + - Egress + - Ingress diff --git a/manifests/grafana-prometheusRule.yaml b/manifests/grafana-prometheusRule.yaml new file mode 100644 index 0000000..7ac2cfc --- /dev/null +++ b/manifests/grafana-prometheusRule.yaml @@ -0,0 +1,33 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 + prometheus: k8s + role: alert-rules + name: grafana-rules + namespace: monitoring +spec: + groups: + - name: GrafanaAlerts + rules: + - alert: GrafanaRequestsFailing + annotations: + message: '{{ $labels.namespace }}/{{ $labels.job }}/{{ $labels.handler }} is experiencing {{ $value | humanize }}% errors' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/grafana/grafanarequestsfailing + expr: | + 100 * namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query", status_code=~"5.."} + / ignoring (status_code) + sum without (status_code) (namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query"}) + > 50 + for: 5m + labels: + severity: warning + - name: grafana_rules + rules: + - expr: | + sum by (namespace, job, handler, status_code) (rate(grafana_http_request_duration_seconds_count[5m])) + record: namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m diff --git a/manifests/grafana-service.yaml b/manifests/grafana-service.yaml index 3acdf1e..ce95f07 100644 --- a/manifests/grafana-service.yaml +++ b/manifests/grafana-service.yaml @@ -2,7 +2,10 @@ apiVersion: v1 kind: Service metadata: labels: - app: grafana + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 name: grafana namespace: monitoring spec: @@ -11,4 +14,6 @@ spec: port: 3000 targetPort: http selector: - app: grafana + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus diff --git a/manifests/grafana-serviceAccount.yaml b/manifests/grafana-serviceAccount.yaml index 3ed3e03..2f87aca 100644 --- a/manifests/grafana-serviceAccount.yaml +++ b/manifests/grafana-serviceAccount.yaml @@ -1,5 +1,11 @@ apiVersion: v1 +automountServiceAccountToken: false kind: ServiceAccount metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 name: grafana namespace: monitoring diff --git a/manifests/grafana-serviceMonitor.yaml b/manifests/grafana-serviceMonitor.yaml index 7ede266..f13c73c 100644 --- a/manifests/grafana-serviceMonitor.yaml +++ b/manifests/grafana-serviceMonitor.yaml @@ -1,6 +1,11 @@ apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: + labels: + app.kubernetes.io/component: grafana + app.kubernetes.io/name: grafana + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 9.3.2 name: grafana namespace: monitoring spec: @@ -9,4 +14,4 @@ spec: port: http selector: matchLabels: - app: grafana + app.kubernetes.io/name: grafana diff --git a/manifests/kube-state-metrics-clusterRole.yaml b/manifests/kube-state-metrics-clusterRole.yaml index 8c72322..1c1f60a 100644 --- a/manifests/kube-state-metrics-clusterRole.yaml +++ b/manifests/kube-state-metrics-clusterRole.yaml @@ -2,8 +2,10 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: labels: + app.kubernetes.io/component: exporter app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 1.9.6 + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.7.0 name: kube-state-metrics rules: - apiGroups: @@ -14,6 +16,7 @@ rules: - nodes - pods - services + - serviceaccounts - resourcequotas - replicationcontrollers - limitranges @@ -24,16 +27,6 @@ rules: verbs: - list - watch -- apiGroups: - - extensions - resources: - - daemonsets - - deployments - - replicasets - - ingresses - verbs: - - list - - watch - apiGroups: - apps resources: @@ -85,6 +78,13 @@ rules: verbs: - list - watch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - list + - watch - apiGroups: - storage.k8s.io resources: @@ -105,6 +105,8 @@ rules: - networking.k8s.io resources: - networkpolicies + - ingressclasses + - ingresses verbs: - list - watch @@ -115,3 +117,13 @@ rules: verbs: - list - watch +- apiGroups: + - rbac.authorization.k8s.io + resources: + - clusterrolebindings + - clusterroles + - rolebindings + - roles + verbs: + - list + - watch diff --git a/manifests/kube-state-metrics-clusterRoleBinding.yaml b/manifests/kube-state-metrics-clusterRoleBinding.yaml index 750ff09..88c5faf 100644 --- a/manifests/kube-state-metrics-clusterRoleBinding.yaml +++ b/manifests/kube-state-metrics-clusterRoleBinding.yaml @@ -2,8 +2,10 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: labels: + app.kubernetes.io/component: exporter app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 1.9.6 + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.7.0 name: kube-state-metrics roleRef: apiGroup: rbac.authorization.k8s.io diff --git a/manifests/kube-state-metrics-deployment.yaml b/manifests/kube-state-metrics-deployment.yaml index 787d86e..ec95b5c 100644 --- a/manifests/kube-state-metrics-deployment.yaml +++ b/manifests/kube-state-metrics-deployment.yaml @@ -2,55 +2,104 @@ apiVersion: apps/v1 kind: Deployment metadata: labels: + app.kubernetes.io/component: exporter app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 1.9.6 + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.7.0 name: kube-state-metrics namespace: monitoring spec: replicas: 1 selector: matchLabels: + app.kubernetes.io/component: exporter app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/part-of: kube-prometheus template: metadata: + annotations: + kubectl.kubernetes.io/default-container: kube-state-metrics labels: + app.kubernetes.io/component: exporter app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 1.9.6 + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.7.0 spec: + automountServiceAccountToken: true containers: - args: - --host=127.0.0.1 - --port=8081 - --telemetry-host=127.0.0.1 - --telemetry-port=8082 - image: carlosedp/kube-state-metrics:v1.9.6 + image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.7.0 name: kube-state-metrics + resources: + limits: + cpu: 100m + memory: 250Mi + requests: + cpu: 10m + memory: 190Mi securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true runAsUser: 65534 - args: - --logtostderr - --secure-listen-address=:8443 - - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 - --upstream=http://127.0.0.1:8081/ - image: carlosedp/kube-rbac-proxy:v0.5.0 + image: quay.io/brancz/kube-rbac-proxy:v0.14.0 name: kube-rbac-proxy-main ports: - containerPort: 8443 name: https-main + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 20m + memory: 20Mi securityContext: - runAsUser: 65534 + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsGroup: 65532 + runAsNonRoot: true + runAsUser: 65532 - args: - --logtostderr - --secure-listen-address=:9443 - - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 - --upstream=http://127.0.0.1:8082/ - image: carlosedp/kube-rbac-proxy:v0.5.0 + image: quay.io/brancz/kube-rbac-proxy:v0.14.0 name: kube-rbac-proxy-self ports: - containerPort: 9443 name: https-self + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi securityContext: - runAsUser: 65534 + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsGroup: 65532 + runAsNonRoot: true + runAsUser: 65532 nodeSelector: kubernetes.io/os: linux serviceAccountName: kube-state-metrics diff --git a/manifests/kube-state-metrics-networkPolicy.yaml b/manifests/kube-state-metrics-networkPolicy.yaml new file mode 100644 index 0000000..9815df8 --- /dev/null +++ b/manifests/kube-state-metrics-networkPolicy.yaml @@ -0,0 +1,31 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.7.0 + name: kube-state-metrics + namespace: monitoring +spec: + egress: + - {} + ingress: + - from: + - podSelector: + matchLabels: + app.kubernetes.io/name: prometheus + ports: + - port: 8443 + protocol: TCP + - port: 9443 + protocol: TCP + podSelector: + matchLabels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/part-of: kube-prometheus + policyTypes: + - Egress + - Ingress diff --git a/manifests/kube-state-metrics-prometheusRule.yaml b/manifests/kube-state-metrics-prometheusRule.yaml new file mode 100644 index 0000000..5bfcc43 --- /dev/null +++ b/manifests/kube-state-metrics-prometheusRule.yaml @@ -0,0 +1,65 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.7.0 + prometheus: k8s + role: alert-rules + name: kube-state-metrics-rules + namespace: monitoring +spec: + groups: + - name: kube-state-metrics + rules: + - alert: KubeStateMetricsListErrors + annotations: + description: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricslisterrors + summary: kube-state-metrics is experiencing errors in list operations. + expr: | + (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m])) + / + sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m]))) + > 0.01 + for: 15m + labels: + severity: critical + - alert: KubeStateMetricsWatchErrors + annotations: + description: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricswatcherrors + summary: kube-state-metrics is experiencing errors in watch operations. + expr: | + (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m])) + / + sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m]))) + > 0.01 + for: 15m + labels: + severity: critical + - alert: KubeStateMetricsShardingMismatch + annotations: + description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardingmismatch + summary: kube-state-metrics sharding is misconfigured. + expr: | + stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) != 0 + for: 15m + labels: + severity: critical + - alert: KubeStateMetricsShardsMissing + annotations: + description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardsmissing + summary: kube-state-metrics shards are missing. + expr: | + 2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) - 1 + - + sum( 2 ^ max by (shard_ordinal) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) ) + != 0 + for: 15m + labels: + severity: critical diff --git a/manifests/kube-state-metrics-service.yaml b/manifests/kube-state-metrics-service.yaml index 7e07515..e349fe7 100644 --- a/manifests/kube-state-metrics-service.yaml +++ b/manifests/kube-state-metrics-service.yaml @@ -2,8 +2,10 @@ apiVersion: v1 kind: Service metadata: labels: + app.kubernetes.io/component: exporter app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 1.9.6 + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.7.0 name: kube-state-metrics namespace: monitoring spec: @@ -16,4 +18,6 @@ spec: port: 9443 targetPort: https-self selector: + app.kubernetes.io/component: exporter app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/part-of: kube-prometheus diff --git a/manifests/kube-state-metrics-serviceAccount.yaml b/manifests/kube-state-metrics-serviceAccount.yaml index 6f856d9..be116f6 100644 --- a/manifests/kube-state-metrics-serviceAccount.yaml +++ b/manifests/kube-state-metrics-serviceAccount.yaml @@ -1,8 +1,11 @@ apiVersion: v1 +automountServiceAccountToken: false kind: ServiceAccount metadata: labels: + app.kubernetes.io/component: exporter app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 1.9.6 + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.7.0 name: kube-state-metrics namespace: monitoring diff --git a/manifests/kube-state-metrics-serviceMonitor.yaml b/manifests/kube-state-metrics-serviceMonitor.yaml index ad7a643..1dde4b0 100644 --- a/manifests/kube-state-metrics-serviceMonitor.yaml +++ b/manifests/kube-state-metrics-serviceMonitor.yaml @@ -2,8 +2,10 @@ apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: labels: + app.kubernetes.io/component: exporter app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 1.9.6 + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.7.0 name: kube-state-metrics namespace: monitoring spec: @@ -11,6 +13,11 @@ spec: - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token honorLabels: true interval: 30s + metricRelabelings: + - action: drop + regex: kube_endpoint_address_not_ready|kube_endpoint_address_available + sourceLabels: + - __name__ port: https-main relabelings: - action: labeldrop @@ -28,4 +35,6 @@ spec: jobLabel: app.kubernetes.io/name selector: matchLabels: + app.kubernetes.io/component: exporter app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/part-of: kube-prometheus diff --git a/manifests/node-exporter-clusterRole.yaml b/manifests/node-exporter-clusterRole.yaml index ad783ae..e4a8b76 100644 --- a/manifests/node-exporter-clusterRole.yaml +++ b/manifests/node-exporter-clusterRole.yaml @@ -1,7 +1,13 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 1.5.0 name: node-exporter + namespace: monitoring rules: - apiGroups: - authentication.k8s.io diff --git a/manifests/node-exporter-clusterRoleBinding.yaml b/manifests/node-exporter-clusterRoleBinding.yaml index a5a2050..ba3594e 100644 --- a/manifests/node-exporter-clusterRoleBinding.yaml +++ b/manifests/node-exporter-clusterRoleBinding.yaml @@ -1,7 +1,13 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 1.5.0 name: node-exporter + namespace: monitoring roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole diff --git a/manifests/node-exporter-daemonset.yaml b/manifests/node-exporter-daemonset.yaml index afe2901..34dca7a 100644 --- a/manifests/node-exporter-daemonset.yaml +++ b/manifests/node-exporter-daemonset.yaml @@ -2,30 +2,41 @@ apiVersion: apps/v1 kind: DaemonSet metadata: labels: + app.kubernetes.io/component: exporter app.kubernetes.io/name: node-exporter - app.kubernetes.io/version: v0.18.1 + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 1.5.0 name: node-exporter namespace: monitoring spec: selector: matchLabels: + app.kubernetes.io/component: exporter app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: kube-prometheus template: metadata: + annotations: + kubectl.kubernetes.io/default-container: node-exporter labels: + app.kubernetes.io/component: exporter app.kubernetes.io/name: node-exporter - app.kubernetes.io/version: v0.18.1 + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 1.5.0 spec: + automountServiceAccountToken: true containers: - args: - --web.listen-address=127.0.0.1:9100 - - --path.procfs=/host/proc - --path.sysfs=/host/sys - --path.rootfs=/host/root + - --path.udev.data=/host/root/run/udev/data - --no-collector.wifi - --no-collector.hwmon - - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/) - image: prom/node-exporter:v0.18.1 + - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run/k3s/containerd/.+|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/) + - --collector.netclass.ignored-devices=^(veth.*|[a-f0-9]{15})$ + - --collector.netdev.device-exclude=^(veth.*|[a-f0-9]{15})$ + image: quay.io/prometheus/node-exporter:v1.5.0 name: node-exporter resources: limits: @@ -34,13 +45,19 @@ spec: requests: cpu: 102m memory: 180Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + add: + - SYS_TIME + drop: + - ALL + readOnlyRootFilesystem: true volumeMounts: - - mountPath: /host/proc - name: proc - readOnly: false - mountPath: /host/sys + mountPropagation: HostToContainer name: sys - readOnly: false + readOnly: true - mountPath: /host/root mountPropagation: HostToContainer name: root @@ -48,14 +65,14 @@ spec: - args: - --logtostderr - --secure-listen-address=[$(IP)]:9100 - - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 - --upstream=http://127.0.0.1:9100/ env: - name: IP valueFrom: fieldRef: fieldPath: status.podIP - image: carlosedp/kube-rbac-proxy:v0.5.0 + image: quay.io/brancz/kube-rbac-proxy:v0.14.0 name: kube-rbac-proxy ports: - containerPort: 9100 @@ -68,10 +85,20 @@ spec: requests: cpu: 10m memory: 20Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsGroup: 65532 + runAsNonRoot: true + runAsUser: 65532 hostNetwork: true hostPID: true nodeSelector: kubernetes.io/os: linux + priorityClassName: system-cluster-critical securityContext: runAsNonRoot: true runAsUser: 65534 @@ -79,12 +106,13 @@ spec: tolerations: - operator: Exists volumes: - - hostPath: - path: /proc - name: proc - hostPath: path: /sys name: sys - hostPath: path: / name: root + updateStrategy: + rollingUpdate: + maxUnavailable: 10% + type: RollingUpdate diff --git a/manifests/node-exporter-networkPolicy.yaml b/manifests/node-exporter-networkPolicy.yaml new file mode 100644 index 0000000..c03fdd4 --- /dev/null +++ b/manifests/node-exporter-networkPolicy.yaml @@ -0,0 +1,29 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 1.5.0 + name: node-exporter + namespace: monitoring +spec: + egress: + - {} + ingress: + - from: + - podSelector: + matchLabels: + app.kubernetes.io/name: prometheus + ports: + - port: 9100 + protocol: TCP + podSelector: + matchLabels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: kube-prometheus + policyTypes: + - Egress + - Ingress diff --git a/manifests/node-exporter-prometheusRule.yaml b/manifests/node-exporter-prometheusRule.yaml new file mode 100644 index 0000000..5e5e52c --- /dev/null +++ b/manifests/node-exporter-prometheusRule.yaml @@ -0,0 +1,316 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 1.5.0 + prometheus: k8s + role: alert-rules + name: node-exporter-rules + namespace: monitoring +spec: + groups: + - name: node-exporter + rules: + - alert: NodeFilesystemSpaceFillingUp + annotations: + description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup + summary: Filesystem is predicted to run out of space within the next 24 hours. + expr: | + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 15 + and + predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: warning + - alert: NodeFilesystemSpaceFillingUp + annotations: + description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup + summary: Filesystem is predicted to run out of space within the next 4 hours. + expr: | + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 10 + and + predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: critical + - alert: NodeFilesystemAlmostOutOfSpace + annotations: + description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace + summary: Filesystem has less than 5% space left. + expr: | + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 30m + labels: + severity: warning + - alert: NodeFilesystemAlmostOutOfSpace + annotations: + description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace + summary: Filesystem has less than 3% space left. + expr: | + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 30m + labels: + severity: critical + - alert: NodeFilesystemFilesFillingUp + annotations: + description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup + summary: Filesystem is predicted to run out of inodes within the next 24 hours. + expr: | + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 40 + and + predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: warning + - alert: NodeFilesystemFilesFillingUp + annotations: + description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup + summary: Filesystem is predicted to run out of inodes within the next 4 hours. + expr: | + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 20 + and + predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: critical + - alert: NodeFilesystemAlmostOutOfFiles + annotations: + description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles + summary: Filesystem has less than 5% inodes left. + expr: | + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: warning + - alert: NodeFilesystemAlmostOutOfFiles + annotations: + description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles + summary: Filesystem has less than 3% inodes left. + expr: | + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: critical + - alert: NodeNetworkReceiveErrs + annotations: + description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs + summary: Network interface is reporting many receive errors. + expr: | + rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01 + for: 1h + labels: + severity: warning + - alert: NodeNetworkTransmitErrs + annotations: + description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs + summary: Network interface is reporting many transmit errors. + expr: | + rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01 + for: 1h + labels: + severity: warning + - alert: NodeHighNumberConntrackEntriesUsed + annotations: + description: '{{ $value | humanizePercentage }} of conntrack entries are used.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused + summary: Number of conntrack are getting close to the limit. + expr: | + (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75 + labels: + severity: warning + - alert: NodeTextFileCollectorScrapeError + annotations: + description: Node Exporter text file collector failed to scrape. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror + summary: Node Exporter text file collector failed to scrape. + expr: | + node_textfile_scrape_error{job="node-exporter"} == 1 + labels: + severity: warning + - alert: NodeClockSkewDetected + annotations: + description: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected + summary: Clock skew detected. + expr: | + ( + node_timex_offset_seconds{job="node-exporter"} > 0.05 + and + deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0 + ) + or + ( + node_timex_offset_seconds{job="node-exporter"} < -0.05 + and + deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0 + ) + for: 10m + labels: + severity: warning + - alert: NodeClockNotSynchronising + annotations: + description: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising + summary: Clock not synchronising. + expr: | + min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0 + and + node_timex_maxerror_seconds{job="node-exporter"} >= 16 + for: 10m + labels: + severity: warning + - alert: NodeRAIDDegraded + annotations: + description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded + summary: RAID Array is degraded + expr: | + node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}) > 0 + for: 15m + labels: + severity: critical + - alert: NodeRAIDDiskFailure + annotations: + description: At least one device in RAID array on {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure + summary: Failed device in RAID array + expr: | + node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} > 0 + labels: + severity: warning + - alert: NodeFileDescriptorLimit + annotations: + description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit + summary: Kernel is predicted to exhaust file descriptors limit soon. + expr: | + ( + node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70 + ) + for: 15m + labels: + severity: warning + - alert: NodeFileDescriptorLimit + annotations: + description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit + summary: Kernel is predicted to exhaust file descriptors limit soon. + expr: | + ( + node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90 + ) + for: 15m + labels: + severity: critical + - name: node-exporter.rules + rules: + - expr: | + count without (cpu, mode) ( + node_cpu_seconds_total{job="node-exporter",mode="idle"} + ) + record: instance:node_num_cpu:sum + - expr: | + 1 - avg without (cpu) ( + sum without (mode) (rate(node_cpu_seconds_total{job="node-exporter", mode=~"idle|iowait|steal"}[5m])) + ) + record: instance:node_cpu_utilisation:rate5m + - expr: | + ( + node_load1{job="node-exporter"} + / + instance:node_num_cpu:sum{job="node-exporter"} + ) + record: instance:node_load1_per_cpu:ratio + - expr: | + 1 - ( + ( + node_memory_MemAvailable_bytes{job="node-exporter"} + or + ( + node_memory_Buffers_bytes{job="node-exporter"} + + + node_memory_Cached_bytes{job="node-exporter"} + + + node_memory_MemFree_bytes{job="node-exporter"} + + + node_memory_Slab_bytes{job="node-exporter"} + ) + ) + / + node_memory_MemTotal_bytes{job="node-exporter"} + ) + record: instance:node_memory_utilisation:ratio + - expr: | + rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) + record: instance:node_vmstat_pgmajfault:rate5m + - expr: | + rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) + record: instance_device:node_disk_io_time_seconds:rate5m + - expr: | + rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) + record: instance_device:node_disk_io_time_weighted_seconds:rate5m + - expr: | + sum without (device) ( + rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_receive_bytes_excluding_lo:rate5m + - expr: | + sum without (device) ( + rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_transmit_bytes_excluding_lo:rate5m + - expr: | + sum without (device) ( + rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_receive_drop_excluding_lo:rate5m + - expr: | + sum without (device) ( + rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_transmit_drop_excluding_lo:rate5m diff --git a/manifests/node-exporter-service.yaml b/manifests/node-exporter-service.yaml index 7dfbef6..7f3b270 100644 --- a/manifests/node-exporter-service.yaml +++ b/manifests/node-exporter-service.yaml @@ -2,8 +2,10 @@ apiVersion: v1 kind: Service metadata: labels: + app.kubernetes.io/component: exporter app.kubernetes.io/name: node-exporter - app.kubernetes.io/version: v0.18.1 + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 1.5.0 name: node-exporter namespace: monitoring spec: @@ -13,4 +15,6 @@ spec: port: 9100 targetPort: https selector: + app.kubernetes.io/component: exporter app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: kube-prometheus diff --git a/manifests/node-exporter-serviceAccount.yaml b/manifests/node-exporter-serviceAccount.yaml index 8a03ac1..b3d72ad 100644 --- a/manifests/node-exporter-serviceAccount.yaml +++ b/manifests/node-exporter-serviceAccount.yaml @@ -1,5 +1,11 @@ apiVersion: v1 +automountServiceAccountToken: false kind: ServiceAccount metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 1.5.0 name: node-exporter namespace: monitoring diff --git a/manifests/node-exporter-serviceMonitor.yaml b/manifests/node-exporter-serviceMonitor.yaml index 357164d..00081b2 100644 --- a/manifests/node-exporter-serviceMonitor.yaml +++ b/manifests/node-exporter-serviceMonitor.yaml @@ -2,8 +2,10 @@ apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: labels: + app.kubernetes.io/component: exporter app.kubernetes.io/name: node-exporter - app.kubernetes.io/version: v0.18.1 + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 1.5.0 name: node-exporter namespace: monitoring spec: @@ -24,4 +26,6 @@ spec: jobLabel: app.kubernetes.io/name selector: matchLabels: + app.kubernetes.io/component: exporter app.kubernetes.io/name: node-exporter + app.kubernetes.io/part-of: kube-prometheus diff --git a/manifests/prometheus-adapter-apiService.yaml b/manifests/prometheus-adapter-apiService.yaml index a215efe..bf17b25 100644 --- a/manifests/prometheus-adapter-apiService.yaml +++ b/manifests/prometheus-adapter-apiService.yaml @@ -1,6 +1,11 @@ apiVersion: apiregistration.k8s.io/v1 kind: APIService metadata: + labels: + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.10.0 name: v1beta1.metrics.k8s.io spec: group: metrics.k8s.io diff --git a/manifests/prometheus-adapter-clusterRole.yaml b/manifests/prometheus-adapter-clusterRole.yaml index a02d2bb..57e4925 100644 --- a/manifests/prometheus-adapter-clusterRole.yaml +++ b/manifests/prometheus-adapter-clusterRole.yaml @@ -1,7 +1,13 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: + labels: + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.10.0 name: prometheus-adapter + namespace: monitoring rules: - apiGroups: - "" diff --git a/manifests/prometheus-adapter-clusterRoleAggregatedMetricsReader.yaml b/manifests/prometheus-adapter-clusterRoleAggregatedMetricsReader.yaml index 9f0dbb3..b1f3175 100644 --- a/manifests/prometheus-adapter-clusterRoleAggregatedMetricsReader.yaml +++ b/manifests/prometheus-adapter-clusterRoleAggregatedMetricsReader.yaml @@ -2,10 +2,15 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: labels: + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.10.0 rbac.authorization.k8s.io/aggregate-to-admin: "true" rbac.authorization.k8s.io/aggregate-to-edit: "true" rbac.authorization.k8s.io/aggregate-to-view: "true" name: system:aggregated-metrics-reader + namespace: monitoring rules: - apiGroups: - metrics.k8s.io diff --git a/manifests/prometheus-adapter-clusterRoleBinding.yaml b/manifests/prometheus-adapter-clusterRoleBinding.yaml index 7e8f3da..749fc66 100644 --- a/manifests/prometheus-adapter-clusterRoleBinding.yaml +++ b/manifests/prometheus-adapter-clusterRoleBinding.yaml @@ -1,7 +1,13 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: + labels: + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.10.0 name: prometheus-adapter + namespace: monitoring roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole diff --git a/manifests/prometheus-adapter-clusterRoleBindingDelegator.yaml b/manifests/prometheus-adapter-clusterRoleBindingDelegator.yaml index 4295b50..eb8fc9e 100644 --- a/manifests/prometheus-adapter-clusterRoleBindingDelegator.yaml +++ b/manifests/prometheus-adapter-clusterRoleBindingDelegator.yaml @@ -1,7 +1,13 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: + labels: + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.10.0 name: resource-metrics:system:auth-delegator + namespace: monitoring roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole diff --git a/manifests/prometheus-adapter-clusterRoleServerResources.yaml b/manifests/prometheus-adapter-clusterRoleServerResources.yaml index fcb914c..73d78b0 100644 --- a/manifests/prometheus-adapter-clusterRoleServerResources.yaml +++ b/manifests/prometheus-adapter-clusterRoleServerResources.yaml @@ -1,7 +1,13 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: + labels: + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.10.0 name: resource-metrics-server-resources + namespace: monitoring rules: - apiGroups: - metrics.k8s.io diff --git a/manifests/prometheus-adapter-configMap.yaml b/manifests/prometheus-adapter-configMap.yaml index b2bde3c..a1690bb 100644 --- a/manifests/prometheus-adapter-configMap.yaml +++ b/manifests/prometheus-adapter-configMap.yaml @@ -4,8 +4,26 @@ data: "resourceRules": "cpu": "containerLabel": "container" - "containerQuery": "sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!=\"POD\",container!=\"\",pod!=\"\"}[5m])) by (<<.GroupBy>>)" - "nodeQuery": "sum(1 - irate(node_cpu_seconds_total{mode=\"idle\"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)" + "containerQuery": | + sum by (<<.GroupBy>>) ( + irate ( + container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="",pod!=""}[120s] + ) + ) + "nodeQuery": | + sum by (<<.GroupBy>>) ( + 1 - irate( + node_cpu_seconds_total{mode="idle"}[60s] + ) + * on(namespace, pod) group_left(node) ( + node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>} + ) + ) + or sum by (<<.GroupBy>>) ( + 1 - irate( + windows_cpu_time_total{mode="idle", job="windows-exporter",<<.LabelMatchers>>}[4m] + ) + ) "resources": "overrides": "namespace": @@ -16,8 +34,21 @@ data: "resource": "pod" "memory": "containerLabel": "container" - "containerQuery": "sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!=\"POD\",container!=\"\",pod!=\"\"}) by (<<.GroupBy>>)" - "nodeQuery": "sum(node_memory_MemTotal_bytes{job=\"node-exporter\",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job=\"node-exporter\",<<.LabelMatchers>>}) by (<<.GroupBy>>)" + "containerQuery": | + sum by (<<.GroupBy>>) ( + container_memory_working_set_bytes{<<.LabelMatchers>>,container!="",pod!=""} + ) + "nodeQuery": | + sum by (<<.GroupBy>>) ( + node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} + - + node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>} + ) + or sum by (<<.GroupBy>>) ( + windows_cs_physical_memory_bytes{job="windows-exporter",<<.LabelMatchers>>} + - + windows_memory_available_bytes{job="windows-exporter",<<.LabelMatchers>>} + ) "resources": "overrides": "instance": @@ -29,5 +60,10 @@ data: "window": "5m" kind: ConfigMap metadata: + labels: + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.10.0 name: adapter-config namespace: monitoring diff --git a/manifests/prometheus-adapter-deployment.yaml b/manifests/prometheus-adapter-deployment.yaml index b95f07d..a4048ac 100644 --- a/manifests/prometheus-adapter-deployment.yaml +++ b/manifests/prometheus-adapter-deployment.yaml @@ -1,22 +1,46 @@ apiVersion: apps/v1 kind: Deployment metadata: + labels: + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.10.0 name: prometheus-adapter namespace: monitoring spec: - replicas: 1 + replicas: 2 selector: matchLabels: - name: prometheus-adapter + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus strategy: rollingUpdate: maxSurge: 1 - maxUnavailable: 0 + maxUnavailable: 1 template: metadata: labels: - name: prometheus-adapter + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.10.0 spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchLabels: + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus + namespaces: + - monitoring + topologyKey: kubernetes.io/hostname + weight: 100 + automountServiceAccountToken: true containers: - args: - --cert-dir=/var/run/serving-cert @@ -25,10 +49,41 @@ spec: - --metrics-relist-interval=1m - --prometheus-url=http://prometheus-k8s.monitoring.svc:9090/ - --secure-port=6443 - image: directxman12/k8s-prometheus-adapter:v0.7.0 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA,TLS_RSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA + image: registry.k8s.io/prometheus-adapter/prometheus-adapter:v0.10.0 + livenessProbe: + failureThreshold: 5 + httpGet: + path: /livez + port: https + scheme: HTTPS + initialDelaySeconds: 30 + periodSeconds: 5 name: prometheus-adapter ports: - containerPort: 6443 + name: https + readinessProbe: + failureThreshold: 5 + httpGet: + path: /readyz + port: https + scheme: HTTPS + initialDelaySeconds: 30 + periodSeconds: 5 + resources: + limits: + cpu: 250m + memory: 180Mi + requests: + cpu: 102m + memory: 180Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true volumeMounts: - mountPath: /tmp name: tmpfs diff --git a/manifests/prometheus-adapter-networkPolicy.yaml b/manifests/prometheus-adapter-networkPolicy.yaml new file mode 100644 index 0000000..29a11fd --- /dev/null +++ b/manifests/prometheus-adapter-networkPolicy.yaml @@ -0,0 +1,23 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + labels: + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.10.0 + name: prometheus-adapter + namespace: monitoring +spec: + egress: + - {} + ingress: + - {} + podSelector: + matchLabels: + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus + policyTypes: + - Egress + - Ingress diff --git a/manifests/prometheus-adapter-podDisruptionBudget.yaml b/manifests/prometheus-adapter-podDisruptionBudget.yaml new file mode 100644 index 0000000..033fcaa --- /dev/null +++ b/manifests/prometheus-adapter-podDisruptionBudget.yaml @@ -0,0 +1,17 @@ +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + labels: + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.10.0 + name: prometheus-adapter + namespace: monitoring +spec: + minAvailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus diff --git a/manifests/prometheus-adapter-roleBindingAuthReader.yaml b/manifests/prometheus-adapter-roleBindingAuthReader.yaml index 48c8f32..5d0d903 100644 --- a/manifests/prometheus-adapter-roleBindingAuthReader.yaml +++ b/manifests/prometheus-adapter-roleBindingAuthReader.yaml @@ -1,6 +1,11 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: + labels: + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.10.0 name: resource-metrics-auth-reader namespace: kube-system roleRef: diff --git a/manifests/prometheus-adapter-service.yaml b/manifests/prometheus-adapter-service.yaml index e786e01..c984828 100644 --- a/manifests/prometheus-adapter-service.yaml +++ b/manifests/prometheus-adapter-service.yaml @@ -2,7 +2,10 @@ apiVersion: v1 kind: Service metadata: labels: - name: prometheus-adapter + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.10.0 name: prometheus-adapter namespace: monitoring spec: @@ -11,4 +14,6 @@ spec: port: 443 targetPort: 6443 selector: - name: prometheus-adapter + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus diff --git a/manifests/prometheus-adapter-serviceAccount.yaml b/manifests/prometheus-adapter-serviceAccount.yaml index d7e7050..bb1058b 100644 --- a/manifests/prometheus-adapter-serviceAccount.yaml +++ b/manifests/prometheus-adapter-serviceAccount.yaml @@ -1,5 +1,11 @@ apiVersion: v1 +automountServiceAccountToken: false kind: ServiceAccount metadata: + labels: + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.10.0 name: prometheus-adapter namespace: monitoring diff --git a/manifests/prometheus-adapter-serviceMonitor.yaml b/manifests/prometheus-adapter-serviceMonitor.yaml new file mode 100644 index 0000000..d894145 --- /dev/null +++ b/manifests/prometheus-adapter-serviceMonitor.yaml @@ -0,0 +1,28 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.10.0 + name: prometheus-adapter + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + metricRelabelings: + - action: drop + regex: (apiserver_client_certificate_.*|apiserver_envelope_.*|apiserver_flowcontrol_.*|apiserver_storage_.*|apiserver_webhooks_.*|workqueue_.*) + sourceLabels: + - __name__ + port: https + scheme: https + tlsConfig: + insecureSkipVerify: true + selector: + matchLabels: + app.kubernetes.io/component: metrics-adapter + app.kubernetes.io/name: prometheus-adapter + app.kubernetes.io/part-of: kube-prometheus diff --git a/manifests/prometheus-clusterRole.yaml b/manifests/prometheus-clusterRole.yaml index d5c4598..e8e866b 100644 --- a/manifests/prometheus-clusterRole.yaml +++ b/manifests/prometheus-clusterRole.yaml @@ -1,6 +1,12 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: + labels: + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.41.0 name: prometheus-k8s rules: - apiGroups: diff --git a/manifests/prometheus-clusterRoleBinding.yaml b/manifests/prometheus-clusterRoleBinding.yaml index 554bb6f..abd9d64 100644 --- a/manifests/prometheus-clusterRoleBinding.yaml +++ b/manifests/prometheus-clusterRoleBinding.yaml @@ -1,6 +1,12 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: + labels: + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.41.0 name: prometheus-k8s roleRef: apiGroup: rbac.authorization.k8s.io diff --git a/manifests/prometheus-kubeControllerManagerPrometheusDiscoveryService.yaml b/manifests/prometheus-kubeControllerManagerPrometheusDiscoveryService.yaml deleted file mode 100644 index 9506973..0000000 --- a/manifests/prometheus-kubeControllerManagerPrometheusDiscoveryService.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - labels: - k8s-app: kube-controller-manager - name: kube-controller-manager-prometheus-discovery - namespace: kube-system -spec: - clusterIP: None - ports: - - name: http-metrics - port: 10252 - targetPort: 10252 - selector: - component: kube-controller-manager diff --git a/manifests/prometheus-kubeDnsPrometheusDiscoveryService.yaml b/manifests/prometheus-kubeDnsPrometheusDiscoveryService.yaml deleted file mode 100644 index 34e746c..0000000 --- a/manifests/prometheus-kubeDnsPrometheusDiscoveryService.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - labels: - k8s-app: kube-dns - name: kube-dns-prometheus-discovery - namespace: kube-system -spec: - clusterIP: None - ports: - - name: metrics - port: 9153 - targetPort: 9153 - selector: - k8s-app: kube-dns diff --git a/manifests/prometheus-kubeSchedulerPrometheusDiscoveryService.yaml b/manifests/prometheus-kubeSchedulerPrometheusDiscoveryService.yaml deleted file mode 100644 index b4843c7..0000000 --- a/manifests/prometheus-kubeSchedulerPrometheusDiscoveryService.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - labels: - k8s-app: kube-scheduler - name: kube-scheduler-prometheus-discovery - namespace: kube-system -spec: - clusterIP: None - ports: - - name: http-metrics - port: 10251 - targetPort: 10251 - selector: - component: kube-scheduler diff --git a/manifests/prometheus-networkPolicy.yaml b/manifests/prometheus-networkPolicy.yaml new file mode 100644 index 0000000..7fbb293 --- /dev/null +++ b/manifests/prometheus-networkPolicy.yaml @@ -0,0 +1,40 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + labels: + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.41.0 + name: prometheus-k8s + namespace: monitoring +spec: + egress: + - {} + ingress: + - from: + - podSelector: + matchLabels: + app.kubernetes.io/name: prometheus + ports: + - port: 9090 + protocol: TCP + - port: 8080 + protocol: TCP + - from: + - podSelector: + matchLabels: + app.kubernetes.io/name: grafana + ports: + - port: 9090 + protocol: TCP + podSelector: + matchLabels: + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + policyTypes: + - Egress + - Ingress diff --git a/manifests/prometheus-operator-serviceMonitor.yaml b/manifests/prometheus-operator-serviceMonitor.yaml index 39e48aa..50d6cd8 100644 --- a/manifests/prometheus-operator-serviceMonitor.yaml +++ b/manifests/prometheus-operator-serviceMonitor.yaml @@ -4,7 +4,8 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.40.0 + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.61.1 name: prometheus-operator namespace: monitoring spec: @@ -19,4 +20,5 @@ spec: matchLabels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.40.0 + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 0.61.1 diff --git a/manifests/prometheus-podDisruptionBudget.yaml b/manifests/prometheus-podDisruptionBudget.yaml new file mode 100644 index 0000000..7e3e656 --- /dev/null +++ b/manifests/prometheus-podDisruptionBudget.yaml @@ -0,0 +1,19 @@ +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + labels: + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.41.0 + name: prometheus-k8s + namespace: monitoring +spec: + minAvailable: 1 + selector: + matchLabels: + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus diff --git a/manifests/prometheus-prometheus.yaml b/manifests/prometheus-prometheus.yaml index 2280b93..6e1d307 100644 --- a/manifests/prometheus-prometheus.yaml +++ b/manifests/prometheus-prometheus.yaml @@ -2,7 +2,11 @@ apiVersion: monitoring.coreos.com/v1 kind: Prometheus metadata: labels: - prometheus: k8s + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.41.0 name: k8s namespace: monitoring spec: @@ -11,35 +15,45 @@ spec: preferredDuringSchedulingIgnoredDuringExecution: - podAffinityTerm: labelSelector: - matchExpressions: - - key: prometheus - operator: In - values: - - k8s + matchLabels: + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus namespaces: - monitoring topologyKey: kubernetes.io/hostname weight: 100 alerting: alertmanagers: - - name: alertmanager-main + - apiVersion: v2 + name: alertmanager-main namespace: monitoring port: web + enableFeatures: [] + externalLabels: {} externalUrl: http://prometheus.192.168.1.15.nip.io - image: prom/prometheus:v2.19.1 + image: quay.io/prometheus/prometheus:v2.41.0 nodeSelector: kubernetes.io/os: linux + podMetadata: + labels: + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.41.0 podMonitorNamespaceSelector: {} podMonitorSelector: {} + probeNamespaceSelector: {} + probeSelector: {} replicas: 1 resources: requests: memory: 400Mi retention: 15d - ruleSelector: - matchLabels: - prometheus: k8s - role: alert-rules + ruleNamespaceSelector: {} + ruleSelector: {} scrapeInterval: 30s scrapeTimeout: 30s securityContext: @@ -49,4 +63,4 @@ spec: serviceAccountName: prometheus-k8s serviceMonitorNamespaceSelector: {} serviceMonitorSelector: {} - version: v2.19.1 + version: 2.41.0 diff --git a/manifests/prometheus-prometheusRule.yaml b/manifests/prometheus-prometheusRule.yaml new file mode 100644 index 0000000..34600e6 --- /dev/null +++ b/manifests/prometheus-prometheusRule.yaml @@ -0,0 +1,280 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.41.0 + prometheus: k8s + role: alert-rules + name: prometheus-k8s-prometheus-rules + namespace: monitoring +spec: + groups: + - name: prometheus + rules: + - alert: PrometheusBadConfig + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to reload its configuration. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusbadconfig + summary: Failed Prometheus configuration reload. + expr: | + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(prometheus_config_last_reload_successful{job="prometheus-k8s",namespace="monitoring"}[5m]) == 0 + for: 10m + labels: + severity: critical + - alert: PrometheusNotificationQueueRunningFull + annotations: + description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} is running full. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotificationqueuerunningfull + summary: Prometheus alert notification queue predicted to run full in less than 30m. + expr: | + # Without min_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s",namespace="monitoring"}[5m], 60 * 30) + > + min_over_time(prometheus_notifications_queue_capacity{job="prometheus-k8s",namespace="monitoring"}[5m]) + ) + for: 15m + labels: + severity: warning + - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers + annotations: + description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstosomealertmanagers + summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager. + expr: | + ( + rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m]) + / + rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m]) + ) + * 100 + > 1 + for: 15m + labels: + severity: warning + - alert: PrometheusNotConnectedToAlertmanagers + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotconnectedtoalertmanagers + summary: Prometheus is not connected to any Alertmanagers. + expr: | + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s",namespace="monitoring"}[5m]) < 1 + for: 10m + labels: + severity: warning + - alert: PrometheusTSDBReloadsFailing + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} reload failures over the last 3h. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbreloadsfailing + summary: Prometheus has issues reloading blocks from disk. + expr: | + increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0 + for: 4h + labels: + severity: warning + - alert: PrometheusTSDBCompactionsFailing + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} compaction failures over the last 3h. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbcompactionsfailing + summary: Prometheus has issues compacting blocks. + expr: | + increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0 + for: 4h + labels: + severity: warning + - alert: PrometheusNotIngestingSamples + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotingestingsamples + summary: Prometheus is not ingesting samples. + expr: | + ( + rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0 + and + ( + sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-k8s",namespace="monitoring"}) > 0 + or + sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-k8s",namespace="monitoring"}) > 0 + ) + ) + for: 10m + labels: + severity: warning + - alert: PrometheusDuplicateTimestamps + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with different values but duplicated timestamp. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusduplicatetimestamps + summary: Prometheus is dropping samples with duplicate timestamps. + expr: | + rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 + for: 10m + labels: + severity: warning + - alert: PrometheusOutOfOrderTimestamps + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with timestamps arriving out of order. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusoutofordertimestamps + summary: Prometheus drops samples with out-of-order timestamps. + expr: | + rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 + for: 10m + labels: + severity: warning + - alert: PrometheusRemoteStorageFailures + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }} + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotestoragefailures + summary: Prometheus fails to send samples to remote storage. + expr: | + ( + (rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="monitoring"}[5m])) + / + ( + (rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="monitoring"}[5m])) + + + (rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m])) + ) + ) + * 100 + > 1 + for: 15m + labels: + severity: critical + - alert: PrometheusRemoteWriteBehind + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritebehind + summary: Prometheus remote write is behind. + expr: | + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="monitoring"}[5m]) + - ignoring(remote_name, url) group_right + max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="monitoring"}[5m]) + ) + > 120 + for: 15m + labels: + severity: critical + - alert: PrometheusRemoteWriteDesiredShards + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}` $labels.instance | query | first | value }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritedesiredshards + summary: Prometheus remote write desired shards calculation wants to run more than configured max shards. + expr: | + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-k8s",namespace="monitoring"}[5m]) + > + max_over_time(prometheus_remote_storage_shards_max{job="prometheus-k8s",namespace="monitoring"}[5m]) + ) + for: 15m + labels: + severity: warning + - alert: PrometheusRuleFailures + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusrulefailures + summary: Prometheus is failing rule evaluations. + expr: | + increase(prometheus_rule_evaluation_failures_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 + for: 15m + labels: + severity: critical + - alert: PrometheusMissingRuleEvaluations + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusmissingruleevaluations + summary: Prometheus is missing rule evaluations due to slow rule group evaluation. + expr: | + increase(prometheus_rule_group_iterations_missed_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 + for: 15m + labels: + severity: warning + - alert: PrometheusTargetLimitHit + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because the number of targets exceeded the configured target_limit. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetlimithit + summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit. + expr: | + increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 + for: 15m + labels: + severity: warning + - alert: PrometheusLabelLimitHit + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuslabellimithit + summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit. + expr: | + increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 + for: 15m + labels: + severity: warning + - alert: PrometheusScrapeBodySizeLimitHit + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured body_size_limit. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapebodysizelimithit + summary: Prometheus has dropped some targets that exceeded body size limit. + expr: | + increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 + for: 15m + labels: + severity: warning + - alert: PrometheusScrapeSampleLimitHit + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured sample_limit. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapesamplelimithit + summary: Prometheus has failed scrapes that have exceeded the configured sample limit. + expr: | + increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 + for: 15m + labels: + severity: warning + - alert: PrometheusTargetSyncFailure + annotations: + description: '{{ printf "%.0f" $value }} targets in Prometheus {{$labels.namespace}}/{{$labels.pod}} have failed to sync because invalid configuration was supplied.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetsyncfailure + summary: Prometheus has failed to sync targets. + expr: | + increase(prometheus_target_sync_failed_total{job="prometheus-k8s",namespace="monitoring"}[30m]) > 0 + for: 5m + labels: + severity: critical + - alert: PrometheusHighQueryLoad + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} query API has less than 20% available capacity in its query engine for the last 15 minutes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheushighqueryload + summary: Prometheus is reaching its maximum capacity serving concurrent requests. + expr: | + avg_over_time(prometheus_engine_queries{job="prometheus-k8s",namespace="monitoring"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0.8 + for: 15m + labels: + severity: warning + - alert: PrometheusErrorSendingAlertsToAnyAlertmanager + annotations: + description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstoanyalertmanager + summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager. + expr: | + min without (alertmanager) ( + rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring",alertmanager!~``}[5m]) + / + rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring",alertmanager!~``}[5m]) + ) + * 100 + > 3 + for: 15m + labels: + severity: critical diff --git a/manifests/prometheus-roleBindingConfig.yaml b/manifests/prometheus-roleBindingConfig.yaml index ec0129d..27685aa 100644 --- a/manifests/prometheus-roleBindingConfig.yaml +++ b/manifests/prometheus-roleBindingConfig.yaml @@ -1,6 +1,12 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: + labels: + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.41.0 name: prometheus-k8s-config namespace: monitoring roleRef: diff --git a/manifests/prometheus-roleBindingSpecificNamespaces.yaml b/manifests/prometheus-roleBindingSpecificNamespaces.yaml index c7527f6..6658f10 100644 --- a/manifests/prometheus-roleBindingSpecificNamespaces.yaml +++ b/manifests/prometheus-roleBindingSpecificNamespaces.yaml @@ -3,6 +3,12 @@ items: - apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: + labels: + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.41.0 name: prometheus-k8s namespace: default roleRef: @@ -16,6 +22,12 @@ items: - apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: + labels: + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.41.0 name: prometheus-k8s namespace: kube-system roleRef: @@ -29,6 +41,12 @@ items: - apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: + labels: + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.41.0 name: prometheus-k8s namespace: monitoring roleRef: diff --git a/manifests/prometheus-roleConfig.yaml b/manifests/prometheus-roleConfig.yaml index 5f1cd04..311bfbf 100644 --- a/manifests/prometheus-roleConfig.yaml +++ b/manifests/prometheus-roleConfig.yaml @@ -1,6 +1,12 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: + labels: + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.41.0 name: prometheus-k8s-config namespace: monitoring rules: diff --git a/manifests/prometheus-roleSpecificNamespaces.yaml b/manifests/prometheus-roleSpecificNamespaces.yaml index b920b88..cbdf10a 100644 --- a/manifests/prometheus-roleSpecificNamespaces.yaml +++ b/manifests/prometheus-roleSpecificNamespaces.yaml @@ -3,6 +3,12 @@ items: - apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: + labels: + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.41.0 name: prometheus-k8s namespace: default rules: @@ -16,9 +22,31 @@ items: - get - list - watch + - apiGroups: + - extensions + resources: + - ingresses + verbs: + - get + - list + - watch + - apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - watch - apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: + labels: + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.41.0 name: prometheus-k8s namespace: kube-system rules: @@ -32,9 +60,31 @@ items: - get - list - watch + - apiGroups: + - extensions + resources: + - ingresses + verbs: + - get + - list + - watch + - apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - watch - apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: + labels: + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.41.0 name: prometheus-k8s namespace: monitoring rules: @@ -48,4 +98,20 @@ items: - get - list - watch + - apiGroups: + - extensions + resources: + - ingresses + verbs: + - get + - list + - watch + - apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - watch kind: RoleList diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml deleted file mode 100644 index 722a797..0000000 --- a/manifests/prometheus-rules.yaml +++ /dev/null @@ -1,1759 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - labels: - prometheus: k8s - role: alert-rules - name: prometheus-k8s-rules - namespace: monitoring -spec: - groups: - - name: node-exporter.rules - rules: - - expr: | - count without (cpu) ( - count without (mode) ( - node_cpu_seconds_total{job="node-exporter"} - ) - ) - record: instance:node_num_cpu:sum - - expr: | - 1 - avg without (cpu, mode) ( - rate(node_cpu_seconds_total{job="node-exporter", mode="idle"}[1m]) - ) - record: instance:node_cpu_utilisation:rate1m - - expr: | - ( - node_load1{job="node-exporter"} - / - instance:node_num_cpu:sum{job="node-exporter"} - ) - record: instance:node_load1_per_cpu:ratio - - expr: | - 1 - ( - node_memory_MemAvailable_bytes{job="node-exporter"} - / - node_memory_MemTotal_bytes{job="node-exporter"} - ) - record: instance:node_memory_utilisation:ratio - - expr: | - rate(node_vmstat_pgmajfault{job="node-exporter"}[1m]) - record: instance:node_vmstat_pgmajfault:rate1m - - expr: | - rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m]) - record: instance_device:node_disk_io_time_seconds:rate1m - - expr: | - rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m]) - record: instance_device:node_disk_io_time_weighted_seconds:rate1m - - expr: | - sum without (device) ( - rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[1m]) - ) - record: instance:node_network_receive_bytes_excluding_lo:rate1m - - expr: | - sum without (device) ( - rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[1m]) - ) - record: instance:node_network_transmit_bytes_excluding_lo:rate1m - - expr: | - sum without (device) ( - rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[1m]) - ) - record: instance:node_network_receive_drop_excluding_lo:rate1m - - expr: | - sum without (device) ( - rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[1m]) - ) - record: instance:node_network_transmit_drop_excluding_lo:rate1m - - name: kube-apiserver.rules - rules: - - expr: | - ( - ( - # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d])) - - - ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1d])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d])) - ) - ) - + - # errors - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d])) - ) - / - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d])) - labels: - verb: read - record: apiserver_request:burnrate1d - - expr: | - ( - ( - # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h])) - - - ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1h])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h])) - ) - ) - + - # errors - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h])) - ) - / - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h])) - labels: - verb: read - record: apiserver_request:burnrate1h - - expr: | - ( - ( - # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h])) - - - ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[2h])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h])) - ) - ) - + - # errors - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h])) - ) - / - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h])) - labels: - verb: read - record: apiserver_request:burnrate2h - - expr: | - ( - ( - # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m])) - - - ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30m])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m])) - ) - ) - + - # errors - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m])) - ) - / - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m])) - labels: - verb: read - record: apiserver_request:burnrate30m - - expr: | - ( - ( - # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d])) - - - ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[3d])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d])) - ) - ) - + - # errors - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d])) - ) - / - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d])) - labels: - verb: read - record: apiserver_request:burnrate3d - - expr: | - ( - ( - # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m])) - - - ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[5m])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m])) - ) - ) - + - # errors - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m])) - ) - / - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) - labels: - verb: read - record: apiserver_request:burnrate5m - - expr: | - ( - ( - # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h])) - - - ( - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[6h])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h])) + - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h])) - ) - ) - + - # errors - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h])) - ) - / - sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h])) - labels: - verb: read - record: apiserver_request:burnrate6h - - expr: | - ( - ( - # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) - - - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1d])) - ) - + - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d])) - ) - / - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) - labels: - verb: write - record: apiserver_request:burnrate1d - - expr: | - ( - ( - # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) - - - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1h])) - ) - + - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) - ) - / - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) - labels: - verb: write - record: apiserver_request:burnrate1h - - expr: | - ( - ( - # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) - - - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[2h])) - ) - + - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h])) - ) - / - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) - labels: - verb: write - record: apiserver_request:burnrate2h - - expr: | - ( - ( - # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) - - - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[30m])) - ) - + - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m])) - ) - / - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) - labels: - verb: write - record: apiserver_request:burnrate30m - - expr: | - ( - ( - # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) - - - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[3d])) - ) - + - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d])) - ) - / - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) - labels: - verb: write - record: apiserver_request:burnrate3d - - expr: | - ( - ( - # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) - - - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[5m])) - ) - + - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m])) - ) - / - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) - labels: - verb: write - record: apiserver_request:burnrate5m - - expr: | - ( - ( - # too slow - sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) - - - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[6h])) - ) - + - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h])) - ) - / - sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) - labels: - verb: write - record: apiserver_request:burnrate6h - - expr: | - sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) - labels: - verb: read - record: code_resource:apiserver_request_total:rate5m - - expr: | - sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) - labels: - verb: write - record: code_resource:apiserver_request_total:rate5m - - expr: | - histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0 - labels: - quantile: "0.99" - verb: read - record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - - expr: | - histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0 - labels: - quantile: "0.99" - verb: write - record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - - expr: | - sum(rate(apiserver_request_duration_seconds_sum{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod) - / - sum(rate(apiserver_request_duration_seconds_count{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod) - record: cluster:apiserver_request_duration_seconds:mean5m - - expr: | - histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)) - labels: - quantile: "0.99" - record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - - expr: | - histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)) - labels: - quantile: "0.9" - record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - - expr: | - histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)) - labels: - quantile: "0.5" - record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile - - interval: 3m - name: kube-apiserver-availability.rules - rules: - - expr: | - 1 - ( - ( - # write too slow - sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d])) - - - sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d])) - ) + - ( - # read too slow - sum(increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d])) - - - ( - sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) + - sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) + - sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d])) - ) - ) + - # errors - sum(code:apiserver_request_total:increase30d{code=~"5.."} or vector(0)) - ) - / - sum(code:apiserver_request_total:increase30d) - labels: - verb: all - record: apiserver_request:availability30d - - expr: | - 1 - ( - sum(increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d])) - - - ( - # too slow - sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) + - sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) + - sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d])) - ) - + - # errors - sum(code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0)) - ) - / - sum(code:apiserver_request_total:increase30d{verb="read"}) - labels: - verb: read - record: apiserver_request:availability30d - - expr: | - 1 - ( - ( - # too slow - sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d])) - - - sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d])) - ) - + - # errors - sum(code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0)) - ) - / - sum(code:apiserver_request_total:increase30d{verb="write"}) - labels: - verb: write - record: apiserver_request:availability30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[30d])) - record: code_verb:apiserver_request_total:increase30d - - expr: | - sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) - labels: - verb: read - record: code:apiserver_request_total:increase30d - - expr: | - sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) - labels: - verb: write - record: code:apiserver_request_total:increase30d - - name: k8s.rules - rules: - - expr: | - sum(rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])) by (namespace) - record: namespace:container_cpu_usage_seconds_total:sum_rate - - expr: | - sum by (cluster, namespace, pod, container) ( - rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m]) - ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( - 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}) - ) - record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate - - expr: | - container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} - * on (namespace, pod) group_left(node) topk by(namespace, pod) (1, - max by(namespace, pod, node) (kube_pod_info{node!=""}) - ) - record: node_namespace_pod_container:container_memory_working_set_bytes - - expr: | - container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} - * on (namespace, pod) group_left(node) topk by(namespace, pod) (1, - max by(namespace, pod, node) (kube_pod_info{node!=""}) - ) - record: node_namespace_pod_container:container_memory_rss - - expr: | - container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} - * on (namespace, pod) group_left(node) topk by(namespace, pod) (1, - max by(namespace, pod, node) (kube_pod_info{node!=""}) - ) - record: node_namespace_pod_container:container_memory_cache - - expr: | - container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} - * on (namespace, pod) group_left(node) topk by(namespace, pod) (1, - max by(namespace, pod, node) (kube_pod_info{node!=""}) - ) - record: node_namespace_pod_container:container_memory_swap - - expr: | - sum(container_memory_usage_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}) by (namespace) - record: namespace:container_memory_usage_bytes:sum - - expr: | - sum by (namespace) ( - sum by (namespace, pod) ( - max by (namespace, pod, container) ( - kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} - ) * on(namespace, pod) group_left() max by (namespace, pod) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 - ) - ) - ) - record: namespace:kube_pod_container_resource_requests_memory_bytes:sum - - expr: | - sum by (namespace) ( - sum by (namespace, pod) ( - max by (namespace, pod, container) ( - kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} - ) * on(namespace, pod) group_left() max by (namespace, pod) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 - ) - ) - ) - record: namespace:kube_pod_container_resource_requests_cpu_cores:sum - - expr: | - max by (cluster, namespace, workload, pod) ( - label_replace( - label_replace( - kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"}, - "replicaset", "$1", "owner_name", "(.*)" - ) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) ( - 1, max by (replicaset, namespace, owner_name) ( - kube_replicaset_owner{job="kube-state-metrics"} - ) - ), - "workload", "$1", "owner_name", "(.*)" - ) - ) - labels: - workload_type: deployment - record: mixin_pod_workload - - expr: | - max by (cluster, namespace, workload, pod) ( - label_replace( - kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"}, - "workload", "$1", "owner_name", "(.*)" - ) - ) - labels: - workload_type: daemonset - record: mixin_pod_workload - - expr: | - max by (cluster, namespace, workload, pod) ( - label_replace( - kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"}, - "workload", "$1", "owner_name", "(.*)" - ) - ) - labels: - workload_type: statefulset - record: mixin_pod_workload - - name: kube-scheduler.rules - rules: - - expr: | - histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: "0.99" - record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile - - expr: | - histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: "0.99" - record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile - - expr: | - histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: "0.99" - record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile - - expr: | - histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: "0.9" - record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile - - expr: | - histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: "0.9" - record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile - - expr: | - histogram_quantile(0.9, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: "0.9" - record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile - - expr: | - histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: "0.5" - record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile - - expr: | - histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: "0.5" - record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile - - expr: | - histogram_quantile(0.5, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: "0.5" - record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile - - name: node.rules - rules: - - expr: | - sum(min(kube_pod_info{node!=""}) by (cluster, node)) - record: ':kube_pod_info_node_count:' - - expr: | - topk by(namespace, pod) (1, - max by (node, namespace, pod) ( - label_replace(kube_pod_info{job="kube-state-metrics",node!=""}, "pod", "$1", "pod", "(.*)") - )) - record: 'node_namespace_pod:kube_pod_info:' - - expr: | - count by (cluster, node) (sum by (node, cpu) ( - node_cpu_seconds_total{job="node-exporter"} - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - )) - record: node:node_num_cpu:sum - - expr: | - sum( - node_memory_MemAvailable_bytes{job="node-exporter"} or - ( - node_memory_Buffers_bytes{job="node-exporter"} + - node_memory_Cached_bytes{job="node-exporter"} + - node_memory_MemFree_bytes{job="node-exporter"} + - node_memory_Slab_bytes{job="node-exporter"} - ) - ) by (cluster) - record: :node_memory_MemAvailable_bytes:sum - - name: kubelet.rules - rules: - - expr: | - histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) - labels: - quantile: "0.99" - record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile - - expr: | - histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) - labels: - quantile: "0.9" - record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile - - expr: | - histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) - labels: - quantile: "0.5" - record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile - - name: kube-prometheus-node-recording.rules - rules: - - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance) - record: instance:node_cpu:rate:sum - - expr: sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"})) BY (instance) - record: instance:node_filesystem_usage:sum - - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) - record: instance:node_network_receive_bytes:rate:sum - - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) - record: instance:node_network_transmit_bytes:rate:sum - - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance) - record: instance:node_cpu:ratio - - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) - record: cluster:node_cpu:sum_rate5m - - expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu)) - record: cluster:node_cpu:ratio - - name: kube-prometheus-general.rules - rules: - - expr: count without(instance, pod, node) (up == 1) - record: count:up1 - - expr: count without(instance, pod, node) (up == 0) - record: count:up0 - - name: kube-state-metrics - rules: - - alert: KubeStateMetricsListErrors - annotations: - message: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricslisterrors - expr: | - (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m])) - / - sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m]))) - > 0.01 - for: 15m - labels: - severity: critical - - alert: KubeStateMetricsWatchErrors - annotations: - message: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricswatcherrors - expr: | - (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m])) - / - sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m]))) - > 0.01 - for: 15m - labels: - severity: critical - - name: node-exporter - rules: - - alert: NodeFilesystemSpaceFillingUp - annotations: - description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup - summary: Filesystem is predicted to run out of space within the next 24 hours. - expr: | - ( - node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 40 - and - predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0 - and - node_filesystem_readonly{job="node-exporter",fstype!=""} == 0 - ) - for: 1h - labels: - severity: warning - - alert: NodeFilesystemSpaceFillingUp - annotations: - description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup - summary: Filesystem is predicted to run out of space within the next 4 hours. - expr: | - ( - node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 15 - and - predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0 - and - node_filesystem_readonly{job="node-exporter",fstype!=""} == 0 - ) - for: 1h - labels: - severity: critical - - alert: NodeFilesystemAlmostOutOfSpace - annotations: - description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace - summary: Filesystem has less than 5% space left. - expr: | - ( - node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 5 - and - node_filesystem_readonly{job="node-exporter",fstype!=""} == 0 - ) - for: 1h - labels: - severity: warning - - alert: NodeFilesystemAlmostOutOfSpace - annotations: - description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace - summary: Filesystem has less than 3% space left. - expr: | - ( - node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 3 - and - node_filesystem_readonly{job="node-exporter",fstype!=""} == 0 - ) - for: 1h - labels: - severity: critical - - alert: NodeFilesystemFilesFillingUp - annotations: - description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup - summary: Filesystem is predicted to run out of inodes within the next 24 hours. - expr: | - ( - node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 40 - and - predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0 - and - node_filesystem_readonly{job="node-exporter",fstype!=""} == 0 - ) - for: 1h - labels: - severity: warning - - alert: NodeFilesystemFilesFillingUp - annotations: - description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup - summary: Filesystem is predicted to run out of inodes within the next 4 hours. - expr: | - ( - node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 20 - and - predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0 - and - node_filesystem_readonly{job="node-exporter",fstype!=""} == 0 - ) - for: 1h - labels: - severity: critical - - alert: NodeFilesystemAlmostOutOfFiles - annotations: - description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles - summary: Filesystem has less than 5% inodes left. - expr: | - ( - node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 5 - and - node_filesystem_readonly{job="node-exporter",fstype!=""} == 0 - ) - for: 1h - labels: - severity: warning - - alert: NodeFilesystemAlmostOutOfFiles - annotations: - description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles - summary: Filesystem has less than 3% inodes left. - expr: | - ( - node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 3 - and - node_filesystem_readonly{job="node-exporter",fstype!=""} == 0 - ) - for: 1h - labels: - severity: critical - - alert: NodeNetworkReceiveErrs - annotations: - description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.' - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworkreceiveerrs - summary: Network interface is reporting many receive errors. - expr: | - increase(node_network_receive_errs_total[2m]) > 10 - for: 1h - labels: - severity: warning - - alert: NodeNetworkTransmitErrs - annotations: - description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.' - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworktransmiterrs - summary: Network interface is reporting many transmit errors. - expr: | - increase(node_network_transmit_errs_total[2m]) > 10 - for: 1h - labels: - severity: warning - - alert: NodeHighNumberConntrackEntriesUsed - annotations: - description: '{{ $value | humanizePercentage }} of conntrack entries are used.' - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodehighnumberconntrackentriesused - summary: Number of conntrack are getting close to the limit. - expr: | - (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75 - labels: - severity: warning - - alert: NodeTextFileCollectorScrapeError - annotations: - description: Node Exporter text file collector failed to scrape. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodetextfilecollectorscrapeerror - summary: Node Exporter text file collector failed to scrape. - expr: | - node_textfile_scrape_error{job="node-exporter"} == 1 - labels: - severity: warning - - alert: NodeClockSkewDetected - annotations: - message: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclockskewdetected - summary: Clock skew detected. - expr: | - ( - node_timex_offset_seconds > 0.05 - and - deriv(node_timex_offset_seconds[5m]) >= 0 - ) - or - ( - node_timex_offset_seconds < -0.05 - and - deriv(node_timex_offset_seconds[5m]) <= 0 - ) - for: 10m - labels: - severity: warning - - alert: NodeClockNotSynchronising - annotations: - message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclocknotsynchronising - summary: Clock not synchronising. - expr: | - min_over_time(node_timex_sync_status[5m]) == 0 - for: 10m - labels: - severity: warning - - name: kubernetes-apps - rules: - - alert: KubePodCrashLooping - annotations: - message: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping - expr: | - rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0 - for: 15m - labels: - severity: warning - - alert: KubePodNotReady - annotations: - message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready - expr: | - sum by (namespace, pod) ( - max by(namespace, pod) ( - kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"} - ) * on(namespace, pod) group_left(owner_kind) topk by(namespace, pod) ( - 1, max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!="Job"}) - ) - ) > 0 - for: 15m - labels: - severity: warning - - alert: KubeDeploymentGenerationMismatch - annotations: - message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch - expr: | - kube_deployment_status_observed_generation{job="kube-state-metrics"} - != - kube_deployment_metadata_generation{job="kube-state-metrics"} - for: 15m - labels: - severity: warning - - alert: KubeDeploymentReplicasMismatch - annotations: - message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch - expr: | - ( - kube_deployment_spec_replicas{job="kube-state-metrics"} - != - kube_deployment_status_replicas_available{job="kube-state-metrics"} - ) and ( - changes(kube_deployment_status_replicas_updated{job="kube-state-metrics"}[5m]) - == - 0 - ) - for: 15m - labels: - severity: warning - - alert: KubeStatefulSetReplicasMismatch - annotations: - message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch - expr: | - ( - kube_statefulset_status_replicas_ready{job="kube-state-metrics"} - != - kube_statefulset_status_replicas{job="kube-state-metrics"} - ) and ( - changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics"}[5m]) - == - 0 - ) - for: 15m - labels: - severity: warning - - alert: KubeStatefulSetGenerationMismatch - annotations: - message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch - expr: | - kube_statefulset_status_observed_generation{job="kube-state-metrics"} - != - kube_statefulset_metadata_generation{job="kube-state-metrics"} - for: 15m - labels: - severity: warning - - alert: KubeStatefulSetUpdateNotRolledOut - annotations: - message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout - expr: | - ( - max without (revision) ( - kube_statefulset_status_current_revision{job="kube-state-metrics"} - unless - kube_statefulset_status_update_revision{job="kube-state-metrics"} - ) - * - ( - kube_statefulset_replicas{job="kube-state-metrics"} - != - kube_statefulset_status_replicas_updated{job="kube-state-metrics"} - ) - ) and ( - changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics"}[5m]) - == - 0 - ) - for: 15m - labels: - severity: warning - - alert: KubeDaemonSetRolloutStuck - annotations: - message: Only {{ $value | humanizePercentage }} of the desired Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are scheduled and ready. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck - expr: | - kube_daemonset_status_number_ready{job="kube-state-metrics"} - / - kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} < 1.00 - for: 15m - labels: - severity: warning - - alert: KubeContainerWaiting - annotations: - message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} has been in waiting state for longer than 1 hour. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting - expr: | - sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0 - for: 1h - labels: - severity: warning - - alert: KubeDaemonSetNotScheduled - annotations: - message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.' - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled - expr: | - kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} - - - kube_daemonset_status_current_number_scheduled{job="kube-state-metrics"} > 0 - for: 10m - labels: - severity: warning - - alert: KubeDaemonSetMisScheduled - annotations: - message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.' - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled - expr: | - kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0 - for: 15m - labels: - severity: warning - - alert: KubeCronJobRunning - annotations: - message: CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is taking more than 1h to complete. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecronjobrunning - expr: | - time() - kube_cronjob_next_schedule_time{job="kube-state-metrics"} > 3600 - for: 1h - labels: - severity: warning - - alert: KubeJobCompletion - annotations: - message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than one hour to complete. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion - expr: | - kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0 - for: 1h - labels: - severity: warning - - alert: KubeJobFailed - annotations: - message: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed - expr: | - kube_job_failed{job="kube-state-metrics"} > 0 - for: 15m - labels: - severity: warning - - alert: KubeHpaReplicasMismatch - annotations: - message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the desired number of replicas for longer than 15 minutes. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpareplicasmismatch - expr: | - (kube_hpa_status_desired_replicas{job="kube-state-metrics"} - != - kube_hpa_status_current_replicas{job="kube-state-metrics"}) - and - changes(kube_hpa_status_current_replicas[15m]) == 0 - for: 15m - labels: - severity: warning - - alert: KubeHpaMaxedOut - annotations: - message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at max replicas for longer than 15 minutes. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpamaxedout - expr: | - kube_hpa_status_current_replicas{job="kube-state-metrics"} - == - kube_hpa_spec_max_replicas{job="kube-state-metrics"} - for: 15m - labels: - severity: warning - - name: kubernetes-resources - rules: - - alert: KubeCPUOvercommit - annotations: - message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit - expr: | - sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{}) - / - sum(kube_node_status_allocatable_cpu_cores) - > - (count(kube_node_status_allocatable_cpu_cores)-1) / count(kube_node_status_allocatable_cpu_cores) - for: 5m - labels: - severity: warning - - alert: KubeMemoryOvercommit - annotations: - message: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit - expr: | - sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{}) - / - sum(kube_node_status_allocatable_memory_bytes) - > - (count(kube_node_status_allocatable_memory_bytes)-1) - / - count(kube_node_status_allocatable_memory_bytes) - for: 5m - labels: - severity: warning - - alert: KubeCPUQuotaOvercommit - annotations: - message: Cluster has overcommitted CPU resource requests for Namespaces. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuquotaovercommit - expr: | - sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"}) - / - sum(kube_node_status_allocatable_cpu_cores) - > 1.5 - for: 5m - labels: - severity: warning - - alert: KubeMemoryQuotaOvercommit - annotations: - message: Cluster has overcommitted memory resource requests for Namespaces. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryquotaovercommit - expr: | - sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"}) - / - sum(kube_node_status_allocatable_memory_bytes{job="node-exporter"}) - > 1.5 - for: 5m - labels: - severity: warning - - alert: KubeQuotaExceeded - annotations: - message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded - expr: | - kube_resourcequota{job="kube-state-metrics", type="used"} - / ignoring(instance, job, type) - (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) - > 0.90 - for: 15m - labels: - severity: warning - - alert: CPUThrottlingHigh - annotations: - message: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.' - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh - expr: | - sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace) - / - sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container, pod, namespace) - > ( 25 / 100 ) - for: 15m - labels: - severity: warning - - name: kubernetes-storage - rules: - - alert: KubePersistentVolumeFillingUp - annotations: - message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup - expr: | - kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"} - / - kubelet_volume_stats_capacity_bytes{job="kubelet", metrics_path="/metrics"} - < 0.03 - for: 1m - labels: - severity: critical - - alert: KubePersistentVolumeFillingUp - annotations: - message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup - expr: | - ( - kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"} - / - kubelet_volume_stats_capacity_bytes{job="kubelet", metrics_path="/metrics"} - ) < 0.15 - and - predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - for: 1h - labels: - severity: warning - - alert: KubePersistentVolumeErrors - annotations: - message: The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors - expr: | - kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0 - for: 5m - labels: - severity: critical - - name: kubernetes-system - rules: - - alert: KubeVersionMismatch - annotations: - message: There are {{ $value }} different semantic versions of Kubernetes components running. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch - expr: | - count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1 - for: 15m - labels: - severity: warning - - alert: KubeClientErrors - annotations: - message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.' - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors - expr: | - (sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job) - / - sum(rate(rest_client_requests_total[5m])) by (instance, job)) - > 0.01 - for: 15m - labels: - severity: warning - - name: kube-apiserver-slos - rules: - - alert: KubeAPIErrorBudgetBurn - annotations: - message: The API server is burning too much error budget - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn - expr: | - sum(apiserver_request:burnrate1h) > (14.40 * 0.01000) - and - sum(apiserver_request:burnrate5m) > (14.40 * 0.01000) - for: 2m - labels: - long: 1h - severity: critical - short: 5m - - alert: KubeAPIErrorBudgetBurn - annotations: - message: The API server is burning too much error budget - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn - expr: | - sum(apiserver_request:burnrate6h) > (6.00 * 0.01000) - and - sum(apiserver_request:burnrate30m) > (6.00 * 0.01000) - for: 15m - labels: - long: 6h - severity: critical - short: 30m - - alert: KubeAPIErrorBudgetBurn - annotations: - message: The API server is burning too much error budget - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn - expr: | - sum(apiserver_request:burnrate1d) > (3.00 * 0.01000) - and - sum(apiserver_request:burnrate2h) > (3.00 * 0.01000) - for: 1h - labels: - long: 1d - severity: warning - short: 2h - - alert: KubeAPIErrorBudgetBurn - annotations: - message: The API server is burning too much error budget - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn - expr: | - sum(apiserver_request:burnrate3d) > (1.00 * 0.01000) - and - sum(apiserver_request:burnrate6h) > (1.00 * 0.01000) - for: 3h - labels: - long: 3d - severity: warning - short: 6h - - name: kubernetes-system-apiserver - rules: - - alert: KubeAPILatencyHigh - annotations: - message: The API server has an abnormal latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh - expr: | - cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"} - > - 1 - and on (verb,resource) - ( - cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} - > - on (verb) group_left() - ( - avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0) - + - 2*stddev by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0) - ) - ) > on (verb) group_left() - 1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0) - for: 5m - labels: - severity: warning - - alert: KubeAPIErrorsHigh - annotations: - message: API server is returning errors for {{ $value | humanizePercentage }} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh - expr: | - sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m])) by (resource,subresource,verb) - / - sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) > 0.05 - for: 10m - labels: - severity: warning - - alert: KubeClientCertificateExpiration - annotations: - message: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration - expr: | - apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800 - labels: - severity: warning - - alert: KubeClientCertificateExpiration - annotations: - message: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration - expr: | - apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 - labels: - severity: critical - - alert: AggregatedAPIErrors - annotations: - message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors - expr: | - sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2 - labels: - severity: warning - - alert: AggregatedAPIDown - annotations: - message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} is down. It has not been available at least for the past five minutes. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown - expr: | - sum by(name, namespace)(sum_over_time(aggregator_unavailable_apiservice[5m])) > 0 - for: 5m - labels: - severity: warning - - alert: KubeAPIDown - annotations: - message: KubeAPI has disappeared from Prometheus target discovery. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown - expr: | - absent(up{job="apiserver"} == 1) - for: 15m - labels: - severity: critical - - name: kubernetes-system-kubelet - rules: - - alert: KubeNodeNotReady - annotations: - message: '{{ $labels.node }} has been unready for more than 15 minutes.' - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready - expr: | - kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0 - for: 15m - labels: - severity: warning - - alert: KubeNodeUnreachable - annotations: - message: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.' - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable - expr: | - (kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key="ToBeDeletedByClusterAutoscaler"}) == 1 - labels: - severity: warning - - alert: KubeletTooManyPods - annotations: - message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods - expr: | - max(max(kubelet_running_pod_count{job="kubelet", metrics_path="/metrics"}) by(instance) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) by(node) / max(kube_node_status_capacity_pods{job="kube-state-metrics"} != 1) by(node) > 0.95 - for: 15m - labels: - severity: warning - - alert: KubeNodeReadinessFlapping - annotations: - message: The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping - expr: | - sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2 - for: 15m - labels: - severity: warning - - alert: KubeletPlegDurationHigh - annotations: - message: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh - expr: | - node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10 - for: 5m - labels: - severity: warning - - alert: KubeletPodStartUpLatencyHigh - annotations: - message: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh - expr: | - histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60 - for: 15m - labels: - severity: warning - - alert: KubeletDown - annotations: - message: Kubelet has disappeared from Prometheus target discovery. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown - expr: | - absent(up{job="kubelet", metrics_path="/metrics"} == 1) - for: 15m - labels: - severity: critical - - name: kubernetes-system-scheduler - rules: - - alert: KubeSchedulerDown - annotations: - message: KubeScheduler has disappeared from Prometheus target discovery. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown - expr: | - absent(up{job="kube-scheduler"} == 1) - for: 15m - labels: - severity: critical - - name: kubernetes-system-controller-manager - rules: - - alert: KubeControllerManagerDown - annotations: - message: KubeControllerManager has disappeared from Prometheus target discovery. - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown - expr: | - absent(up{job="kube-controller-manager"} == 1) - for: 15m - labels: - severity: critical - - name: prometheus - rules: - - alert: PrometheusBadConfig - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to reload its configuration. - summary: Failed Prometheus configuration reload. - expr: | - # Without max_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - max_over_time(prometheus_config_last_reload_successful{job="prometheus-k8s",namespace="monitoring"}[5m]) == 0 - for: 10m - labels: - severity: critical - - alert: PrometheusNotificationQueueRunningFull - annotations: - description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} is running full. - summary: Prometheus alert notification queue predicted to run full in less than 30m. - expr: | - # Without min_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - ( - predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s",namespace="monitoring"}[5m], 60 * 30) - > - min_over_time(prometheus_notifications_queue_capacity{job="prometheus-k8s",namespace="monitoring"}[5m]) - ) - for: 15m - labels: - severity: warning - - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers - annotations: - description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.' - summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager. - expr: | - ( - rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m]) - / - rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m]) - ) - * 100 - > 1 - for: 15m - labels: - severity: warning - - alert: PrometheusErrorSendingAlertsToAnyAlertmanager - annotations: - description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.' - summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager. - expr: | - min without(alertmanager) ( - rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m]) - / - rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m]) - ) - * 100 - > 3 - for: 15m - labels: - severity: critical - - alert: PrometheusNotConnectedToAlertmanagers - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers. - summary: Prometheus is not connected to any Alertmanagers. - expr: | - # Without max_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s",namespace="monitoring"}[5m]) < 1 - for: 10m - labels: - severity: warning - - alert: PrometheusTSDBReloadsFailing - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} reload failures over the last 3h. - summary: Prometheus has issues reloading blocks from disk. - expr: | - increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0 - for: 4h - labels: - severity: warning - - alert: PrometheusTSDBCompactionsFailing - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} compaction failures over the last 3h. - summary: Prometheus has issues compacting blocks. - expr: | - increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0 - for: 4h - labels: - severity: warning - - alert: PrometheusNotIngestingSamples - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples. - summary: Prometheus is not ingesting samples. - expr: | - rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0 - for: 10m - labels: - severity: warning - - alert: PrometheusDuplicateTimestamps - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with different values but duplicated timestamp. - summary: Prometheus is dropping samples with duplicate timestamps. - expr: | - rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 - for: 10m - labels: - severity: warning - - alert: PrometheusOutOfOrderTimestamps - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with timestamps arriving out of order. - summary: Prometheus drops samples with out-of-order timestamps. - expr: | - rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 - for: 10m - labels: - severity: warning - - alert: PrometheusRemoteStorageFailures - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }} - summary: Prometheus fails to send samples to remote storage. - expr: | - ( - rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) - / - ( - rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) - + - rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) - ) - ) - * 100 - > 1 - for: 15m - labels: - severity: critical - - alert: PrometheusRemoteWriteBehind - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}. - summary: Prometheus remote write is behind. - expr: | - # Without max_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - ( - max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="monitoring"}[5m]) - - on(job, instance) group_right - max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="monitoring"}[5m]) - ) - > 120 - for: 15m - labels: - severity: critical - - alert: PrometheusRemoteWriteDesiredShards - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}` $labels.instance | query | first | value }}. - summary: Prometheus remote write desired shards calculation wants to run more than configured max shards. - expr: | - # Without max_over_time, failed scrapes could create false negatives, see - # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. - ( - max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-k8s",namespace="monitoring"}[5m]) - > - max_over_time(prometheus_remote_storage_shards_max{job="prometheus-k8s",namespace="monitoring"}[5m]) - ) - for: 15m - labels: - severity: warning - - alert: PrometheusRuleFailures - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m. - summary: Prometheus is failing rule evaluations. - expr: | - increase(prometheus_rule_evaluation_failures_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 - for: 15m - labels: - severity: critical - - alert: PrometheusMissingRuleEvaluations - annotations: - description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m. - summary: Prometheus is missing rule evaluations due to slow rule group evaluation. - expr: | - increase(prometheus_rule_group_iterations_missed_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 - for: 15m - labels: - severity: warning - - name: alertmanager.rules - rules: - - alert: AlertmanagerConfigInconsistent - annotations: - message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync. - expr: | - count_values("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="prometheus-operator",namespace="monitoring",controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1 - for: 5m - labels: - severity: critical - - alert: AlertmanagerFailedReload - annotations: - message: Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}. - expr: | - alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"} == 0 - for: 10m - labels: - severity: warning - - alert: AlertmanagerMembersInconsistent - annotations: - message: Alertmanager has not found all other members of the cluster. - expr: | - alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"} - != on (service) GROUP_LEFT() - count by (service) (alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}) - for: 5m - labels: - severity: critical - - name: general.rules - rules: - - alert: TargetDown - annotations: - message: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down.' - expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10 - for: 10m - labels: - severity: warning - - alert: Watchdog - annotations: - message: | - This is an alert meant to ensure that the entire alerting pipeline is functional. - This alert is always firing, therefore it should always be firing in Alertmanager - and always fire against a receiver. There are integrations with various notification - mechanisms that send a notification when this alert is not firing. For example the - "DeadMansSnitch" integration in PagerDuty. - expr: vector(1) - labels: - severity: none - - name: node-network - rules: - - alert: NodeNetworkInterfaceFlapping - annotations: - message: Network interface "{{ $labels.device }}" changing it's up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}" - expr: | - changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2 - for: 2m - labels: - severity: warning - - name: prometheus-operator - rules: - - alert: PrometheusOperatorReconcileErrors - annotations: - message: Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace }} Namespace. - expr: | - rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1 - for: 10m - labels: - severity: warning - - alert: PrometheusOperatorNodeLookupErrors - annotations: - message: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace. - expr: | - rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1 - for: 10m - labels: - severity: warning diff --git a/manifests/prometheus-service.yaml b/manifests/prometheus-service.yaml index 4f61e88..23ea918 100644 --- a/manifests/prometheus-service.yaml +++ b/manifests/prometheus-service.yaml @@ -2,7 +2,11 @@ apiVersion: v1 kind: Service metadata: labels: - prometheus: k8s + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.41.0 name: prometheus-k8s namespace: monitoring spec: @@ -10,7 +14,12 @@ spec: - name: web port: 9090 targetPort: web + - name: reloader-web + port: 8080 + targetPort: reloader-web selector: - app: prometheus - prometheus: k8s + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus sessionAffinity: ClientIP diff --git a/manifests/prometheus-serviceAccount.yaml b/manifests/prometheus-serviceAccount.yaml index 3e55fad..2a4ada1 100644 --- a/manifests/prometheus-serviceAccount.yaml +++ b/manifests/prometheus-serviceAccount.yaml @@ -1,5 +1,12 @@ apiVersion: v1 +automountServiceAccountToken: true kind: ServiceAccount metadata: + labels: + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.41.0 name: prometheus-k8s namespace: monitoring diff --git a/manifests/prometheus-serviceMonitor.yaml b/manifests/prometheus-serviceMonitor.yaml index b7605db..936b449 100644 --- a/manifests/prometheus-serviceMonitor.yaml +++ b/manifests/prometheus-serviceMonitor.yaml @@ -2,13 +2,22 @@ apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: labels: - k8s-app: prometheus - name: prometheus + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus + app.kubernetes.io/version: 2.41.0 + name: prometheus-k8s namespace: monitoring spec: endpoints: - interval: 30s port: web + - interval: 30s + port: reloader-web selector: matchLabels: - prometheus: k8s + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: k8s + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus diff --git a/manifests/prometheus-serviceMonitorApiserver.yaml b/manifests/prometheus-serviceMonitorApiserver.yaml deleted file mode 100644 index 500c0d3..0000000 --- a/manifests/prometheus-serviceMonitorApiserver.yaml +++ /dev/null @@ -1,74 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - labels: - k8s-app: apiserver - name: kube-apiserver - namespace: monitoring -spec: - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - interval: 30s - metricRelabelings: - - action: drop - regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds) - sourceLabels: - - __name__ - - action: drop - regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds) - sourceLabels: - - __name__ - - action: drop - regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs) - sourceLabels: - - __name__ - - action: drop - regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout) - sourceLabels: - - __name__ - - action: drop - regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total) - sourceLabels: - - __name__ - - action: drop - regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary) - sourceLabels: - - __name__ - - action: drop - regex: transformation_(transformation_latencies_microseconds|failures_total) - sourceLabels: - - __name__ - - action: drop - regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries) - sourceLabels: - - __name__ - - action: drop - regex: etcd_(debugging|disk|request|server).* - sourceLabels: - - __name__ - - action: drop - regex: apiserver_admission_controller_admission_latencies_seconds_.* - sourceLabels: - - __name__ - - action: drop - regex: apiserver_admission_step_admission_latencies_seconds_.* - sourceLabels: - - __name__ - - action: drop - regex: apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50) - sourceLabels: - - __name__ - - le - port: https - scheme: https - tlsConfig: - caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - serverName: kubernetes - jobLabel: component - namespaceSelector: - matchNames: - - default - selector: - matchLabels: - component: apiserver - provider: kubernetes diff --git a/manifests/prometheus-serviceMonitorCoreDNS.yaml b/manifests/prometheus-serviceMonitorCoreDNS.yaml deleted file mode 100644 index 633aa18..0000000 --- a/manifests/prometheus-serviceMonitorCoreDNS.yaml +++ /dev/null @@ -1,19 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - labels: - k8s-app: coredns - name: coredns - namespace: monitoring -spec: - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - interval: 15s - port: metrics - jobLabel: k8s-app - namespaceSelector: - matchNames: - - kube-system - selector: - matchLabels: - k8s-app: kube-dns diff --git a/manifests/prometheus-serviceMonitorKubeControllerManager.yaml b/manifests/prometheus-serviceMonitorKubeControllerManager.yaml deleted file mode 100644 index 7f20fce..0000000 --- a/manifests/prometheus-serviceMonitorKubeControllerManager.yaml +++ /dev/null @@ -1,55 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - labels: - k8s-app: kube-controller-manager - name: kube-controller-manager - namespace: monitoring -spec: - endpoints: - - interval: 30s - metricRelabelings: - - action: drop - regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds) - sourceLabels: - - __name__ - - action: drop - regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds) - sourceLabels: - - __name__ - - action: drop - regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs) - sourceLabels: - - __name__ - - action: drop - regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout) - sourceLabels: - - __name__ - - action: drop - regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total) - sourceLabels: - - __name__ - - action: drop - regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary) - sourceLabels: - - __name__ - - action: drop - regex: transformation_(transformation_latencies_microseconds|failures_total) - sourceLabels: - - __name__ - - action: drop - regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries) - sourceLabels: - - __name__ - - action: drop - regex: etcd_(debugging|disk|request|server).* - sourceLabels: - - __name__ - port: http-metrics - jobLabel: k8s-app - namespaceSelector: - matchNames: - - kube-system - selector: - matchLabels: - k8s-app: kube-controller-manager diff --git a/manifests/prometheus-serviceMonitorKubeScheduler.yaml b/manifests/prometheus-serviceMonitorKubeScheduler.yaml deleted file mode 100644 index f00db0e..0000000 --- a/manifests/prometheus-serviceMonitorKubeScheduler.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - labels: - k8s-app: kube-scheduler - name: kube-scheduler - namespace: monitoring -spec: - endpoints: - - interval: 30s - port: http-metrics - jobLabel: k8s-app - namespaceSelector: - matchNames: - - kube-system - selector: - matchLabels: - k8s-app: kube-scheduler diff --git a/manifests/prometheus-serviceMonitorKubelet.yaml b/manifests/prometheus-serviceMonitorKubelet.yaml deleted file mode 100644 index 6ee73fd..0000000 --- a/manifests/prometheus-serviceMonitorKubelet.yaml +++ /dev/null @@ -1,77 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - labels: - k8s-app: kubelet - name: kubelet - namespace: monitoring -spec: - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - honorLabels: true - interval: 30s - metricRelabelings: - - action: drop - regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds) - sourceLabels: - - __name__ - - action: drop - regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds) - sourceLabels: - - __name__ - - action: drop - regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs) - sourceLabels: - - __name__ - - action: drop - regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout) - sourceLabels: - - __name__ - - action: drop - regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total) - sourceLabels: - - __name__ - - action: drop - regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary) - sourceLabels: - - __name__ - - action: drop - regex: transformation_(transformation_latencies_microseconds|failures_total) - sourceLabels: - - __name__ - - action: drop - regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries) - sourceLabels: - - __name__ - port: https-metrics - relabelings: - - sourceLabels: - - __metrics_path__ - targetLabel: metrics_path - scheme: https - tlsConfig: - insecureSkipVerify: true - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - honorLabels: true - interval: 30s - metricRelabelings: - - action: drop - regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s) - sourceLabels: - - __name__ - path: /metrics/cadvisor - port: https-metrics - relabelings: - - sourceLabels: - - __metrics_path__ - targetLabel: metrics_path - scheme: https - tlsConfig: - insecureSkipVerify: true - jobLabel: k8s-app - namespaceSelector: - matchNames: - - kube-system - selector: - matchLabels: - k8s-app: kubelet diff --git a/manifests/setup/0namespace-prometheusRule.yaml b/manifests/setup/0namespace-prometheusRule.yaml new file mode 100644 index 0000000..0facf21 --- /dev/null +++ b/manifests/setup/0namespace-prometheusRule.yaml @@ -0,0 +1,83 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-prometheus + app.kubernetes.io/part-of: kube-prometheus + prometheus: k8s + role: alert-rules + name: kube-prometheus-rules + namespace: monitoring +spec: + groups: + - name: general.rules + rules: + - alert: TargetDown + annotations: + description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown + summary: One or more targets are unreachable. + expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10 + for: 10m + labels: + severity: warning + - alert: Watchdog + annotations: + description: | + This is an alert meant to ensure that the entire alerting pipeline is functional. + This alert is always firing, therefore it should always be firing in Alertmanager + and always fire against a receiver. There are integrations with various notification + mechanisms that send a notification when this alert is not firing. For example the + "DeadMansSnitch" integration in PagerDuty. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog + summary: An alert that should always be firing to certify that Alertmanager is working properly. + expr: vector(1) + labels: + severity: none + - alert: InfoInhibitor + annotations: + description: | + This is an alert that is used to inhibit info alerts. + By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with + other alerts. + This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a + severity of 'warning' or 'critical' starts firing on the same namespace. + This alert should be routed to a null receiver and configured to inhibit alerts with severity="info". + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor + summary: Info-level alert inhibition. + expr: ALERTS{severity = "info"} == 1 unless on(namespace) ALERTS{alertname != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1 + labels: + severity: none + - name: node-network + rules: + - alert: NodeNetworkInterfaceFlapping + annotations: + description: Network interface "{{ $labels.device }}" changing its up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }} + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/nodenetworkinterfaceflapping + summary: Network interface is often changing its status + expr: | + changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2 + for: 2m + labels: + severity: warning + - name: kube-prometheus-node-recording.rules + rules: + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY (instance) + record: instance:node_cpu:rate:sum + - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) + record: instance:node_network_receive_bytes:rate:sum + - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) + record: instance:node_network_transmit_bytes:rate:sum + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance) + record: instance:node_cpu:ratio + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) + record: cluster:node_cpu:sum_rate5m + - expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu)) + record: cluster:node_cpu:ratio + - name: kube-prometheus-general.rules + rules: + - expr: count without(instance, pod, node) (up == 1) + record: count:up1 + - expr: count without(instance, pod, node) (up == 0) + record: count:up0 diff --git a/manifests/setup/prometheus-operator-0alertmanagerConfigCustomResourceDefinition.yaml b/manifests/setup/prometheus-operator-0alertmanagerConfigCustomResourceDefinition.yaml new file mode 100644 index 0000000..ad8336d --- /dev/null +++ b/manifests/setup/prometheus-operator-0alertmanagerConfigCustomResourceDefinition.yaml @@ -0,0 +1,3401 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.9.2 + creationTimestamp: null + name: alertmanagerconfigs.monitoring.coreos.com +spec: + group: monitoring.coreos.com + names: + categories: + - prometheus-operator + kind: AlertmanagerConfig + listKind: AlertmanagerConfigList + plural: alertmanagerconfigs + shortNames: + - amcfg + singular: alertmanagerconfig + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: AlertmanagerConfig defines a namespaced AlertmanagerConfig to be aggregated across multiple namespaces configuring one Alertmanager cluster. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: AlertmanagerConfigSpec is a specification of the desired behavior of the Alertmanager configuration. By definition, the Alertmanager configuration only applies to alerts for which the `namespace` label is equal to the namespace of the AlertmanagerConfig resource. + properties: + inhibitRules: + description: List of inhibition rules. The rules will only apply to alerts matching the resource's namespace. + items: + description: InhibitRule defines an inhibition rule that allows to mute alerts when other alerts are already firing. See https://prometheus.io/docs/alerting/latest/configuration/#inhibit_rule + properties: + equal: + description: Labels that must have an equal value in the source and target alert for the inhibition to take effect. + items: + type: string + type: array + sourceMatch: + description: Matchers for which one or more alerts have to exist for the inhibition to take effect. The operator enforces that the alert matches the resource's namespace. + items: + description: Matcher defines how to match on alert's labels. + properties: + matchType: + description: Match operation available with AlertManager >= v0.22.0 and takes precedence over Regex (deprecated) if non-empty. + enum: + - '!=' + - = + - =~ + - '!~' + type: string + name: + description: Label to match. + minLength: 1 + type: string + regex: + description: Whether to match on equality (false) or regular-expression (true). Deprecated as of AlertManager >= v0.22.0 where a user should use MatchType instead. + type: boolean + value: + description: Label value to match. + type: string + required: + - name + type: object + type: array + targetMatch: + description: Matchers that have to be fulfilled in the alerts to be muted. The operator enforces that the alert matches the resource's namespace. + items: + description: Matcher defines how to match on alert's labels. + properties: + matchType: + description: Match operation available with AlertManager >= v0.22.0 and takes precedence over Regex (deprecated) if non-empty. + enum: + - '!=' + - = + - =~ + - '!~' + type: string + name: + description: Label to match. + minLength: 1 + type: string + regex: + description: Whether to match on equality (false) or regular-expression (true). Deprecated as of AlertManager >= v0.22.0 where a user should use MatchType instead. + type: boolean + value: + description: Label value to match. + type: string + required: + - name + type: object + type: array + type: object + type: array + muteTimeIntervals: + description: List of MuteTimeInterval specifying when the routes should be muted. + items: + description: MuteTimeInterval specifies the periods in time when notifications will be muted + properties: + name: + description: Name of the time interval + type: string + timeIntervals: + description: TimeIntervals is a list of TimeInterval + items: + description: TimeInterval describes intervals of time + properties: + daysOfMonth: + description: DaysOfMonth is a list of DayOfMonthRange + items: + description: DayOfMonthRange is an inclusive range of days of the month beginning at 1 + properties: + end: + description: End of the inclusive range + maximum: 31 + minimum: -31 + type: integer + start: + description: Start of the inclusive range + maximum: 31 + minimum: -31 + type: integer + type: object + type: array + months: + description: Months is a list of MonthRange + items: + description: MonthRange is an inclusive range of months of the year beginning in January Months can be specified by name (e.g 'January') by numerical month (e.g '1') or as an inclusive range (e.g 'January:March', '1:3', '1:March') + pattern: ^((?i)january|february|march|april|may|june|july|august|september|october|november|december|[1-12])(?:((:((?i)january|february|march|april|may|june|july|august|september|october|november|december|[1-12]))$)|$) + type: string + type: array + times: + description: Times is a list of TimeRange + items: + description: TimeRange defines a start and end time in 24hr format + properties: + endTime: + description: EndTime is the end time in 24hr format. + pattern: ^((([01][0-9])|(2[0-3])):[0-5][0-9])$|(^24:00$) + type: string + startTime: + description: StartTime is the start time in 24hr format. + pattern: ^((([01][0-9])|(2[0-3])):[0-5][0-9])$|(^24:00$) + type: string + type: object + type: array + weekdays: + description: Weekdays is a list of WeekdayRange + items: + description: WeekdayRange is an inclusive range of days of the week beginning on Sunday Days can be specified by name (e.g 'Sunday') or as an inclusive range (e.g 'Monday:Friday') + pattern: ^((?i)sun|mon|tues|wednes|thurs|fri|satur)day(?:((:(sun|mon|tues|wednes|thurs|fri|satur)day)$)|$) + type: string + type: array + years: + description: Years is a list of YearRange + items: + description: YearRange is an inclusive range of years + pattern: ^2\d{3}(?::2\d{3}|$) + type: string + type: array + type: object + type: array + type: object + type: array + receivers: + description: List of receivers. + items: + description: Receiver defines one or more notification integrations. + properties: + emailConfigs: + description: List of Email configurations. + items: + description: EmailConfig configures notifications via Email. + properties: + authIdentity: + description: The identity to use for authentication. + type: string + authPassword: + description: The secret's key that contains the password to use for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + authSecret: + description: The secret's key that contains the CRAM-MD5 secret. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + authUsername: + description: The username to use for authentication. + type: string + from: + description: The sender address. + type: string + headers: + description: Further headers email header key/value pairs. Overrides any headers previously set by the notification implementation. + items: + description: KeyValue defines a (key, value) tuple. + properties: + key: + description: Key of the tuple. + minLength: 1 + type: string + value: + description: Value of the tuple. + type: string + required: + - key + - value + type: object + type: array + hello: + description: The hostname to identify to the SMTP server. + type: string + html: + description: The HTML body of the email notification. + type: string + requireTLS: + description: The SMTP TLS requirement. Note that Go does not support unencrypted connections to remote SMTP endpoints. + type: boolean + sendResolved: + description: Whether or not to notify about resolved alerts. + type: boolean + smarthost: + description: The SMTP host and port through which emails are sent. E.g. example.com:25 + type: string + text: + description: The text body of the email notification. + type: string + tlsConfig: + description: TLS configuration + properties: + ca: + description: Certificate authority used when verifying server certificates. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + cert: + description: Client certificate to present when doing client-authentication. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keySecret: + description: Secret containing the client key file for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object + to: + description: The email address to send notifications to. + type: string + type: object + type: array + name: + description: Name of the receiver. Must be unique across all items from the list. + minLength: 1 + type: string + opsgenieConfigs: + description: List of OpsGenie configurations. + items: + description: OpsGenieConfig configures notifications via OpsGenie. See https://prometheus.io/docs/alerting/latest/configuration/#opsgenie_config + properties: + actions: + description: Comma separated list of actions that will be available for the alert. + type: string + apiKey: + description: The secret's key that contains the OpsGenie API key. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + apiURL: + description: The URL to send OpsGenie API requests to. + type: string + description: + description: Description of the incident. + type: string + details: + description: A set of arbitrary key/value pairs that provide further detail about the incident. + items: + description: KeyValue defines a (key, value) tuple. + properties: + key: + description: Key of the tuple. + minLength: 1 + type: string + value: + description: Value of the tuple. + type: string + required: + - key + - value + type: object + type: array + entity: + description: Optional field that can be used to specify which domain alert is related to. + type: string + httpConfig: + description: HTTP client configuration. + properties: + authorization: + description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+. + properties: + credentials: + description: The secret's key that contains the credentials of the request + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: + description: Set the authentication type. Defaults to Bearer, Basic will cause an error + type: string + type: object + basicAuth: + description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence. + properties: + password: + description: The secret in the service monitor namespace that contains the password for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + username: + description: The secret in the service monitor namespace that contains the username for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + bearerTokenSecret: + description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + followRedirects: + description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects. + type: boolean + oauth2: + description: OAuth2 client credentials used to fetch a token for the targets. + properties: + clientId: + description: The secret or configmap containing the OAuth2 client id + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientSecret: + description: The secret containing the OAuth2 client secret + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + additionalProperties: + type: string + description: Parameters to append to the token URL + type: object + scopes: + description: OAuth2 scopes used for the token request + items: + type: string + type: array + tokenUrl: + description: The URL to fetch the token from + minLength: 1 + type: string + required: + - clientId + - clientSecret + - tokenUrl + type: object + proxyURL: + description: Optional proxy URL. + type: string + tlsConfig: + description: TLS configuration for the client. + properties: + ca: + description: Certificate authority used when verifying server certificates. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + cert: + description: Client certificate to present when doing client-authentication. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keySecret: + description: Secret containing the client key file for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object + type: object + message: + description: Alert text limited to 130 characters. + type: string + note: + description: Additional alert note. + type: string + priority: + description: Priority level of alert. Possible values are P1, P2, P3, P4, and P5. + type: string + responders: + description: List of responders responsible for notifications. + items: + description: OpsGenieConfigResponder defines a responder to an incident. One of `id`, `name` or `username` has to be defined. + properties: + id: + description: ID of the responder. + type: string + name: + description: Name of the responder. + type: string + type: + description: Type of responder. + enum: + - team + - teams + - user + - escalation + - schedule + minLength: 1 + type: string + username: + description: Username of the responder. + type: string + required: + - type + type: object + type: array + sendResolved: + description: Whether or not to notify about resolved alerts. + type: boolean + source: + description: Backlink to the sender of the notification. + type: string + tags: + description: Comma separated list of tags attached to the notifications. + type: string + updateAlerts: + description: Whether to update message and description of the alert in OpsGenie if it already exists By default, the alert is never updated in OpsGenie, the new message only appears in activity log. + type: boolean + type: object + type: array + pagerdutyConfigs: + description: List of PagerDuty configurations. + items: + description: PagerDutyConfig configures notifications via PagerDuty. See https://prometheus.io/docs/alerting/latest/configuration/#pagerduty_config + properties: + class: + description: The class/type of the event. + type: string + client: + description: Client identification. + type: string + clientURL: + description: Backlink to the sender of notification. + type: string + component: + description: The part or component of the affected system that is broken. + type: string + description: + description: Description of the incident. + type: string + details: + description: Arbitrary key/value pairs that provide further detail about the incident. + items: + description: KeyValue defines a (key, value) tuple. + properties: + key: + description: Key of the tuple. + minLength: 1 + type: string + value: + description: Value of the tuple. + type: string + required: + - key + - value + type: object + type: array + group: + description: A cluster or grouping of sources. + type: string + httpConfig: + description: HTTP client configuration. + properties: + authorization: + description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+. + properties: + credentials: + description: The secret's key that contains the credentials of the request + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: + description: Set the authentication type. Defaults to Bearer, Basic will cause an error + type: string + type: object + basicAuth: + description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence. + properties: + password: + description: The secret in the service monitor namespace that contains the password for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + username: + description: The secret in the service monitor namespace that contains the username for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + bearerTokenSecret: + description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + followRedirects: + description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects. + type: boolean + oauth2: + description: OAuth2 client credentials used to fetch a token for the targets. + properties: + clientId: + description: The secret or configmap containing the OAuth2 client id + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientSecret: + description: The secret containing the OAuth2 client secret + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + additionalProperties: + type: string + description: Parameters to append to the token URL + type: object + scopes: + description: OAuth2 scopes used for the token request + items: + type: string + type: array + tokenUrl: + description: The URL to fetch the token from + minLength: 1 + type: string + required: + - clientId + - clientSecret + - tokenUrl + type: object + proxyURL: + description: Optional proxy URL. + type: string + tlsConfig: + description: TLS configuration for the client. + properties: + ca: + description: Certificate authority used when verifying server certificates. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + cert: + description: Client certificate to present when doing client-authentication. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keySecret: + description: Secret containing the client key file for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object + type: object + pagerDutyImageConfigs: + description: A list of image details to attach that provide further detail about an incident. + items: + description: PagerDutyImageConfig attaches images to an incident + properties: + alt: + description: Alt is the optional alternative text for the image. + type: string + href: + description: Optional URL; makes the image a clickable link. + type: string + src: + description: Src of the image being attached to the incident + type: string + type: object + type: array + pagerDutyLinkConfigs: + description: A list of link details to attach that provide further detail about an incident. + items: + description: PagerDutyLinkConfig attaches text links to an incident + properties: + alt: + description: Text that describes the purpose of the link, and can be used as the link's text. + type: string + href: + description: Href is the URL of the link to be attached + type: string + type: object + type: array + routingKey: + description: The secret's key that contains the PagerDuty integration key (when using Events API v2). Either this field or `serviceKey` needs to be defined. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + sendResolved: + description: Whether or not to notify about resolved alerts. + type: boolean + serviceKey: + description: The secret's key that contains the PagerDuty service key (when using integration type "Prometheus"). Either this field or `routingKey` needs to be defined. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + severity: + description: Severity of the incident. + type: string + url: + description: The URL to send requests to. + type: string + type: object + type: array + pushoverConfigs: + description: List of Pushover configurations. + items: + description: PushoverConfig configures notifications via Pushover. See https://prometheus.io/docs/alerting/latest/configuration/#pushover_config + properties: + expire: + description: How long your notification will continue to be retried for, unless the user acknowledges the notification. + pattern: ^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$ + type: string + html: + description: Whether notification message is HTML or plain text. + type: boolean + httpConfig: + description: HTTP client configuration. + properties: + authorization: + description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+. + properties: + credentials: + description: The secret's key that contains the credentials of the request + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: + description: Set the authentication type. Defaults to Bearer, Basic will cause an error + type: string + type: object + basicAuth: + description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence. + properties: + password: + description: The secret in the service monitor namespace that contains the password for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + username: + description: The secret in the service monitor namespace that contains the username for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + bearerTokenSecret: + description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + followRedirects: + description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects. + type: boolean + oauth2: + description: OAuth2 client credentials used to fetch a token for the targets. + properties: + clientId: + description: The secret or configmap containing the OAuth2 client id + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientSecret: + description: The secret containing the OAuth2 client secret + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + additionalProperties: + type: string + description: Parameters to append to the token URL + type: object + scopes: + description: OAuth2 scopes used for the token request + items: + type: string + type: array + tokenUrl: + description: The URL to fetch the token from + minLength: 1 + type: string + required: + - clientId + - clientSecret + - tokenUrl + type: object + proxyURL: + description: Optional proxy URL. + type: string + tlsConfig: + description: TLS configuration for the client. + properties: + ca: + description: Certificate authority used when verifying server certificates. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + cert: + description: Client certificate to present when doing client-authentication. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keySecret: + description: Secret containing the client key file for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object + type: object + message: + description: Notification message. + type: string + priority: + description: Priority, see https://pushover.net/api#priority + type: string + retry: + description: How often the Pushover servers will send the same notification to the user. Must be at least 30 seconds. + pattern: ^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$ + type: string + sendResolved: + description: Whether or not to notify about resolved alerts. + type: boolean + sound: + description: The name of one of the sounds supported by device clients to override the user's default sound choice + type: string + title: + description: Notification title. + type: string + token: + description: The secret's key that contains the registered application's API token, see https://pushover.net/apps. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + url: + description: A supplementary URL shown alongside the message. + type: string + urlTitle: + description: A title for supplementary URL, otherwise just the URL is shown + type: string + userKey: + description: The secret's key that contains the recipient user's user key. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + type: object + type: array + slackConfigs: + description: List of Slack configurations. + items: + description: SlackConfig configures notifications via Slack. See https://prometheus.io/docs/alerting/latest/configuration/#slack_config + properties: + actions: + description: A list of Slack actions that are sent with each notification. + items: + description: SlackAction configures a single Slack action that is sent with each notification. See https://api.slack.com/docs/message-attachments#action_fields and https://api.slack.com/docs/message-buttons for more information. + properties: + confirm: + description: SlackConfirmationField protect users from destructive actions or particularly distinguished decisions by asking them to confirm their button click one more time. See https://api.slack.com/docs/interactive-message-field-guide#confirmation_fields for more information. + properties: + dismissText: + type: string + okText: + type: string + text: + minLength: 1 + type: string + title: + type: string + required: + - text + type: object + name: + type: string + style: + type: string + text: + minLength: 1 + type: string + type: + minLength: 1 + type: string + url: + type: string + value: + type: string + required: + - text + - type + type: object + type: array + apiURL: + description: The secret's key that contains the Slack webhook URL. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + callbackId: + type: string + channel: + description: The channel or user to send notifications to. + type: string + color: + type: string + fallback: + type: string + fields: + description: A list of Slack fields that are sent with each notification. + items: + description: SlackField configures a single Slack field that is sent with each notification. Each field must contain a title, value, and optionally, a boolean value to indicate if the field is short enough to be displayed next to other fields designated as short. See https://api.slack.com/docs/message-attachments#fields for more information. + properties: + short: + type: boolean + title: + minLength: 1 + type: string + value: + minLength: 1 + type: string + required: + - title + - value + type: object + type: array + footer: + type: string + httpConfig: + description: HTTP client configuration. + properties: + authorization: + description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+. + properties: + credentials: + description: The secret's key that contains the credentials of the request + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: + description: Set the authentication type. Defaults to Bearer, Basic will cause an error + type: string + type: object + basicAuth: + description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence. + properties: + password: + description: The secret in the service monitor namespace that contains the password for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + username: + description: The secret in the service monitor namespace that contains the username for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + bearerTokenSecret: + description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + followRedirects: + description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects. + type: boolean + oauth2: + description: OAuth2 client credentials used to fetch a token for the targets. + properties: + clientId: + description: The secret or configmap containing the OAuth2 client id + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientSecret: + description: The secret containing the OAuth2 client secret + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + additionalProperties: + type: string + description: Parameters to append to the token URL + type: object + scopes: + description: OAuth2 scopes used for the token request + items: + type: string + type: array + tokenUrl: + description: The URL to fetch the token from + minLength: 1 + type: string + required: + - clientId + - clientSecret + - tokenUrl + type: object + proxyURL: + description: Optional proxy URL. + type: string + tlsConfig: + description: TLS configuration for the client. + properties: + ca: + description: Certificate authority used when verifying server certificates. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + cert: + description: Client certificate to present when doing client-authentication. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keySecret: + description: Secret containing the client key file for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object + type: object + iconEmoji: + type: string + iconURL: + type: string + imageURL: + type: string + linkNames: + type: boolean + mrkdwnIn: + items: + type: string + type: array + pretext: + type: string + sendResolved: + description: Whether or not to notify about resolved alerts. + type: boolean + shortFields: + type: boolean + text: + type: string + thumbURL: + type: string + title: + type: string + titleLink: + type: string + username: + type: string + type: object + type: array + snsConfigs: + description: List of SNS configurations + items: + description: SNSConfig configures notifications via AWS SNS. See https://prometheus.io/docs/alerting/latest/configuration/#sns_configs + properties: + apiURL: + description: The SNS API URL i.e. https://sns.us-east-2.amazonaws.com. If not specified, the SNS API URL from the SNS SDK will be used. + type: string + attributes: + additionalProperties: + type: string + description: SNS message attributes. + type: object + httpConfig: + description: HTTP client configuration. + properties: + authorization: + description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+. + properties: + credentials: + description: The secret's key that contains the credentials of the request + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: + description: Set the authentication type. Defaults to Bearer, Basic will cause an error + type: string + type: object + basicAuth: + description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence. + properties: + password: + description: The secret in the service monitor namespace that contains the password for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + username: + description: The secret in the service monitor namespace that contains the username for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + bearerTokenSecret: + description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + followRedirects: + description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects. + type: boolean + oauth2: + description: OAuth2 client credentials used to fetch a token for the targets. + properties: + clientId: + description: The secret or configmap containing the OAuth2 client id + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientSecret: + description: The secret containing the OAuth2 client secret + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + additionalProperties: + type: string + description: Parameters to append to the token URL + type: object + scopes: + description: OAuth2 scopes used for the token request + items: + type: string + type: array + tokenUrl: + description: The URL to fetch the token from + minLength: 1 + type: string + required: + - clientId + - clientSecret + - tokenUrl + type: object + proxyURL: + description: Optional proxy URL. + type: string + tlsConfig: + description: TLS configuration for the client. + properties: + ca: + description: Certificate authority used when verifying server certificates. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + cert: + description: Client certificate to present when doing client-authentication. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keySecret: + description: Secret containing the client key file for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object + type: object + message: + description: The message content of the SNS notification. + type: string + phoneNumber: + description: Phone number if message is delivered via SMS in E.164 format. If you don't specify this value, you must specify a value for the TopicARN or TargetARN. + type: string + sendResolved: + description: Whether or not to notify about resolved alerts. + type: boolean + sigv4: + description: Configures AWS's Signature Verification 4 signing process to sign requests. + properties: + accessKey: + description: AccessKey is the AWS API key. If blank, the environment variable `AWS_ACCESS_KEY_ID` is used. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + profile: + description: Profile is the named AWS profile used to authenticate. + type: string + region: + description: Region is the AWS region. If blank, the region from the default credentials chain used. + type: string + roleArn: + description: RoleArn is the named AWS profile used to authenticate. + type: string + secretKey: + description: SecretKey is the AWS API secret. If blank, the environment variable `AWS_SECRET_ACCESS_KEY` is used. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + subject: + description: Subject line when the message is delivered to email endpoints. + type: string + targetARN: + description: The mobile platform endpoint ARN if message is delivered via mobile notifications. If you don't specify this value, you must specify a value for the topic_arn or PhoneNumber. + type: string + topicARN: + description: SNS topic ARN, i.e. arn:aws:sns:us-east-2:698519295917:My-Topic If you don't specify this value, you must specify a value for the PhoneNumber or TargetARN. + type: string + type: object + type: array + telegramConfigs: + description: List of Telegram configurations. + items: + description: TelegramConfig configures notifications via Telegram. See https://prometheus.io/docs/alerting/latest/configuration/#telegram_config + properties: + apiURL: + description: The Telegram API URL i.e. https://api.telegram.org. If not specified, default API URL will be used. + type: string + botToken: + description: Telegram bot token The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + chatID: + description: The Telegram chat ID. + format: int64 + type: integer + disableNotifications: + description: Disable telegram notifications + type: boolean + httpConfig: + description: HTTP client configuration. + properties: + authorization: + description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+. + properties: + credentials: + description: The secret's key that contains the credentials of the request + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: + description: Set the authentication type. Defaults to Bearer, Basic will cause an error + type: string + type: object + basicAuth: + description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence. + properties: + password: + description: The secret in the service monitor namespace that contains the password for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + username: + description: The secret in the service monitor namespace that contains the username for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + bearerTokenSecret: + description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + followRedirects: + description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects. + type: boolean + oauth2: + description: OAuth2 client credentials used to fetch a token for the targets. + properties: + clientId: + description: The secret or configmap containing the OAuth2 client id + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientSecret: + description: The secret containing the OAuth2 client secret + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + additionalProperties: + type: string + description: Parameters to append to the token URL + type: object + scopes: + description: OAuth2 scopes used for the token request + items: + type: string + type: array + tokenUrl: + description: The URL to fetch the token from + minLength: 1 + type: string + required: + - clientId + - clientSecret + - tokenUrl + type: object + proxyURL: + description: Optional proxy URL. + type: string + tlsConfig: + description: TLS configuration for the client. + properties: + ca: + description: Certificate authority used when verifying server certificates. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + cert: + description: Client certificate to present when doing client-authentication. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keySecret: + description: Secret containing the client key file for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object + type: object + message: + description: Message template + type: string + parseMode: + description: Parse mode for telegram message + enum: + - MarkdownV2 + - Markdown + - HTML + type: string + sendResolved: + description: Whether to notify about resolved alerts. + type: boolean + type: object + type: array + victoropsConfigs: + description: List of VictorOps configurations. + items: + description: VictorOpsConfig configures notifications via VictorOps. See https://prometheus.io/docs/alerting/latest/configuration/#victorops_config + properties: + apiKey: + description: The secret's key that contains the API key to use when talking to the VictorOps API. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + apiUrl: + description: The VictorOps API URL. + type: string + customFields: + description: Additional custom fields for notification. + items: + description: KeyValue defines a (key, value) tuple. + properties: + key: + description: Key of the tuple. + minLength: 1 + type: string + value: + description: Value of the tuple. + type: string + required: + - key + - value + type: object + type: array + entityDisplayName: + description: Contains summary of the alerted problem. + type: string + httpConfig: + description: The HTTP client's configuration. + properties: + authorization: + description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+. + properties: + credentials: + description: The secret's key that contains the credentials of the request + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: + description: Set the authentication type. Defaults to Bearer, Basic will cause an error + type: string + type: object + basicAuth: + description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence. + properties: + password: + description: The secret in the service monitor namespace that contains the password for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + username: + description: The secret in the service monitor namespace that contains the username for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + bearerTokenSecret: + description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + followRedirects: + description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects. + type: boolean + oauth2: + description: OAuth2 client credentials used to fetch a token for the targets. + properties: + clientId: + description: The secret or configmap containing the OAuth2 client id + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientSecret: + description: The secret containing the OAuth2 client secret + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + additionalProperties: + type: string + description: Parameters to append to the token URL + type: object + scopes: + description: OAuth2 scopes used for the token request + items: + type: string + type: array + tokenUrl: + description: The URL to fetch the token from + minLength: 1 + type: string + required: + - clientId + - clientSecret + - tokenUrl + type: object + proxyURL: + description: Optional proxy URL. + type: string + tlsConfig: + description: TLS configuration for the client. + properties: + ca: + description: Certificate authority used when verifying server certificates. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + cert: + description: Client certificate to present when doing client-authentication. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keySecret: + description: Secret containing the client key file for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object + type: object + messageType: + description: Describes the behavior of the alert (CRITICAL, WARNING, INFO). + type: string + monitoringTool: + description: The monitoring tool the state message is from. + type: string + routingKey: + description: A key used to map the alert to a team. + type: string + sendResolved: + description: Whether or not to notify about resolved alerts. + type: boolean + stateMessage: + description: Contains long explanation of the alerted problem. + type: string + type: object + type: array + webhookConfigs: + description: List of webhook configurations. + items: + description: WebhookConfig configures notifications via a generic receiver supporting the webhook payload. See https://prometheus.io/docs/alerting/latest/configuration/#webhook_config + properties: + httpConfig: + description: HTTP client configuration. + properties: + authorization: + description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+. + properties: + credentials: + description: The secret's key that contains the credentials of the request + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: + description: Set the authentication type. Defaults to Bearer, Basic will cause an error + type: string + type: object + basicAuth: + description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence. + properties: + password: + description: The secret in the service monitor namespace that contains the password for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + username: + description: The secret in the service monitor namespace that contains the username for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + bearerTokenSecret: + description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + followRedirects: + description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects. + type: boolean + oauth2: + description: OAuth2 client credentials used to fetch a token for the targets. + properties: + clientId: + description: The secret or configmap containing the OAuth2 client id + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientSecret: + description: The secret containing the OAuth2 client secret + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + additionalProperties: + type: string + description: Parameters to append to the token URL + type: object + scopes: + description: OAuth2 scopes used for the token request + items: + type: string + type: array + tokenUrl: + description: The URL to fetch the token from + minLength: 1 + type: string + required: + - clientId + - clientSecret + - tokenUrl + type: object + proxyURL: + description: Optional proxy URL. + type: string + tlsConfig: + description: TLS configuration for the client. + properties: + ca: + description: Certificate authority used when verifying server certificates. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + cert: + description: Client certificate to present when doing client-authentication. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keySecret: + description: Secret containing the client key file for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object + type: object + maxAlerts: + description: Maximum number of alerts to be sent per webhook message. When 0, all alerts are included. + format: int32 + minimum: 0 + type: integer + sendResolved: + description: Whether or not to notify about resolved alerts. + type: boolean + url: + description: The URL to send HTTP POST requests to. `urlSecret` takes precedence over `url`. One of `urlSecret` and `url` should be defined. + type: string + urlSecret: + description: The secret's key that contains the webhook URL to send HTTP requests to. `urlSecret` takes precedence over `url`. One of `urlSecret` and `url` should be defined. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + type: object + type: array + wechatConfigs: + description: List of WeChat configurations. + items: + description: WeChatConfig configures notifications via WeChat. See https://prometheus.io/docs/alerting/latest/configuration/#wechat_config + properties: + agentID: + type: string + apiSecret: + description: The secret's key that contains the WeChat API key. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + apiURL: + description: The WeChat API URL. + type: string + corpID: + description: The corp id for authentication. + type: string + httpConfig: + description: HTTP client configuration. + properties: + authorization: + description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+. + properties: + credentials: + description: The secret's key that contains the credentials of the request + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: + description: Set the authentication type. Defaults to Bearer, Basic will cause an error + type: string + type: object + basicAuth: + description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence. + properties: + password: + description: The secret in the service monitor namespace that contains the password for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + username: + description: The secret in the service monitor namespace that contains the username for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + bearerTokenSecret: + description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + followRedirects: + description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects. + type: boolean + oauth2: + description: OAuth2 client credentials used to fetch a token for the targets. + properties: + clientId: + description: The secret or configmap containing the OAuth2 client id + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientSecret: + description: The secret containing the OAuth2 client secret + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + additionalProperties: + type: string + description: Parameters to append to the token URL + type: object + scopes: + description: OAuth2 scopes used for the token request + items: + type: string + type: array + tokenUrl: + description: The URL to fetch the token from + minLength: 1 + type: string + required: + - clientId + - clientSecret + - tokenUrl + type: object + proxyURL: + description: Optional proxy URL. + type: string + tlsConfig: + description: TLS configuration for the client. + properties: + ca: + description: Certificate authority used when verifying server certificates. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + cert: + description: Client certificate to present when doing client-authentication. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keySecret: + description: Secret containing the client key file for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object + type: object + message: + description: API request data as defined by the WeChat API. + type: string + messageType: + type: string + sendResolved: + description: Whether or not to notify about resolved alerts. + type: boolean + toParty: + type: string + toTag: + type: string + toUser: + type: string + type: object + type: array + required: + - name + type: object + type: array + route: + description: The Alertmanager route definition for alerts matching the resource's namespace. If present, it will be added to the generated Alertmanager configuration as a first-level route. + properties: + activeTimeIntervals: + description: ActiveTimeIntervals is a list of MuteTimeInterval names when this route should be active. + items: + type: string + type: array + continue: + description: Boolean indicating whether an alert should continue matching subsequent sibling nodes. It will always be overridden to true for the first-level route by the Prometheus operator. + type: boolean + groupBy: + description: List of labels to group by. Labels must not be repeated (unique list). Special label "..." (aggregate by all possible labels), if provided, must be the only element in the list. + items: + type: string + type: array + groupInterval: + description: 'How long to wait before sending an updated notification. Must match the regular expression`^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$` Example: "5m"' + type: string + groupWait: + description: 'How long to wait before sending the initial notification. Must match the regular expression`^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$` Example: "30s"' + type: string + matchers: + description: 'List of matchers that the alert''s labels should match. For the first level route, the operator removes any existing equality and regexp matcher on the `namespace` label and adds a `namespace: ` matcher.' + items: + description: Matcher defines how to match on alert's labels. + properties: + matchType: + description: Match operation available with AlertManager >= v0.22.0 and takes precedence over Regex (deprecated) if non-empty. + enum: + - '!=' + - = + - =~ + - '!~' + type: string + name: + description: Label to match. + minLength: 1 + type: string + regex: + description: Whether to match on equality (false) or regular-expression (true). Deprecated as of AlertManager >= v0.22.0 where a user should use MatchType instead. + type: boolean + value: + description: Label value to match. + type: string + required: + - name + type: object + type: array + muteTimeIntervals: + description: 'Note: this comment applies to the field definition above but appears below otherwise it gets included in the generated manifest. CRD schema doesn''t support self-referential types for now (see https://github.com/kubernetes/kubernetes/issues/62872). We have to use an alternative type to circumvent the limitation. The downside is that the Kube API can''t validate the data beyond the fact that it is a valid JSON representation. MuteTimeIntervals is a list of MuteTimeInterval names that will mute this route when matched,' + items: + type: string + type: array + receiver: + description: Name of the receiver for this route. If not empty, it should be listed in the `receivers` field. + type: string + repeatInterval: + description: 'How long to wait before repeating the last notification. Must match the regular expression`^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$` Example: "4h"' + type: string + routes: + description: Child routes. + items: + x-kubernetes-preserve-unknown-fields: true + type: array + type: object + type: object + required: + - spec + type: object + served: true + storage: true diff --git a/manifests/setup/prometheus-operator-0alertmanagerCustomResourceDefinition.yaml b/manifests/setup/prometheus-operator-0alertmanagerCustomResourceDefinition.yaml index f78b2e9..1f9089d 100644 --- a/manifests/setup/prometheus-operator-0alertmanagerCustomResourceDefinition.yaml +++ b/manifests/setup/prometheus-operator-0alertmanagerCustomResourceDefinition.yaml @@ -2,15 +2,19 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.2.4 + controller-gen.kubebuilder.io/version: v0.9.2 creationTimestamp: null name: alertmanagers.monitoring.coreos.com spec: group: monitoring.coreos.com names: + categories: + - prometheus-operator kind: Alertmanager listKind: AlertmanagerList plural: alertmanagers + shortNames: + - am singular: alertmanager scope: Namespaced versions: @@ -19,13 +23,18 @@ spec: jsonPath: .spec.version name: Version type: string - - description: The desired replicas number of Alertmanagers + - description: The number of desired replicas jsonPath: .spec.replicas name: Replicas type: integer - jsonPath: .metadata.creationTimestamp name: Age type: date + - description: Whether the resource reconciliation is paused or not + jsonPath: .status.paused + name: Paused + priority: 1 + type: boolean name: v1 schema: openAPIV3Schema: @@ -104,6 +113,7 @@ spec: type: object type: array type: object + x-kubernetes-map-type: atomic weight: description: Weight associated with matching the corresponding nodeSelectorTerm, in the range 1-100. format: int32 @@ -164,10 +174,12 @@ spec: type: object type: array type: object + x-kubernetes-map-type: atomic type: array required: - nodeSelectorTerms type: object + x-kubernetes-map-type: atomic type: object podAffinity: description: Describes pod affinity scheduling rules (e.g. co-locate this pod in the same node, zone, etc. as some other pod(s)). @@ -210,8 +222,40 @@ spec: description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. type: object type: object + x-kubernetes-map-type: atomic + namespaceSelector: + description: A label query over the set of namespaces that the term applies to. The term is applied to the union of the namespaces selected by this field and the ones listed in the namespaces field. null selector and null or empty namespaces list means "this pod's namespace". An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic namespaces: - description: namespaces specifies which namespaces the labelSelector applies to (matches against); null or empty list means "this pod's namespace" + description: namespaces specifies a static list of namespace names that the term applies to. The term is applied to the union of the namespaces listed in this field and the ones selected by namespaceSelector. null or empty namespaces list and null namespaceSelector means "this pod's namespace". items: type: string type: array @@ -265,8 +309,40 @@ spec: description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. type: object type: object + x-kubernetes-map-type: atomic + namespaceSelector: + description: A label query over the set of namespaces that the term applies to. The term is applied to the union of the namespaces selected by this field and the ones listed in the namespaces field. null selector and null or empty namespaces list means "this pod's namespace". An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic namespaces: - description: namespaces specifies which namespaces the labelSelector applies to (matches against); null or empty list means "this pod's namespace" + description: namespaces specifies a static list of namespace names that the term applies to. The term is applied to the union of the namespaces listed in this field and the ones selected by namespaceSelector. null or empty namespaces list and null namespaceSelector means "this pod's namespace". items: type: string type: array @@ -319,8 +395,40 @@ spec: description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. type: object type: object + x-kubernetes-map-type: atomic + namespaceSelector: + description: A label query over the set of namespaces that the term applies to. The term is applied to the union of the namespaces selected by this field and the ones listed in the namespaces field. null selector and null or empty namespaces list means "this pod's namespace". An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic namespaces: - description: namespaces specifies which namespaces the labelSelector applies to (matches against); null or empty list means "this pod's namespace" + description: namespaces specifies a static list of namespace names that the term applies to. The term is applied to the union of the namespaces listed in this field and the ones selected by namespaceSelector. null or empty namespaces list and null namespaceSelector means "this pod's namespace". items: type: string type: array @@ -374,8 +482,40 @@ spec: description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. type: object type: object + x-kubernetes-map-type: atomic + namespaceSelector: + description: A label query over the set of namespaces that the term applies to. The term is applied to the union of the namespaces selected by this field and the ones listed in the namespaces field. null selector and null or empty namespaces list means "this pod's namespace". An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic namespaces: - description: namespaces specifies which namespaces the labelSelector applies to (matches against); null or empty list means "this pod's namespace" + description: namespaces specifies a static list of namespace names that the term applies to. The term is applied to the union of the namespaces listed in this field and the ones selected by namespaceSelector. null or empty namespaces list and null namespaceSelector means "this pod's namespace". items: type: string type: array @@ -388,32 +528,429 @@ spec: type: array type: object type: object + alertmanagerConfigMatcherStrategy: + description: The AlertmanagerConfigMatcherStrategy defines how AlertmanagerConfig objects match the alerts. In the future more options may be added. + properties: + type: + default: OnNamespace + description: If set to `OnNamespace`, the operator injects a label matcher matching the namespace of the AlertmanagerConfig object for all its routes and inhibition rules. `None` will not add any additional matchers other than the ones specified in the AlertmanagerConfig. Default is `OnNamespace`. + enum: + - OnNamespace + - None + type: string + type: object + alertmanagerConfigNamespaceSelector: + description: Namespaces to be selected for AlertmanagerConfig discovery. If nil, only check own namespace. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + alertmanagerConfigSelector: + description: AlertmanagerConfigs to be selected for to merge and configure Alertmanager with. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + alertmanagerConfiguration: + description: 'EXPERIMENTAL: alertmanagerConfiguration specifies the configuration of Alertmanager. If defined, it takes precedence over the `configSecret` field. This field may change in future releases.' + properties: + global: + description: Defines the global parameters of the Alertmanager configuration. + properties: + httpConfig: + description: HTTP client configuration. + properties: + authorization: + description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+. + properties: + credentials: + description: The secret's key that contains the credentials of the request + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: + description: Set the authentication type. Defaults to Bearer, Basic will cause an error + type: string + type: object + basicAuth: + description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence. + properties: + password: + description: The secret in the service monitor namespace that contains the password for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + username: + description: The secret in the service monitor namespace that contains the username for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + bearerTokenSecret: + description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the Alertmanager object and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + followRedirects: + description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects. + type: boolean + oauth2: + description: OAuth2 client credentials used to fetch a token for the targets. + properties: + clientId: + description: The secret or configmap containing the OAuth2 client id + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientSecret: + description: The secret containing the OAuth2 client secret + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + additionalProperties: + type: string + description: Parameters to append to the token URL + type: object + scopes: + description: OAuth2 scopes used for the token request + items: + type: string + type: array + tokenUrl: + description: The URL to fetch the token from + minLength: 1 + type: string + required: + - clientId + - clientSecret + - tokenUrl + type: object + proxyURL: + description: Optional proxy URL. + type: string + tlsConfig: + description: TLS configuration for the client. + properties: + ca: + description: Certificate authority used when verifying server certificates. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + cert: + description: Client certificate to present when doing client-authentication. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keySecret: + description: Secret containing the client key file for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object + type: object + resolveTimeout: + description: ResolveTimeout is the default value used by alertmanager if the alert does not include EndsAt, after this time passes it can declare the alert as resolved if it has not been updated. This has no impact on alerts from Prometheus, as they always include EndsAt. + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + type: object + name: + description: The name of the AlertmanagerConfig resource which is used to generate the Alertmanager configuration. It must be defined in the same namespace as the Alertmanager object. The operator will not enforce a `namespace` label for routes and inhibition rules. + minLength: 1 + type: string + templates: + description: Custom notification templates. + items: + description: SecretOrConfigMap allows to specify data as a Secret or ConfigMap. Fields are mutually exclusive. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: array + type: object baseImage: - description: Base image that is used to deploy pods, without tag. + description: 'Base image that is used to deploy pods, without tag. Deprecated: use ''image'' instead' type: string clusterAdvertiseAddress: description: 'ClusterAdvertiseAddress is the explicit address to advertise in cluster. Needs to be provided for non RFC1918 [1] (public) addresses. [1] RFC1918: https://tools.ietf.org/html/rfc1918' type: string + clusterGossipInterval: + description: Interval between gossip attempts. + pattern: ^(0|(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + clusterPeerTimeout: + description: Timeout for cluster peering. + pattern: ^(0|(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + clusterPushpullInterval: + description: Interval between pushpull attempts. + pattern: ^(0|(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string configMaps: - description: ConfigMaps is a list of ConfigMaps in the same namespace as the Alertmanager object, which shall be mounted into the Alertmanager Pods. The ConfigMaps are mounted into /etc/alertmanager/configmaps/. + description: ConfigMaps is a list of ConfigMaps in the same namespace as the Alertmanager object, which shall be mounted into the Alertmanager Pods. Each ConfigMap is added to the StatefulSet definition as a volume named `configmap-`. The ConfigMaps are mounted into `/etc/alertmanager/configmaps/` in the 'alertmanager' container. items: type: string type: array configSecret: - description: ConfigSecret is the name of a Kubernetes Secret in the same namespace as the Alertmanager object, which contains configuration for this Alertmanager instance. Defaults to 'alertmanager-' The secret is mounted into /etc/alertmanager/config. + description: "ConfigSecret is the name of a Kubernetes Secret in the same namespace as the Alertmanager object, which contains the configuration for this Alertmanager instance. If empty, it defaults to `alertmanager-`. \n The Alertmanager configuration should be available under the `alertmanager.yaml` key. Additional keys from the original secret are copied to the generated secret and mounted into the `/etc/alertmanager/config` directory in the `alertmanager` container. \n If either the secret or the `alertmanager.yaml` key is missing, the operator provisions a minimal Alertmanager configuration with one empty receiver (effectively dropping alert notifications)." type: string containers: - description: Containers allows injecting additional containers. This is meant to allow adding an authentication proxy to an Alertmanager pod. + description: 'Containers allows injecting additional containers. This is meant to allow adding an authentication proxy to an Alertmanager pod. Containers described here modify an operator generated container if they share the same name and modifications are done via a strategic merge patch. The current container names are: `alertmanager` and `config-reloader`. Overriding containers is entirely outside the scope of what the maintainers will support and by doing so, you accept that this behaviour may break at any time without notice.' items: description: A single application container that you want to run within a pod. properties: args: - description: 'Arguments to the entrypoint. The docker image''s CMD is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container''s environment. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + description: 'Arguments to the entrypoint. The container image''s CMD is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container''s environment. If a variable cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless of whether the variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' items: type: string type: array command: - description: 'Entrypoint array. Not executed within a shell. The docker image''s ENTRYPOINT is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container''s environment. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + description: 'Entrypoint array. Not executed within a shell. The container image''s ENTRYPOINT is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container''s environment. If a variable cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless of whether the variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' items: type: string type: array @@ -426,7 +963,7 @@ spec: description: Name of the environment variable. Must be a C_IDENTIFIER. type: string value: - description: 'Variable references $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not. Defaults to "".' + description: 'Variable references $(VAR_NAME) are expanded using the previously defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless of whether the variable exists or not. Defaults to "".' type: string valueFrom: description: Source for the environment variable's value. Cannot be used if value is not empty. @@ -446,8 +983,9 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic fieldRef: - description: 'Selects a field of the pod: supports metadata.name, metadata.namespace, metadata.labels, metadata.annotations, spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.' + description: 'Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['''']`, `metadata.annotations['''']`, spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.' properties: apiVersion: description: Version of the schema the FieldPath is written in terms of, defaults to "v1". @@ -458,6 +996,7 @@ spec: required: - fieldPath type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: 'Selects a resource of the container: only resources limits and requests (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.' properties: @@ -465,14 +1004,19 @@ spec: description: 'Container name: required for volumes, optional for env vars' type: string divisor: + anyOf: + - type: integer + - type: string description: Specifies the output format of the exposed resources, defaults to "1" - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true resource: description: 'Required: resource to select' type: string required: - resource type: object + x-kubernetes-map-type: atomic secretKeyRef: description: Selects a key of a secret in the pod's namespace properties: @@ -488,6 +1032,7 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic type: object required: - name @@ -508,6 +1053,7 @@ spec: description: Specify whether the ConfigMap must be defined type: boolean type: object + x-kubernetes-map-type: atomic prefix: description: An optional identifier to prepend to each key in the ConfigMap. Must be a C_IDENTIFIER. type: string @@ -521,10 +1067,11 @@ spec: description: Specify whether the Secret must be defined type: boolean type: object + x-kubernetes-map-type: atomic type: object type: array image: - description: 'Docker image name. More info: https://kubernetes.io/docs/concepts/containers/images This field is optional to allow higher level config management to default or override container images in workload controllers like Deployments and StatefulSets.' + description: 'Container image name. More info: https://kubernetes.io/docs/concepts/containers/images This field is optional to allow higher level config management to default or override container images in workload controllers like Deployments and StatefulSets.' type: string imagePullPolicy: description: 'Image pull policy. One of Always, Never, IfNotPresent. Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. Cannot be updated. More info: https://kubernetes.io/docs/concepts/containers/images#updating-images' @@ -536,7 +1083,7 @@ spec: description: 'PostStart is called immediately after a container is created. If the handler fails, the container is terminated and restarted according to its restart policy. Other management of the container blocks until the hook completes. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -582,7 +1129,7 @@ spec: - port type: object tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept for the backward compatibility. There are no validation of this field and lifecycle hooks will fail in runtime when tcp handler is specified. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -598,10 +1145,10 @@ spec: type: object type: object preStop: - description: 'PreStop is called immediately before a container is terminated due to an API request or management event such as liveness/startup probe failure, preemption, resource contention, etc. The handler is not called if the container crashes or exits. The reason for termination is passed to the handler. The Pod''s termination grace period countdown begins before the PreStop hooked is executed. Regardless of the outcome of the handler, the container will eventually terminate within the Pod''s termination grace period. Other management of the container blocks until the hook completes or until the termination grace period is reached. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks' + description: 'PreStop is called immediately before a container is terminated due to an API request or management event such as liveness/startup probe failure, preemption, resource contention, etc. The handler is not called if the container crashes or exits. The Pod''s termination grace period countdown begins before the PreStop hook is executed. Regardless of the outcome of the handler, the container will eventually terminate within the Pod''s termination grace period (unless delayed by finalizers). Other management of the container blocks until the hook completes or until the termination grace period is reached. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -647,7 +1194,7 @@ spec: - port type: object tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept for the backward compatibility. There are no validation of this field and lifecycle hooks will fail in runtime when tcp handler is specified. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -667,7 +1214,7 @@ spec: description: 'Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -679,6 +1226,19 @@ spec: description: Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 3. Minimum value is 1. format: int32 type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. This is a beta field and requires enabling GRPCContainerProbe feature gate. + properties: + port: + description: Port number of the gRPC service. Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: "Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). \n If this is not specified, the default behavior is defined by gRPC." + type: string + required: + - port + type: object httpGet: description: HTTPGet specifies the http request to perform. properties: @@ -729,7 +1289,7 @@ spec: format: int32 type: integer tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: TCPSocket specifies an action involving a TCP port. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -743,6 +1303,10 @@ spec: required: - port type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer timeoutSeconds: description: 'Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' format: int32 @@ -752,7 +1316,7 @@ spec: description: Name of the container specified as a DNS_LABEL. Each container in a pod must have a unique name (DNS_LABEL). Cannot be updated. type: string ports: - description: List of ports to expose from the container. Exposing a port here gives the system additional information about the network connections a container uses, but is primarily informational. Not specifying a port here DOES NOT prevent that port from being exposed. Any port which is listening on the default "0.0.0.0" address inside a container will be accessible from the network. Cannot be updated. + description: List of ports to expose from the container. Not specifying a port here DOES NOT prevent that port from being exposed. Any port which is listening on the default "0.0.0.0" address inside a container will be accessible from the network. Modifying this array with strategic merge patch may corrupt the data. For more information See https://github.com/kubernetes/kubernetes/issues/108255. Cannot be updated. items: description: ContainerPort represents a network port in a single container. properties: @@ -771,17 +1335,22 @@ spec: description: If specified, this must be an IANA_SVC_NAME and unique within the pod. Each named port in a pod must have a unique name. Name for the port that can be referred to by services. type: string protocol: + default: TCP description: Protocol for port. Must be UDP, TCP, or SCTP. Defaults to "TCP". type: string required: - containerPort type: object type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map readinessProbe: description: 'Periodic probe of container service readiness. Container will be removed from service endpoints if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -793,6 +1362,19 @@ spec: description: Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 3. Minimum value is 1. format: int32 type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. This is a beta field and requires enabling GRPCContainerProbe feature gate. + properties: + port: + description: Port number of the gRPC service. Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: "Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). \n If this is not specified, the default behavior is defined by gRPC." + type: string + required: + - port + type: object httpGet: description: HTTPGet specifies the http request to perform. properties: @@ -843,7 +1425,7 @@ spec: format: int32 type: integer tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: TCPSocket specifies an action involving a TCP port. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -857,33 +1439,45 @@ spec: required: - port type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer timeoutSeconds: description: 'Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' format: int32 type: integer type: object resources: - description: 'Compute Resources required by this container. Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + description: 'Compute Resources required by this container. Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' properties: limits: additionalProperties: - type: string - description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' type: object requests: additionalProperties: - type: string - description: 'Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' type: object type: object securityContext: - description: 'Security options the pod should run with. More info: https://kubernetes.io/docs/concepts/policy/security-context/ More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/' + description: 'SecurityContext defines the security options the container should be run with. If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/' properties: allowPrivilegeEscalation: - description: 'AllowPrivilegeEscalation controls whether a process can gain more privileges than its parent process. This bool directly controls if the no_new_privs flag will be set on the container process. AllowPrivilegeEscalation is true always when the container is: 1) run as Privileged 2) has CAP_SYS_ADMIN' + description: 'AllowPrivilegeEscalation controls whether a process can gain more privileges than its parent process. This bool directly controls if the no_new_privs flag will be set on the container process. AllowPrivilegeEscalation is true always when the container is: 1) run as Privileged 2) has CAP_SYS_ADMIN Note that this field cannot be set when spec.os.name is windows.' type: boolean capabilities: - description: The capabilities to add/drop when running containers. Defaults to the default set of capabilities granted by the container runtime. + description: The capabilities to add/drop when running containers. Defaults to the default set of capabilities granted by the container runtime. Note that this field cannot be set when spec.os.name is windows. properties: add: description: Added capabilities @@ -899,27 +1493,27 @@ spec: type: array type: object privileged: - description: Run container in privileged mode. Processes in privileged containers are essentially equivalent to root on the host. Defaults to false. + description: Run container in privileged mode. Processes in privileged containers are essentially equivalent to root on the host. Defaults to false. Note that this field cannot be set when spec.os.name is windows. type: boolean procMount: - description: procMount denotes the type of proc mount to use for the containers. The default is DefaultProcMount which uses the container runtime defaults for readonly paths and masked paths. This requires the ProcMountType feature flag to be enabled. + description: procMount denotes the type of proc mount to use for the containers. The default is DefaultProcMount which uses the container runtime defaults for readonly paths and masked paths. This requires the ProcMountType feature flag to be enabled. Note that this field cannot be set when spec.os.name is windows. type: string readOnlyRootFilesystem: - description: Whether this container has a read-only root filesystem. Default is false. + description: Whether this container has a read-only root filesystem. Default is false. Note that this field cannot be set when spec.os.name is windows. type: boolean runAsGroup: - description: The GID to run the entrypoint of the container process. Uses runtime default if unset. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + description: The GID to run the entrypoint of the container process. Uses runtime default if unset. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is windows. format: int64 type: integer runAsNonRoot: description: Indicates that the container must run as a non-root user. If true, the Kubelet will validate the image at runtime to ensure that it does not run as UID 0 (root) and fail to start the container if it does. If unset or false, no such validation will be performed. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. type: boolean runAsUser: - description: The UID to run the entrypoint of the container process. Defaults to user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + description: The UID to run the entrypoint of the container process. Defaults to user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is windows. format: int64 type: integer seLinuxOptions: - description: The SELinux context to be applied to the container. If unspecified, the container runtime will allocate a random SELinux context for each container. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + description: The SELinux context to be applied to the container. If unspecified, the container runtime will allocate a random SELinux context for each container. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is windows. properties: level: description: Level is SELinux level label that applies to the container. @@ -934,8 +1528,20 @@ spec: description: User is a SELinux user label that applies to the container. type: string type: object + seccompProfile: + description: The seccomp options to use by this container. If seccomp options are provided at both the pod & container level, the container options override the pod options. Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: localhostProfile indicates a profile defined in a file on the node should be used. The profile must be preconfigured on the node to work. Must be a descending path, relative to the kubelet's configured seccomp profile location. Must only be set if type is "Localhost". + type: string + type: + description: "type indicates which kind of seccomp profile will be applied. Valid options are: \n Localhost - a profile defined in a file on the node should be used. RuntimeDefault - the container runtime default profile should be used. Unconfined - no profile should be applied." + type: string + required: + - type + type: object windowsOptions: - description: The Windows specific settings applied to all containers. If unspecified, the options from the PodSecurityContext will be used. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + description: The Windows specific settings applied to all containers. If unspecified, the options from the PodSecurityContext will be used. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is linux. properties: gmsaCredentialSpec: description: GMSACredentialSpec is where the GMSA admission webhook (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the GMSA credential spec named by the GMSACredentialSpecName field. @@ -943,16 +1549,19 @@ spec: gmsaCredentialSpecName: description: GMSACredentialSpecName is the name of the GMSA credential spec to use. type: string + hostProcess: + description: HostProcess determines if a container should be run as a 'Host Process' container. This field is alpha-level and will only be honored by components that enable the WindowsHostProcessContainers feature flag. Setting this field without the feature flag will result in errors when validating the Pod. All of a Pod's containers must have the same effective HostProcess value (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean runAsUserName: description: The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. type: string type: object type: object startupProbe: - description: 'StartupProbe indicates that the Pod has successfully initialized. If specified, no other probes are executed until this completes successfully. If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. This can be used to provide different probe parameters at the beginning of a Pod''s lifecycle, when it might take a long time to load data or warm a cache, than during steady-state operation. This cannot be updated. This is a beta feature enabled by the StartupProbe feature flag. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + description: 'StartupProbe indicates that the Pod has successfully initialized. If specified, no other probes are executed until this completes successfully. If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. This can be used to provide different probe parameters at the beginning of a Pod''s lifecycle, when it might take a long time to load data or warm a cache, than during steady-state operation. This cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -964,6 +1573,19 @@ spec: description: Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 3. Minimum value is 1. format: int32 type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. This is a beta field and requires enabling GRPCContainerProbe feature gate. + properties: + port: + description: Port number of the gRPC service. Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: "Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). \n If this is not specified, the default behavior is defined by gRPC." + type: string + required: + - port + type: object httpGet: description: HTTPGet specifies the http request to perform. properties: @@ -1014,7 +1636,7 @@ spec: format: int32 type: integer tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: TCPSocket specifies an action involving a TCP port. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -1028,6 +1650,10 @@ spec: required: - port type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer timeoutSeconds: description: 'Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' format: int32 @@ -1102,9 +1728,41 @@ spec: externalUrl: description: The external URL the Alertmanager instances will be available under. This is necessary to generate correct URLs. This is necessary if Alertmanager is not served from root of a DNS name. type: string + forceEnableClusterMode: + description: ForceEnableClusterMode ensures Alertmanager does not deactivate the cluster mode when running with a single replica. Use case is e.g. spanning an Alertmanager cluster across Kubernetes clusters with a single replica in each. + type: boolean + hostAliases: + description: Pods' hostAliases configuration + items: + description: HostAlias holds the mapping between IP and hostnames that will be injected as an entry in the pod's hosts file. + properties: + hostnames: + description: Hostnames for the above IP address. + items: + type: string + type: array + ip: + description: IP address of the host file entry. + type: string + required: + - hostnames + - ip + type: object + type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map image: description: Image if specified has precedence over baseImage, tag and sha combinations. Specifying the version is still necessary to ensure the Prometheus Operator knows what version of Alertmanager is being configured. type: string + imagePullPolicy: + description: Image pull policy for the 'alertmanager', 'init-config-reloader' and 'config-reloader' containers. See https://kubernetes.io/docs/concepts/containers/images/#image-pull-policy for more details. + enum: + - "" + - Always + - Never + - IfNotPresent + type: string imagePullSecrets: description: An optional list of references to secrets in the same namespace to use for pulling prometheus and alertmanager images from registries see http://kubernetes.io/docs/user-guide/images#specifying-imagepullsecrets-on-a-pod items: @@ -1114,6 +1772,7 @@ spec: description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' type: string type: object + x-kubernetes-map-type: atomic type: array initContainers: description: 'InitContainers allows adding initContainers to the pod definition. Those can be used to e.g. fetch secrets for injection into the Alertmanager configuration from external sources. Any errors during the execution of an initContainer will lead to a restart of the Pod. More info: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/ Using initContainers for any use case other then secret fetching is entirely outside the scope of what the maintainers will support and by doing so, you accept that this behaviour may break at any time without notice.' @@ -1121,12 +1780,12 @@ spec: description: A single application container that you want to run within a pod. properties: args: - description: 'Arguments to the entrypoint. The docker image''s CMD is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container''s environment. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + description: 'Arguments to the entrypoint. The container image''s CMD is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container''s environment. If a variable cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless of whether the variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' items: type: string type: array command: - description: 'Entrypoint array. Not executed within a shell. The docker image''s ENTRYPOINT is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container''s environment. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + description: 'Entrypoint array. Not executed within a shell. The container image''s ENTRYPOINT is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container''s environment. If a variable cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless of whether the variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' items: type: string type: array @@ -1139,7 +1798,7 @@ spec: description: Name of the environment variable. Must be a C_IDENTIFIER. type: string value: - description: 'Variable references $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not. Defaults to "".' + description: 'Variable references $(VAR_NAME) are expanded using the previously defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless of whether the variable exists or not. Defaults to "".' type: string valueFrom: description: Source for the environment variable's value. Cannot be used if value is not empty. @@ -1159,8 +1818,9 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic fieldRef: - description: 'Selects a field of the pod: supports metadata.name, metadata.namespace, metadata.labels, metadata.annotations, spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.' + description: 'Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['''']`, `metadata.annotations['''']`, spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.' properties: apiVersion: description: Version of the schema the FieldPath is written in terms of, defaults to "v1". @@ -1171,6 +1831,7 @@ spec: required: - fieldPath type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: 'Selects a resource of the container: only resources limits and requests (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.' properties: @@ -1178,14 +1839,19 @@ spec: description: 'Container name: required for volumes, optional for env vars' type: string divisor: + anyOf: + - type: integer + - type: string description: Specifies the output format of the exposed resources, defaults to "1" - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true resource: description: 'Required: resource to select' type: string required: - resource type: object + x-kubernetes-map-type: atomic secretKeyRef: description: Selects a key of a secret in the pod's namespace properties: @@ -1201,6 +1867,7 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic type: object required: - name @@ -1221,6 +1888,7 @@ spec: description: Specify whether the ConfigMap must be defined type: boolean type: object + x-kubernetes-map-type: atomic prefix: description: An optional identifier to prepend to each key in the ConfigMap. Must be a C_IDENTIFIER. type: string @@ -1234,10 +1902,11 @@ spec: description: Specify whether the Secret must be defined type: boolean type: object + x-kubernetes-map-type: atomic type: object type: array image: - description: 'Docker image name. More info: https://kubernetes.io/docs/concepts/containers/images This field is optional to allow higher level config management to default or override container images in workload controllers like Deployments and StatefulSets.' + description: 'Container image name. More info: https://kubernetes.io/docs/concepts/containers/images This field is optional to allow higher level config management to default or override container images in workload controllers like Deployments and StatefulSets.' type: string imagePullPolicy: description: 'Image pull policy. One of Always, Never, IfNotPresent. Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. Cannot be updated. More info: https://kubernetes.io/docs/concepts/containers/images#updating-images' @@ -1249,7 +1918,7 @@ spec: description: 'PostStart is called immediately after a container is created. If the handler fails, the container is terminated and restarted according to its restart policy. Other management of the container blocks until the hook completes. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -1295,7 +1964,7 @@ spec: - port type: object tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept for the backward compatibility. There are no validation of this field and lifecycle hooks will fail in runtime when tcp handler is specified. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -1311,10 +1980,10 @@ spec: type: object type: object preStop: - description: 'PreStop is called immediately before a container is terminated due to an API request or management event such as liveness/startup probe failure, preemption, resource contention, etc. The handler is not called if the container crashes or exits. The reason for termination is passed to the handler. The Pod''s termination grace period countdown begins before the PreStop hooked is executed. Regardless of the outcome of the handler, the container will eventually terminate within the Pod''s termination grace period. Other management of the container blocks until the hook completes or until the termination grace period is reached. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks' + description: 'PreStop is called immediately before a container is terminated due to an API request or management event such as liveness/startup probe failure, preemption, resource contention, etc. The handler is not called if the container crashes or exits. The Pod''s termination grace period countdown begins before the PreStop hook is executed. Regardless of the outcome of the handler, the container will eventually terminate within the Pod''s termination grace period (unless delayed by finalizers). Other management of the container blocks until the hook completes or until the termination grace period is reached. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -1360,7 +2029,7 @@ spec: - port type: object tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept for the backward compatibility. There are no validation of this field and lifecycle hooks will fail in runtime when tcp handler is specified. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -1380,7 +2049,7 @@ spec: description: 'Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -1392,6 +2061,19 @@ spec: description: Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 3. Minimum value is 1. format: int32 type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. This is a beta field and requires enabling GRPCContainerProbe feature gate. + properties: + port: + description: Port number of the gRPC service. Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: "Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). \n If this is not specified, the default behavior is defined by gRPC." + type: string + required: + - port + type: object httpGet: description: HTTPGet specifies the http request to perform. properties: @@ -1442,7 +2124,7 @@ spec: format: int32 type: integer tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: TCPSocket specifies an action involving a TCP port. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -1456,6 +2138,10 @@ spec: required: - port type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer timeoutSeconds: description: 'Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' format: int32 @@ -1465,7 +2151,7 @@ spec: description: Name of the container specified as a DNS_LABEL. Each container in a pod must have a unique name (DNS_LABEL). Cannot be updated. type: string ports: - description: List of ports to expose from the container. Exposing a port here gives the system additional information about the network connections a container uses, but is primarily informational. Not specifying a port here DOES NOT prevent that port from being exposed. Any port which is listening on the default "0.0.0.0" address inside a container will be accessible from the network. Cannot be updated. + description: List of ports to expose from the container. Not specifying a port here DOES NOT prevent that port from being exposed. Any port which is listening on the default "0.0.0.0" address inside a container will be accessible from the network. Modifying this array with strategic merge patch may corrupt the data. For more information See https://github.com/kubernetes/kubernetes/issues/108255. Cannot be updated. items: description: ContainerPort represents a network port in a single container. properties: @@ -1484,17 +2170,22 @@ spec: description: If specified, this must be an IANA_SVC_NAME and unique within the pod. Each named port in a pod must have a unique name. Name for the port that can be referred to by services. type: string protocol: + default: TCP description: Protocol for port. Must be UDP, TCP, or SCTP. Defaults to "TCP". type: string required: - containerPort type: object type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map readinessProbe: description: 'Periodic probe of container service readiness. Container will be removed from service endpoints if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -1506,6 +2197,19 @@ spec: description: Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 3. Minimum value is 1. format: int32 type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. This is a beta field and requires enabling GRPCContainerProbe feature gate. + properties: + port: + description: Port number of the gRPC service. Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: "Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). \n If this is not specified, the default behavior is defined by gRPC." + type: string + required: + - port + type: object httpGet: description: HTTPGet specifies the http request to perform. properties: @@ -1556,7 +2260,7 @@ spec: format: int32 type: integer tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: TCPSocket specifies an action involving a TCP port. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -1570,33 +2274,45 @@ spec: required: - port type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer timeoutSeconds: description: 'Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' format: int32 type: integer type: object resources: - description: 'Compute Resources required by this container. Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + description: 'Compute Resources required by this container. Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' properties: limits: additionalProperties: - type: string - description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' type: object requests: additionalProperties: - type: string - description: 'Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' type: object type: object securityContext: - description: 'Security options the pod should run with. More info: https://kubernetes.io/docs/concepts/policy/security-context/ More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/' + description: 'SecurityContext defines the security options the container should be run with. If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/' properties: allowPrivilegeEscalation: - description: 'AllowPrivilegeEscalation controls whether a process can gain more privileges than its parent process. This bool directly controls if the no_new_privs flag will be set on the container process. AllowPrivilegeEscalation is true always when the container is: 1) run as Privileged 2) has CAP_SYS_ADMIN' + description: 'AllowPrivilegeEscalation controls whether a process can gain more privileges than its parent process. This bool directly controls if the no_new_privs flag will be set on the container process. AllowPrivilegeEscalation is true always when the container is: 1) run as Privileged 2) has CAP_SYS_ADMIN Note that this field cannot be set when spec.os.name is windows.' type: boolean capabilities: - description: The capabilities to add/drop when running containers. Defaults to the default set of capabilities granted by the container runtime. + description: The capabilities to add/drop when running containers. Defaults to the default set of capabilities granted by the container runtime. Note that this field cannot be set when spec.os.name is windows. properties: add: description: Added capabilities @@ -1612,27 +2328,27 @@ spec: type: array type: object privileged: - description: Run container in privileged mode. Processes in privileged containers are essentially equivalent to root on the host. Defaults to false. + description: Run container in privileged mode. Processes in privileged containers are essentially equivalent to root on the host. Defaults to false. Note that this field cannot be set when spec.os.name is windows. type: boolean procMount: - description: procMount denotes the type of proc mount to use for the containers. The default is DefaultProcMount which uses the container runtime defaults for readonly paths and masked paths. This requires the ProcMountType feature flag to be enabled. + description: procMount denotes the type of proc mount to use for the containers. The default is DefaultProcMount which uses the container runtime defaults for readonly paths and masked paths. This requires the ProcMountType feature flag to be enabled. Note that this field cannot be set when spec.os.name is windows. type: string readOnlyRootFilesystem: - description: Whether this container has a read-only root filesystem. Default is false. + description: Whether this container has a read-only root filesystem. Default is false. Note that this field cannot be set when spec.os.name is windows. type: boolean runAsGroup: - description: The GID to run the entrypoint of the container process. Uses runtime default if unset. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + description: The GID to run the entrypoint of the container process. Uses runtime default if unset. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is windows. format: int64 type: integer runAsNonRoot: description: Indicates that the container must run as a non-root user. If true, the Kubelet will validate the image at runtime to ensure that it does not run as UID 0 (root) and fail to start the container if it does. If unset or false, no such validation will be performed. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. type: boolean runAsUser: - description: The UID to run the entrypoint of the container process. Defaults to user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + description: The UID to run the entrypoint of the container process. Defaults to user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is windows. format: int64 type: integer seLinuxOptions: - description: The SELinux context to be applied to the container. If unspecified, the container runtime will allocate a random SELinux context for each container. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + description: The SELinux context to be applied to the container. If unspecified, the container runtime will allocate a random SELinux context for each container. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is windows. properties: level: description: Level is SELinux level label that applies to the container. @@ -1647,8 +2363,20 @@ spec: description: User is a SELinux user label that applies to the container. type: string type: object + seccompProfile: + description: The seccomp options to use by this container. If seccomp options are provided at both the pod & container level, the container options override the pod options. Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: localhostProfile indicates a profile defined in a file on the node should be used. The profile must be preconfigured on the node to work. Must be a descending path, relative to the kubelet's configured seccomp profile location. Must only be set if type is "Localhost". + type: string + type: + description: "type indicates which kind of seccomp profile will be applied. Valid options are: \n Localhost - a profile defined in a file on the node should be used. RuntimeDefault - the container runtime default profile should be used. Unconfined - no profile should be applied." + type: string + required: + - type + type: object windowsOptions: - description: The Windows specific settings applied to all containers. If unspecified, the options from the PodSecurityContext will be used. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + description: The Windows specific settings applied to all containers. If unspecified, the options from the PodSecurityContext will be used. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is linux. properties: gmsaCredentialSpec: description: GMSACredentialSpec is where the GMSA admission webhook (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the GMSA credential spec named by the GMSACredentialSpecName field. @@ -1656,16 +2384,19 @@ spec: gmsaCredentialSpecName: description: GMSACredentialSpecName is the name of the GMSA credential spec to use. type: string + hostProcess: + description: HostProcess determines if a container should be run as a 'Host Process' container. This field is alpha-level and will only be honored by components that enable the WindowsHostProcessContainers feature flag. Setting this field without the feature flag will result in errors when validating the Pod. All of a Pod's containers must have the same effective HostProcess value (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean runAsUserName: description: The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. type: string type: object type: object startupProbe: - description: 'StartupProbe indicates that the Pod has successfully initialized. If specified, no other probes are executed until this completes successfully. If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. This can be used to provide different probe parameters at the beginning of a Pod''s lifecycle, when it might take a long time to load data or warm a cache, than during steady-state operation. This cannot be updated. This is a beta feature enabled by the StartupProbe feature flag. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + description: 'StartupProbe indicates that the Pod has successfully initialized. If specified, no other probes are executed until this completes successfully. If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. This can be used to provide different probe parameters at the beginning of a Pod''s lifecycle, when it might take a long time to load data or warm a cache, than during steady-state operation. This cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -1677,6 +2408,19 @@ spec: description: Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 3. Minimum value is 1. format: int32 type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. This is a beta field and requires enabling GRPCContainerProbe feature gate. + properties: + port: + description: Port number of the gRPC service. Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: "Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). \n If this is not specified, the default behavior is defined by gRPC." + type: string + required: + - port + type: object httpGet: description: HTTPGet specifies the http request to perform. properties: @@ -1727,7 +2471,7 @@ spec: format: int32 type: integer tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: TCPSocket specifies an action involving a TCP port. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -1741,6 +2485,10 @@ spec: required: - port type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer timeoutSeconds: description: 'Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' format: int32 @@ -1817,17 +2565,31 @@ spec: type: boolean logFormat: description: Log format for Alertmanager to be configured with. + enum: + - "" + - logfmt + - json type: string logLevel: description: Log level for Alertmanager to be configured with. + enum: + - "" + - debug + - info + - warn + - error type: string + minReadySeconds: + description: Minimum number of seconds for which a newly created pod should be ready without any of its container crashing for it to be considered available. Defaults to 0 (pod will be considered available as soon as it is ready) This is an alpha field from kubernetes 1.22 until 1.24 which requires enabling the StatefulSetMinReadySeconds feature gate. + format: int32 + type: integer nodeSelector: additionalProperties: type: string description: Define which Nodes the Pods are scheduled on. type: object paused: - description: If set to true all actions on the underlaying managed objects are not goint to be performed, except for delete actions. + description: If set to true all actions on the underlying managed objects are not goint to be performed, except for delete actions. type: boolean podMetadata: description: PodMetadata configures Labels and Annotations which are propagated to the alertmanager pods. @@ -1861,23 +2623,33 @@ spec: properties: limits: additionalProperties: - type: string - description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' type: object requests: additionalProperties: - type: string - description: 'Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' type: object type: object retention: + default: 120h description: Time duration Alertmanager shall retain data for. Default is '120h', and must match the regular expression `[0-9]+(ms|s|m|h)` (milliseconds seconds minutes hours). + pattern: ^(0|(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ type: string routePrefix: description: The route prefix Alertmanager registers HTTP handlers for. This is useful, if using ExternalURL and a proxy is rewriting HTTP routes of a request, and the actual ExternalURL is still true, but the server serves requests under a different route prefix. For example for use with `kubectl proxy`. type: string secrets: - description: Secrets is a list of Secrets in the same namespace as the Alertmanager object, which shall be mounted into the Alertmanager Pods. The Secrets are mounted into /etc/alertmanager/secrets/. + description: Secrets is a list of Secrets in the same namespace as the Alertmanager object, which shall be mounted into the Alertmanager Pods. Each Secret is added to the StatefulSet definition as a volume named `secret-`. The Secrets are mounted into `/etc/alertmanager/secrets/` in the 'alertmanager' container. items: type: string type: array @@ -1885,25 +2657,25 @@ spec: description: SecurityContext holds pod-level security attributes and common container settings. This defaults to the default PodSecurityContext. properties: fsGroup: - description: "A special supplemental group that applies to all containers in a pod. Some volume types allow the Kubelet to change the ownership of that volume to be owned by the pod: \n 1. The owning GID will be the FSGroup 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) 3. The permission bits are OR'd with rw-rw---- \n If unset, the Kubelet will not modify the ownership and permissions of any volume." + description: "A special supplemental group that applies to all containers in a pod. Some volume types allow the Kubelet to change the ownership of that volume to be owned by the pod: \n 1. The owning GID will be the FSGroup 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) 3. The permission bits are OR'd with rw-rw---- \n If unset, the Kubelet will not modify the ownership and permissions of any volume. Note that this field cannot be set when spec.os.name is windows." format: int64 type: integer fsGroupChangePolicy: - description: 'fsGroupChangePolicy defines behavior of changing ownership and permission of the volume before being exposed inside Pod. This field will only apply to volume types which support fsGroup based ownership(and permissions). It will have no effect on ephemeral volume types such as: secret, configmaps and emptydir. Valid values are "OnRootMismatch" and "Always". If not specified defaults to "Always".' + description: 'fsGroupChangePolicy defines behavior of changing ownership and permission of the volume before being exposed inside Pod. This field will only apply to volume types which support fsGroup based ownership(and permissions). It will have no effect on ephemeral volume types such as: secret, configmaps and emptydir. Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. Note that this field cannot be set when spec.os.name is windows.' type: string runAsGroup: - description: The GID to run the entrypoint of the container process. Uses runtime default if unset. May also be set in SecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence for that container. + description: The GID to run the entrypoint of the container process. Uses runtime default if unset. May also be set in SecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence for that container. Note that this field cannot be set when spec.os.name is windows. format: int64 type: integer runAsNonRoot: description: Indicates that the container must run as a non-root user. If true, the Kubelet will validate the image at runtime to ensure that it does not run as UID 0 (root) and fail to start the container if it does. If unset or false, no such validation will be performed. May also be set in SecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. type: boolean runAsUser: - description: The UID to run the entrypoint of the container process. Defaults to user specified in image metadata if unspecified. May also be set in SecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence for that container. + description: The UID to run the entrypoint of the container process. Defaults to user specified in image metadata if unspecified. May also be set in SecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence for that container. Note that this field cannot be set when spec.os.name is windows. format: int64 type: integer seLinuxOptions: - description: The SELinux context to be applied to all containers. If unspecified, the container runtime will allocate a random SELinux context for each container. May also be set in SecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence for that container. + description: The SELinux context to be applied to all containers. If unspecified, the container runtime will allocate a random SELinux context for each container. May also be set in SecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence for that container. Note that this field cannot be set when spec.os.name is windows. properties: level: description: Level is SELinux level label that applies to the container. @@ -1918,14 +2690,26 @@ spec: description: User is a SELinux user label that applies to the container. type: string type: object + seccompProfile: + description: The seccomp options to use by the containers in this pod. Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: localhostProfile indicates a profile defined in a file on the node should be used. The profile must be preconfigured on the node to work. Must be a descending path, relative to the kubelet's configured seccomp profile location. Must only be set if type is "Localhost". + type: string + type: + description: "type indicates which kind of seccomp profile will be applied. Valid options are: \n Localhost - a profile defined in a file on the node should be used. RuntimeDefault - the container runtime default profile should be used. Unconfined - no profile should be applied." + type: string + required: + - type + type: object supplementalGroups: - description: A list of groups applied to the first process run in each container, in addition to the container's primary GID. If unspecified, no groups will be added to any container. + description: A list of groups applied to the first process run in each container, in addition to the container's primary GID. If unspecified, no groups will be added to any container. Note that this field cannot be set when spec.os.name is windows. items: format: int64 type: integer type: array sysctls: - description: Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported sysctls (by the container runtime) might fail to launch. + description: Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported sysctls (by the container runtime) might fail to launch. Note that this field cannot be set when spec.os.name is windows. items: description: Sysctl defines a kernel parameter to be set properties: @@ -1941,7 +2725,7 @@ spec: type: object type: array windowsOptions: - description: The Windows specific settings applied to all containers. If unspecified, the options within a container's SecurityContext will be used. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + description: The Windows specific settings applied to all containers. If unspecified, the options within a container's SecurityContext will be used. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is linux. properties: gmsaCredentialSpec: description: GMSACredentialSpec is where the GMSA admission webhook (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the GMSA credential spec named by the GMSACredentialSpecName field. @@ -1949,6 +2733,9 @@ spec: gmsaCredentialSpecName: description: GMSACredentialSpecName is the name of the GMSA credential spec to use. type: string + hostProcess: + description: HostProcess determines if a container should be run as a 'Host Process' container. This field is alpha-level and will only be honored by components that enable the WindowsHostProcessContainers feature flag. Setting this field without the feature flag will result in errors when validating the Pod. All of a Pod's containers must have the same effective HostProcess value (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean runAsUserName: description: The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. type: string @@ -1958,7 +2745,7 @@ spec: description: ServiceAccountName is the name of the ServiceAccount to use to run the Prometheus Pods. type: string sha: - description: SHA of Alertmanager container image to be deployed. Defaults to the value of `version`. Similar to a tag, but the SHA explicitly deploys an immutable container image. Version and Tag are ignored if SHA is set. + description: 'SHA of Alertmanager container image to be deployed. Defaults to the value of `version`. Similar to a tag, but the SHA explicitly deploys an immutable container image. Version and Tag are ignored if SHA is set. Deprecated: use ''image'' instead. The image digest can be specified as part of the image URL.' type: string storage: description: Storage is the definition of how storage will be used by the Alertmanager instances. @@ -1967,17 +2754,139 @@ spec: description: 'Deprecated: subPath usage will be disabled by default in a future release, this option will become unnecessary. DisableMountSubPath allows to remove any subPath usage in volume mounts.' type: boolean emptyDir: - description: 'EmptyDirVolumeSource to be used by the Prometheus StatefulSets. If specified, used in place of any volumeClaimTemplate. More info: https://kubernetes.io/docs/concepts/storage/volumes/#emptydir' + description: 'EmptyDirVolumeSource to be used by the StatefulSet. If specified, used in place of any volumeClaimTemplate. More info: https://kubernetes.io/docs/concepts/storage/volumes/#emptydir' properties: medium: - description: 'What type of storage medium should back this directory. The default is "" which means to use the node''s default medium. Must be an empty string (default) or Memory. More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir' + description: 'medium represents what type of storage medium should back this directory. The default is "" which means to use the node''s default medium. Must be an empty string (default) or Memory. More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir' type: string sizeLimit: - description: 'Total amount of local storage required for this EmptyDir volume. The size limit is also applicable for memory medium. The maximum usage on memory medium EmptyDir would be the minimum value between the SizeLimit specified here and the sum of memory limits of all containers in a pod. The default is nil which means that the limit is undefined. More info: http://kubernetes.io/docs/user-guide/volumes#emptydir' - type: string + anyOf: + - type: integer + - type: string + description: 'sizeLimit is the total amount of local storage required for this EmptyDir volume. The size limit is also applicable for memory medium. The maximum usage on memory medium EmptyDir would be the minimum value between the SizeLimit specified here and the sum of memory limits of all containers in a pod. The default is nil which means that the limit is undefined. More info: http://kubernetes.io/docs/user-guide/volumes#emptydir' + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + ephemeral: + description: 'EphemeralVolumeSource to be used by the StatefulSet. This is a beta field in k8s 1.21, for lower versions, starting with k8s 1.19, it requires enabling the GenericEphemeralVolume feature gate. More info: https://kubernetes.io/docs/concepts/storage/ephemeral-volumes/#generic-ephemeral-volumes' + properties: + volumeClaimTemplate: + description: "Will be used to create a stand-alone PVC to provision the volume. The pod in which this EphemeralVolumeSource is embedded will be the owner of the PVC, i.e. the PVC will be deleted together with the pod. The name of the PVC will be `-` where `` is the name from the `PodSpec.Volumes` array entry. Pod validation will reject the pod if the concatenated name is not valid for a PVC (for example, too long). \n An existing PVC with that name that is not owned by the pod will *not* be used for the pod to avoid using an unrelated volume by mistake. Starting the pod is then blocked until the unrelated PVC is removed. If such a pre-created PVC is meant to be used by the pod, the PVC has to updated with an owner reference to the pod once the pod exists. Normally this should not be necessary, but it may be useful when manually reconstructing a broken cluster. \n This field is read-only and no changes will be made by Kubernetes to the PVC after it has been created. \n Required, must not be nil." + properties: + metadata: + description: May contain labels and annotations that will be copied into the PVC when creating it. No other fields are allowed and will be rejected during validation. + type: object + spec: + description: The specification for the PersistentVolumeClaim. The entire content is copied unchanged into the PVC that gets created from this template. The same fields as in a PersistentVolumeClaim are also valid here. + properties: + accessModes: + description: 'accessModes contains the desired access modes the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1' + items: + type: string + type: array + dataSource: + description: 'dataSource field can be used to specify either: * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) * An existing PVC (PersistentVolumeClaim) If the provisioner or an external controller can support the specified data source, it will create a new volume based on the contents of the specified data source. If the AnyVolumeDataSource feature gate is enabled, this field will always have the same contents as the DataSourceRef field.' + properties: + apiGroup: + description: APIGroup is the group for the resource being referenced. If APIGroup is not specified, the specified Kind must be in the core API group. For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + description: 'dataSourceRef specifies the object from which to populate the volume with data, if a non-empty volume is desired. This may be any local object from a non-empty API group (non core object) or a PersistentVolumeClaim object. When this field is specified, volume binding will only succeed if the type of the specified object matches some installed volume populator or dynamic provisioner. This field will replace the functionality of the DataSource field and as such if both fields are non-empty, they must have the same value. For backwards compatibility, both fields (DataSource and DataSourceRef) will be set to the same value automatically if one of them is empty and the other is non-empty. There are two important differences between DataSource and DataSourceRef: * While DataSource only allows two specific types of objects, DataSourceRef allows any non-core object, as well as PersistentVolumeClaim objects. * While DataSource ignores disallowed values (dropping them), DataSourceRef preserves all values, and generates an error if a disallowed value is specified. (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled.' + properties: + apiGroup: + description: APIGroup is the group for the resource being referenced. If APIGroup is not specified, the specified Kind must be in the core API group. For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + resources: + description: 'resources represents the minimum resources the volume should have. If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements that are lower than previous value but must still be higher than capacity recorded in the status field of the claim. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' + type: object + type: object + selector: + description: selector is a label query over volumes to consider for binding. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + storageClassName: + description: 'storageClassName is the name of the StorageClass required by the claim. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1' + type: string + volumeMode: + description: volumeMode defines what type of volume is required by the claim. Value of Filesystem is implied when not included in claim spec. + type: string + volumeName: + description: volumeName is the binding reference to the PersistentVolume backing this claim. + type: string + type: object + required: + - spec + type: object type: object volumeClaimTemplate: - description: A PVC spec to be used by the Prometheus StatefulSets. + description: A PVC spec to be used by the StatefulSet. The easiest way to use a volume that cannot be automatically provisioned (for whatever reason) is to use a label selector alongside manually created PersistentVolumes. properties: apiVersion: description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' @@ -2006,12 +2915,29 @@ spec: description: 'Spec defines the desired characteristics of a volume requested by a pod author. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims' properties: accessModes: - description: 'AccessModes contains the desired access modes the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1' + description: 'accessModes contains the desired access modes the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1' items: type: string type: array dataSource: - description: 'This field can be used to specify either: * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot - Beta) * An existing PVC (PersistentVolumeClaim) * An existing custom resource/object that implements data population (Alpha) In order to use VolumeSnapshot object types, the appropriate feature gate must be enabled (VolumeSnapshotDataSource or AnyVolumeDataSource) If the provisioner or an external controller can support the specified data source, it will create a new volume based on the contents of the specified data source. If the specified data source is not supported, the volume will not be created and the failure will be reported as an event. In the future, we plan to support more data source types and the behavior of the provisioner may change.' + description: 'dataSource field can be used to specify either: * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) * An existing PVC (PersistentVolumeClaim) If the provisioner or an external controller can support the specified data source, it will create a new volume based on the contents of the specified data source. If the AnyVolumeDataSource feature gate is enabled, this field will always have the same contents as the DataSourceRef field.' + properties: + apiGroup: + description: APIGroup is the group for the resource being referenced. If APIGroup is not specified, the specified Kind must be in the core API group. For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + description: 'dataSourceRef specifies the object from which to populate the volume with data, if a non-empty volume is desired. This may be any local object from a non-empty API group (non core object) or a PersistentVolumeClaim object. When this field is specified, volume binding will only succeed if the type of the specified object matches some installed volume populator or dynamic provisioner. This field will replace the functionality of the DataSource field and as such if both fields are non-empty, they must have the same value. For backwards compatibility, both fields (DataSource and DataSourceRef) will be set to the same value automatically if one of them is empty and the other is non-empty. There are two important differences between DataSource and DataSourceRef: * While DataSource only allows two specific types of objects, DataSourceRef allows any non-core object, as well as PersistentVolumeClaim objects. * While DataSource ignores disallowed values (dropping them), DataSourceRef preserves all values, and generates an error if a disallowed value is specified. (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled.' properties: apiGroup: description: APIGroup is the group for the resource being referenced. If APIGroup is not specified, the specified Kind must be in the core API group. For any other third-party types, APIGroup is required. @@ -2026,22 +2952,31 @@ spec: - kind - name type: object + x-kubernetes-map-type: atomic resources: - description: 'Resources represents the minimum resources the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources' + description: 'resources represents the minimum resources the volume should have. If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements that are lower than previous value but must still be higher than capacity recorded in the status field of the claim. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources' properties: limits: additionalProperties: - type: string - description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' type: object requests: additionalProperties: - type: string - description: 'Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' type: object type: object selector: - description: A label query over volumes to consider for binding. + description: selector is a label query over volumes to consider for binding. properties: matchExpressions: description: matchExpressions is a list of label selector requirements. The requirements are ANDed. @@ -2070,47 +3005,61 @@ spec: description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. type: object type: object + x-kubernetes-map-type: atomic storageClassName: - description: 'Name of the StorageClass required by the claim. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1' + description: 'storageClassName is the name of the StorageClass required by the claim. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1' type: string volumeMode: description: volumeMode defines what type of volume is required by the claim. Value of Filesystem is implied when not included in claim spec. type: string volumeName: - description: VolumeName is the binding reference to the PersistentVolume backing this claim. + description: volumeName is the binding reference to the PersistentVolume backing this claim. type: string type: object status: description: 'Status represents the current information/status of a persistent volume claim. Read-only. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims' properties: accessModes: - description: 'AccessModes contains the actual access modes the volume backing the PVC has. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1' + description: 'accessModes contains the actual access modes the volume backing the PVC has. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1' items: type: string type: array + allocatedResources: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: allocatedResources is the storage resource within AllocatedResources tracks the capacity allocated to a PVC. It may be larger than the actual capacity when a volume expansion operation is requested. For storage quota, the larger value from allocatedResources and PVC.spec.resources is used. If allocatedResources is not set, PVC.spec.resources alone is used for quota calculation. If a volume expansion capacity request is lowered, allocatedResources is only lowered if there are no expansion operations in progress and if the actual volume capacity is equal or lower than the requested capacity. This is an alpha field and requires enabling RecoverVolumeExpansionFailure feature. + type: object capacity: additionalProperties: - type: string - description: Represents the actual resources of the underlying volume. + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: capacity represents the actual resources of the underlying volume. type: object conditions: - description: Current Condition of persistent volume claim. If underlying persistent volume is being resized then the Condition will be set to 'ResizeStarted'. + description: conditions is the current Condition of persistent volume claim. If underlying persistent volume is being resized then the Condition will be set to 'ResizeStarted'. items: description: PersistentVolumeClaimCondition contails details about state of pvc properties: lastProbeTime: - description: Last time we probed the condition. + description: lastProbeTime is the time we probed the condition. format: date-time type: string lastTransitionTime: - description: Last time the condition transitioned from one status to another. + description: lastTransitionTime is the time the condition transitioned from one status to another. format: date-time type: string message: - description: Human-readable message indicating details about last transition. + description: message is the human-readable message indicating details about last transition. type: string reason: - description: Unique, this should be a short, machine understandable string that gives the reason for condition's last transition. If it reports "ResizeStarted" that means the underlying persistent volume is being resized. + description: reason is a unique, this should be a short, machine understandable string that gives the reason for condition's last transition. If it reports "ResizeStarted" that means the underlying persistent volume is being resized. type: string status: type: string @@ -2123,13 +3072,16 @@ spec: type: object type: array phase: - description: Phase represents the current phase of PersistentVolumeClaim. + description: phase represents the current phase of PersistentVolumeClaim. + type: string + resizeStatus: + description: resizeStatus stores status of resize operation. ResizeStatus is not set by default but when expansion is complete resizeStatus is set to empty string by resize controller or kubelet. This is an alpha field and requires enabling RecoverVolumeExpansionFailure feature. type: string type: object type: object type: object tag: - description: Tag of Alertmanager container image to be deployed. Defaults to the value of `version`. Version is ignored if Tag is set. + description: 'Tag of Alertmanager container image to be deployed. Defaults to the value of `version`. Version is ignored if Tag is set. Deprecated: use ''image'' instead. The image tag can be specified as part of the image URL.' type: string tolerations: description: If specified, the pod's tolerations. @@ -2154,6 +3106,74 @@ spec: type: string type: object type: array + topologySpreadConstraints: + description: If specified, the pod's topology spread constraints. + items: + description: TopologySpreadConstraint specifies how to spread matching pods among the given topology. + properties: + labelSelector: + description: LabelSelector is used to find matching pods. Pods that match this label selector are counted to determine the number of pods in their corresponding topology domain. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: MatchLabelKeys is a set of pod label keys to select the pods over which spreading will be calculated. The keys are used to lookup values from the incoming pod labels, those key-value labels are ANDed with labelSelector to select the group of existing pods over which spreading will be calculated for the incoming pod. Keys that don't exist in the incoming pod labels will be ignored. A null or empty list means only match against labelSelector. + items: + type: string + type: array + x-kubernetes-list-type: atomic + maxSkew: + description: 'MaxSkew describes the degree to which pods may be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference between the number of matching pods in the target topology and the global minimum. The global minimum is the minimum number of matching pods in an eligible domain or zero if the number of eligible domains is less than MinDomains. For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same labelSelector spread as 2/2/1: In this case, the global minimum is 1. | zone1 | zone2 | zone3 | | P P | P P | P | - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) violate MaxSkew(1). - if MaxSkew is 2, incoming pod can be scheduled onto any zone. When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence to topologies that satisfy it. It''s a required field. Default value is 1 and 0 is not allowed.' + format: int32 + type: integer + minDomains: + description: "MinDomains indicates a minimum number of eligible domains. When the number of eligible domains with matching topology keys is less than minDomains, Pod Topology Spread treats \"global minimum\" as 0, and then the calculation of Skew is performed. And when the number of eligible domains with matching topology keys equals or greater than minDomains, this value has no effect on scheduling. As a result, when the number of eligible domains is less than minDomains, scheduler won't schedule more than maxSkew Pods to those domains. If value is nil, the constraint behaves as if MinDomains is equal to 1. Valid values are integers greater than 0. When value is not nil, WhenUnsatisfiable must be DoNotSchedule. \n For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same labelSelector spread as 2/2/2: | zone1 | zone2 | zone3 | | P P | P P | P P | The number of domains is less than 5(MinDomains), so \"global minimum\" is treated as 0. In this situation, new pod with the same labelSelector cannot be scheduled, because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, it will violate MaxSkew. \n This is a beta field and requires the MinDomainsInPodTopologySpread feature gate to be enabled (enabled by default)." + format: int32 + type: integer + nodeAffinityPolicy: + description: "NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector when calculating pod topology spread skew. Options are: - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. \n If this value is nil, the behavior is equivalent to the Honor policy. This is a alpha-level feature enabled by the NodeInclusionPolicyInPodTopologySpread feature flag." + type: string + nodeTaintsPolicy: + description: "NodeTaintsPolicy indicates how we will treat node taints when calculating pod topology spread skew. Options are: - Honor: nodes without taints, along with tainted nodes for which the incoming pod has a toleration, are included. - Ignore: node taints are ignored. All nodes are included. \n If this value is nil, the behavior is equivalent to the Ignore policy. This is a alpha-level feature enabled by the NodeInclusionPolicyInPodTopologySpread feature flag." + type: string + topologyKey: + description: TopologyKey is the key of node labels. Nodes that have a label with this key and identical values are considered to be in the same topology. We consider each as a "bucket", and try to put balanced number of pods into each bucket. We define a domain as a particular instance of a topology. Also, we define an eligible domain as a domain whose nodes meet the requirements of nodeAffinityPolicy and nodeTaintsPolicy. e.g. If TopologyKey is "kubernetes.io/hostname", each Node is a domain of that topology. And, if TopologyKey is "topology.kubernetes.io/zone", each zone is a domain of that topology. It's a required field. + type: string + whenUnsatisfiable: + description: 'WhenUnsatisfiable indicates how to deal with a pod if it doesn''t satisfy the spread constraint. - DoNotSchedule (default) tells the scheduler not to schedule it. - ScheduleAnyway tells the scheduler to schedule the pod in any location, but giving higher precedence to topologies that would help reduce the skew. A constraint is considered "Unsatisfiable" for an incoming pod if and only if every possible node assignment for that pod would violate "MaxSkew" on some topology. For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same labelSelector spread as 3/1/1: | zone1 | zone2 | zone3 | | P P P | P | P | If WhenUnsatisfiable is set to DoNotSchedule, incoming pod can only be scheduled to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies MaxSkew(1). In other words, the cluster can still be imbalanced, but scheduler won''t make it *more* imbalanced. It''s a required field.' + type: string + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array version: description: Version the cluster should be on. type: string @@ -2191,138 +3211,140 @@ spec: description: Volume represents a named volume in a pod that may be accessed by any container in the pod. properties: awsElasticBlockStore: - description: 'AWSElasticBlockStore represents an AWS Disk resource that is attached to a kubelet''s host machine and then exposed to the pod. More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore' + description: 'awsElasticBlockStore represents an AWS Disk resource that is attached to a kubelet''s host machine and then exposed to the pod. More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore' properties: fsType: - description: 'Filesystem type of the volume that you want to mount. Tip: Ensure that the filesystem type is supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore TODO: how do we prevent errors in the filesystem from compromising the machine' + description: 'fsType is the filesystem type of the volume that you want to mount. Tip: Ensure that the filesystem type is supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore TODO: how do we prevent errors in the filesystem from compromising the machine' type: string partition: - description: 'The partition in the volume that you want to mount. If omitted, the default is to mount by volume name. Examples: For volume /dev/sda1, you specify the partition as "1". Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty).' + description: 'partition is the partition in the volume that you want to mount. If omitted, the default is to mount by volume name. Examples: For volume /dev/sda1, you specify the partition as "1". Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty).' format: int32 type: integer readOnly: - description: 'Specify "true" to force and set the ReadOnly property in VolumeMounts to "true". If omitted, the default is "false". More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore' + description: 'readOnly value true will force the readOnly setting in VolumeMounts. More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore' type: boolean volumeID: - description: 'Unique ID of the persistent disk resource in AWS (Amazon EBS volume). More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore' + description: 'volumeID is unique ID of the persistent disk resource in AWS (Amazon EBS volume). More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore' type: string required: - volumeID type: object azureDisk: - description: AzureDisk represents an Azure Data Disk mount on the host and bind mount to the pod. + description: azureDisk represents an Azure Data Disk mount on the host and bind mount to the pod. properties: cachingMode: - description: 'Host Caching mode: None, Read Only, Read Write.' + description: 'cachingMode is the Host Caching mode: None, Read Only, Read Write.' type: string diskName: - description: The Name of the data disk in the blob storage + description: diskName is the Name of the data disk in the blob storage type: string diskURI: - description: The URI the data disk in the blob storage + description: diskURI is the URI of data disk in the blob storage type: string fsType: - description: Filesystem type to mount. Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + description: fsType is Filesystem type to mount. Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. type: string kind: - description: 'Expected values Shared: multiple blob disks per storage account Dedicated: single blob disk per storage account Managed: azure managed data disk (only in managed availability set). defaults to shared' + description: 'kind expected values are Shared: multiple blob disks per storage account Dedicated: single blob disk per storage account Managed: azure managed data disk (only in managed availability set). defaults to shared' type: string readOnly: - description: Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. + description: readOnly Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. type: boolean required: - diskName - diskURI type: object azureFile: - description: AzureFile represents an Azure File Service mount on the host and bind mount to the pod. + description: azureFile represents an Azure File Service mount on the host and bind mount to the pod. properties: readOnly: - description: Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. + description: readOnly defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. type: boolean secretName: - description: the name of secret that contains Azure Storage Account Name and Key + description: secretName is the name of secret that contains Azure Storage Account Name and Key type: string shareName: - description: Share Name + description: shareName is the azure share Name type: string required: - secretName - shareName type: object cephfs: - description: CephFS represents a Ceph FS mount on the host that shares a pod's lifetime + description: cephFS represents a Ceph FS mount on the host that shares a pod's lifetime properties: monitors: - description: 'Required: Monitors is a collection of Ceph monitors More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it' + description: 'monitors is Required: Monitors is a collection of Ceph monitors More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it' items: type: string type: array path: - description: 'Optional: Used as the mounted root, rather than the full Ceph tree, default is /' + description: 'path is Optional: Used as the mounted root, rather than the full Ceph tree, default is /' type: string readOnly: - description: 'Optional: Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it' + description: 'readOnly is Optional: Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it' type: boolean secretFile: - description: 'Optional: SecretFile is the path to key ring for User, default is /etc/ceph/user.secret More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it' + description: 'secretFile is Optional: SecretFile is the path to key ring for User, default is /etc/ceph/user.secret More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it' type: string secretRef: - description: 'Optional: SecretRef is reference to the authentication secret for User, default is empty. More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it' + description: 'secretRef is Optional: SecretRef is reference to the authentication secret for User, default is empty. More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it' properties: name: description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' type: string type: object + x-kubernetes-map-type: atomic user: - description: 'Optional: User is the rados user name, default is admin More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it' + description: 'user is optional: User is the rados user name, default is admin More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it' type: string required: - monitors type: object cinder: - description: 'Cinder represents a cinder volume attached and mounted on kubelets host machine. More info: https://examples.k8s.io/mysql-cinder-pd/README.md' + description: 'cinder represents a cinder volume attached and mounted on kubelets host machine. More info: https://examples.k8s.io/mysql-cinder-pd/README.md' properties: fsType: - description: 'Filesystem type to mount. Must be a filesystem type supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. More info: https://examples.k8s.io/mysql-cinder-pd/README.md' + description: 'fsType is the filesystem type to mount. Must be a filesystem type supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. More info: https://examples.k8s.io/mysql-cinder-pd/README.md' type: string readOnly: - description: 'Optional: Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. More info: https://examples.k8s.io/mysql-cinder-pd/README.md' + description: 'readOnly defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. More info: https://examples.k8s.io/mysql-cinder-pd/README.md' type: boolean secretRef: - description: 'Optional: points to a secret object containing parameters used to connect to OpenStack.' + description: 'secretRef is optional: points to a secret object containing parameters used to connect to OpenStack.' properties: name: description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' type: string type: object + x-kubernetes-map-type: atomic volumeID: - description: 'volume id used to identify the volume in cinder. More info: https://examples.k8s.io/mysql-cinder-pd/README.md' + description: 'volumeID used to identify the volume in cinder. More info: https://examples.k8s.io/mysql-cinder-pd/README.md' type: string required: - volumeID type: object configMap: - description: ConfigMap represents a configMap that should populate this volume + description: configMap represents a configMap that should populate this volume properties: defaultMode: - description: 'Optional: mode bits to use on created files by default. Must be a value between 0 and 0777. Defaults to 0644. Directories within the path are not affected by this setting. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set.' + description: 'defaultMode is optional: mode bits used to set permissions on created files by default. Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. Defaults to 0644. Directories within the path are not affected by this setting. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set.' format: int32 type: integer items: - description: If unspecified, each key-value pair in the Data field of the referenced ConfigMap will be projected into the volume as a file whose name is the key and content is the value. If specified, the listed keys will be projected into the specified paths, and unlisted keys will not be present. If a key is specified which is not present in the ConfigMap, the volume setup will error unless it is marked optional. Paths must be relative and may not contain the '..' path or start with '..'. + description: items if unspecified, each key-value pair in the Data field of the referenced ConfigMap will be projected into the volume as a file whose name is the key and content is the value. If specified, the listed keys will be projected into the specified paths, and unlisted keys will not be present. If a key is specified which is not present in the ConfigMap, the volume setup will error unless it is marked optional. Paths must be relative and may not contain the '..' path or start with '..'. items: description: Maps a string key to a path within a volume. properties: key: - description: The key to project. + description: key is the key to project. type: string mode: - description: 'Optional: mode bits to use on this file, must be a value between 0 and 0777. If not specified, the volume defaultMode will be used. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set.' + description: 'mode is Optional: mode bits used to set permissions on this file. Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. If not specified, the volume defaultMode will be used. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set.' format: int32 type: integer path: - description: The relative path of the file to map the key to. May not be an absolute path. May not contain the path element '..'. May not start with the string '..'. + description: path is the relative path of the file to map the key to. May not be an absolute path. May not contain the path element '..'. May not start with the string '..'. type: string required: - key @@ -2333,41 +3355,43 @@ spec: description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' type: string optional: - description: Specify whether the ConfigMap or its keys must be defined + description: optional specify whether the ConfigMap or its keys must be defined type: boolean type: object + x-kubernetes-map-type: atomic csi: - description: CSI (Container Storage Interface) represents storage that is handled by an external CSI driver (Alpha feature). + description: csi (Container Storage Interface) represents ephemeral storage that is handled by certain external CSI drivers (Beta feature). properties: driver: - description: Driver is the name of the CSI driver that handles this volume. Consult with your admin for the correct name as registered in the cluster. + description: driver is the name of the CSI driver that handles this volume. Consult with your admin for the correct name as registered in the cluster. type: string fsType: - description: Filesystem type to mount. Ex. "ext4", "xfs", "ntfs". If not provided, the empty value is passed to the associated CSI driver which will determine the default filesystem to apply. + description: fsType to mount. Ex. "ext4", "xfs", "ntfs". If not provided, the empty value is passed to the associated CSI driver which will determine the default filesystem to apply. type: string nodePublishSecretRef: - description: NodePublishSecretRef is a reference to the secret object containing sensitive information to pass to the CSI driver to complete the CSI NodePublishVolume and NodeUnpublishVolume calls. This field is optional, and may be empty if no secret is required. If the secret object contains more than one secret, all secret references are passed. + description: nodePublishSecretRef is a reference to the secret object containing sensitive information to pass to the CSI driver to complete the CSI NodePublishVolume and NodeUnpublishVolume calls. This field is optional, and may be empty if no secret is required. If the secret object contains more than one secret, all secret references are passed. properties: name: description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' type: string type: object + x-kubernetes-map-type: atomic readOnly: - description: Specifies a read-only configuration for the volume. Defaults to false (read/write). + description: readOnly specifies a read-only configuration for the volume. Defaults to false (read/write). type: boolean volumeAttributes: additionalProperties: type: string - description: VolumeAttributes stores driver-specific properties that are passed to the CSI driver. Consult your driver's documentation for supported values. + description: volumeAttributes stores driver-specific properties that are passed to the CSI driver. Consult your driver's documentation for supported values. type: object required: - driver type: object downwardAPI: - description: DownwardAPI represents downward API about the pod that should populate this volume + description: downwardAPI represents downward API about the pod that should populate this volume properties: defaultMode: - description: 'Optional: mode bits to use on created files by default. Must be a value between 0 and 0777. Defaults to 0644. Directories within the path are not affected by this setting. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set.' + description: 'Optional: mode bits to use on created files by default. Must be a Optional: mode bits used to set permissions on created files by default. Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. Defaults to 0644. Directories within the path are not affected by this setting. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set.' format: int32 type: integer items: @@ -2387,8 +3411,9 @@ spec: required: - fieldPath type: object + x-kubernetes-map-type: atomic mode: - description: 'Optional: mode bits to use on this file, must be a value between 0 and 0777. If not specified, the volume defaultMode will be used. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set.' + description: 'Optional: mode bits used to set permissions on this file, must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. If not specified, the volume defaultMode will be used. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set.' format: int32 type: integer path: @@ -2401,194 +3426,323 @@ spec: description: 'Container name: required for volumes, optional for env vars' type: string divisor: + anyOf: + - type: integer + - type: string description: Specifies the output format of the exposed resources, defaults to "1" - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true resource: description: 'Required: resource to select' type: string required: - resource type: object + x-kubernetes-map-type: atomic required: - path type: object type: array type: object emptyDir: - description: 'EmptyDir represents a temporary directory that shares a pod''s lifetime. More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir' + description: 'emptyDir represents a temporary directory that shares a pod''s lifetime. More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir' properties: medium: - description: 'What type of storage medium should back this directory. The default is "" which means to use the node''s default medium. Must be an empty string (default) or Memory. More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir' + description: 'medium represents what type of storage medium should back this directory. The default is "" which means to use the node''s default medium. Must be an empty string (default) or Memory. More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir' type: string sizeLimit: - description: 'Total amount of local storage required for this EmptyDir volume. The size limit is also applicable for memory medium. The maximum usage on memory medium EmptyDir would be the minimum value between the SizeLimit specified here and the sum of memory limits of all containers in a pod. The default is nil which means that the limit is undefined. More info: http://kubernetes.io/docs/user-guide/volumes#emptydir' - type: string + anyOf: + - type: integer + - type: string + description: 'sizeLimit is the total amount of local storage required for this EmptyDir volume. The size limit is also applicable for memory medium. The maximum usage on memory medium EmptyDir would be the minimum value between the SizeLimit specified here and the sum of memory limits of all containers in a pod. The default is nil which means that the limit is undefined. More info: http://kubernetes.io/docs/user-guide/volumes#emptydir' + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + ephemeral: + description: "ephemeral represents a volume that is handled by a cluster storage driver. The volume's lifecycle is tied to the pod that defines it - it will be created before the pod starts, and deleted when the pod is removed. \n Use this if: a) the volume is only needed while the pod runs, b) features of normal volumes like restoring from snapshot or capacity tracking are needed, c) the storage driver is specified through a storage class, and d) the storage driver supports dynamic volume provisioning through a PersistentVolumeClaim (see EphemeralVolumeSource for more information on the connection between this volume type and PersistentVolumeClaim). \n Use PersistentVolumeClaim or one of the vendor-specific APIs for volumes that persist for longer than the lifecycle of an individual pod. \n Use CSI for light-weight local ephemeral volumes if the CSI driver is meant to be used that way - see the documentation of the driver for more information. \n A pod can use both types of ephemeral volumes and persistent volumes at the same time." + properties: + volumeClaimTemplate: + description: "Will be used to create a stand-alone PVC to provision the volume. The pod in which this EphemeralVolumeSource is embedded will be the owner of the PVC, i.e. the PVC will be deleted together with the pod. The name of the PVC will be `-` where `` is the name from the `PodSpec.Volumes` array entry. Pod validation will reject the pod if the concatenated name is not valid for a PVC (for example, too long). \n An existing PVC with that name that is not owned by the pod will *not* be used for the pod to avoid using an unrelated volume by mistake. Starting the pod is then blocked until the unrelated PVC is removed. If such a pre-created PVC is meant to be used by the pod, the PVC has to updated with an owner reference to the pod once the pod exists. Normally this should not be necessary, but it may be useful when manually reconstructing a broken cluster. \n This field is read-only and no changes will be made by Kubernetes to the PVC after it has been created. \n Required, must not be nil." + properties: + metadata: + description: May contain labels and annotations that will be copied into the PVC when creating it. No other fields are allowed and will be rejected during validation. + type: object + spec: + description: The specification for the PersistentVolumeClaim. The entire content is copied unchanged into the PVC that gets created from this template. The same fields as in a PersistentVolumeClaim are also valid here. + properties: + accessModes: + description: 'accessModes contains the desired access modes the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1' + items: + type: string + type: array + dataSource: + description: 'dataSource field can be used to specify either: * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) * An existing PVC (PersistentVolumeClaim) If the provisioner or an external controller can support the specified data source, it will create a new volume based on the contents of the specified data source. If the AnyVolumeDataSource feature gate is enabled, this field will always have the same contents as the DataSourceRef field.' + properties: + apiGroup: + description: APIGroup is the group for the resource being referenced. If APIGroup is not specified, the specified Kind must be in the core API group. For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + description: 'dataSourceRef specifies the object from which to populate the volume with data, if a non-empty volume is desired. This may be any local object from a non-empty API group (non core object) or a PersistentVolumeClaim object. When this field is specified, volume binding will only succeed if the type of the specified object matches some installed volume populator or dynamic provisioner. This field will replace the functionality of the DataSource field and as such if both fields are non-empty, they must have the same value. For backwards compatibility, both fields (DataSource and DataSourceRef) will be set to the same value automatically if one of them is empty and the other is non-empty. There are two important differences between DataSource and DataSourceRef: * While DataSource only allows two specific types of objects, DataSourceRef allows any non-core object, as well as PersistentVolumeClaim objects. * While DataSource ignores disallowed values (dropping them), DataSourceRef preserves all values, and generates an error if a disallowed value is specified. (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled.' + properties: + apiGroup: + description: APIGroup is the group for the resource being referenced. If APIGroup is not specified, the specified Kind must be in the core API group. For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + resources: + description: 'resources represents the minimum resources the volume should have. If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements that are lower than previous value but must still be higher than capacity recorded in the status field of the claim. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' + type: object + type: object + selector: + description: selector is a label query over volumes to consider for binding. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + storageClassName: + description: 'storageClassName is the name of the StorageClass required by the claim. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1' + type: string + volumeMode: + description: volumeMode defines what type of volume is required by the claim. Value of Filesystem is implied when not included in claim spec. + type: string + volumeName: + description: volumeName is the binding reference to the PersistentVolume backing this claim. + type: string + type: object + required: + - spec + type: object type: object fc: - description: FC represents a Fibre Channel resource that is attached to a kubelet's host machine and then exposed to the pod. + description: fc represents a Fibre Channel resource that is attached to a kubelet's host machine and then exposed to the pod. properties: fsType: - description: 'Filesystem type to mount. Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. TODO: how do we prevent errors in the filesystem from compromising the machine' + description: 'fsType is the filesystem type to mount. Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. TODO: how do we prevent errors in the filesystem from compromising the machine' type: string lun: - description: 'Optional: FC target lun number' + description: 'lun is Optional: FC target lun number' format: int32 type: integer readOnly: - description: 'Optional: Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts.' + description: 'readOnly is Optional: Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts.' type: boolean targetWWNs: - description: 'Optional: FC target worldwide names (WWNs)' + description: 'targetWWNs is Optional: FC target worldwide names (WWNs)' items: type: string type: array wwids: - description: 'Optional: FC volume world wide identifiers (wwids) Either wwids or combination of targetWWNs and lun must be set, but not both simultaneously.' + description: 'wwids Optional: FC volume world wide identifiers (wwids) Either wwids or combination of targetWWNs and lun must be set, but not both simultaneously.' items: type: string type: array type: object flexVolume: - description: FlexVolume represents a generic volume resource that is provisioned/attached using an exec based plugin. + description: flexVolume represents a generic volume resource that is provisioned/attached using an exec based plugin. properties: driver: - description: Driver is the name of the driver to use for this volume. + description: driver is the name of the driver to use for this volume. type: string fsType: - description: Filesystem type to mount. Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs", "ntfs". The default filesystem depends on FlexVolume script. + description: fsType is the filesystem type to mount. Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs", "ntfs". The default filesystem depends on FlexVolume script. type: string options: additionalProperties: type: string - description: 'Optional: Extra command options if any.' + description: 'options is Optional: this field holds extra command options if any.' type: object readOnly: - description: 'Optional: Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts.' + description: 'readOnly is Optional: defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts.' type: boolean secretRef: - description: 'Optional: SecretRef is reference to the secret object containing sensitive information to pass to the plugin scripts. This may be empty if no secret object is specified. If the secret object contains more than one secret, all secrets are passed to the plugin scripts.' + description: 'secretRef is Optional: secretRef is reference to the secret object containing sensitive information to pass to the plugin scripts. This may be empty if no secret object is specified. If the secret object contains more than one secret, all secrets are passed to the plugin scripts.' properties: name: description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' type: string type: object + x-kubernetes-map-type: atomic required: - driver type: object flocker: - description: Flocker represents a Flocker volume attached to a kubelet's host machine. This depends on the Flocker control service being running + description: flocker represents a Flocker volume attached to a kubelet's host machine. This depends on the Flocker control service being running properties: datasetName: - description: Name of the dataset stored as metadata -> name on the dataset for Flocker should be considered as deprecated + description: datasetName is Name of the dataset stored as metadata -> name on the dataset for Flocker should be considered as deprecated type: string datasetUUID: - description: UUID of the dataset. This is unique identifier of a Flocker dataset + description: datasetUUID is the UUID of the dataset. This is unique identifier of a Flocker dataset type: string type: object gcePersistentDisk: - description: 'GCEPersistentDisk represents a GCE Disk resource that is attached to a kubelet''s host machine and then exposed to the pod. More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk' + description: 'gcePersistentDisk represents a GCE Disk resource that is attached to a kubelet''s host machine and then exposed to the pod. More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk' properties: fsType: - description: 'Filesystem type of the volume that you want to mount. Tip: Ensure that the filesystem type is supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk TODO: how do we prevent errors in the filesystem from compromising the machine' + description: 'fsType is filesystem type of the volume that you want to mount. Tip: Ensure that the filesystem type is supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk TODO: how do we prevent errors in the filesystem from compromising the machine' type: string partition: - description: 'The partition in the volume that you want to mount. If omitted, the default is to mount by volume name. Examples: For volume /dev/sda1, you specify the partition as "1". Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk' + description: 'partition is the partition in the volume that you want to mount. If omitted, the default is to mount by volume name. Examples: For volume /dev/sda1, you specify the partition as "1". Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk' format: int32 type: integer pdName: - description: 'Unique name of the PD resource in GCE. Used to identify the disk in GCE. More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk' + description: 'pdName is unique name of the PD resource in GCE. Used to identify the disk in GCE. More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk' type: string readOnly: - description: 'ReadOnly here will force the ReadOnly setting in VolumeMounts. Defaults to false. More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk' + description: 'readOnly here will force the ReadOnly setting in VolumeMounts. Defaults to false. More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk' type: boolean required: - pdName type: object gitRepo: - description: 'GitRepo represents a git repository at a particular revision. DEPRECATED: GitRepo is deprecated. To provision a container with a git repo, mount an EmptyDir into an InitContainer that clones the repo using git, then mount the EmptyDir into the Pod''s container.' + description: 'gitRepo represents a git repository at a particular revision. DEPRECATED: GitRepo is deprecated. To provision a container with a git repo, mount an EmptyDir into an InitContainer that clones the repo using git, then mount the EmptyDir into the Pod''s container.' properties: directory: - description: Target directory name. Must not contain or start with '..'. If '.' is supplied, the volume directory will be the git repository. Otherwise, if specified, the volume will contain the git repository in the subdirectory with the given name. + description: directory is the target directory name. Must not contain or start with '..'. If '.' is supplied, the volume directory will be the git repository. Otherwise, if specified, the volume will contain the git repository in the subdirectory with the given name. type: string repository: - description: Repository URL + description: repository is the URL type: string revision: - description: Commit hash for the specified revision. + description: revision is the commit hash for the specified revision. type: string required: - repository type: object glusterfs: - description: 'Glusterfs represents a Glusterfs mount on the host that shares a pod''s lifetime. More info: https://examples.k8s.io/volumes/glusterfs/README.md' + description: 'glusterfs represents a Glusterfs mount on the host that shares a pod''s lifetime. More info: https://examples.k8s.io/volumes/glusterfs/README.md' properties: endpoints: - description: 'EndpointsName is the endpoint name that details Glusterfs topology. More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod' + description: 'endpoints is the endpoint name that details Glusterfs topology. More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod' type: string path: - description: 'Path is the Glusterfs volume path. More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod' + description: 'path is the Glusterfs volume path. More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod' type: string readOnly: - description: 'ReadOnly here will force the Glusterfs volume to be mounted with read-only permissions. Defaults to false. More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod' + description: 'readOnly here will force the Glusterfs volume to be mounted with read-only permissions. Defaults to false. More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod' type: boolean required: - endpoints - path type: object hostPath: - description: 'HostPath represents a pre-existing file or directory on the host machine that is directly exposed to the container. This is generally used for system agents or other privileged things that are allowed to see the host machine. Most containers will NOT need this. More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath --- TODO(jonesdl) We need to restrict who can use host directory mounts and who can/can not mount host directories as read/write.' + description: 'hostPath represents a pre-existing file or directory on the host machine that is directly exposed to the container. This is generally used for system agents or other privileged things that are allowed to see the host machine. Most containers will NOT need this. More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath --- TODO(jonesdl) We need to restrict who can use host directory mounts and who can/can not mount host directories as read/write.' properties: path: - description: 'Path of the directory on the host. If the path is a symlink, it will follow the link to the real path. More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath' + description: 'path of the directory on the host. If the path is a symlink, it will follow the link to the real path. More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath' type: string type: - description: 'Type for HostPath Volume Defaults to "" More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath' + description: 'type for HostPath Volume Defaults to "" More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath' type: string required: - path type: object iscsi: - description: 'ISCSI represents an ISCSI Disk resource that is attached to a kubelet''s host machine and then exposed to the pod. More info: https://examples.k8s.io/volumes/iscsi/README.md' + description: 'iscsi represents an ISCSI Disk resource that is attached to a kubelet''s host machine and then exposed to the pod. More info: https://examples.k8s.io/volumes/iscsi/README.md' properties: chapAuthDiscovery: - description: whether support iSCSI Discovery CHAP authentication + description: chapAuthDiscovery defines whether support iSCSI Discovery CHAP authentication type: boolean chapAuthSession: - description: whether support iSCSI Session CHAP authentication + description: chapAuthSession defines whether support iSCSI Session CHAP authentication type: boolean fsType: - description: 'Filesystem type of the volume that you want to mount. Tip: Ensure that the filesystem type is supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. More info: https://kubernetes.io/docs/concepts/storage/volumes#iscsi TODO: how do we prevent errors in the filesystem from compromising the machine' + description: 'fsType is the filesystem type of the volume that you want to mount. Tip: Ensure that the filesystem type is supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. More info: https://kubernetes.io/docs/concepts/storage/volumes#iscsi TODO: how do we prevent errors in the filesystem from compromising the machine' type: string initiatorName: - description: Custom iSCSI Initiator Name. If initiatorName is specified with iscsiInterface simultaneously, new iSCSI interface : will be created for the connection. + description: initiatorName is the custom iSCSI Initiator Name. If initiatorName is specified with iscsiInterface simultaneously, new iSCSI interface : will be created for the connection. type: string iqn: - description: Target iSCSI Qualified Name. + description: iqn is the target iSCSI Qualified Name. type: string iscsiInterface: - description: iSCSI Interface Name that uses an iSCSI transport. Defaults to 'default' (tcp). + description: iscsiInterface is the interface Name that uses an iSCSI transport. Defaults to 'default' (tcp). type: string lun: - description: iSCSI Target Lun number. + description: lun represents iSCSI Target Lun number. format: int32 type: integer portals: - description: iSCSI Target Portal List. The portal is either an IP or ip_addr:port if the port is other than default (typically TCP ports 860 and 3260). + description: portals is the iSCSI Target Portal List. The portal is either an IP or ip_addr:port if the port is other than default (typically TCP ports 860 and 3260). items: type: string type: array readOnly: - description: ReadOnly here will force the ReadOnly setting in VolumeMounts. Defaults to false. + description: readOnly here will force the ReadOnly setting in VolumeMounts. Defaults to false. type: boolean secretRef: - description: CHAP Secret for iSCSI target and initiator authentication + description: secretRef is the CHAP Secret for iSCSI target and initiator authentication properties: name: description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' type: string type: object + x-kubernetes-map-type: atomic targetPortal: - description: iSCSI Target Portal. The Portal is either an IP or ip_addr:port if the port is other than default (typically TCP ports 860 and 3260). + description: targetPortal is iSCSI Target Portal. The Portal is either an IP or ip_addr:port if the port is other than default (typically TCP ports 860 and 3260). type: string required: - iqn @@ -2596,92 +3750,92 @@ spec: - targetPortal type: object name: - description: 'Volume''s name. Must be a DNS_LABEL and unique within the pod. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + description: 'name of the volume. Must be a DNS_LABEL and unique within the pod. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' type: string nfs: - description: 'NFS represents an NFS mount on the host that shares a pod''s lifetime More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs' + description: 'nfs represents an NFS mount on the host that shares a pod''s lifetime More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs' properties: path: - description: 'Path that is exported by the NFS server. More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs' + description: 'path that is exported by the NFS server. More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs' type: string readOnly: - description: 'ReadOnly here will force the NFS export to be mounted with read-only permissions. Defaults to false. More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs' + description: 'readOnly here will force the NFS export to be mounted with read-only permissions. Defaults to false. More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs' type: boolean server: - description: 'Server is the hostname or IP address of the NFS server. More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs' + description: 'server is the hostname or IP address of the NFS server. More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs' type: string required: - path - server type: object persistentVolumeClaim: - description: 'PersistentVolumeClaimVolumeSource represents a reference to a PersistentVolumeClaim in the same namespace. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims' + description: 'persistentVolumeClaimVolumeSource represents a reference to a PersistentVolumeClaim in the same namespace. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims' properties: claimName: - description: 'ClaimName is the name of a PersistentVolumeClaim in the same namespace as the pod using this volume. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims' + description: 'claimName is the name of a PersistentVolumeClaim in the same namespace as the pod using this volume. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims' type: string readOnly: - description: Will force the ReadOnly setting in VolumeMounts. Default false. + description: readOnly Will force the ReadOnly setting in VolumeMounts. Default false. type: boolean required: - claimName type: object photonPersistentDisk: - description: PhotonPersistentDisk represents a PhotonController persistent disk attached and mounted on kubelets host machine + description: photonPersistentDisk represents a PhotonController persistent disk attached and mounted on kubelets host machine properties: fsType: - description: Filesystem type to mount. Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + description: fsType is the filesystem type to mount. Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. type: string pdID: - description: ID that identifies Photon Controller persistent disk + description: pdID is the ID that identifies Photon Controller persistent disk type: string required: - pdID type: object portworxVolume: - description: PortworxVolume represents a portworx volume attached and mounted on kubelets host machine + description: portworxVolume represents a portworx volume attached and mounted on kubelets host machine properties: fsType: - description: FSType represents the filesystem type to mount Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs". Implicitly inferred to be "ext4" if unspecified. + description: fSType represents the filesystem type to mount Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs". Implicitly inferred to be "ext4" if unspecified. type: string readOnly: - description: Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. + description: readOnly defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. type: boolean volumeID: - description: VolumeID uniquely identifies a Portworx volume + description: volumeID uniquely identifies a Portworx volume type: string required: - volumeID type: object projected: - description: Items for all in one resources secrets, configmaps, and downward API + description: projected items for all in one resources secrets, configmaps, and downward API properties: defaultMode: - description: Mode bits to use on created files by default. Must be a value between 0 and 0777. Directories within the path are not affected by this setting. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set. + description: defaultMode are the mode bits used to set permissions on created files by default. Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. Directories within the path are not affected by this setting. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set. format: int32 type: integer sources: - description: list of volume projections + description: sources is the list of volume projections items: description: Projection that may be projected along with other supported volume types properties: configMap: - description: information about the configMap data to project + description: configMap information about the configMap data to project properties: items: - description: If unspecified, each key-value pair in the Data field of the referenced ConfigMap will be projected into the volume as a file whose name is the key and content is the value. If specified, the listed keys will be projected into the specified paths, and unlisted keys will not be present. If a key is specified which is not present in the ConfigMap, the volume setup will error unless it is marked optional. Paths must be relative and may not contain the '..' path or start with '..'. + description: items if unspecified, each key-value pair in the Data field of the referenced ConfigMap will be projected into the volume as a file whose name is the key and content is the value. If specified, the listed keys will be projected into the specified paths, and unlisted keys will not be present. If a key is specified which is not present in the ConfigMap, the volume setup will error unless it is marked optional. Paths must be relative and may not contain the '..' path or start with '..'. items: description: Maps a string key to a path within a volume. properties: key: - description: The key to project. + description: key is the key to project. type: string mode: - description: 'Optional: mode bits to use on this file, must be a value between 0 and 0777. If not specified, the volume defaultMode will be used. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set.' + description: 'mode is Optional: mode bits used to set permissions on this file. Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. If not specified, the volume defaultMode will be used. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set.' format: int32 type: integer path: - description: The relative path of the file to map the key to. May not be an absolute path. May not contain the path element '..'. May not start with the string '..'. + description: path is the relative path of the file to map the key to. May not be an absolute path. May not contain the path element '..'. May not start with the string '..'. type: string required: - key @@ -2692,11 +3846,12 @@ spec: description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' type: string optional: - description: Specify whether the ConfigMap or its keys must be defined + description: optional specify whether the ConfigMap or its keys must be defined type: boolean type: object + x-kubernetes-map-type: atomic downwardAPI: - description: information about the downwardAPI data to project + description: downwardAPI information about the downwardAPI data to project properties: items: description: Items is a list of DownwardAPIVolume file @@ -2715,8 +3870,9 @@ spec: required: - fieldPath type: object + x-kubernetes-map-type: atomic mode: - description: 'Optional: mode bits to use on this file, must be a value between 0 and 0777. If not specified, the volume defaultMode will be used. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set.' + description: 'Optional: mode bits used to set permissions on this file, must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. If not specified, the volume defaultMode will be used. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set.' format: int32 type: integer path: @@ -2729,36 +3885,41 @@ spec: description: 'Container name: required for volumes, optional for env vars' type: string divisor: + anyOf: + - type: integer + - type: string description: Specifies the output format of the exposed resources, defaults to "1" - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true resource: description: 'Required: resource to select' type: string required: - resource type: object + x-kubernetes-map-type: atomic required: - path type: object type: array type: object secret: - description: information about the secret data to project + description: secret information about the secret data to project properties: items: - description: If unspecified, each key-value pair in the Data field of the referenced Secret will be projected into the volume as a file whose name is the key and content is the value. If specified, the listed keys will be projected into the specified paths, and unlisted keys will not be present. If a key is specified which is not present in the Secret, the volume setup will error unless it is marked optional. Paths must be relative and may not contain the '..' path or start with '..'. + description: items if unspecified, each key-value pair in the Data field of the referenced Secret will be projected into the volume as a file whose name is the key and content is the value. If specified, the listed keys will be projected into the specified paths, and unlisted keys will not be present. If a key is specified which is not present in the Secret, the volume setup will error unless it is marked optional. Paths must be relative and may not contain the '..' path or start with '..'. items: description: Maps a string key to a path within a volume. properties: key: - description: The key to project. + description: key is the key to project. type: string mode: - description: 'Optional: mode bits to use on this file, must be a value between 0 and 0777. If not specified, the volume defaultMode will be used. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set.' + description: 'mode is Optional: mode bits used to set permissions on this file. Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. If not specified, the volume defaultMode will be used. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set.' format: int32 type: integer path: - description: The relative path of the file to map the key to. May not be an absolute path. May not contain the path element '..'. May not start with the string '..'. + description: path is the relative path of the file to map the key to. May not be an absolute path. May not contain the path element '..'. May not start with the string '..'. type: string required: - key @@ -2769,128 +3930,129 @@ spec: description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' type: string optional: - description: Specify whether the Secret or its key must be defined + description: optional field specify whether the Secret or its key must be defined type: boolean type: object + x-kubernetes-map-type: atomic serviceAccountToken: - description: information about the serviceAccountToken data to project + description: serviceAccountToken is information about the serviceAccountToken data to project properties: audience: - description: Audience is the intended audience of the token. A recipient of a token must identify itself with an identifier specified in the audience of the token, and otherwise should reject the token. The audience defaults to the identifier of the apiserver. + description: audience is the intended audience of the token. A recipient of a token must identify itself with an identifier specified in the audience of the token, and otherwise should reject the token. The audience defaults to the identifier of the apiserver. type: string expirationSeconds: - description: ExpirationSeconds is the requested duration of validity of the service account token. As the token approaches expiration, the kubelet volume plugin will proactively rotate the service account token. The kubelet will start trying to rotate the token if the token is older than 80 percent of its time to live or if the token is older than 24 hours.Defaults to 1 hour and must be at least 10 minutes. + description: expirationSeconds is the requested duration of validity of the service account token. As the token approaches expiration, the kubelet volume plugin will proactively rotate the service account token. The kubelet will start trying to rotate the token if the token is older than 80 percent of its time to live or if the token is older than 24 hours.Defaults to 1 hour and must be at least 10 minutes. format: int64 type: integer path: - description: Path is the path relative to the mount point of the file to project the token into. + description: path is the path relative to the mount point of the file to project the token into. type: string required: - path type: object type: object type: array - required: - - sources type: object quobyte: - description: Quobyte represents a Quobyte mount on the host that shares a pod's lifetime + description: quobyte represents a Quobyte mount on the host that shares a pod's lifetime properties: group: - description: Group to map volume access to Default is no group + description: group to map volume access to Default is no group type: string readOnly: - description: ReadOnly here will force the Quobyte volume to be mounted with read-only permissions. Defaults to false. + description: readOnly here will force the Quobyte volume to be mounted with read-only permissions. Defaults to false. type: boolean registry: - description: Registry represents a single or multiple Quobyte Registry services specified as a string as host:port pair (multiple entries are separated with commas) which acts as the central registry for volumes + description: registry represents a single or multiple Quobyte Registry services specified as a string as host:port pair (multiple entries are separated with commas) which acts as the central registry for volumes type: string tenant: - description: Tenant owning the given Quobyte volume in the Backend Used with dynamically provisioned Quobyte volumes, value is set by the plugin + description: tenant owning the given Quobyte volume in the Backend Used with dynamically provisioned Quobyte volumes, value is set by the plugin type: string user: - description: User to map volume access to Defaults to serivceaccount user + description: user to map volume access to Defaults to serivceaccount user type: string volume: - description: Volume is a string that references an already created Quobyte volume by name. + description: volume is a string that references an already created Quobyte volume by name. type: string required: - registry - volume type: object rbd: - description: 'RBD represents a Rados Block Device mount on the host that shares a pod''s lifetime. More info: https://examples.k8s.io/volumes/rbd/README.md' + description: 'rbd represents a Rados Block Device mount on the host that shares a pod''s lifetime. More info: https://examples.k8s.io/volumes/rbd/README.md' properties: fsType: - description: 'Filesystem type of the volume that you want to mount. Tip: Ensure that the filesystem type is supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. More info: https://kubernetes.io/docs/concepts/storage/volumes#rbd TODO: how do we prevent errors in the filesystem from compromising the machine' + description: 'fsType is the filesystem type of the volume that you want to mount. Tip: Ensure that the filesystem type is supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. More info: https://kubernetes.io/docs/concepts/storage/volumes#rbd TODO: how do we prevent errors in the filesystem from compromising the machine' type: string image: - description: 'The rados image name. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' + description: 'image is the rados image name. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' type: string keyring: - description: 'Keyring is the path to key ring for RBDUser. Default is /etc/ceph/keyring. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' + description: 'keyring is the path to key ring for RBDUser. Default is /etc/ceph/keyring. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' type: string monitors: - description: 'A collection of Ceph monitors. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' + description: 'monitors is a collection of Ceph monitors. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' items: type: string type: array pool: - description: 'The rados pool name. Default is rbd. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' + description: 'pool is the rados pool name. Default is rbd. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' type: string readOnly: - description: 'ReadOnly here will force the ReadOnly setting in VolumeMounts. Defaults to false. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' + description: 'readOnly here will force the ReadOnly setting in VolumeMounts. Defaults to false. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' type: boolean secretRef: - description: 'SecretRef is name of the authentication secret for RBDUser. If provided overrides keyring. Default is nil. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' + description: 'secretRef is name of the authentication secret for RBDUser. If provided overrides keyring. Default is nil. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' properties: name: description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' type: string type: object + x-kubernetes-map-type: atomic user: - description: 'The rados user name. Default is admin. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' + description: 'user is the rados user name. Default is admin. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it' type: string required: - image - monitors type: object scaleIO: - description: ScaleIO represents a ScaleIO persistent volume attached and mounted on Kubernetes nodes. + description: scaleIO represents a ScaleIO persistent volume attached and mounted on Kubernetes nodes. properties: fsType: - description: Filesystem type to mount. Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs", "ntfs". Default is "xfs". + description: fsType is the filesystem type to mount. Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs", "ntfs". Default is "xfs". type: string gateway: - description: The host address of the ScaleIO API Gateway. + description: gateway is the host address of the ScaleIO API Gateway. type: string protectionDomain: - description: The name of the ScaleIO Protection Domain for the configured storage. + description: protectionDomain is the name of the ScaleIO Protection Domain for the configured storage. type: string readOnly: - description: Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. + description: readOnly Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. type: boolean secretRef: - description: SecretRef references to the secret for ScaleIO user and other sensitive information. If this is not provided, Login operation will fail. + description: secretRef references to the secret for ScaleIO user and other sensitive information. If this is not provided, Login operation will fail. properties: name: description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' type: string type: object + x-kubernetes-map-type: atomic sslEnabled: - description: Flag to enable/disable SSL communication with Gateway, default false + description: sslEnabled Flag enable/disable SSL communication with Gateway, default false type: boolean storageMode: - description: Indicates whether the storage for a volume should be ThickProvisioned or ThinProvisioned. Default is ThinProvisioned. + description: storageMode indicates whether the storage for a volume should be ThickProvisioned or ThinProvisioned. Default is ThinProvisioned. type: string storagePool: - description: The ScaleIO Storage Pool associated with the protection domain. + description: storagePool is the ScaleIO Storage Pool associated with the protection domain. type: string system: - description: The name of the storage system as configured in ScaleIO. + description: system is the name of the storage system as configured in ScaleIO. type: string volumeName: - description: The name of a volume already created in the ScaleIO system that is associated with this volume source. + description: volumeName is the name of a volume already created in the ScaleIO system that is associated with this volume source. type: string required: - gateway @@ -2898,26 +4060,26 @@ spec: - system type: object secret: - description: 'Secret represents a secret that should populate this volume. More info: https://kubernetes.io/docs/concepts/storage/volumes#secret' + description: 'secret represents a secret that should populate this volume. More info: https://kubernetes.io/docs/concepts/storage/volumes#secret' properties: defaultMode: - description: 'Optional: mode bits to use on created files by default. Must be a value between 0 and 0777. Defaults to 0644. Directories within the path are not affected by this setting. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set.' + description: 'defaultMode is Optional: mode bits used to set permissions on created files by default. Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. Defaults to 0644. Directories within the path are not affected by this setting. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set.' format: int32 type: integer items: - description: If unspecified, each key-value pair in the Data field of the referenced Secret will be projected into the volume as a file whose name is the key and content is the value. If specified, the listed keys will be projected into the specified paths, and unlisted keys will not be present. If a key is specified which is not present in the Secret, the volume setup will error unless it is marked optional. Paths must be relative and may not contain the '..' path or start with '..'. + description: items If unspecified, each key-value pair in the Data field of the referenced Secret will be projected into the volume as a file whose name is the key and content is the value. If specified, the listed keys will be projected into the specified paths, and unlisted keys will not be present. If a key is specified which is not present in the Secret, the volume setup will error unless it is marked optional. Paths must be relative and may not contain the '..' path or start with '..'. items: description: Maps a string key to a path within a volume. properties: key: - description: The key to project. + description: key is the key to project. type: string mode: - description: 'Optional: mode bits to use on this file, must be a value between 0 and 0777. If not specified, the volume defaultMode will be used. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set.' + description: 'mode is Optional: mode bits used to set permissions on this file. Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. If not specified, the volume defaultMode will be used. This might be in conflict with other options that affect the file mode, like fsGroup, and the result can be other mode bits set.' format: int32 type: integer path: - description: The relative path of the file to map the key to. May not be an absolute path. May not contain the path element '..'. May not start with the string '..'. + description: path is the relative path of the file to map the key to. May not be an absolute path. May not contain the path element '..'. May not start with the string '..'. type: string required: - key @@ -2925,49 +4087,50 @@ spec: type: object type: array optional: - description: Specify whether the Secret or its keys must be defined + description: optional field specify whether the Secret or its keys must be defined type: boolean secretName: - description: 'Name of the secret in the pod''s namespace to use. More info: https://kubernetes.io/docs/concepts/storage/volumes#secret' + description: 'secretName is the name of the secret in the pod''s namespace to use. More info: https://kubernetes.io/docs/concepts/storage/volumes#secret' type: string type: object storageos: - description: StorageOS represents a StorageOS volume attached and mounted on Kubernetes nodes. + description: storageOS represents a StorageOS volume attached and mounted on Kubernetes nodes. properties: fsType: - description: Filesystem type to mount. Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + description: fsType is the filesystem type to mount. Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. type: string readOnly: - description: Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. + description: readOnly defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. type: boolean secretRef: - description: SecretRef specifies the secret to use for obtaining the StorageOS API credentials. If not specified, default values will be attempted. + description: secretRef specifies the secret to use for obtaining the StorageOS API credentials. If not specified, default values will be attempted. properties: name: description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' type: string type: object + x-kubernetes-map-type: atomic volumeName: - description: VolumeName is the human-readable name of the StorageOS volume. Volume names are only unique within a namespace. + description: volumeName is the human-readable name of the StorageOS volume. Volume names are only unique within a namespace. type: string volumeNamespace: - description: VolumeNamespace specifies the scope of the volume within StorageOS. If no namespace is specified then the Pod's namespace will be used. This allows the Kubernetes name scoping to be mirrored within StorageOS for tighter integration. Set VolumeName to any name to override the default behaviour. Set to "default" if you are not using namespaces within StorageOS. Namespaces that do not pre-exist within StorageOS will be created. + description: volumeNamespace specifies the scope of the volume within StorageOS. If no namespace is specified then the Pod's namespace will be used. This allows the Kubernetes name scoping to be mirrored within StorageOS for tighter integration. Set VolumeName to any name to override the default behaviour. Set to "default" if you are not using namespaces within StorageOS. Namespaces that do not pre-exist within StorageOS will be created. type: string type: object vsphereVolume: - description: VsphereVolume represents a vSphere volume attached and mounted on kubelets host machine + description: vsphereVolume represents a vSphere volume attached and mounted on kubelets host machine properties: fsType: - description: Filesystem type to mount. Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + description: fsType is filesystem type to mount. Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. type: string storagePolicyID: - description: Storage Policy Based Management (SPBM) profile ID associated with the StoragePolicyName. + description: storagePolicyID is the storage Policy Based Management (SPBM) profile ID associated with the StoragePolicyName. type: string storagePolicyName: - description: Storage Policy Based Management (SPBM) profile name. + description: storagePolicyName is the storage Policy Based Management (SPBM) profile name. type: string volumePath: - description: Path that identifies vSphere volume vmdk + description: volumePath is the path that identifies vSphere volume vmdk type: string required: - volumePath @@ -2976,6 +4139,160 @@ spec: - name type: object type: array + web: + description: Defines the web command line flags when starting Alertmanager. + properties: + httpConfig: + description: Defines HTTP parameters for web server. + properties: + headers: + description: List of headers that can be added to HTTP responses. + properties: + contentSecurityPolicy: + description: Set the Content-Security-Policy header to HTTP responses. Unset if blank. + type: string + strictTransportSecurity: + description: Set the Strict-Transport-Security header to HTTP responses. Unset if blank. Please make sure that you use this with care as this header might force browsers to load Prometheus and the other applications hosted on the same domain and subdomains over HTTPS. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security + type: string + xContentTypeOptions: + description: Set the X-Content-Type-Options header to HTTP responses. Unset if blank. Accepted value is nosniff. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Content-Type-Options + enum: + - "" + - NoSniff + type: string + xFrameOptions: + description: Set the X-Frame-Options header to HTTP responses. Unset if blank. Accepted values are deny and sameorigin. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Frame-Options + enum: + - "" + - Deny + - SameOrigin + type: string + xXSSProtection: + description: Set the X-XSS-Protection header to all responses. Unset if blank. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-XSS-Protection + type: string + type: object + http2: + description: Enable HTTP/2 support. Note that HTTP/2 is only supported with TLS. When TLSConfig is not configured, HTTP/2 will be disabled. Whenever the value of the field changes, a rolling update will be triggered. + type: boolean + type: object + tlsConfig: + description: Defines the TLS parameters for HTTPS. + properties: + cert: + description: Contains the TLS certificate for the server. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + cipherSuites: + description: 'List of supported cipher suites for TLS versions up to TLS 1.2. If empty, Go default cipher suites are used. Available cipher suites are documented in the go documentation: https://golang.org/pkg/crypto/tls/#pkg-constants' + items: + type: string + type: array + client_ca: + description: Contains the CA certificate for client certificate authentication to the server. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientAuthType: + description: 'Server policy for client authentication. Maps to ClientAuth Policies. For more detail on clientAuth options: https://golang.org/pkg/crypto/tls/#ClientAuthType' + type: string + curvePreferences: + description: 'Elliptic curves that will be used in an ECDHE handshake, in preference order. Available curves are documented in the go documentation: https://golang.org/pkg/crypto/tls/#CurveID' + items: + type: string + type: array + keySecret: + description: Secret containing the TLS key for the server. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + maxVersion: + description: Maximum TLS version that is acceptable. Defaults to TLS13. + type: string + minVersion: + description: Minimum TLS version that is acceptable. Defaults to TLS12. + type: string + preferServerCipherSuites: + description: Controls whether the server selects the client's most preferred cipher suite, or the server's most preferred cipher suite. If true then the server's preference, as expressed in the order of elements in cipherSuites, is used. + type: boolean + required: + - cert + - keySecret + type: object + type: object type: object status: description: 'Most recent observed status of the Alertmanager cluster. Read-only. Not included when requesting from the apiserver, only from the Prometheus Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#spec-and-status' @@ -2985,7 +4302,7 @@ spec: format: int32 type: integer paused: - description: Represents whether any actions on the underlaying managed objects are being performed. Only delete actions will be performed. + description: Represents whether any actions on the underlying managed objects are being performed. Only delete actions will be performed. type: boolean replicas: description: Total number of non-terminated pods targeted by this Alertmanager cluster (their labels match the selector). @@ -3012,9 +4329,3 @@ spec: served: true storage: true subresources: {} -status: - acceptedNames: - kind: "" - plural: "" - conditions: [] - storedVersions: [] diff --git a/manifests/setup/prometheus-operator-0podmonitorCustomResourceDefinition.yaml b/manifests/setup/prometheus-operator-0podmonitorCustomResourceDefinition.yaml index 1079811..20e3b0b 100644 --- a/manifests/setup/prometheus-operator-0podmonitorCustomResourceDefinition.yaml +++ b/manifests/setup/prometheus-operator-0podmonitorCustomResourceDefinition.yaml @@ -2,15 +2,19 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.2.4 + controller-gen.kubebuilder.io/version: v0.9.2 creationTimestamp: null name: podmonitors.monitoring.coreos.com spec: group: monitoring.coreos.com names: + categories: + - prometheus-operator kind: PodMonitor listKind: PodMonitorList plural: podmonitors + shortNames: + - pmon singular: podmonitor scope: Namespaced versions: @@ -30,9 +34,28 @@ spec: spec: description: Specification of desired Pod selection for target discovery by Prometheus. properties: + attachMetadata: + description: Attaches node metadata to discovered targets. Requires Prometheus v2.35.0 and above. + properties: + node: + description: When set to true, Prometheus must have permissions to get Nodes. + type: boolean + type: object jobLabel: description: The label to use to retrieve the job name from. type: string + labelLimit: + description: Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + format: int64 + type: integer + labelNameLengthLimit: + description: Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + format: int64 + type: integer + labelValueLengthLimit: + description: Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + format: int64 + type: integer namespaceSelector: description: Selector to select which namespaces the Endpoints objects are discovered from. properties: @@ -40,7 +63,7 @@ spec: description: Boolean describing whether all namespaces are selected in contrast to a list restricting them. type: boolean matchNames: - description: List of namespace names. + description: List of namespace names to select from. items: type: string type: array @@ -50,6 +73,90 @@ spec: items: description: PodMetricsEndpoint defines a scrapeable endpoint of a Kubernetes Pod serving Prometheus metrics. properties: + authorization: + description: Authorization section for this endpoint + properties: + credentials: + description: The secret's key that contains the credentials of the request + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: + description: Set the authentication type. Defaults to Bearer, Basic will cause an error + type: string + type: object + basicAuth: + description: 'BasicAuth allow an endpoint to authenticate over basic authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint' + properties: + password: + description: The secret in the service monitor namespace that contains the password for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + username: + description: The secret in the service monitor namespace that contains the username for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + bearerTokenSecret: + description: Secret to mount to read bearer token for scraping targets. The secret needs to be in the same namespace as the pod monitor and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + enableHttp2: + description: Whether to enable HTTP2. + type: boolean + filterRunning: + description: 'Drop pods that are not running. (Failed, Succeeded). Enabled by default. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase' + type: boolean + followRedirects: + description: FollowRedirects configures whether scrape requests follow HTTP 3xx redirects. + type: boolean honorLabels: description: HonorLabels chooses the metric's labels on collisions with target labels. type: boolean @@ -57,7 +164,8 @@ spec: description: HonorTimestamps controls whether Prometheus respects the timestamps present in scraped data. type: boolean interval: - description: Interval at which metrics should be scraped + description: Interval at which metrics should be scraped If not specified Prometheus' global scrape interval is used. + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ type: string metricRelabelings: description: MetricRelabelConfigs to apply to samples before ingestion. @@ -65,7 +173,27 @@ spec: description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines ``-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs' properties: action: - description: Action to perform based on regex matching. Default is 'replace' + default: replace + description: Action to perform based on regex matching. Default is 'replace'. uppercase and lowercase actions require Prometheus >= 2.36. + enum: + - replace + - Replace + - keep + - Keep + - drop + - Drop + - hashmod + - HashMod + - labelmap + - LabelMap + - labeldrop + - LabelDrop + - labelkeep + - LabelKeep + - lowercase + - Lowercase + - uppercase + - Uppercase type: string modulus: description: Modulus to take of the hash of the source label values. @@ -83,6 +211,8 @@ spec: sourceLabels: description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions. items: + description: LabelName is a valid Prometheus label name which may only contain ASCII letters, numbers, as well as underscores. + pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$ type: string type: array targetLabel: @@ -90,6 +220,80 @@ spec: type: string type: object type: array + oauth2: + description: OAuth2 for the URL. Only valid in Prometheus versions 2.27.0 and newer. + properties: + clientId: + description: The secret or configmap containing the OAuth2 client id + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientSecret: + description: The secret containing the OAuth2 client secret + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + additionalProperties: + type: string + description: Parameters to append to the token URL + type: object + scopes: + description: OAuth2 scopes used for the token request + items: + type: string + type: array + tokenUrl: + description: The URL to fetch the token from + minLength: 1 + type: string + required: + - clientId + - clientSecret + - tokenUrl + type: object params: additionalProperties: items: @@ -98,7 +302,7 @@ spec: description: Optional HTTP URL parameters type: object path: - description: HTTP path to scrape for metrics. + description: HTTP path to scrape for metrics. If empty, Prometheus uses the default value (e.g. `/metrics`). type: string port: description: Name of the pod port this endpoint refers to. Mutually exclusive with targetPort. @@ -107,12 +311,32 @@ spec: description: ProxyURL eg http://proxyserver:2195 Directs scrapes to proxy through this endpoint. type: string relabelings: - description: 'RelabelConfigs to apply to samples before ingestion. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config' + description: 'RelabelConfigs to apply to samples before scraping. Prometheus Operator automatically adds relabelings for a few standard Kubernetes fields. The original scrape job''s name is available via the `__tmp_prometheus_job_name` label. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config' items: description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines ``-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs' properties: action: - description: Action to perform based on regex matching. Default is 'replace' + default: replace + description: Action to perform based on regex matching. Default is 'replace'. uppercase and lowercase actions require Prometheus >= 2.36. + enum: + - replace + - Replace + - keep + - Keep + - drop + - Drop + - hashmod + - HashMod + - labelmap + - LabelMap + - labeldrop + - LabelDrop + - labelkeep + - LabelKeep + - lowercase + - Lowercase + - uppercase + - Uppercase type: string modulus: description: Modulus to take of the hash of the source label values. @@ -130,6 +354,8 @@ spec: sourceLabels: description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions. items: + description: LabelName is a valid Prometheus label name which may only contain ASCII letters, numbers, as well as underscores. + pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$ type: string type: array targetLabel: @@ -141,7 +367,8 @@ spec: description: HTTP scheme to use for scraping. type: string scrapeTimeout: - description: Timeout after which the scrape is ended + description: Timeout after which the scrape is ended If not specified, the Prometheus global scrape interval is used. + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ type: string targetPort: anyOf: @@ -149,6 +376,104 @@ spec: - type: string description: 'Deprecated: Use ''port'' instead.' x-kubernetes-int-or-string: true + tlsConfig: + description: TLS configuration to use when scraping the endpoint. + properties: + ca: + description: Certificate authority used when verifying server certificates. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + cert: + description: Client certificate to present when doing client-authentication. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keySecret: + description: Secret containing the client key file for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object type: object type: array podTargetLabels: @@ -190,6 +515,11 @@ spec: description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. type: object type: object + x-kubernetes-map-type: atomic + targetLimit: + description: TargetLimit defines a limit on the number of scraped targets that will be accepted. + format: int64 + type: integer required: - podMetricsEndpoints - selector @@ -199,9 +529,3 @@ spec: type: object served: true storage: true -status: - acceptedNames: - kind: "" - plural: "" - conditions: [] - storedVersions: [] diff --git a/manifests/setup/prometheus-operator-0probeCustomResourceDefinition.yaml b/manifests/setup/prometheus-operator-0probeCustomResourceDefinition.yaml new file mode 100644 index 0000000..bb5b3d4 --- /dev/null +++ b/manifests/setup/prometheus-operator-0probeCustomResourceDefinition.yaml @@ -0,0 +1,566 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.9.2 + creationTimestamp: null + name: probes.monitoring.coreos.com +spec: + group: monitoring.coreos.com + names: + categories: + - prometheus-operator + kind: Probe + listKind: ProbeList + plural: probes + shortNames: + - prb + singular: probe + scope: Namespaced + versions: + - name: v1 + schema: + openAPIV3Schema: + description: Probe defines monitoring for a set of static targets or ingresses. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: Specification of desired Ingress selection for target discovery by Prometheus. + properties: + authorization: + description: Authorization section for this endpoint + properties: + credentials: + description: The secret's key that contains the credentials of the request + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: + description: Set the authentication type. Defaults to Bearer, Basic will cause an error + type: string + type: object + basicAuth: + description: 'BasicAuth allow an endpoint to authenticate over basic authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint' + properties: + password: + description: The secret in the service monitor namespace that contains the password for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + username: + description: The secret in the service monitor namespace that contains the username for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + bearerTokenSecret: + description: Secret to mount to read bearer token for scraping targets. The secret needs to be in the same namespace as the probe and accessible by the Prometheus Operator. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + interval: + description: Interval at which targets are probed using the configured prober. If not specified Prometheus' global scrape interval is used. + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + jobName: + description: The job name assigned to scraped metrics by default. + type: string + labelLimit: + description: Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + format: int64 + type: integer + labelNameLengthLimit: + description: Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + format: int64 + type: integer + labelValueLengthLimit: + description: Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + format: int64 + type: integer + metricRelabelings: + description: MetricRelabelConfigs to apply to samples before ingestion. + items: + description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines ``-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs' + properties: + action: + default: replace + description: Action to perform based on regex matching. Default is 'replace'. uppercase and lowercase actions require Prometheus >= 2.36. + enum: + - replace + - Replace + - keep + - Keep + - drop + - Drop + - hashmod + - HashMod + - labelmap + - LabelMap + - labeldrop + - LabelDrop + - labelkeep + - LabelKeep + - lowercase + - Lowercase + - uppercase + - Uppercase + type: string + modulus: + description: Modulus to take of the hash of the source label values. + format: int64 + type: integer + regex: + description: Regular expression against which the extracted value is matched. Default is '(.*)' + type: string + replacement: + description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1' + type: string + separator: + description: Separator placed between concatenated source label values. default is ';'. + type: string + sourceLabels: + description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions. + items: + description: LabelName is a valid Prometheus label name which may only contain ASCII letters, numbers, as well as underscores. + pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$ + type: string + type: array + targetLabel: + description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available. + type: string + type: object + type: array + module: + description: 'The module to use for probing specifying how to probe the target. Example module configuring in the blackbox exporter: https://github.com/prometheus/blackbox_exporter/blob/master/example.yml' + type: string + oauth2: + description: OAuth2 for the URL. Only valid in Prometheus versions 2.27.0 and newer. + properties: + clientId: + description: The secret or configmap containing the OAuth2 client id + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientSecret: + description: The secret containing the OAuth2 client secret + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + additionalProperties: + type: string + description: Parameters to append to the token URL + type: object + scopes: + description: OAuth2 scopes used for the token request + items: + type: string + type: array + tokenUrl: + description: The URL to fetch the token from + minLength: 1 + type: string + required: + - clientId + - clientSecret + - tokenUrl + type: object + prober: + description: Specification for the prober to use for probing targets. The prober.URL parameter is required. Targets cannot be probed if left empty. + properties: + path: + default: /probe + description: Path to collect metrics from. Defaults to `/probe`. + type: string + proxyUrl: + description: Optional ProxyURL. + type: string + scheme: + description: HTTP scheme to use for scraping. Defaults to `http`. + type: string + url: + description: Mandatory URL of the prober. + type: string + required: + - url + type: object + sampleLimit: + description: SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + format: int64 + type: integer + scrapeTimeout: + description: Timeout for scraping metrics from the Prometheus exporter. If not specified, the Prometheus global scrape interval is used. + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + targetLimit: + description: TargetLimit defines a limit on the number of scraped targets that will be accepted. + format: int64 + type: integer + targets: + description: Targets defines a set of static or dynamically discovered targets to probe. + properties: + ingress: + description: ingress defines the Ingress objects to probe and the relabeling configuration. If `staticConfig` is also defined, `staticConfig` takes precedence. + properties: + namespaceSelector: + description: From which namespaces to select Ingress objects. + properties: + any: + description: Boolean describing whether all namespaces are selected in contrast to a list restricting them. + type: boolean + matchNames: + description: List of namespace names to select from. + items: + type: string + type: array + type: object + relabelingConfigs: + description: 'RelabelConfigs to apply to the label set of the target before it gets scraped. The original ingress address is available via the `__tmp_prometheus_ingress_address` label. It can be used to customize the probed URL. The original scrape job''s name is available via the `__tmp_prometheus_job_name` label. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config' + items: + description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines ``-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs' + properties: + action: + default: replace + description: Action to perform based on regex matching. Default is 'replace'. uppercase and lowercase actions require Prometheus >= 2.36. + enum: + - replace + - Replace + - keep + - Keep + - drop + - Drop + - hashmod + - HashMod + - labelmap + - LabelMap + - labeldrop + - LabelDrop + - labelkeep + - LabelKeep + - lowercase + - Lowercase + - uppercase + - Uppercase + type: string + modulus: + description: Modulus to take of the hash of the source label values. + format: int64 + type: integer + regex: + description: Regular expression against which the extracted value is matched. Default is '(.*)' + type: string + replacement: + description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1' + type: string + separator: + description: Separator placed between concatenated source label values. default is ';'. + type: string + sourceLabels: + description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions. + items: + description: LabelName is a valid Prometheus label name which may only contain ASCII letters, numbers, as well as underscores. + pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$ + type: string + type: array + targetLabel: + description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available. + type: string + type: object + type: array + selector: + description: Selector to select the Ingress objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + type: object + staticConfig: + description: 'staticConfig defines the static list of targets to probe and the relabeling configuration. If `ingress` is also defined, `staticConfig` takes precedence. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config.' + properties: + labels: + additionalProperties: + type: string + description: Labels assigned to all metrics scraped from the targets. + type: object + relabelingConfigs: + description: 'RelabelConfigs to apply to the label set of the targets before it gets scraped. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config' + items: + description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines ``-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs' + properties: + action: + default: replace + description: Action to perform based on regex matching. Default is 'replace'. uppercase and lowercase actions require Prometheus >= 2.36. + enum: + - replace + - Replace + - keep + - Keep + - drop + - Drop + - hashmod + - HashMod + - labelmap + - LabelMap + - labeldrop + - LabelDrop + - labelkeep + - LabelKeep + - lowercase + - Lowercase + - uppercase + - Uppercase + type: string + modulus: + description: Modulus to take of the hash of the source label values. + format: int64 + type: integer + regex: + description: Regular expression against which the extracted value is matched. Default is '(.*)' + type: string + replacement: + description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1' + type: string + separator: + description: Separator placed between concatenated source label values. default is ';'. + type: string + sourceLabels: + description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions. + items: + description: LabelName is a valid Prometheus label name which may only contain ASCII letters, numbers, as well as underscores. + pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$ + type: string + type: array + targetLabel: + description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available. + type: string + type: object + type: array + static: + description: The list of hosts to probe. + items: + type: string + type: array + type: object + type: object + tlsConfig: + description: TLS configuration to use when scraping the endpoint. + properties: + ca: + description: Certificate authority used when verifying server certificates. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + cert: + description: Client certificate to present when doing client-authentication. + properties: + configMap: + description: ConfigMap containing data to use for the targets. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + secret: + description: Secret containing data to use for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keySecret: + description: Secret containing the client key file for the targets. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object + type: object + required: + - spec + type: object + served: true + storage: true diff --git a/manifests/setup/prometheus-operator-0prometheusCustomResourceDefinition.yaml b/manifests/setup/prometheus-operator-0prometheusCustomResourceDefinition.yaml index 91f140c..9b313f1 100644 --- a/manifests/setup/prometheus-operator-0prometheusCustomResourceDefinition.yaml +++ b/manifests/setup/prometheus-operator-0prometheusCustomResourceDefinition.yaml @@ -2,15 +2,19 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.2.4 + controller-gen.kubebuilder.io/version: v0.9.2 creationTimestamp: null name: prometheuses.monitoring.coreos.com spec: group: monitoring.coreos.com names: + categories: + - prometheus-operator kind: Prometheus listKind: PrometheusList plural: prometheuses + shortNames: + - prom singular: prometheus scope: Namespaced versions: @@ -19,13 +23,28 @@ spec: jsonPath: .spec.version name: Version type: string - - description: The desired replicas number of Prometheuses + - description: The number of desired replicas jsonPath: .spec.replicas - name: Replicas + name: Desired type: integer + - description: The number of ready replicas + jsonPath: .status.availableReplicas + name: Ready + type: integer + - jsonPath: .status.conditions[?(@.type == 'Reconciled')].status + name: Reconciled + type: string + - jsonPath: .status.conditions[?(@.type == 'Available')].status + name: Available + type: string - jsonPath: .metadata.creationTimestamp name: Age type: date + - description: Whether the resource reconciliation is paused or not + jsonPath: .status.paused + name: Paused + priority: 1 + type: boolean name: v1 schema: openAPIV3Schema: @@ -57,6 +76,7 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic additionalAlertRelabelConfigs: description: 'AdditionalAlertRelabelConfigs allows specifying a key of a Secret containing additional Prometheus alert relabel configurations. Alert relabel configurations specified are appended to the configurations generated by the Prometheus Operator. Alert relabel configurations specified must have the form as specified in the official Prometheus documentation: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#alert_relabel_configs. As alert relabel configs are appended, the user is responsible to make sure it is valid. Note that using this feature may expose the possibility to break upgrades of Prometheus. It is advised to review Prometheus release notes to ensure that no incompatible alert relabel configs are going to break Prometheus after the upgrade.' properties: @@ -72,6 +92,23 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic + additionalArgs: + description: AdditionalArgs allows setting additional arguments for the Prometheus container. It is intended for e.g. activating hidden flags which are not supported by the dedicated configuration options yet. The arguments are passed as-is to the Prometheus container which may cause issues if they are invalid or not supported by the given Prometheus version. In case of an argument conflict (e.g. an argument which is already set by the operator itself) or when providing an invalid argument the reconciliation will fail and an error will be logged. + items: + description: Argument as part of the AdditionalArgs list. + properties: + name: + description: Name of the argument, e.g. "scrape.discovery-reload-interval". + minLength: 1 + type: string + value: + description: Argument value, e.g. 30s. Can be empty for name-only arguments (e.g. --storage.tsdb.no-lockfile) + type: string + required: + - name + type: object + type: array additionalScrapeConfigs: description: 'AdditionalScrapeConfigs allows specifying a key of a Secret containing additional Prometheus scrape configurations. Scrape configurations specified are appended to the configurations generated by the Prometheus Operator. Job configurations specified must have the form as specified in the official Prometheus documentation: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config. As scrape configs are appended, the user is responsible to make sure it is valid. Note that using this feature may expose the possibility to break upgrades of Prometheus. It is advised to review Prometheus release notes to ensure that no incompatible scrape configs are going to break Prometheus after the upgrade.' properties: @@ -87,6 +124,7 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic affinity: description: If specified, the pod's scheduling constraints. properties: @@ -144,6 +182,7 @@ spec: type: object type: array type: object + x-kubernetes-map-type: atomic weight: description: Weight associated with matching the corresponding nodeSelectorTerm, in the range 1-100. format: int32 @@ -204,10 +243,12 @@ spec: type: object type: array type: object + x-kubernetes-map-type: atomic type: array required: - nodeSelectorTerms type: object + x-kubernetes-map-type: atomic type: object podAffinity: description: Describes pod affinity scheduling rules (e.g. co-locate this pod in the same node, zone, etc. as some other pod(s)). @@ -250,8 +291,40 @@ spec: description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. type: object type: object + x-kubernetes-map-type: atomic + namespaceSelector: + description: A label query over the set of namespaces that the term applies to. The term is applied to the union of the namespaces selected by this field and the ones listed in the namespaces field. null selector and null or empty namespaces list means "this pod's namespace". An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic namespaces: - description: namespaces specifies which namespaces the labelSelector applies to (matches against); null or empty list means "this pod's namespace" + description: namespaces specifies a static list of namespace names that the term applies to. The term is applied to the union of the namespaces listed in this field and the ones selected by namespaceSelector. null or empty namespaces list and null namespaceSelector means "this pod's namespace". items: type: string type: array @@ -305,8 +378,40 @@ spec: description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. type: object type: object + x-kubernetes-map-type: atomic + namespaceSelector: + description: A label query over the set of namespaces that the term applies to. The term is applied to the union of the namespaces selected by this field and the ones listed in the namespaces field. null selector and null or empty namespaces list means "this pod's namespace". An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic namespaces: - description: namespaces specifies which namespaces the labelSelector applies to (matches against); null or empty list means "this pod's namespace" + description: namespaces specifies a static list of namespace names that the term applies to. The term is applied to the union of the namespaces listed in this field and the ones selected by namespaceSelector. null or empty namespaces list and null namespaceSelector means "this pod's namespace". items: type: string type: array @@ -359,8 +464,40 @@ spec: description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. type: object type: object + x-kubernetes-map-type: atomic + namespaceSelector: + description: A label query over the set of namespaces that the term applies to. The term is applied to the union of the namespaces selected by this field and the ones listed in the namespaces field. null selector and null or empty namespaces list means "this pod's namespace". An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic namespaces: - description: namespaces specifies which namespaces the labelSelector applies to (matches against); null or empty list means "this pod's namespace" + description: namespaces specifies a static list of namespace names that the term applies to. The term is applied to the union of the namespaces listed in this field and the ones selected by namespaceSelector. null or empty namespaces list and null namespaceSelector means "this pod's namespace". items: type: string type: array @@ -414,8 +551,40 @@ spec: description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. type: object type: object + x-kubernetes-map-type: atomic + namespaceSelector: + description: A label query over the set of namespaces that the term applies to. The term is applied to the union of the namespaces selected by this field and the ones listed in the namespaces field. null selector and null or empty namespaces list means "this pod's namespace". An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic namespaces: - description: namespaces specifies which namespaces the labelSelector applies to (matches against); null or empty list means "this pod's namespace" + description: namespaces specifies a static list of namespace names that the term applies to. The term is applied to the union of the namespaces listed in this field and the ones selected by namespaceSelector. null or empty namespaces list and null namespaceSelector means "this pod's namespace". items: type: string type: array @@ -439,9 +608,71 @@ spec: apiVersion: description: Version of the Alertmanager API that Prometheus uses to send alerts. It can be "v1" or "v2". type: string + authorization: + description: Authorization section for this alertmanager endpoint + properties: + credentials: + description: The secret's key that contains the credentials of the request + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: + description: Set the authentication type. Defaults to Bearer, Basic will cause an error + type: string + type: object + basicAuth: + description: BasicAuth allow an endpoint to authenticate over basic authentication + properties: + password: + description: The secret in the service monitor namespace that contains the password for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + username: + description: The secret in the service monitor namespace that contains the username for authentication. + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object bearerTokenFile: description: BearerTokenFile to read from filesystem to use when authenticating to Alertmanager. type: string + enableHttp2: + description: Whether to enable HTTP2. + type: boolean name: description: Name of Endpoints object in Namespace. type: string @@ -460,11 +691,15 @@ spec: scheme: description: Scheme to use when firing alerts. type: string + timeout: + description: Timeout is a per-target Alertmanager timeout when pushing alerts. + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string tlsConfig: description: TLS Config to use for alertmanager connection. properties: ca: - description: Stuct containing the CA cert to use for the targets. + description: Certificate authority used when verifying server certificates. properties: configMap: description: ConfigMap containing data to use for the targets. @@ -481,6 +716,7 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic secret: description: Secret containing data to use for the targets. properties: @@ -496,12 +732,13 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic type: object caFile: description: Path to the CA cert in the Prometheus container to use for the targets. type: string cert: - description: Struct containing the client cert file for the targets. + description: Client certificate to present when doing client-authentication. properties: configMap: description: ConfigMap containing data to use for the targets. @@ -518,6 +755,7 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic secret: description: Secret containing data to use for the targets. properties: @@ -533,6 +771,7 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic type: object certFile: description: Path to the client cert file in the Prometheus container for the targets. @@ -558,6 +797,7 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic serverName: description: Used to verify the hostname for the targets. type: string @@ -571,9 +811,38 @@ spec: required: - alertmanagers type: object + allowOverlappingBlocks: + description: AllowOverlappingBlocks enables vertical compaction and vertical query merge in Prometheus. This is still experimental in Prometheus so it may change in any upcoming release. + type: boolean apiserverConfig: description: APIServerConfig allows specifying a host and auth methods to access apiserver. If left empty, Prometheus is assumed to run inside of the cluster and will discover API servers automatically and use the pod's CA certificate and bearer token file at /var/run/secrets/kubernetes.io/serviceaccount/. properties: + authorization: + description: Authorization section for accessing apiserver + properties: + credentials: + description: The secret's key that contains the credentials of the request + properties: + key: + description: The key of the secret to select from. Must be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + optional: + description: Specify whether the Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + credentialsFile: + description: File to read a secret from, mutually exclusive with Credentials (from SafeAuthorization) + type: string + type: + description: Set the authentication type. Defaults to Bearer, Basic will cause an error + type: string + type: object basicAuth: description: BasicAuth allow an endpoint to authenticate over basic authentication properties: @@ -592,6 +861,7 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic username: description: The secret in the service monitor namespace that contains the username for authentication. properties: @@ -607,6 +877,7 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic type: object bearerToken: description: Bearer token for accessing apiserver. @@ -621,7 +892,7 @@ spec: description: TLS Config to use for accessing apiserver. properties: ca: - description: Stuct containing the CA cert to use for the targets. + description: Certificate authority used when verifying server certificates. properties: configMap: description: ConfigMap containing data to use for the targets. @@ -638,6 +909,7 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic secret: description: Secret containing data to use for the targets. properties: @@ -653,12 +925,13 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic type: object caFile: description: Path to the CA cert in the Prometheus container to use for the targets. type: string cert: - description: Struct containing the client cert file for the targets. + description: Client certificate to present when doing client-authentication. properties: configMap: description: ConfigMap containing data to use for the targets. @@ -675,6 +948,7 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic secret: description: Secret containing data to use for the targets. properties: @@ -690,6 +964,7 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic type: object certFile: description: Path to the client cert file in the Prometheus container for the targets. @@ -715,6 +990,7 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic serverName: description: Used to verify the hostname for the targets. type: string @@ -729,25 +1005,25 @@ spec: type: boolean type: object baseImage: - description: Base image to use for a Prometheus deployment. + description: 'Base image to use for a Prometheus deployment. Deprecated: use ''image'' instead' type: string configMaps: - description: ConfigMaps is a list of ConfigMaps in the same namespace as the Prometheus object, which shall be mounted into the Prometheus Pods. The ConfigMaps are mounted into /etc/prometheus/configmaps/. + description: ConfigMaps is a list of ConfigMaps in the same namespace as the Prometheus object, which shall be mounted into the Prometheus Pods. Each ConfigMap is added to the StatefulSet definition as a volume named `configmap-`. The ConfigMaps are mounted into /etc/prometheus/configmaps/ in the 'prometheus' container. items: type: string type: array containers: - description: 'Containers allows injecting additional containers or modifying operator generated containers. This can be used to allow adding an authentication proxy to a Prometheus pod or to change the behavior of an operator generated container. Containers described here modify an operator generated container if they share the same name and modifications are done via a strategic merge patch. The current container names are: `prometheus`, `prometheus-config-reloader`, `rules-configmap-reloader`, and `thanos-sidecar`. Overriding containers is entirely outside the scope of what the maintainers will support and by doing so, you accept that this behaviour may break at any time without notice.' + description: 'Containers allows injecting additional containers or modifying operator generated containers. This can be used to allow adding an authentication proxy to a Prometheus pod or to change the behavior of an operator generated container. Containers described here modify an operator generated container if they share the same name and modifications are done via a strategic merge patch. The current container names are: `prometheus`, `config-reloader`, and `thanos-sidecar`. Overriding containers is entirely outside the scope of what the maintainers will support and by doing so, you accept that this behaviour may break at any time without notice.' items: description: A single application container that you want to run within a pod. properties: args: - description: 'Arguments to the entrypoint. The docker image''s CMD is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container''s environment. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + description: 'Arguments to the entrypoint. The container image''s CMD is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container''s environment. If a variable cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless of whether the variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' items: type: string type: array command: - description: 'Entrypoint array. Not executed within a shell. The docker image''s ENTRYPOINT is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container''s environment. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + description: 'Entrypoint array. Not executed within a shell. The container image''s ENTRYPOINT is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container''s environment. If a variable cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless of whether the variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' items: type: string type: array @@ -760,7 +1036,7 @@ spec: description: Name of the environment variable. Must be a C_IDENTIFIER. type: string value: - description: 'Variable references $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not. Defaults to "".' + description: 'Variable references $(VAR_NAME) are expanded using the previously defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless of whether the variable exists or not. Defaults to "".' type: string valueFrom: description: Source for the environment variable's value. Cannot be used if value is not empty. @@ -780,8 +1056,9 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic fieldRef: - description: 'Selects a field of the pod: supports metadata.name, metadata.namespace, metadata.labels, metadata.annotations, spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.' + description: 'Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['''']`, `metadata.annotations['''']`, spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.' properties: apiVersion: description: Version of the schema the FieldPath is written in terms of, defaults to "v1". @@ -792,6 +1069,7 @@ spec: required: - fieldPath type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: 'Selects a resource of the container: only resources limits and requests (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.' properties: @@ -799,14 +1077,19 @@ spec: description: 'Container name: required for volumes, optional for env vars' type: string divisor: + anyOf: + - type: integer + - type: string description: Specifies the output format of the exposed resources, defaults to "1" - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true resource: description: 'Required: resource to select' type: string required: - resource type: object + x-kubernetes-map-type: atomic secretKeyRef: description: Selects a key of a secret in the pod's namespace properties: @@ -822,6 +1105,7 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic type: object required: - name @@ -842,6 +1126,7 @@ spec: description: Specify whether the ConfigMap must be defined type: boolean type: object + x-kubernetes-map-type: atomic prefix: description: An optional identifier to prepend to each key in the ConfigMap. Must be a C_IDENTIFIER. type: string @@ -855,10 +1140,11 @@ spec: description: Specify whether the Secret must be defined type: boolean type: object + x-kubernetes-map-type: atomic type: object type: array image: - description: 'Docker image name. More info: https://kubernetes.io/docs/concepts/containers/images This field is optional to allow higher level config management to default or override container images in workload controllers like Deployments and StatefulSets.' + description: 'Container image name. More info: https://kubernetes.io/docs/concepts/containers/images This field is optional to allow higher level config management to default or override container images in workload controllers like Deployments and StatefulSets.' type: string imagePullPolicy: description: 'Image pull policy. One of Always, Never, IfNotPresent. Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. Cannot be updated. More info: https://kubernetes.io/docs/concepts/containers/images#updating-images' @@ -870,7 +1156,7 @@ spec: description: 'PostStart is called immediately after a container is created. If the handler fails, the container is terminated and restarted according to its restart policy. Other management of the container blocks until the hook completes. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -916,7 +1202,7 @@ spec: - port type: object tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept for the backward compatibility. There are no validation of this field and lifecycle hooks will fail in runtime when tcp handler is specified. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -932,10 +1218,10 @@ spec: type: object type: object preStop: - description: 'PreStop is called immediately before a container is terminated due to an API request or management event such as liveness/startup probe failure, preemption, resource contention, etc. The handler is not called if the container crashes or exits. The reason for termination is passed to the handler. The Pod''s termination grace period countdown begins before the PreStop hooked is executed. Regardless of the outcome of the handler, the container will eventually terminate within the Pod''s termination grace period. Other management of the container blocks until the hook completes or until the termination grace period is reached. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks' + description: 'PreStop is called immediately before a container is terminated due to an API request or management event such as liveness/startup probe failure, preemption, resource contention, etc. The handler is not called if the container crashes or exits. The Pod''s termination grace period countdown begins before the PreStop hook is executed. Regardless of the outcome of the handler, the container will eventually terminate within the Pod''s termination grace period (unless delayed by finalizers). Other management of the container blocks until the hook completes or until the termination grace period is reached. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -981,7 +1267,7 @@ spec: - port type: object tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept for the backward compatibility. There are no validation of this field and lifecycle hooks will fail in runtime when tcp handler is specified. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -1001,7 +1287,7 @@ spec: description: 'Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -1013,6 +1299,19 @@ spec: description: Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 3. Minimum value is 1. format: int32 type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. This is a beta field and requires enabling GRPCContainerProbe feature gate. + properties: + port: + description: Port number of the gRPC service. Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: "Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). \n If this is not specified, the default behavior is defined by gRPC." + type: string + required: + - port + type: object httpGet: description: HTTPGet specifies the http request to perform. properties: @@ -1063,7 +1362,7 @@ spec: format: int32 type: integer tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: TCPSocket specifies an action involving a TCP port. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -1077,6 +1376,10 @@ spec: required: - port type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer timeoutSeconds: description: 'Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' format: int32 @@ -1086,7 +1389,7 @@ spec: description: Name of the container specified as a DNS_LABEL. Each container in a pod must have a unique name (DNS_LABEL). Cannot be updated. type: string ports: - description: List of ports to expose from the container. Exposing a port here gives the system additional information about the network connections a container uses, but is primarily informational. Not specifying a port here DOES NOT prevent that port from being exposed. Any port which is listening on the default "0.0.0.0" address inside a container will be accessible from the network. Cannot be updated. + description: List of ports to expose from the container. Not specifying a port here DOES NOT prevent that port from being exposed. Any port which is listening on the default "0.0.0.0" address inside a container will be accessible from the network. Modifying this array with strategic merge patch may corrupt the data. For more information See https://github.com/kubernetes/kubernetes/issues/108255. Cannot be updated. items: description: ContainerPort represents a network port in a single container. properties: @@ -1105,17 +1408,22 @@ spec: description: If specified, this must be an IANA_SVC_NAME and unique within the pod. Each named port in a pod must have a unique name. Name for the port that can be referred to by services. type: string protocol: + default: TCP description: Protocol for port. Must be UDP, TCP, or SCTP. Defaults to "TCP". type: string required: - containerPort type: object type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map readinessProbe: description: 'Periodic probe of container service readiness. Container will be removed from service endpoints if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -1127,6 +1435,19 @@ spec: description: Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 3. Minimum value is 1. format: int32 type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. This is a beta field and requires enabling GRPCContainerProbe feature gate. + properties: + port: + description: Port number of the gRPC service. Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: "Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). \n If this is not specified, the default behavior is defined by gRPC." + type: string + required: + - port + type: object httpGet: description: HTTPGet specifies the http request to perform. properties: @@ -1177,7 +1498,7 @@ spec: format: int32 type: integer tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: TCPSocket specifies an action involving a TCP port. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -1191,33 +1512,45 @@ spec: required: - port type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer timeoutSeconds: description: 'Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' format: int32 type: integer type: object resources: - description: 'Compute Resources required by this container. Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + description: 'Compute Resources required by this container. Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' properties: limits: additionalProperties: - type: string - description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' type: object requests: additionalProperties: - type: string - description: 'Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' type: object type: object securityContext: - description: 'Security options the pod should run with. More info: https://kubernetes.io/docs/concepts/policy/security-context/ More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/' + description: 'SecurityContext defines the security options the container should be run with. If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/' properties: allowPrivilegeEscalation: - description: 'AllowPrivilegeEscalation controls whether a process can gain more privileges than its parent process. This bool directly controls if the no_new_privs flag will be set on the container process. AllowPrivilegeEscalation is true always when the container is: 1) run as Privileged 2) has CAP_SYS_ADMIN' + description: 'AllowPrivilegeEscalation controls whether a process can gain more privileges than its parent process. This bool directly controls if the no_new_privs flag will be set on the container process. AllowPrivilegeEscalation is true always when the container is: 1) run as Privileged 2) has CAP_SYS_ADMIN Note that this field cannot be set when spec.os.name is windows.' type: boolean capabilities: - description: The capabilities to add/drop when running containers. Defaults to the default set of capabilities granted by the container runtime. + description: The capabilities to add/drop when running containers. Defaults to the default set of capabilities granted by the container runtime. Note that this field cannot be set when spec.os.name is windows. properties: add: description: Added capabilities @@ -1233,27 +1566,27 @@ spec: type: array type: object privileged: - description: Run container in privileged mode. Processes in privileged containers are essentially equivalent to root on the host. Defaults to false. + description: Run container in privileged mode. Processes in privileged containers are essentially equivalent to root on the host. Defaults to false. Note that this field cannot be set when spec.os.name is windows. type: boolean procMount: - description: procMount denotes the type of proc mount to use for the containers. The default is DefaultProcMount which uses the container runtime defaults for readonly paths and masked paths. This requires the ProcMountType feature flag to be enabled. + description: procMount denotes the type of proc mount to use for the containers. The default is DefaultProcMount which uses the container runtime defaults for readonly paths and masked paths. This requires the ProcMountType feature flag to be enabled. Note that this field cannot be set when spec.os.name is windows. type: string readOnlyRootFilesystem: - description: Whether this container has a read-only root filesystem. Default is false. + description: Whether this container has a read-only root filesystem. Default is false. Note that this field cannot be set when spec.os.name is windows. type: boolean runAsGroup: - description: The GID to run the entrypoint of the container process. Uses runtime default if unset. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + description: The GID to run the entrypoint of the container process. Uses runtime default if unset. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is windows. format: int64 type: integer runAsNonRoot: description: Indicates that the container must run as a non-root user. If true, the Kubelet will validate the image at runtime to ensure that it does not run as UID 0 (root) and fail to start the container if it does. If unset or false, no such validation will be performed. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. type: boolean runAsUser: - description: The UID to run the entrypoint of the container process. Defaults to user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + description: The UID to run the entrypoint of the container process. Defaults to user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is windows. format: int64 type: integer seLinuxOptions: - description: The SELinux context to be applied to the container. If unspecified, the container runtime will allocate a random SELinux context for each container. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + description: The SELinux context to be applied to the container. If unspecified, the container runtime will allocate a random SELinux context for each container. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is windows. properties: level: description: Level is SELinux level label that applies to the container. @@ -1268,8 +1601,20 @@ spec: description: User is a SELinux user label that applies to the container. type: string type: object + seccompProfile: + description: The seccomp options to use by this container. If seccomp options are provided at both the pod & container level, the container options override the pod options. Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: localhostProfile indicates a profile defined in a file on the node should be used. The profile must be preconfigured on the node to work. Must be a descending path, relative to the kubelet's configured seccomp profile location. Must only be set if type is "Localhost". + type: string + type: + description: "type indicates which kind of seccomp profile will be applied. Valid options are: \n Localhost - a profile defined in a file on the node should be used. RuntimeDefault - the container runtime default profile should be used. Unconfined - no profile should be applied." + type: string + required: + - type + type: object windowsOptions: - description: The Windows specific settings applied to all containers. If unspecified, the options from the PodSecurityContext will be used. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + description: The Windows specific settings applied to all containers. If unspecified, the options from the PodSecurityContext will be used. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is linux. properties: gmsaCredentialSpec: description: GMSACredentialSpec is where the GMSA admission webhook (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the GMSA credential spec named by the GMSACredentialSpecName field. @@ -1277,16 +1622,19 @@ spec: gmsaCredentialSpecName: description: GMSACredentialSpecName is the name of the GMSA credential spec to use. type: string + hostProcess: + description: HostProcess determines if a container should be run as a 'Host Process' container. This field is alpha-level and will only be honored by components that enable the WindowsHostProcessContainers feature flag. Setting this field without the feature flag will result in errors when validating the Pod. All of a Pod's containers must have the same effective HostProcess value (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean runAsUserName: description: The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. type: string type: object type: object startupProbe: - description: 'StartupProbe indicates that the Pod has successfully initialized. If specified, no other probes are executed until this completes successfully. If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. This can be used to provide different probe parameters at the beginning of a Pod''s lifecycle, when it might take a long time to load data or warm a cache, than during steady-state operation. This cannot be updated. This is a beta feature enabled by the StartupProbe feature flag. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + description: 'StartupProbe indicates that the Pod has successfully initialized. If specified, no other probes are executed until this completes successfully. If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. This can be used to provide different probe parameters at the beginning of a Pod''s lifecycle, when it might take a long time to load data or warm a cache, than during steady-state operation. This cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -1298,6 +1646,19 @@ spec: description: Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 3. Minimum value is 1. format: int32 type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. This is a beta field and requires enabling GRPCContainerProbe feature gate. + properties: + port: + description: Port number of the gRPC service. Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: "Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). \n If this is not specified, the default behavior is defined by gRPC." + type: string + required: + - port + type: object httpGet: description: HTTPGet specifies the http request to perform. properties: @@ -1348,7 +1709,7 @@ spec: format: int32 type: integer tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: TCPSocket specifies an action involving a TCP port. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -1362,6 +1723,10 @@ spec: required: - port type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer timeoutSeconds: description: 'Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' format: int32 @@ -1439,29 +1804,130 @@ spec: enableAdminAPI: description: 'Enable access to prometheus web admin API. Defaults to the value of `false`. WARNING: Enabling the admin APIs enables mutating endpoints, to delete data, shutdown Prometheus, and more. Enabling this should be done with care and the user is advised to add additional authentication authorization via a proxy to ensure only clients authorized to perform these actions can do so. For more information see https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-admin-apis' type: boolean + enableFeatures: + description: Enable access to Prometheus disabled features. By default, no features are enabled. Enabling disabled features is entirely outside the scope of what the maintainers will support and by doing so, you accept that this behaviour may break at any time without notice. For more information see https://prometheus.io/docs/prometheus/latest/disabled_features/ + items: + type: string + type: array + enableRemoteWriteReceiver: + description: 'Enable Prometheus to be used as a receiver for the Prometheus remote write protocol. Defaults to the value of `false`. WARNING: This is not considered an efficient way of ingesting samples. Use it with caution for specific low-volume use cases. It is not suitable for replacing the ingestion via scraping and turning Prometheus into a push-based metrics collection system. For more information see https://prometheus.io/docs/prometheus/latest/querying/api/#remote-write-receiver Only valid in Prometheus versions 2.33.0 and newer.' + type: boolean + enforcedBodySizeLimit: + description: 'EnforcedBodySizeLimit defines the maximum size of uncompressed response body that will be accepted by Prometheus. Targets responding with a body larger than this many bytes will cause the scrape to fail. Example: 100MB. If defined, the limit will apply to all service/pod monitors and probes. This is an experimental feature, this behaviour could change or be removed in the future. Only valid in Prometheus versions 2.28.0 and newer.' + pattern: (^0|([0-9]*[.])?[0-9]+((K|M|G|T|E|P)i?)?B)$ + type: string + enforcedLabelLimit: + description: Per-scrape limit on number of labels that will be accepted for a sample. If more than this number of labels are present post metric-relabeling, the entire scrape will be treated as failed. 0 means no limit. Only valid in Prometheus versions 2.27.0 and newer. + format: int64 + type: integer + enforcedLabelNameLengthLimit: + description: Per-scrape limit on length of labels name that will be accepted for a sample. If a label name is longer than this number post metric-relabeling, the entire scrape will be treated as failed. 0 means no limit. Only valid in Prometheus versions 2.27.0 and newer. + format: int64 + type: integer + enforcedLabelValueLengthLimit: + description: Per-scrape limit on length of labels value that will be accepted for a sample. If a label value is longer than this number post metric-relabeling, the entire scrape will be treated as failed. 0 means no limit. Only valid in Prometheus versions 2.27.0 and newer. + format: int64 + type: integer enforcedNamespaceLabel: - description: EnforcedNamespaceLabel enforces adding a namespace label of origin for each alert and metric that is user created. The label value will always be the namespace of the object that is being created. + description: "EnforcedNamespaceLabel If set, a label will be added to \n 1. all user-metrics (created by `ServiceMonitor`, `PodMonitor` and `Probe` objects) and 2. in all `PrometheusRule` objects (except the ones excluded in `prometheusRulesExcludedFromEnforce`) to * alerting & recording rules and * the metrics used in their expressions (`expr`). \n Label name is this field's value. Label value is the namespace of the created object (mentioned above)." type: string enforcedSampleLimit: description: EnforcedSampleLimit defines global limit on number of scraped samples that will be accepted. This overrides any SampleLimit set per ServiceMonitor or/and PodMonitor. It is meant to be used by admins to enforce the SampleLimit to keep overall number of samples/series under the desired limit. Note that if SampleLimit is lower that value will be taken instead. format: int64 type: integer + enforcedTargetLimit: + description: EnforcedTargetLimit defines a global limit on the number of scraped targets. This overrides any TargetLimit set per ServiceMonitor or/and PodMonitor. It is meant to be used by admins to enforce the TargetLimit to keep the overall number of targets under the desired limit. Note that if TargetLimit is lower, that value will be taken instead, except if either value is zero, in which case the non-zero value will be used. If both values are zero, no limit is enforced. + format: int64 + type: integer evaluationInterval: - description: Interval between consecutive evaluations. - type: string - externalLabels: - additionalProperties: - type: string - description: The labels to add to any time series or alerts when communicating with external systems (federation, remote storage, Alertmanager). - type: object - externalUrl: - description: The external URL the Prometheus instances will be available under. This is necessary to generate correct URLs. This is necessary if Prometheus is not served from root of a DNS name. + default: 30s + description: 'Interval between consecutive evaluations. Default: `30s`' + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ type: string - ignoreNamespaceSelectors: - description: IgnoreNamespaceSelectors if set to true will ignore NamespaceSelector settings from the podmonitor and servicemonitor configs, and they will only discover endpoints within their current namespace. Defaults to false. - type: boolean - image: - description: Image if specified has precedence over baseImage, tag and sha combinations. Specifying the version is still necessary to ensure the Prometheus Operator knows what version of Prometheus is being configured. + excludedFromEnforcement: + description: List of references to PodMonitor, ServiceMonitor, Probe and PrometheusRule objects to be excluded from enforcing a namespace label of origin. Applies only if enforcedNamespaceLabel set to true. + items: + description: ObjectReference references a PodMonitor, ServiceMonitor, Probe or PrometheusRule object. + properties: + group: + default: monitoring.coreos.com + description: Group of the referent. When not specified, it defaults to `monitoring.coreos.com` + enum: + - monitoring.coreos.com + type: string + name: + description: Name of the referent. When not set, all resources are matched. + type: string + namespace: + description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/' + minLength: 1 + type: string + resource: + description: Resource of the referent. + enum: + - prometheusrules + - servicemonitors + - podmonitors + - probes + type: string + required: + - namespace + - resource + type: object + type: array + exemplars: + description: Exemplars related settings that are runtime reloadable. It requires to enable the exemplar storage feature to be effective. + properties: + maxSize: + description: Maximum number of exemplars stored in memory for all series. If not set, Prometheus uses its default value. A value of zero or less than zero disables the storage. + format: int64 + type: integer + type: object + externalLabels: + additionalProperties: + type: string + description: The labels to add to any time series or alerts when communicating with external systems (federation, remote storage, Alertmanager). + type: object + externalUrl: + description: The external URL the Prometheus instances will be available under. This is necessary to generate correct URLs. This is necessary if Prometheus is not served from root of a DNS name. + type: string + hostAliases: + description: Pods' hostAliases configuration + items: + description: HostAlias holds the mapping between IP and hostnames that will be injected as an entry in the pod's hosts file. + properties: + hostnames: + description: Hostnames for the above IP address. + items: + type: string + type: array + ip: + description: IP address of the host file entry. + type: string + required: + - hostnames + - ip + type: object + type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map + hostNetwork: + description: Use the host's network namespace if true. Make sure to understand the security implications if you want to enable it. When hostNetwork is enabled, this will set dnsPolicy to ClusterFirstWithHostNet automatically. + type: boolean + ignoreNamespaceSelectors: + description: IgnoreNamespaceSelectors if set to true will ignore NamespaceSelector settings from all PodMonitor, ServiceMonitor and Probe objects. They will only discover endpoints within the namespace of the PodMonitor, ServiceMonitor and Probe objects. Defaults to false. + type: boolean + image: + description: Image if specified has precedence over baseImage, tag and sha combinations. Specifying the version is still necessary to ensure the Prometheus Operator knows what version of Prometheus is being configured. + type: string + imagePullPolicy: + description: Image pull policy for the 'prometheus', 'init-config-reloader' and 'config-reloader' containers. See https://kubernetes.io/docs/concepts/containers/images/#image-pull-policy for more details. + enum: + - "" + - Always + - Never + - IfNotPresent type: string imagePullSecrets: description: An optional list of references to secrets in the same namespace to use for pulling prometheus and alertmanager images from registries see http://kubernetes.io/docs/user-guide/images#specifying-imagepullsecrets-on-a-pod @@ -1472,19 +1938,20 @@ spec: description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?' type: string type: object + x-kubernetes-map-type: atomic type: array initContainers: - description: 'InitContainers allows adding initContainers to the pod definition. Those can be used to e.g. fetch secrets for injection into the Prometheus configuration from external sources. Any errors during the execution of an initContainer will lead to a restart of the Pod. More info: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/ Using initContainers for any use case other then secret fetching is entirely outside the scope of what the maintainers will support and by doing so, you accept that this behaviour may break at any time without notice.' + description: 'InitContainers allows adding initContainers to the pod definition. Those can be used to e.g. fetch secrets for injection into the Prometheus configuration from external sources. Any errors during the execution of an initContainer will lead to a restart of the Pod. More info: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/ InitContainers described here modify an operator generated init containers if they share the same name and modifications are done via a strategic merge patch. The current init container name is: `init-config-reloader`. Overriding init containers is entirely outside the scope of what the maintainers will support and by doing so, you accept that this behaviour may break at any time without notice.' items: description: A single application container that you want to run within a pod. properties: args: - description: 'Arguments to the entrypoint. The docker image''s CMD is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container''s environment. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + description: 'Arguments to the entrypoint. The container image''s CMD is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container''s environment. If a variable cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless of whether the variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' items: type: string type: array command: - description: 'Entrypoint array. Not executed within a shell. The docker image''s ENTRYPOINT is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container''s environment. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + description: 'Entrypoint array. Not executed within a shell. The container image''s ENTRYPOINT is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container''s environment. If a variable cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless of whether the variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' items: type: string type: array @@ -1497,7 +1964,7 @@ spec: description: Name of the environment variable. Must be a C_IDENTIFIER. type: string value: - description: 'Variable references $(VAR_NAME) are expanded using the previous defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not. Defaults to "".' + description: 'Variable references $(VAR_NAME) are expanded using the previously defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless of whether the variable exists or not. Defaults to "".' type: string valueFrom: description: Source for the environment variable's value. Cannot be used if value is not empty. @@ -1517,8 +1984,9 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic fieldRef: - description: 'Selects a field of the pod: supports metadata.name, metadata.namespace, metadata.labels, metadata.annotations, spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.' + description: 'Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['''']`, `metadata.annotations['''']`, spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.' properties: apiVersion: description: Version of the schema the FieldPath is written in terms of, defaults to "v1". @@ -1529,6 +1997,7 @@ spec: required: - fieldPath type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: 'Selects a resource of the container: only resources limits and requests (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.' properties: @@ -1536,14 +2005,19 @@ spec: description: 'Container name: required for volumes, optional for env vars' type: string divisor: + anyOf: + - type: integer + - type: string description: Specifies the output format of the exposed resources, defaults to "1" - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true resource: description: 'Required: resource to select' type: string required: - resource type: object + x-kubernetes-map-type: atomic secretKeyRef: description: Selects a key of a secret in the pod's namespace properties: @@ -1559,6 +2033,7 @@ spec: required: - key type: object + x-kubernetes-map-type: atomic type: object required: - name @@ -1579,6 +2054,7 @@ spec: description: Specify whether the ConfigMap must be defined type: boolean type: object + x-kubernetes-map-type: atomic prefix: description: An optional identifier to prepend to each key in the ConfigMap. Must be a C_IDENTIFIER. type: string @@ -1592,10 +2068,11 @@ spec: description: Specify whether the Secret must be defined type: boolean type: object + x-kubernetes-map-type: atomic type: object type: array image: - description: 'Docker image name. More info: https://kubernetes.io/docs/concepts/containers/images This field is optional to allow higher level config management to default or override container images in workload controllers like Deployments and StatefulSets.' + description: 'Container image name. More info: https://kubernetes.io/docs/concepts/containers/images This field is optional to allow higher level config management to default or override container images in workload controllers like Deployments and StatefulSets.' type: string imagePullPolicy: description: 'Image pull policy. One of Always, Never, IfNotPresent. Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. Cannot be updated. More info: https://kubernetes.io/docs/concepts/containers/images#updating-images' @@ -1607,7 +2084,7 @@ spec: description: 'PostStart is called immediately after a container is created. If the handler fails, the container is terminated and restarted according to its restart policy. Other management of the container blocks until the hook completes. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -1653,7 +2130,7 @@ spec: - port type: object tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept for the backward compatibility. There are no validation of this field and lifecycle hooks will fail in runtime when tcp handler is specified. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -1669,10 +2146,10 @@ spec: type: object type: object preStop: - description: 'PreStop is called immediately before a container is terminated due to an API request or management event such as liveness/startup probe failure, preemption, resource contention, etc. The handler is not called if the container crashes or exits. The reason for termination is passed to the handler. The Pod''s termination grace period countdown begins before the PreStop hooked is executed. Regardless of the outcome of the handler, the container will eventually terminate within the Pod''s termination grace period. Other management of the container blocks until the hook completes or until the termination grace period is reached. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks' + description: 'PreStop is called immediately before a container is terminated due to an API request or management event such as liveness/startup probe failure, preemption, resource contention, etc. The handler is not called if the container crashes or exits. The Pod''s termination grace period countdown begins before the PreStop hook is executed. Regardless of the outcome of the handler, the container will eventually terminate within the Pod''s termination grace period (unless delayed by finalizers). Other management of the container blocks until the hook completes or until the termination grace period is reached. More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -1718,7 +2195,7 @@ spec: - port type: object tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept for the backward compatibility. There are no validation of this field and lifecycle hooks will fail in runtime when tcp handler is specified. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -1738,7 +2215,7 @@ spec: description: 'Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -1750,6 +2227,19 @@ spec: description: Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 3. Minimum value is 1. format: int32 type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. This is a beta field and requires enabling GRPCContainerProbe feature gate. + properties: + port: + description: Port number of the gRPC service. Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: "Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). \n If this is not specified, the default behavior is defined by gRPC." + type: string + required: + - port + type: object httpGet: description: HTTPGet specifies the http request to perform. properties: @@ -1800,7 +2290,7 @@ spec: format: int32 type: integer tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: TCPSocket specifies an action involving a TCP port. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -1814,6 +2304,10 @@ spec: required: - port type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer timeoutSeconds: description: 'Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' format: int32 @@ -1823,7 +2317,7 @@ spec: description: Name of the container specified as a DNS_LABEL. Each container in a pod must have a unique name (DNS_LABEL). Cannot be updated. type: string ports: - description: List of ports to expose from the container. Exposing a port here gives the system additional information about the network connections a container uses, but is primarily informational. Not specifying a port here DOES NOT prevent that port from being exposed. Any port which is listening on the default "0.0.0.0" address inside a container will be accessible from the network. Cannot be updated. + description: List of ports to expose from the container. Not specifying a port here DOES NOT prevent that port from being exposed. Any port which is listening on the default "0.0.0.0" address inside a container will be accessible from the network. Modifying this array with strategic merge patch may corrupt the data. For more information See https://github.com/kubernetes/kubernetes/issues/108255. Cannot be updated. items: description: ContainerPort represents a network port in a single container. properties: @@ -1842,17 +2336,22 @@ spec: description: If specified, this must be an IANA_SVC_NAME and unique within the pod. Each named port in a pod must have a unique name. Name for the port that can be referred to by services. type: string protocol: + default: TCP description: Protocol for port. Must be UDP, TCP, or SCTP. Defaults to "TCP". type: string required: - containerPort type: object type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map readinessProbe: description: 'Periodic probe of container service readiness. Container will be removed from service endpoints if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -1864,6 +2363,19 @@ spec: description: Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 3. Minimum value is 1. format: int32 type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. This is a beta field and requires enabling GRPCContainerProbe feature gate. + properties: + port: + description: Port number of the gRPC service. Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: "Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). \n If this is not specified, the default behavior is defined by gRPC." + type: string + required: + - port + type: object httpGet: description: HTTPGet specifies the http request to perform. properties: @@ -1914,7 +2426,7 @@ spec: format: int32 type: integer tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: TCPSocket specifies an action involving a TCP port. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -1928,33 +2440,45 @@ spec: required: - port type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer timeoutSeconds: description: 'Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' format: int32 type: integer type: object resources: - description: 'Compute Resources required by this container. Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + description: 'Compute Resources required by this container. Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' properties: limits: additionalProperties: - type: string - description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' type: object requests: additionalProperties: - type: string - description: 'Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' type: object type: object securityContext: - description: 'Security options the pod should run with. More info: https://kubernetes.io/docs/concepts/policy/security-context/ More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/' + description: 'SecurityContext defines the security options the container should be run with. If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/' properties: allowPrivilegeEscalation: - description: 'AllowPrivilegeEscalation controls whether a process can gain more privileges than its parent process. This bool directly controls if the no_new_privs flag will be set on the container process. AllowPrivilegeEscalation is true always when the container is: 1) run as Privileged 2) has CAP_SYS_ADMIN' + description: 'AllowPrivilegeEscalation controls whether a process can gain more privileges than its parent process. This bool directly controls if the no_new_privs flag will be set on the container process. AllowPrivilegeEscalation is true always when the container is: 1) run as Privileged 2) has CAP_SYS_ADMIN Note that this field cannot be set when spec.os.name is windows.' type: boolean capabilities: - description: The capabilities to add/drop when running containers. Defaults to the default set of capabilities granted by the container runtime. + description: The capabilities to add/drop when running containers. Defaults to the default set of capabilities granted by the container runtime. Note that this field cannot be set when spec.os.name is windows. properties: add: description: Added capabilities @@ -1970,27 +2494,27 @@ spec: type: array type: object privileged: - description: Run container in privileged mode. Processes in privileged containers are essentially equivalent to root on the host. Defaults to false. + description: Run container in privileged mode. Processes in privileged containers are essentially equivalent to root on the host. Defaults to false. Note that this field cannot be set when spec.os.name is windows. type: boolean procMount: - description: procMount denotes the type of proc mount to use for the containers. The default is DefaultProcMount which uses the container runtime defaults for readonly paths and masked paths. This requires the ProcMountType feature flag to be enabled. + description: procMount denotes the type of proc mount to use for the containers. The default is DefaultProcMount which uses the container runtime defaults for readonly paths and masked paths. This requires the ProcMountType feature flag to be enabled. Note that this field cannot be set when spec.os.name is windows. type: string readOnlyRootFilesystem: - description: Whether this container has a read-only root filesystem. Default is false. + description: Whether this container has a read-only root filesystem. Default is false. Note that this field cannot be set when spec.os.name is windows. type: boolean runAsGroup: - description: The GID to run the entrypoint of the container process. Uses runtime default if unset. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + description: The GID to run the entrypoint of the container process. Uses runtime default if unset. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is windows. format: int64 type: integer runAsNonRoot: description: Indicates that the container must run as a non-root user. If true, the Kubelet will validate the image at runtime to ensure that it does not run as UID 0 (root) and fail to start the container if it does. If unset or false, no such validation will be performed. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. type: boolean runAsUser: - description: The UID to run the entrypoint of the container process. Defaults to user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + description: The UID to run the entrypoint of the container process. Defaults to user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is windows. format: int64 type: integer seLinuxOptions: - description: The SELinux context to be applied to the container. If unspecified, the container runtime will allocate a random SELinux context for each container. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + description: The SELinux context to be applied to the container. If unspecified, the container runtime will allocate a random SELinux context for each container. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is windows. properties: level: description: Level is SELinux level label that applies to the container. @@ -2005,8 +2529,20 @@ spec: description: User is a SELinux user label that applies to the container. type: string type: object + seccompProfile: + description: The seccomp options to use by this container. If seccomp options are provided at both the pod & container level, the container options override the pod options. Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: localhostProfile indicates a profile defined in a file on the node should be used. The profile must be preconfigured on the node to work. Must be a descending path, relative to the kubelet's configured seccomp profile location. Must only be set if type is "Localhost". + type: string + type: + description: "type indicates which kind of seccomp profile will be applied. Valid options are: \n Localhost - a profile defined in a file on the node should be used. RuntimeDefault - the container runtime default profile should be used. Unconfined - no profile should be applied." + type: string + required: + - type + type: object windowsOptions: - description: The Windows specific settings applied to all containers. If unspecified, the options from the PodSecurityContext will be used. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + description: The Windows specific settings applied to all containers. If unspecified, the options from the PodSecurityContext will be used. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is linux. properties: gmsaCredentialSpec: description: GMSACredentialSpec is where the GMSA admission webhook (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the GMSA credential spec named by the GMSACredentialSpecName field. @@ -2014,16 +2550,19 @@ spec: gmsaCredentialSpecName: description: GMSACredentialSpecName is the name of the GMSA credential spec to use. type: string + hostProcess: + description: HostProcess determines if a container should be run as a 'Host Process' container. This field is alpha-level and will only be honored by components that enable the WindowsHostProcessContainers feature flag. Setting this field without the feature flag will result in errors when validating the Pod. All of a Pod's containers must have the same effective HostProcess value (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean runAsUserName: description: The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. type: string type: object type: object startupProbe: - description: 'StartupProbe indicates that the Pod has successfully initialized. If specified, no other probes are executed until this completes successfully. If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. This can be used to provide different probe parameters at the beginning of a Pod''s lifecycle, when it might take a long time to load data or warm a cache, than during steady-state operation. This cannot be updated. This is a beta feature enabled by the StartupProbe feature flag. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + description: 'StartupProbe indicates that the Pod has successfully initialized. If specified, no other probes are executed until this completes successfully. If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. This can be used to provide different probe parameters at the beginning of a Pod''s lifecycle, when it might take a long time to load data or warm a cache, than during steady-state operation. This cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' properties: exec: - description: One and only one of the following should be specified. Exec specifies the action to take. + description: Exec specifies the action to take. properties: command: description: Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy. @@ -2035,6 +2574,19 @@ spec: description: Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 3. Minimum value is 1. format: int32 type: integer + grpc: + description: GRPC specifies an action involving a GRPC port. This is a beta field and requires enabling GRPCContainerProbe feature gate. + properties: + port: + description: Port number of the gRPC service. Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + description: "Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). \n If this is not specified, the default behavior is defined by gRPC." + type: string + required: + - port + type: object httpGet: description: HTTPGet specifies the http request to perform. properties: @@ -2085,7 +2637,7 @@ spec: format: int32 type: integer tcpSocket: - description: 'TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook' + description: TCPSocket specifies an action involving a TCP port. properties: host: description: 'Optional: Host name to connect to, defaults to the pod IP.' @@ -2099,6 +2651,10 @@ spec: required: - port type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer timeoutSeconds: description: 'Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' format: int32 @@ -2175,20 +2731,34 @@ spec: type: boolean logFormat: description: Log format for Prometheus to be configured with. + enum: + - "" + - logfmt + - json type: string logLevel: description: Log level for Prometheus to be configured with. + enum: + - "" + - debug + - info + - warn + - error type: string + minReadySeconds: + description: Minimum number of seconds for which a newly created pod should be ready without any of its container crashing for it to be considered available. Defaults to 0 (pod will be considered available as soon as it is ready) This is an alpha field from kubernetes 1.22 until 1.24 which requires enabling the StatefulSetMinReadySeconds feature gate. + format: int32 + type: integer nodeSelector: additionalProperties: type: string description: Define which Nodes the Pods are scheduled on. type: object overrideHonorLabels: - description: OverrideHonorLabels if set to true overrides all user configured honor_labels. If HonorLabels is set in ServiceMonitor or PodMonitor to true, this overrides honor_labels to false. + description: When true, Prometheus resolves label conflicts by renaming the labels in the scraped data to "exported_