diff --git a/base_operator_stack.jsonnet b/base_operator_stack.jsonnet
index 03328e9..6997966 100644
--- a/base_operator_stack.jsonnet
+++ b/base_operator_stack.jsonnet
@@ -1,4 +1,4 @@
-local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
+local k = import 'ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local utils = import 'utils.libsonnet';
local vars = import 'vars.jsonnet';
@@ -60,7 +60,7 @@ local vars = import 'vars.jsonnet';
},
},
plugins: vars.grafana.plugins,
- env: vars.grafana.env
+ env: vars.grafana.env,
},
},
//---------------------------------------
diff --git a/jsonnetfile.json b/jsonnetfile.json
index 420c9b3..7d75ad2 100644
--- a/jsonnetfile.json
+++ b/jsonnetfile.json
@@ -4,10 +4,18 @@
{
"source": {
"git": {
- "remote": "https://github.com/coreos/kube-prometheus.git",
+ "remote": "https://github.com/prometheus-operator/kube-prometheus.git",
"subdir": "jsonnet/kube-prometheus"
}
},
+ "version": "main"
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/ksonnet/ksonnet-lib.git"
+ }
+ },
"version": "master"
}
],
diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json
index 49ad402..06cee35 100644
--- a/jsonnetfile.lock.json
+++ b/jsonnetfile.lock.json
@@ -8,48 +8,48 @@
"subdir": "grafana"
}
},
- "version": "57b4365eacda291b82e0d55ba7eec573a8198dda",
- "sum": "92DWADwGjnCfpZaL7Q07C0GZayxBziGla/O03qWea34="
+ "version": "d039275e4916aceae1c137120882e01d857787ac",
+ "sum": "515vMn4x4tP8vegL4HLW0nDO5+njGTgnDZB5OOhtsCI="
},
{
"source": {
"git": {
- "remote": "https://github.com/coreos/etcd.git",
- "subdir": "Documentation/etcd-mixin"
+ "remote": "https://github.com/etcd-io/etcd.git",
+ "subdir": "contrib/mixin"
}
},
- "version": "d8c8f903eee10b8391abaef7758c38b2cd393c55",
- "sum": "pk7mLpdUrHuJKkj2vhD6LGMU7P+oYYooBXAeZyZa398="
+ "version": "e73f55d4e94666c99558baa2fd4e365aeaca4dc4",
+ "sum": "IkDHlaE0gvvcPjSNurFT+jQ2aCOAbqHF1WVmXbAgkds="
},
{
"source": {
"git": {
- "remote": "https://github.com/coreos/kube-prometheus.git",
- "subdir": "jsonnet/kube-prometheus"
+ "remote": "https://github.com/grafana/grafana.git",
+ "subdir": "grafana-mixin"
}
},
- "version": "17989b42aa10b1c6afa07043cb05bcd5ae492284",
- "sum": "2FR289B1LGUf5tTN4PXBj5TjRX7okSFxE8uHkSslzDQ="
+ "version": "1120f9e255760a3c104b57871fcb91801e934382",
+ "sum": "MkjR7zCgq6MUZgjDzop574tFKoTX2OBr7DTwm1K+Ofs="
},
{
"source": {
"git": {
- "remote": "https://github.com/coreos/prometheus-operator.git",
- "subdir": "jsonnet/prometheus-operator"
+ "remote": "https://github.com/grafana/grafonnet-lib.git",
+ "subdir": "grafonnet"
}
},
- "version": "e31c69f9b5c6555e0f4a5c1f39d0f03182dd6b41",
- "sum": "WggWVWZ+CBEUThQCztSaRELbtqdXf9s3OFzf06HbYNA="
+ "version": "30280196507e0fe6fa978a3e0eaca3a62844f817",
+ "sum": "342u++/7rViR/zj2jeJOjshzglkZ1SY+hFNuyCBFMdc="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib.git",
- "subdir": "grafonnet"
+ "subdir": "grafonnet-7.0"
}
},
- "version": "8fb95bd89990e493a8534205ee636bfcb8db67bd",
- "sum": "tDuuSKE9f4Ew2bjBM33Rs6behLEAzkmKkShSt+jpAak="
+ "version": "30280196507e0fe6fa978a3e0eaca3a62844f817",
+ "sum": "gCtR9s/4D5fxU9aKXg0Bru+/njZhA0YjLjPiASc61FM="
},
{
"source": {
@@ -58,8 +58,8 @@
"subdir": "grafana-builder"
}
},
- "version": "881db2241f0c5007c3e831caf34b0c645202b4ab",
- "sum": "slxrtftVDiTlQK22ertdfrg4Epnq97gdrLI63ftUfaE="
+ "version": "d68f9a6e0b1af7c4c4056dc2b43fb8f3bac01f43",
+ "sum": "tDR6yT2GVfw0wTU12iZH+m01HrbIr6g/xN+/8nzNkU0="
},
{
"source": {
@@ -69,8 +69,7 @@
}
},
"version": "0d2f82676817bbf9e4acf6495b2090205f323b9f",
- "sum": "h28BXZ7+vczxYJ2sCt8JuR9+yznRtU/iA6DCpQUrtEg=",
- "name": "ksonnet"
+ "sum": "h28BXZ7+vczxYJ2sCt8JuR9+yznRtU/iA6DCpQUrtEg="
},
{
"source": {
@@ -79,38 +78,70 @@
"subdir": ""
}
},
- "version": "b61c5a34051f8f57284a08fe78ad8a45b430252b",
- "sum": "7Hx/5eNm7ubLTsdrpk3b2+e/FLR3XOa4HCukmbRUCAY="
+ "version": "3c386687c1f8ceb6b79ff887c4a934e9cee1b90a",
+ "sum": "H8lcnk7gQEUoRi58/xq+JTfd2PcjJUjMQHgxGklUiFY="
},
{
"source": {
"git": {
- "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin.git",
- "subdir": "lib/promgrafonnet"
+ "remote": "https://github.com/kubernetes/kube-state-metrics.git",
+ "subdir": "jsonnet/kube-state-metrics"
}
},
- "version": "b61c5a34051f8f57284a08fe78ad8a45b430252b",
- "sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
+ "version": "71200632a6c97e9b87166dbb27489798a05effe3",
+ "sum": "4PJ2ROxODsoYO/1Y70+dgLZVjW5zlfzB+TDpxJBHwaI="
},
{
"source": {
"git": {
"remote": "https://github.com/kubernetes/kube-state-metrics.git",
- "subdir": "jsonnet/kube-state-metrics"
+ "subdir": "jsonnet/kube-state-metrics-mixin"
}
},
- "version": "d667979ed55ad1c4db44d331b51d646f5b903aa7",
- "sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
+ "version": "71200632a6c97e9b87166dbb27489798a05effe3",
+ "sum": "u8gaydJoxEjzizQ8jY8xSjYgWooPmxw+wIWdDxifMAk="
},
{
"source": {
"git": {
- "remote": "https://github.com/kubernetes/kube-state-metrics.git",
- "subdir": "jsonnet/kube-state-metrics-mixin"
+ "remote": "https://github.com/prometheus-operator/kube-prometheus.git",
+ "subdir": "jsonnet/kube-prometheus"
+ }
+ },
+ "version": "37d00082289c587f5a02a343ba23cfbe167000e2",
+ "sum": "5onAaPSrjnmgXIAsypnx0W/sIA7iTsHCeCjPrhGxj5A="
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/prometheus-operator/prometheus-operator.git",
+ "subdir": "jsonnet/mixin"
+ }
+ },
+ "version": "df4cbd9526d8ff8e404a903b7ed2532847551d19",
+ "sum": "GQmaVFJwKMiD/P4n3N2LrAZVcwutriWrP8joclDtBYQ=",
+ "name": "prometheus-operator-mixin"
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/prometheus-operator/prometheus-operator.git",
+ "subdir": "jsonnet/prometheus-operator"
}
},
- "version": "d667979ed55ad1c4db44d331b51d646f5b903aa7",
- "sum": "o5avaguRsfFwYFNen00ZEsub1x4i8Z/ZZ2QoEjFMff8="
+ "version": "df4cbd9526d8ff8e404a903b7ed2532847551d19",
+ "sum": "wJ1E8XxYJ0RJrUuDNWLzE7bzo6JrH7P9q1lAu/xi4Ow="
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/prometheus/alertmanager.git",
+ "subdir": "doc/alertmanager-mixin"
+ }
+ },
+ "version": "8afd462a9eaa3979bddf7bd6278bede4bc1f30e2",
+ "sum": "PsK+V7oETCPKu2gLoPfqY0wwPKH9TzhNj6o2xezjjXc=",
+ "name": "alertmanager"
},
{
"source": {
@@ -119,8 +150,8 @@
"subdir": "docs/node-mixin"
}
},
- "version": "08ce3c6dd430deb51798826701a395e460620d60",
- "sum": "3jFV2qsc/GZe2GADswTYqxxP2zGOiANTj73W/VNFGqc="
+ "version": "a3bd2e13052929663dbd7d680fab4a952efb1de6",
+ "sum": "TwdaTm0Z++diiLyaKAAimmC6hBL7XbrJc0RHhBCpAdU="
},
{
"source": {
@@ -129,9 +160,30 @@
"subdir": "documentation/prometheus-mixin"
}
},
- "version": "74207c04655e1fd93eea0e9a5d2f31b1cbc4d3d0",
- "sum": "lEzhZ8gllSfAO4kmXeTwl4W0anapIeFd5GCaCNuDe18=",
+ "version": "84c6f0e58444a452a5e2e19d14221409d2b9d790",
+ "sum": "LRx0tbMnoE1p8KEn+i81j2YsA5Sgt3itE5Y6jBf5eOQ=",
"name": "prometheus"
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/pyrra-dev/pyrra.git",
+ "subdir": "config/crd/bases"
+ }
+ },
+ "version": "2584cefb8e6859eb9ee103df199e232cd0066aab",
+ "sum": "d1550yhsX4VxdVN7b0gWT0cido/W90P6OGLzLqPwZcs="
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/thanos-io/thanos.git",
+ "subdir": "mixin"
+ }
+ },
+ "version": "3327c510076a77f876ac26e699d5252a61fc529a",
+ "sum": "Io++1+lp1oQVoQiVRSCXUiGdTIRPV7aL6Ewgs3bShEs=",
+ "name": "thanos-mixin"
}
],
"legacyImports": false
diff --git a/main.jsonnet b/main.jsonnet
index 681366a..e08470f 100644
--- a/main.jsonnet
+++ b/main.jsonnet
@@ -1,10 +1,10 @@
local utils = import 'utils.libsonnet';
local vars = import 'vars.jsonnet';
-local kp = (import 'kube-prometheus/kube-prometheus.libsonnet')
- + (import 'kube-prometheus/kube-prometheus-anti-affinity.libsonnet')
- + (import 'kube-prometheus/kube-prometheus-kops-coredns.libsonnet')
- + (import 'kube-prometheus/kube-prometheus-kubeadm.libsonnet')
+local kp = (import 'kube-prometheus/main.libsonnet') + { values+:: { common+: { namespace: 'monitoring' } } }
+ + (import 'kube-prometheus/addons/anti-affinity.libsonnet')
+ + (import 'kube-prometheus/platforms/kops-coredns.libsonnet')
+ + (import 'kube-prometheus/platforms/kubeadm.libsonnet')
// Additional modules are loaded dynamically from vars.jsonnet
+ utils.join_objects([module.file for module in vars.modules if module.enabled])
// Load K3s customized modules
@@ -14,6 +14,7 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet')
// Load image versions last to override default from modules
+ (import 'image_sources_versions.jsonnet');
+
// Generate core modules
{ ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) }
// First generate operator resources except the serviceMonitors
diff --git a/manifests/alertmanager-alertmanager.yaml b/manifests/alertmanager-alertmanager.yaml
index cedc323..8086809 100644
--- a/manifests/alertmanager-alertmanager.yaml
+++ b/manifests/alertmanager-alertmanager.yaml
@@ -2,7 +2,11 @@ apiVersion: monitoring.coreos.com/v1
kind: Alertmanager
metadata:
labels:
- alertmanager: main
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.25.0
name: main
namespace: monitoring
spec:
@@ -11,22 +15,36 @@ spec:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
- matchExpressions:
- - key: alertmanager
- operator: In
- values:
- - main
+ matchLabels:
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
namespaces:
- monitoring
topologyKey: kubernetes.io/hostname
weight: 100
- image: prom/alertmanager:v0.21.0
+ image: quay.io/prometheus/alertmanager:v0.25.0
nodeSelector:
kubernetes.io/os: linux
- replicas: 1
+ podMetadata:
+ labels:
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.25.0
+ replicas: 3
+ resources:
+ limits:
+ cpu: 100m
+ memory: 100Mi
+ requests:
+ cpu: 4m
+ memory: 100Mi
securityContext:
fsGroup: 2000
runAsNonRoot: true
runAsUser: 1000
serviceAccountName: alertmanager-main
- version: v0.21.0
+ version: 0.25.0
diff --git a/manifests/alertmanager-networkPolicy.yaml b/manifests/alertmanager-networkPolicy.yaml
new file mode 100644
index 0000000..d84f477
--- /dev/null
+++ b/manifests/alertmanager-networkPolicy.yaml
@@ -0,0 +1,42 @@
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+ labels:
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.25.0
+ name: alertmanager-main
+ namespace: monitoring
+spec:
+ egress:
+ - {}
+ ingress:
+ - from:
+ - podSelector:
+ matchLabels:
+ app.kubernetes.io/name: prometheus
+ ports:
+ - port: 9093
+ protocol: TCP
+ - port: 8080
+ protocol: TCP
+ - from:
+ - podSelector:
+ matchLabels:
+ app.kubernetes.io/name: alertmanager
+ ports:
+ - port: 9094
+ protocol: TCP
+ - port: 9094
+ protocol: UDP
+ podSelector:
+ matchLabels:
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ policyTypes:
+ - Egress
+ - Ingress
diff --git a/manifests/alertmanager-podDisruptionBudget.yaml b/manifests/alertmanager-podDisruptionBudget.yaml
new file mode 100644
index 0000000..85cae78
--- /dev/null
+++ b/manifests/alertmanager-podDisruptionBudget.yaml
@@ -0,0 +1,19 @@
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+ labels:
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.25.0
+ name: alertmanager-main
+ namespace: monitoring
+spec:
+ maxUnavailable: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/alertmanager-prometheusRule.yaml b/manifests/alertmanager-prometheusRule.yaml
new file mode 100644
index 0000000..534bca2
--- /dev/null
+++ b/manifests/alertmanager-prometheusRule.yaml
@@ -0,0 +1,139 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ labels:
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.25.0
+ prometheus: k8s
+ role: alert-rules
+ name: alertmanager-main-rules
+ namespace: monitoring
+spec:
+ groups:
+ - name: alertmanager.rules
+ rules:
+ - alert: AlertmanagerFailedReload
+ annotations:
+ description: Configuration has failed to load for {{ $labels.namespace }}/{{ $labels.pod}}.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedreload
+ summary: Reloading an Alertmanager configuration has failed.
+ expr: |
+ # Without max_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"}[5m]) == 0
+ for: 10m
+ labels:
+ severity: critical
+ - alert: AlertmanagerMembersInconsistent
+ annotations:
+ description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only found {{ $value }} members of the {{$labels.job}} cluster.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagermembersinconsistent
+ summary: A member of an Alertmanager cluster has not found all other cluster members.
+ expr: |
+ # Without max_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m])
+ < on (namespace,service) group_left
+ count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m]))
+ for: 15m
+ labels:
+ severity: critical
+ - alert: AlertmanagerFailedToSendAlerts
+ annotations:
+ description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed to send {{ $value | humanizePercentage }} of notifications to {{ $labels.integration }}.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedtosendalerts
+ summary: An Alertmanager instance failed to send notifications.
+ expr: |
+ (
+ rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring"}[5m])
+ /
+ rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring"}[5m])
+ )
+ > 0.01
+ for: 5m
+ labels:
+ severity: warning
+ - alert: AlertmanagerClusterFailedToSendAlerts
+ annotations:
+ description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts
+ summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration.
+ expr: |
+ min by (namespace,service, integration) (
+ rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m])
+ /
+ rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m])
+ )
+ > 0.01
+ for: 5m
+ labels:
+ severity: critical
+ - alert: AlertmanagerClusterFailedToSendAlerts
+ annotations:
+ description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts
+ summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration.
+ expr: |
+ min by (namespace,service, integration) (
+ rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m])
+ /
+ rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m])
+ )
+ > 0.01
+ for: 5m
+ labels:
+ severity: warning
+ - alert: AlertmanagerConfigInconsistent
+ annotations:
+ description: Alertmanager instances within the {{$labels.job}} cluster have different configurations.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerconfiginconsistent
+ summary: Alertmanager instances within the same cluster have different configurations.
+ expr: |
+ count by (namespace,service) (
+ count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"})
+ )
+ != 1
+ for: 20m
+ labels:
+ severity: critical
+ - alert: AlertmanagerClusterDown
+ annotations:
+ description: '{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have been up for less than half of the last 5m.'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterdown
+ summary: Half or more of the Alertmanager instances within the same cluster are down.
+ expr: |
+ (
+ count by (namespace,service) (
+ avg_over_time(up{job="alertmanager-main",namespace="monitoring"}[5m]) < 0.5
+ )
+ /
+ count by (namespace,service) (
+ up{job="alertmanager-main",namespace="monitoring"}
+ )
+ )
+ >= 0.5
+ for: 5m
+ labels:
+ severity: critical
+ - alert: AlertmanagerClusterCrashlooping
+ annotations:
+ description: '{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have restarted at least 5 times in the last 10m.'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclustercrashlooping
+ summary: Half or more of the Alertmanager instances within the same cluster are crashlooping.
+ expr: |
+ (
+ count by (namespace,service) (
+ changes(process_start_time_seconds{job="alertmanager-main",namespace="monitoring"}[10m]) > 4
+ )
+ /
+ count by (namespace,service) (
+ up{job="alertmanager-main",namespace="monitoring"}
+ )
+ )
+ >= 0.5
+ for: 5m
+ labels:
+ severity: critical
diff --git a/manifests/alertmanager-secret.yaml b/manifests/alertmanager-secret.yaml
index e019922..54dfb43 100644
--- a/manifests/alertmanager-secret.yaml
+++ b/manifests/alertmanager-secret.yaml
@@ -1,7 +1,12 @@
apiVersion: v1
-data: {}
kind: Secret
metadata:
+ labels:
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.25.0
name: alertmanager-main
namespace: monitoring
stringData:
@@ -12,21 +17,28 @@ stringData:
- "equal":
- "namespace"
- "alertname"
- "source_match":
- "severity": "critical"
- "target_match_re":
- "severity": "warning|info"
+ "source_matchers":
+ - "severity = critical"
+ "target_matchers":
+ - "severity =~ warning|info"
- "equal":
- "namespace"
- "alertname"
- "source_match":
- "severity": "warning"
- "target_match_re":
- "severity": "info"
+ "source_matchers":
+ - "severity = warning"
+ "target_matchers":
+ - "severity = info"
+ - "equal":
+ - "namespace"
+ "source_matchers":
+ - "alertname = InfoInhibitor"
+ "target_matchers":
+ - "severity = info"
"receivers":
- "name": "Default"
- "name": "Watchdog"
- "name": "Critical"
+ - "name": "null"
"route":
"group_by":
- "namespace"
@@ -35,10 +47,13 @@ stringData:
"receiver": "Default"
"repeat_interval": "12h"
"routes":
- - "match":
- "alertname": "Watchdog"
+ - "matchers":
+ - "alertname = Watchdog"
"receiver": "Watchdog"
- - "match":
- "severity": "critical"
+ - "matchers":
+ - "alertname = InfoInhibitor"
+ "receiver": "null"
+ - "matchers":
+ - "severity = critical"
"receiver": "Critical"
type: Opaque
diff --git a/manifests/alertmanager-service.yaml b/manifests/alertmanager-service.yaml
index df4c9ff..33c960d 100644
--- a/manifests/alertmanager-service.yaml
+++ b/manifests/alertmanager-service.yaml
@@ -2,7 +2,11 @@ apiVersion: v1
kind: Service
metadata:
labels:
- alertmanager: main
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.25.0
name: alertmanager-main
namespace: monitoring
spec:
@@ -10,7 +14,12 @@ spec:
- name: web
port: 9093
targetPort: web
+ - name: reloader-web
+ port: 8080
+ targetPort: reloader-web
selector:
- alertmanager: main
- app: alertmanager
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
sessionAffinity: ClientIP
diff --git a/manifests/alertmanager-serviceAccount.yaml b/manifests/alertmanager-serviceAccount.yaml
index 5c06d5e..dc2eb85 100644
--- a/manifests/alertmanager-serviceAccount.yaml
+++ b/manifests/alertmanager-serviceAccount.yaml
@@ -1,5 +1,12 @@
apiVersion: v1
+automountServiceAccountToken: false
kind: ServiceAccount
metadata:
+ labels:
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.25.0
name: alertmanager-main
namespace: monitoring
diff --git a/manifests/alertmanager-serviceMonitor.yaml b/manifests/alertmanager-serviceMonitor.yaml
index 548af0d..492a9f0 100644
--- a/manifests/alertmanager-serviceMonitor.yaml
+++ b/manifests/alertmanager-serviceMonitor.yaml
@@ -2,13 +2,22 @@ apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
- k8s-app: alertmanager
- name: alertmanager
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.25.0
+ name: alertmanager-main
namespace: monitoring
spec:
endpoints:
- interval: 30s
port: web
+ - interval: 30s
+ port: reloader-web
selector:
matchLabels:
- alertmanager: main
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/grafana-config.yaml b/manifests/grafana-config.yaml
index 750e3c8..10d9c6a 100644
--- a/manifests/grafana-config.yaml
+++ b/manifests/grafana-config.yaml
@@ -1,8 +1,15 @@
apiVersion: v1
-data:
- grafana.ini: W2F1dGguYW5vbnltb3VzXQplbmFibGVkID0gZmFsc2UKW2F1dGguYmFzaWNdCmVuYWJsZWQgPSBmYWxzZQpbc2Vzc2lvbl0KcHJvdmlkZXIgPSBtZW1vcnkKW3NtdHBdCmVuYWJsZWQgPSB0cnVlCmZyb21fYWRkcmVzcyA9IG15ZW1haWxAZ21haWwuY29tCmZyb21fbmFtZSA9IEdyYWZhbmEgQWxlcnQKaG9zdCA9IHNtdHAtc2VydmVyLm1vbml0b3Jpbmcuc3ZjOjI1CnBhc3N3b3JkID0gCnNraXBfdmVyaWZ5ID0gdHJ1ZQp1c2VyID0gCg==
kind: Secret
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-config
namespace: monitoring
+stringData:
+ grafana.ini: |
+ [date_formats]
+ default_timezone = UTC
type: Opaque
diff --git a/manifests/grafana-dashboardDatasources.yaml b/manifests/grafana-dashboardDatasources.yaml
index 22d4748..f4c4cde 100644
--- a/manifests/grafana-dashboardDatasources.yaml
+++ b/manifests/grafana-dashboardDatasources.yaml
@@ -1,8 +1,27 @@
apiVersion: v1
-data:
- datasources.yaml: ewogICAgImFwaVZlcnNpb24iOiAxLAogICAgImRhdGFzb3VyY2VzIjogWwogICAgICAgIHsKICAgICAgICAgICAgImFjY2VzcyI6ICJwcm94eSIsCiAgICAgICAgICAgICJlZGl0YWJsZSI6IGZhbHNlLAogICAgICAgICAgICAibmFtZSI6ICJwcm9tZXRoZXVzIiwKICAgICAgICAgICAgIm9yZ0lkIjogMSwKICAgICAgICAgICAgInR5cGUiOiAicHJvbWV0aGV1cyIsCiAgICAgICAgICAgICJ1cmwiOiAiaHR0cDovL3Byb21ldGhldXMtazhzLm1vbml0b3Jpbmcuc3ZjOjkwOTAiLAogICAgICAgICAgICAidmVyc2lvbiI6IDEKICAgICAgICB9CiAgICBdCn0=
kind: Secret
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-datasources
namespace: monitoring
+stringData:
+ datasources.yaml: |-
+ {
+ "apiVersion": 1,
+ "datasources": [
+ {
+ "access": "proxy",
+ "editable": false,
+ "name": "prometheus",
+ "orgId": 1,
+ "type": "prometheus",
+ "url": "http://prometheus-k8s.monitoring.svc:9090",
+ "version": 1
+ }
+ ]
+ }
type: Opaque
diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml
index e7a908e..9ec56fe 100644
--- a/manifests/grafana-dashboardDefinitions.yaml
+++ b/manifests/grafana-dashboardDefinitions.yaml
@@ -2,7 +2,7 @@ apiVersion: v1
items:
- apiVersion: v1
data:
- apiserver.json: |-
+ alertmanager-overview.json: |-
{
"__inputs": [
@@ -17,122 +17,18 @@ items:
},
"editable": false,
"gnetId": null,
- "graphTooltip": 0,
+ "graphTooltip": 1,
"hideControls": false,
"id": null,
"links": [
],
- "panels": [
- {
- "content": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.",
- "datasource": null,
- "description": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.",
- "gridPos": {
- "h": 2,
- "w": 24,
- "x": 0,
- "y": 0
- },
- "id": 2,
- "mode": "markdown",
- "span": 12,
- "title": "Notice",
- "type": "text"
- }
- ],
- "refresh": "10s",
+ "refresh": "30s",
"rows": [
{
"collapse": false,
"collapsed": false,
"panels": [
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "decimals": 3,
- "description": "How many percent of requests (both read and write) in 30 days have been answered successfully and fast enough?",
- "format": "percentunit",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
-
- },
- "id": 3,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 4,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "apiserver_request:availability30d{verb=\"all\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Availability (30d) > 99.000%",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "avg"
- },
{
"aliasColors": {
@@ -141,13 +37,13 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "decimals": 3,
- "description": "How much error budget is left looking at our 0.990% availability gurantees?",
- "fill": 10,
+ "description": "current set of alerts stored in the Alertmanager",
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 4,
+ "id": 2,
"legend": {
"alignAsTable": false,
"avg": false,
@@ -155,7 +51,7 @@ items:
"max": false,
"min": false,
"rightSide": false,
- "show": true,
+ "show": false,
"sideWidth": null,
"total": false,
"values": false
@@ -175,15 +71,15 @@ items:
],
"spaceLength": 10,
- "span": 8,
- "stack": false,
+ "span": 6,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "100 * (apiserver_request:availability30d{verb=\"all\"} - 0.990000)",
+ "expr": "sum(alertmanager_alerts{namespace=~\"$namespace\",service=~\"$service\"}) by (namespace,service,instance)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "errorbudget",
+ "legendFormat": "{{instance}}",
"refId": "A"
}
],
@@ -192,9 +88,9 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "ErrorBudget (30d) > 99.000%",
+ "title": "Alerts",
"tooltip": {
- "shared": false,
+ "shared": true,
"sort": 0,
"value_type": "individual"
},
@@ -210,8 +106,7 @@ items:
},
"yaxes": [
{
- "decimals": 3,
- "format": "percentunit",
+ "format": "none",
"label": null,
"logBase": 1,
"max": null,
@@ -219,8 +114,7 @@ items:
"show": true
},
{
- "decimals": 3,
- "format": "percentunit",
+ "format": "none",
"label": null,
"logBase": 1,
"max": null,
@@ -228,105 +122,6 @@ items:
"show": true
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "decimals": 3,
- "description": "How many percent of read requests (LIST,GET) in 30 days have been answered successfully and fast enough?",
- "format": "percentunit",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
-
- },
- "id": 5,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 3,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "apiserver_request:availability30d{verb=\"read\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Read Availability (30d)",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "avg"
},
{
"aliasColors": {
@@ -336,12 +131,13 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "description": "How many read requests (LIST,GET) per second do the apiservers get by code?",
- "fill": 10,
+ "description": "rate of successful and invalid alerts received by the Alertmanager",
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 6,
+ "id": 3,
"legend": {
"alignAsTable": false,
"avg": false,
@@ -349,7 +145,7 @@ items:
"max": false,
"min": false,
"rightSide": false,
- "show": true,
+ "show": false,
"sideWidth": null,
"total": false,
"values": false
@@ -366,34 +162,26 @@ items:
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
- {
- "alias": "/2../i",
- "color": "#56A64B"
- },
- {
- "alias": "/3../i",
- "color": "#F2CC0C"
- },
- {
- "alias": "/4../i",
- "color": "#3274D9"
- },
- {
- "alias": "/5../i",
- "color": "#E02F44"
- }
+
],
"spaceLength": 10,
- "span": 3,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"read\"})",
+ "expr": "sum(rate(alertmanager_alerts_received_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval])) by (namespace,service,instance)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{ code }}",
+ "legendFormat": "{{instance}} Received",
"refId": "A"
+ },
+ {
+ "expr": "sum(rate(alertmanager_alerts_invalid_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval])) by (namespace,service,instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Invalid",
+ "refId": "B"
}
],
"thresholds": [
@@ -401,9 +189,9 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Read SLI - Requests",
+ "title": "Alerts receive rate",
"tooltip": {
- "shared": false,
+ "shared": true,
"sort": 0,
"value_type": "individual"
},
@@ -419,7 +207,7 @@ items:
},
"yaxes": [
{
- "format": "reqps",
+ "format": "ops",
"label": null,
"logBase": 1,
"max": null,
@@ -427,7 +215,7 @@ items:
"show": true
},
{
- "format": "reqps",
+ "format": "ops",
"label": null,
"logBase": 1,
"max": null,
@@ -435,7 +223,20 @@ items:
"show": true
}
]
- },
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Alerts",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
{
"aliasColors": {
@@ -444,12 +245,13 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "description": "How many percent of read requests (LIST,GET) per second are returned with errors (5xx)?",
+ "description": "rate of successful and invalid notifications sent by the Alertmanager",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 7,
+ "id": 4,
"legend": {
"alignAsTable": false,
"avg": false,
@@ -457,7 +259,7 @@ items:
"max": false,
"min": false,
"rightSide": false,
- "show": true,
+ "show": false,
"sideWidth": null,
"total": false,
"values": false
@@ -472,21 +274,27 @@ items:
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
+ "repeat": "integration",
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 3,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\",code=~\"5..\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\"})",
+ "expr": "sum(rate(alertmanager_notifications_total{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{ resource }}",
+ "legendFormat": "{{instance}} Total",
"refId": "A"
+ },
+ {
+ "expr": "sum(rate(alertmanager_notifications_failed_total{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Failed",
+ "refId": "B"
}
],
"thresholds": [
@@ -494,9 +302,9 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Read SLI - Errors",
+ "title": "$integration: Notifications Send Rate",
"tooltip": {
- "shared": false,
+ "shared": true,
"sort": 0,
"value_type": "individual"
},
@@ -512,19 +320,19 @@ items:
},
"yaxes": [
{
- "format": "percentunit",
+ "format": "ops",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
- "format": "percentunit",
+ "format": "ops",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
}
]
@@ -537,12 +345,13 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "description": "How many seconds is the 99th percentile for reading (LIST|GET) a given resource?",
+ "description": "latency of notifications sent by the Alertmanager",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 8,
+ "id": 5,
"legend": {
"alignAsTable": false,
"avg": false,
@@ -550,7 +359,7 @@ items:
"max": false,
"min": false,
"rightSide": false,
- "show": true,
+ "show": false,
"sideWidth": null,
"total": false,
"values": false
@@ -565,21 +374,34 @@ items:
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
+ "repeat": "integration",
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 3,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{verb=\"read\"}",
+ "expr": "histogram_quantile(0.99,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (le,namespace,service,instance)\n) \n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{ resource }}",
+ "legendFormat": "{{instance}} 99th Percentile",
"refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.50,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (le,namespace,service,instance)\n) \n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Median",
+ "refId": "B"
+ },
+ {
+ "expr": "sum(rate(alertmanager_notification_latency_seconds_sum{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (namespace,service,instance)\n/\nsum(rate(alertmanager_notification_latency_seconds_count{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (namespace,service,instance)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Average",
+ "refId": "C"
}
],
"thresholds": [
@@ -587,9 +409,9 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Read SLI - Duration",
+ "title": "$integration: Notification Duration",
"tooltip": {
- "shared": false,
+ "shared": true,
"sort": 0,
"value_type": "individual"
},
@@ -626,11 +448,204 @@ items:
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
+ "showTitle": true,
+ "title": "Notifications",
"titleSize": "h6",
"type": "row"
- },
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "alertmanager-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "Prometheus",
+ "value": "Prometheus"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [
+
+ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "",
+ "value": ""
+ },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": "namespace",
+ "multi": false,
+ "name": "namespace",
+ "options": [
+
+ ],
+ "query": "label_values(alertmanager_alerts, namespace)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "",
+ "value": ""
+ },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": "service",
+ "multi": false,
+ "name": "service",
+ "options": [
+
+ ],
+ "query": "label_values(alertmanager_alerts, service)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "all",
+ "value": "$__all"
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "integration",
+ "options": [
+
+ ],
+ "query": "label_values(alertmanager_notifications_total{integration=~\".*\"}, integration)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "utc",
+ "title": "Alertmanager / Overview",
+ "uid": "alertmanager-overview",
+ "version": 0
+ }
+ kind: ConfigMap
+ metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-alertmanager-overview
+ namespace: monitoring
+- apiVersion: v1
+ data:
+ apiserver.json: |-
+ {
+ "__inputs": [
+
+ ],
+ "__requires": [
+
+ ],
+ "annotations": {
+ "list": [
+
+ ]
+ },
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [
+
+ ],
+ "panels": [
+ {
+ "content": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.",
+ "datasource": null,
+ "description": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
+ "mode": "markdown",
+ "span": 12,
+ "title": "Notice",
+ "type": "text"
+ }
+ ],
+ "refresh": "10s",
+ "rows": [
{
"collapse": false,
"collapsed": false,
@@ -646,7 +661,7 @@ items:
],
"datasource": "$datasource",
"decimals": 3,
- "description": "How many percent of write requests (POST|PUT|PATCH|DELETE) in 30 days have been answered successfully and fast enough?",
+ "description": "How many percent of requests (both read and write) in 30 days have been answered successfully and fast enough?",
"format": "percentunit",
"gauge": {
"maxValue": 100,
@@ -658,8 +673,213 @@ items:
"gridPos": {
},
- "id": 9,
- "interval": null,
+ "id": 3,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "rightSide": true
+ },
+ "links": [
+
+ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 4,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Availability (30d) > 99.000%",
+ "tooltip": {
+ "shared": false
+ },
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 3,
+ "description": "How much error budget is left looking at our 0.990% availability guarantees?",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 4,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 8,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "100 * (apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"} - 0.990000)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "errorbudget",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "ErrorBudget (30d) > 99.000%",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "decimals": 3,
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "decimals": 3,
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "decimals": 3,
+ "description": "How many percent of read requests (LIST,GET) in 30 days have been answered successfully and fast enough?",
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+
+ },
+ "id": 5,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "rightSide": true
+ },
"links": [
],
@@ -698,7 +918,7 @@ items:
"tableColumn": "",
"targets": [
{
- "expr": "apiserver_request:availability30d{verb=\"write\"}",
+ "expr": "apiserver_request:availability30d{verb=\"read\", cluster=\"$cluster\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -706,7 +926,7 @@ items:
}
],
"thresholds": "",
- "title": "Write Availability (30d)",
+ "title": "Read Availability (30d)",
"tooltip": {
"shared": false
},
@@ -729,19 +949,21 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "description": "How many write requests (POST|PUT|PATCH|DELETE) per second do the apiservers get by code?",
+ "description": "How many read requests (LIST,GET) per second do the apiservers get by code?",
"fill": 10,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 10,
+ "id": 6,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -782,7 +1004,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"write\"})",
+ "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ code }}",
@@ -794,7 +1016,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Write SLI - Requests",
+ "title": "Read SLI - Requests",
"tooltip": {
"shared": false,
"sort": 0,
@@ -837,19 +1059,21 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "description": "How many percent of write requests (POST|PUT|PATCH|DELETE) per second are returned with errors (5xx)?",
+ "description": "How many percent of read requests (LIST,GET) per second are returned with errors (5xx)?",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 11,
+ "id": 7,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -875,7 +1099,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\",code=~\"5..\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\"})",
+ "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ resource }}",
@@ -887,7 +1111,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Write SLI - Errors",
+ "title": "Read SLI - Errors",
"tooltip": {
"shared": false,
"sort": 0,
@@ -930,19 +1154,21 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "description": "How many seconds is the 99th percentile for writing (POST|PUT|PATCH|DELETE) a given resource?",
+ "description": "How many seconds is the 99th percentile for reading (LIST|GET) a given resource?",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 12,
+ "id": 8,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -968,7 +1194,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{verb=\"write\"}",
+ "expr": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile{verb=\"read\", cluster=\"$cluster\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ resource }}",
@@ -980,7 +1206,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Write SLI - Duration",
+ "title": "Read SLI - Duration",
"tooltip": {
"shared": false,
"sort": 0,
@@ -1028,6 +1254,96 @@ items:
"collapse": false,
"collapsed": false,
"panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "decimals": 3,
+ "description": "How many percent of write requests (POST|PUT|PATCH|DELETE) in 30 days have been answered successfully and fast enough?",
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+
+ },
+ "id": 9,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "rightSide": true
+ },
+ "links": [
+
+ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 3,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "apiserver_request:availability30d{verb=\"write\", cluster=\"$cluster\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Write Availability (30d)",
+ "tooltip": {
+ "shared": false
+ },
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
{
"aliasColors": {
@@ -1036,19 +1352,22 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
+ "description": "How many write requests (POST|PUT|PATCH|DELETE) per second do the apiservers get by code?",
+ "fill": 10,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 13,
+ "id": 10,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
- "show": false,
+ "rightSide": true,
+ "show": true,
"sideWidth": null,
"total": false,
"values": false
@@ -1065,18 +1384,33 @@ items:
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
-
+ {
+ "alias": "/2../i",
+ "color": "#56A64B"
+ },
+ {
+ "alias": "/3../i",
+ "color": "#F2CC0C"
+ },
+ {
+ "alias": "/4../i",
+ "color": "#3274D9"
+ },
+ {
+ "alias": "/5../i",
+ "color": "#E02F44"
+ }
],
"spaceLength": 10,
- "span": 4,
- "stack": false,
+ "span": 3,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name)",
+ "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} {{name}}",
+ "legendFormat": "{{ code }}",
"refId": "A"
}
],
@@ -1085,7 +1419,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Work Queue Add Rate",
+ "title": "Write SLI - Requests",
"tooltip": {
"shared": false,
"sort": 0,
@@ -1103,19 +1437,19 @@ items:
},
"yaxes": [
{
- "format": "ops",
+ "format": "reqps",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
- "format": "ops",
+ "format": "reqps",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
}
]
@@ -1128,19 +1462,22 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "description": "How many percent of write requests (POST|PUT|PATCH|DELETE) per second are returned with errors (5xx)?",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 14,
+ "id": 11,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
- "show": false,
+ "rightSide": true,
+ "show": true,
"sideWidth": null,
"total": false,
"values": false
@@ -1160,15 +1497,15 @@ items:
],
"spaceLength": 10,
- "span": 4,
+ "span": 3,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name)",
+ "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} {{name}}",
+ "legendFormat": "{{ resource }}",
"refId": "A"
}
],
@@ -1177,7 +1514,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Work Queue Depth",
+ "title": "Write SLI - Errors",
"tooltip": {
"shared": false,
"sort": 0,
@@ -1195,7 +1532,7 @@ items:
},
"yaxes": [
{
- "format": "short",
+ "format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
@@ -1203,7 +1540,7 @@ items:
"show": true
},
{
- "format": "short",
+ "format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
@@ -1220,22 +1557,25 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "description": "How many seconds is the 99th percentile for writing (POST|PUT|PATCH|DELETE) a given resource?",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 15,
+ "id": 12,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
- "current": true,
+ "current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
- "values": true
+ "values": false
},
"lines": true,
"linewidth": 1,
@@ -1252,15 +1592,15 @@ items:
],
"spaceLength": 10,
- "span": 4,
+ "span": 3,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name, le))",
+ "expr": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile{verb=\"write\", cluster=\"$cluster\"}",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} {{name}}",
+ "legendFormat": "{{ resource }}",
"refId": "A"
}
],
@@ -1269,7 +1609,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Work Queue Latency",
+ "title": "Write SLI - Duration",
"tooltip": {
"shared": false,
"sort": 0,
@@ -1326,18 +1666,20 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 16,
+ "id": 13,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
- "show": true,
+ "rightSide": true,
+ "show": false,
"sideWidth": null,
"total": false,
"values": false
@@ -1357,15 +1699,15 @@ items:
],
"spaceLength": 10,
- "span": 4,
+ "span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "etcd_helper_cache_entry_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}",
+ "expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}}",
+ "legendFormat": "{{instance}} {{name}}",
"refId": "A"
}
],
@@ -1374,7 +1716,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "ETCD Cache Entry Total",
+ "title": "Work Queue Add Rate",
"tooltip": {
"shared": false,
"sort": 0,
@@ -1392,7 +1734,7 @@ items:
},
"yaxes": [
{
- "format": "short",
+ "format": "ops",
"label": null,
"logBase": 1,
"max": null,
@@ -1400,7 +1742,7 @@ items:
"show": true
},
{
- "format": "short",
+ "format": "ops",
"label": null,
"logBase": 1,
"max": null,
@@ -1418,18 +1760,20 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 17,
+ "id": 14,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
- "show": true,
+ "rightSide": true,
+ "show": false,
"sideWidth": null,
"total": false,
"values": false
@@ -1449,23 +1793,16 @@ items:
],
"spaceLength": 10,
- "span": 4,
+ "span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(etcd_helper_cache_hit_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance)",
+ "expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} hit",
+ "legendFormat": "{{instance}} {{name}}",
"refId": "A"
- },
- {
- "expr": "sum(rate(etcd_helper_cache_miss_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} miss",
- "refId": "B"
}
],
"thresholds": [
@@ -1473,7 +1810,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "ETCD Cache Hit/Miss Rate",
+ "title": "Work Queue Depth",
"tooltip": {
"shared": false,
"sort": 0,
@@ -1491,7 +1828,7 @@ items:
},
"yaxes": [
{
- "format": "ops",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -1499,7 +1836,7 @@ items:
"show": true
},
{
- "format": "ops",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -1517,21 +1854,23 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 18,
+ "id": 15,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
- "current": false,
+ "current": true,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
- "values": false
+ "values": true
},
"lines": true,
"linewidth": 1,
@@ -1548,23 +1887,16 @@ items:
],
"spaceLength": 10,
- "span": 4,
+ "span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99,sum(rate(etcd_request_cache_get_duration_seconds_bucket{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name, le))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} get",
+ "legendFormat": "{{instance}} {{name}}",
"refId": "A"
- },
- {
- "expr": "histogram_quantile(0.99,sum(rate(etcd_request_cache_add_duration_seconds_bucket{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, le))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} miss",
- "refId": "B"
}
],
"thresholds": [
@@ -1572,7 +1904,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "ETCD Cache Duration 99th Quantile",
+ "title": "Work Queue Latency",
"tooltip": {
"shared": false,
"sort": 0,
@@ -1594,7 +1926,7 @@ items:
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
@@ -1602,7 +1934,7 @@ items:
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
}
]
@@ -1629,17 +1961,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 19,
+ "id": 16,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -1721,17 +2055,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 20,
+ "id": 17,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -1757,7 +2093,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])",
+ "expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -1813,17 +2149,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 21,
+ "id": 18,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -1919,7 +2257,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -1932,20 +2270,19 @@ items:
{
"allValue": null,
"current": {
- "text": "prod",
- "value": "prod"
+
},
"datasource": "$datasource",
"hide": 2,
"includeAll": false,
- "label": null,
+ "label": "cluster",
"multi": false,
"name": "cluster",
"options": [
],
- "query": "label_values(apiserver_request_total, cluster)",
- "refresh": 1,
+ "query": "label_values(up{job=\"apiserver\"}, cluster)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -1970,7 +2307,7 @@ items:
"options": [
],
- "query": "label_values(apiserver_request_total{job=\"apiserver\", cluster=\"$cluster\"}, instance)",
+ "query": "label_values(up{job=\"apiserver\", cluster=\"$cluster\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -2020,6 +2357,11 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-apiserver
namespace: monitoring
- apiVersion: v1
@@ -2084,6 +2426,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -2129,7 +2472,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -2186,6 +2529,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -2231,7 +2575,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -2334,6 +2678,9 @@ items:
"id": 5,
"lines": true,
"linewidth": 1,
+ "links": [
+
+ ],
"minSpan": 24,
"nullPointMode": "null as zero",
"renderer": "flot",
@@ -2529,7 +2876,7 @@ items:
],
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -2538,7 +2885,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -2547,7 +2894,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -2556,7 +2903,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -2565,7 +2912,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -2574,7 +2921,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -2583,7 +2930,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -2592,7 +2939,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -2626,6 +2973,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -2671,7 +3019,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -2728,6 +3076,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -2773,7 +3122,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -2860,6 +3209,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
@@ -2903,7 +3253,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -2960,6 +3310,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
@@ -3003,7 +3354,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -3071,6 +3422,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
@@ -3114,7 +3466,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -3171,6 +3523,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
@@ -3214,7 +3567,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -3291,6 +3644,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
@@ -3334,7 +3688,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -3391,6 +3745,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
@@ -3434,7 +3789,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -3491,6 +3846,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
@@ -3538,7 +3894,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(rate(node_netstat_Tcp_RetransSegs[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs[$interval:$resolution])) by (instance))",
+ "expr": "sort_desc(sum(rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs{cluster=\"$cluster\"}[$interval:$resolution])) by (instance))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
@@ -3595,6 +3951,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
@@ -3642,7 +3999,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs[$interval:$resolution])) by (instance))",
+ "expr": "sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans{cluster=\"$cluster\"}[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$interval:$resolution])) by (instance))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
@@ -3797,7 +4154,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -3806,6 +4163,32 @@ items:
"refresh": 1,
"regex": "",
"type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {
+
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
}
]
},
@@ -3845,6 +4228,11 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-cluster-total
namespace: monitoring
- apiVersion: v1
@@ -3898,7 +4286,11 @@ items:
},
"id": 2,
- "interval": null,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "rightSide": true
+ },
"links": [
],
@@ -3937,7 +4329,7 @@ items:
"tableColumn": "",
"targets": [
{
- "expr": "sum(up{job=\"kube-controller-manager\"})",
+ "expr": "sum(up{cluster=\"$cluster\", job=\"kube-controller-manager\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -3969,10 +4361,12 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 3,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@@ -4005,10 +4399,10 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(workqueue_adds_total{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)",
+ "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} {{name}}",
+ "legendFormat": "{{cluster}} {{instance}} {{name}}",
"refId": "A"
}
],
@@ -4074,10 +4468,12 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 4,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@@ -4110,10 +4506,10 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(workqueue_depth{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)",
+ "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} {{name}}",
+ "legendFormat": "{{cluster}} {{instance}} {{name}}",
"refId": "A"
}
],
@@ -4179,10 +4575,12 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 5,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@@ -4215,10 +4613,10 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name, le))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} {{name}}",
+ "legendFormat": "{{cluster}} {{instance}} {{name}}",
"refId": "A"
}
],
@@ -4284,340 +4682,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 6,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 4,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "2xx",
- "refId": "A"
- },
- {
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "3xx",
- "refId": "B"
- },
- {
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "4xx",
- "refId": "C"
- },
- {
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "5xx",
- "refId": "D"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Kube API Request Rate",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 7,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 8,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{verb}} {{url}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Post Request Latency 99th Quantile",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 8,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{verb}} {{url}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Get Request Latency 99th Quantile",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 9,
- "legend": {
- "alignAsTable": false,
- "avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -4643,7 +4720,336 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "process_resident_memory_bytes{job=\"kube-controller-manager\",instance=~\"$instance\"}",
+ "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "2xx",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "3xx",
+ "refId": "B"
+ },
+ {
+ "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "4xx",
+ "refId": "C"
+ },
+ {
+ "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "5xx",
+ "refId": "D"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Kube API Request Rate",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 7,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 8,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{verb}} {{url}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Post Request Latency 99th Quantile",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 8,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{verb}} {{url}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Get Request Latency 99th Quantile",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 9,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 4,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -4699,17 +5105,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 10,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -4735,7 +5143,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "rate(process_cpu_seconds_total{job=\"kube-controller-manager\",instance=~\"$instance\"}[5m])",
+ "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -4791,17 +5199,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 11,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -4827,7 +5237,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "go_goroutines{job=\"kube-controller-manager\",instance=~\"$instance\"}",
+ "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -4897,7 +5307,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -4911,6 +5321,32 @@ items:
"allValue": null,
"current": {
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": "cluster",
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(up{job=\"kube-controller-manager\"}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+
},
"datasource": "$datasource",
"hide": 0,
@@ -4921,7 +5357,7 @@ items:
"options": [
],
- "query": "label_values(process_cpu_seconds_total{job=\"kube-controller-manager\"}, instance)",
+ "query": "label_values(up{cluster=\"$cluster\", job=\"kube-controller-manager\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -4971,11 +5407,16 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-controller-manager
namespace: monitoring
- apiVersion: v1
data:
- coredns-dashboard.json: |-
+ grafana-overview.json: |-
{
"annotations": {
"list": [
@@ -4986,238 +5427,248 @@ items:
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
+ "target": {
+ "limit": 100,
+ "matchAny": false,
+ "tags": [
+
+ ],
+ "type": "dashboard"
+ },
"type": "dashboard"
}
]
},
- "description": "A dashboard for the CoreDNS DNS server.",
"editable": true,
- "gnetId": 5926,
+ "gnetId": null,
"graphTooltip": 0,
- "id": 14,
- "iteration": 1549319226130,
+ "id": 3085,
+ "iteration": 1631554945276,
"links": [
],
"panels": [
{
- "aliasColors": {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "mappings": [
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
+ ],
+ "noValue": "0",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": [
+ ]
},
"gridPos": {
- "h": 7,
- "w": 8,
+ "h": 5,
+ "w": 6,
"x": 0,
"y": 0
},
- "id": 1,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
+ "id": 6,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "mean"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "text": {
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "total",
- "yaxis": 2
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m])) by (proto)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}",
- "refId": "A",
- "step": 60
},
+ "textMode": "auto"
+ },
+ "pluginVersion": "8.1.3",
+ "targets": [
{
- "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "total",
- "refId": "B",
- "step": 60
+ "expr": "grafana_alerting_result_total{job=~\"$job\", instance=~\"$instance\", state=\"alerting\"}",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
}
- ],
- "thresholds": [
-
],
"timeFrom": null,
- "timeRegions": [
-
- ],
"timeShift": null,
- "title": "Requests (total)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ "title": "Firing Alerts",
+ "type": "stat"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "mappings": [
+
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": [
]
},
- "yaxes": [
- {
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
+ "gridPos": {
+ "h": 5,
+ "w": 6,
+ "x": 6,
+ "y": 0
+ },
+ "id": 8,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "mean"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "text": {
+
},
+ "textMode": "auto"
+ },
+ "pluginVersion": "8.1.3",
+ "targets": [
{
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
+ "expr": "sum(grafana_stat_totals_dashboard{job=~\"$job\", instance=~\"$instance\"})",
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Dashboards",
+ "type": "stat"
},
{
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "align": null,
+ "displayMode": "auto"
+ },
+ "mappings": [
+
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": [
+ ]
},
"gridPos": {
- "h": 7,
- "w": 8,
- "x": 8,
+ "h": 5,
+ "w": 12,
+ "x": 12,
"y": 0
},
- "id": 12,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
+ "id": 10,
+ "options": {
+ "showHeader": true
},
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "total",
- "yaxis": 2
- },
- {
- "alias": "other",
- "yaxis": 2
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
+ "pluginVersion": "8.1.3",
"targets": [
{
- "expr": "sum(rate(coredns_dns_request_type_count_total{instance=~\"$instance\"}[5m])) by (type)",
- "intervalFactor": 2,
- "legendFormat": "{{type}}",
- "refId": "A",
- "step": 60
+ "expr": "grafana_build_info{job=~\"$job\", instance=~\"$instance\"}",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
}
- ],
- "thresholds": [
-
],
"timeFrom": null,
- "timeRegions": [
-
- ],
"timeShift": null,
- "title": "Requests (by qtype)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
+ "title": "Build Info",
+ "transformations": [
{
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
+ "id": "labelsToFields",
+ "options": {
+
+ }
},
{
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true,
+ "Value": true,
+ "branch": true,
+ "container": true,
+ "goversion": true,
+ "namespace": true,
+ "pod": true,
+ "revision": true
+ },
+ "indexByName": {
+ "Time": 7,
+ "Value": 11,
+ "branch": 4,
+ "container": 8,
+ "edition": 2,
+ "goversion": 6,
+ "instance": 1,
+ "job": 0,
+ "namespace": 9,
+ "pod": 10,
+ "revision": 5,
+ "version": 3
+ },
+ "renameByName": {
+
+ }
+ }
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "type": "table"
},
{
"aliasColors": {
@@ -5226,130 +5677,27 @@ items:
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 16,
- "y": 0
- },
- "id": 2,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "links": [
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "total",
- "yaxis": 2
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m])) by (zone)",
- "intervalFactor": 2,
- "legendFormat": "{{zone}}",
- "refId": "A",
- "step": 60
+ ]
},
- {
- "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m]))",
- "intervalFactor": 2,
- "legendFormat": "total",
- "refId": "B",
- "step": 60
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Requests (by zone)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ "overrides": [
]
},
- "yaxes": [
- {
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
"fill": 1,
- "grid": {
-
- },
+ "fillGradient": 0,
"gridPos": {
- "h": 7,
+ "h": 8,
"w": 12,
"x": 0,
- "y": 7
+ "y": 5
},
- "id": 10,
+ "hiddenSeries": false,
+ "id": 2,
"legend": {
"avg": false,
"current": false,
@@ -5360,38 +5708,28 @@ items:
"values": false
},
"lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
"percentage": false,
- "pointradius": 5,
+ "pluginVersion": "8.1.3",
+ "pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
- {
- "alias": "total",
- "yaxis": 2
- }
+
],
"spaceLength": 10,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(coredns_dns_request_do_count_total{instance=~\"$instance\"}[5m]))",
- "intervalFactor": 2,
- "legendFormat": "DO",
- "refId": "A",
- "step": 40
- },
- {
- "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m]))",
- "intervalFactor": 2,
- "legendFormat": "total",
- "refId": "B",
- "step": 40
+ "expr": "sum by (status_code) (irate(grafana_http_request_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[1m])) ",
+ "interval": "",
+ "legendFormat": "{{status_code}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -5402,11 +5740,11 @@ items:
],
"timeShift": null,
- "title": "Requests (DO bit)",
+ "title": "RPS",
"tooltip": {
"shared": true,
"sort": 0,
- "value_type": "cumulative"
+ "value_type": "individual"
},
"type": "graph",
"xaxis": {
@@ -5420,18 +5758,22 @@ items:
},
"yaxes": [
{
- "format": "pps",
+ "$$hashKey": "object:157",
+ "format": "reqps",
+ "label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
- "format": "pps",
+ "$$hashKey": "object:158",
+ "format": "short",
+ "label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": true
+ "show": false
}
],
"yaxis": {
@@ -5446,20 +5788,27 @@ items:
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "links": [
+ ]
+ },
+ "overrides": [
+
+ ]
},
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
- "h": 7,
- "w": 6,
+ "h": 8,
+ "w": 12,
"x": 12,
- "y": 7
+ "y": 5
},
- "id": 9,
+ "hiddenSeries": false,
+ "id": 4,
"legend": {
"avg": false,
"current": false,
@@ -5470,53 +5819,43 @@ items:
"values": false
},
"lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
"percentage": false,
- "pointradius": 5,
+ "pluginVersion": "8.1.3",
+ "pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
- {
- "alias": "tcp:90",
- "yaxis": 2
- },
- {
- "alias": "tcp:99 ",
- "yaxis": 2
- },
- {
- "alias": "tcp:50",
- "yaxis": 2
- }
+
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:99 ",
- "refId": "A",
- "step": 60
+ "exemplar": true,
+ "expr": "histogram_quantile(0.99, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1",
+ "interval": "",
+ "legendFormat": "99th Percentile",
+ "refId": "A"
},
{
- "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:90",
- "refId": "B",
- "step": 60
+ "exemplar": true,
+ "expr": "histogram_quantile(0.50, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1",
+ "interval": "",
+ "legendFormat": "50th Percentile",
+ "refId": "B"
},
{
- "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:50",
- "refId": "C",
- "step": 60
+ "exemplar": true,
+ "expr": "sum(irate(grafana_http_request_duration_seconds_sum{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) * 1 / sum(irate(grafana_http_request_duration_seconds_count{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval]))",
+ "interval": "",
+ "legendFormat": "Average",
+ "refId": "C"
}
],
"thresholds": [
@@ -5527,11 +5866,11 @@ items:
],
"timeShift": null,
- "title": "Requests (size, udp)",
+ "title": "Request Latency",
"tooltip": {
"shared": true,
"sort": 0,
- "value_type": "cumulative"
+ "value_type": "individual"
},
"type": "graph",
"xaxis": {
@@ -5545,17 +5884,21 @@ items:
},
"yaxes": [
{
- "format": "bytes",
+ "$$hashKey": "object:210",
+ "format": "ms",
+ "label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
+ "$$hashKey": "object:211",
"format": "short",
+ "label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
}
],
@@ -5563,916 +5906,509 @@ items:
"align": false,
"alignLevel": null
}
- },
- {
- "aliasColors": {
+ }
+ ],
+ "schemaVersion": 30,
+ "style": "dark",
+ "tags": [
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": true,
+ "text": "dev-cortex",
+ "value": "dev-cortex"
+ },
+ "description": null,
+ "error": null,
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "datasource",
+ "options": [
+ ],
+ "query": "prometheus",
+ "queryValue": "",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
},
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 18,
- "y": 7
- },
- "id": 14,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "tcp:90",
- "yaxis": 1
- },
- {
- "alias": "tcp:99 ",
- "yaxis": 1
- },
- {
- "alias": "tcp:50",
- "yaxis": 1
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:99 ",
- "refId": "A",
- "step": 60
- },
- {
- "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:90",
- "refId": "B",
- "step": 60
+ {
+ "allValue": ".*",
+ "current": {
+ "selected": false,
+ "text": [
+ "default/grafana"
+ ],
+ "value": [
+ "default/grafana"
+ ]
},
- {
- "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:50",
- "refId": "C",
- "step": 60
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
+ "datasource": "$datasource",
+ "definition": "label_values(grafana_build_info, job)",
+ "description": null,
+ "error": null,
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": true,
+ "name": "job",
+ "options": [
- ],
- "timeShift": null,
- "title": "Requests (size,tcp)",
- "tooltip": {
- "shared": true,
+ ],
+ "query": {
+ "query": "label_values(grafana_build_info, job)",
+ "refId": "Billing Admin-job-Variable-Query"
+ },
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
"sort": 0,
- "value_type": "cumulative"
+ "tagValuesQuery": "",
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
},
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ {
+ "allValue": ".*",
+ "current": {
+ "selected": false,
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": "$datasource",
+ "definition": "label_values(grafana_build_info, instance)",
+ "description": null,
+ "error": null,
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": true,
+ "name": "instance",
+ "options": [
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
+ ],
+ "query": {
+ "query": "label_values(grafana_build_info, instance)",
+ "refId": "Billing Admin-instance-Variable-Query"
},
- {
- "format": "short",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
}
- },
- {
- "aliasColors": {
+ ]
+ },
+ "time": {
+ "from": "now-6h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ]
+ },
+ "timezone": "",
+ "title": "Grafana Overview",
+ "uid": "6be0s85Mk",
+ "version": 2
+ }
+ kind: ConfigMap
+ metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-grafana-overview
+ namespace: monitoring
+- apiVersion: v1
+ data:
+ k8s-resources-cluster.json: |-
+ {
+ "annotations": {
+ "list": [
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "links": [
- },
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 0,
- "y": 14
- },
- "id": 5,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
+ ],
+ "refresh": "10s",
+ "rows": [
+ {
+ "collapse": false,
+ "height": "100px",
+ "panels": [
+ {
+ "aliasColors": {
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(coredns_dns_response_rcode_count_total{instance=~\"$instance\"}[5m])) by (rcode)",
- "intervalFactor": 2,
- "legendFormat": "{{rcode}}",
- "refId": "A",
- "step": 40
- }
- ],
- "thresholds": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "format": "percentunit",
+ "id": 1,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "timeFrom": null,
- "timeRegions": [
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
- ],
- "timeShift": null,
- "title": "Responses (by rcode)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "spaceLength": 10,
+ "span": 2,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "cluster:node_cpu:ratio_rate5m{cluster=\"$cluster\"}",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "70,80",
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Utilisation",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "singlestat",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ]
- },
- "yaxes": [
- {
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
},
{
- "format": "short",
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
+ "aliasColors": {
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "format": "percentunit",
+ "id": 2,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- },
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 12,
- "y": 14
- },
- "id": 3,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ ],
+ "spaceLength": 10,
+ "span": 2,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "70,80",
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Requests Commitment",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "singlestat",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_milliseconds_bucket{instance=~\"$instance\"}[5m])) by (le, job))",
- "intervalFactor": 2,
- "legendFormat": "99%",
- "refId": "A",
- "step": 40
- },
- {
- "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_duration_milliseconds_bucket{instance=~\"$instance\"}[5m])) by (le))",
- "intervalFactor": 2,
- "legendFormat": "90%",
- "refId": "B",
- "step": 40
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
},
{
- "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_duration_milliseconds_bucket{instance=~\"$instance\"}[5m])) by (le))",
- "intervalFactor": 2,
- "legendFormat": "50%",
- "refId": "C",
- "step": 40
- }
- ],
- "thresholds": [
+ "aliasColors": {
- ],
- "timeFrom": null,
- "timeRegions": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "format": "percentunit",
+ "id": 3,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "timeShift": null,
- "title": "Responses (duration)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
- ]
- },
- "yaxes": [
- {
- "format": "ms",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
+ ],
+ "spaceLength": 10,
+ "span": 2,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "70,80",
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Limits Commitment",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "singlestat",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
},
{
- "format": "short",
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
+ "aliasColors": {
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "format": "percentunit",
+ "id": 4,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- },
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 0,
- "y": 21
- },
- "id": 8,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "udp:50%",
- "yaxis": 1
- },
- {
- "alias": "tcp:50%",
- "yaxis": 2
- },
- {
- "alias": "tcp:90%",
- "yaxis": 2
+ ],
+ "spaceLength": 10,
+ "span": 2,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\",cluster=\"$cluster\"})",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "70,80",
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory Utilisation",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "singlestat",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
},
- {
- "alias": "tcp:99%",
- "yaxis": 2
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:99%",
- "refId": "A",
- "step": 40
- },
- {
- "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance=\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:90%",
- "refId": "B",
- "step": 40
- },
- {
- "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:50%",
- "metric": "",
- "refId": "C",
- "step": 40
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Responses (size, udp)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 12,
- "y": 21
- },
- "id": 13,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "udp:50%",
- "yaxis": 1
- },
- {
- "alias": "tcp:50%",
- "yaxis": 1
- },
- {
- "alias": "tcp:90%",
- "yaxis": 1
- },
- {
- "alias": "tcp:99%",
- "yaxis": 1
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)) ",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:99%",
- "refId": "A",
- "step": 40
- },
- {
- "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)) ",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:90%",
- "refId": "B",
- "step": 40
- },
- {
- "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le, proto)) ",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:50%",
- "metric": "",
- "refId": "C",
- "step": 40
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Responses (size, tcp)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 0,
- "y": 28
- },
- "id": 15,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(coredns_cache_size{instance=~\"$instance\"}) by (type)",
- "intervalFactor": 2,
- "legendFormat": "{{type}}",
- "refId": "A",
- "step": 40
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Cache (size)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 12,
- "y": 28
- },
- "id": 16,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "misses",
- "yaxis": 2
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(coredns_cache_hits_total{instance=~\"$instance\"}[5m])) by (type)",
- "intervalFactor": 2,
- "legendFormat": "hits:{{type}}",
- "refId": "A",
- "step": 40
- },
- {
- "expr": "sum(rate(coredns_cache_misses_total{instance=~\"$instance\"}[5m])) by (type)",
- "intervalFactor": 2,
- "legendFormat": "misses",
- "refId": "B",
- "step": 40
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Cache (hitrate)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- }
- ],
- "schemaVersion": 16,
- "style": "dark",
- "tags": [
- "dns",
- "coredns"
- ],
- "templating": {
- "list": [
- {
- "allValue": ".*",
- "current": {
- "text": "All",
- "value": "$__all"
- },
- "datasource": "prometheus",
- "definition": "",
- "hide": 0,
- "includeAll": true,
- "label": "Instance",
- "multi": false,
- "name": "instance",
- "options": [
-
- ],
- "query": "up{job=\"coredns\"}",
- "refresh": 1,
- "regex": ".*instance=\"(.*?)\".*",
- "skipUrlSync": false,
- "sort": 0,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- }
- ]
- },
- "time": {
- "from": "now-3h",
- "to": "now"
- },
- "timepicker": {
- "now": true,
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
- "timezone": "utc",
- "title": "CoreDNS",
- "version": 1
- }
- kind: ConfigMap
- metadata:
- name: grafana-dashboard-coredns-dashboard
- namespace: monitoring
-- apiVersion: v1
- data:
- k8s-resources-cluster.json: |-
- {
- "annotations": {
- "list": [
-
- ]
- },
- "editable": true,
- "gnetId": null,
- "graphTooltip": 0,
- "hideControls": false,
- "links": [
-
- ],
- "refresh": "10s",
- "rows": [
- {
- "collapse": false,
- "height": "100px",
- "panels": [
{
"aliasColors": {
@@ -6483,13 +6419,15 @@ items:
"datasource": "$datasource",
"fill": 1,
"format": "percentunit",
- "id": 1,
+ "id": 5,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -6513,7 +6451,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[$__interval]))",
+ "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@@ -6523,10 +6461,10 @@ items:
"thresholds": "70,80",
"timeFrom": null,
"timeShift": null,
- "title": "CPU Utilisation",
+ "title": "Memory Requests Commitment",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@@ -6568,12 +6506,15 @@ items:
"datasource": "$datasource",
"fill": 1,
"format": "percentunit",
- "id": 2,
+ "id": 6,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -6597,7 +6538,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})",
+ "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@@ -6607,10 +6548,10 @@ items:
"thresholds": "70,80",
"timeFrom": null,
"timeShift": null,
- "title": "CPU Requests Commitment",
+ "title": "Memory Limits Commitment",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@@ -6641,7 +6582,19 @@ items:
"show": false
}
]
- },
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Headlines",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
{
"aliasColors": {
@@ -6650,20 +6603,22 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "format": "percentunit",
- "id": 3,
+ "fill": 10,
+ "id": 7,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
@@ -6676,28 +6631,31 @@ items:
],
"spaceLength": 10,
- "span": 2,
- "stack": false,
+ "span": 12,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)",
"format": "time_series",
- "instant": true,
"intervalFactor": 2,
- "refId": "A"
+ "legendFormat": "{{namespace}}",
+ "legendLink": null,
+ "step": 10
}
],
- "thresholds": "70,80",
+ "thresholds": [
+
+ ],
"timeFrom": null,
"timeShift": null,
- "title": "CPU Limits Commitment",
+ "title": "CPU Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
- "type": "singlestat",
+ "type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
@@ -6725,7 +6683,19 @@ items:
"show": false
}
]
- },
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "CPU",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
{
"aliasColors": {
@@ -6735,374 +6705,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
- "format": "percentunit",
- "id": 4,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 2,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})",
- "format": "time_series",
- "instant": true,
- "intervalFactor": 2,
- "refId": "A"
- }
- ],
- "thresholds": "70,80",
- "timeFrom": null,
- "timeShift": null,
- "title": "Memory Utilisation",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "singlestat",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "format": "percentunit",
- "id": 5,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 2,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})",
- "format": "time_series",
- "instant": true,
- "intervalFactor": 2,
- "refId": "A"
- }
- ],
- "thresholds": "70,80",
- "timeFrom": null,
- "timeShift": null,
- "title": "Memory Requests Commitment",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "singlestat",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "format": "percentunit",
- "id": 6,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 2,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})",
- "format": "time_series",
- "instant": true,
- "intervalFactor": 2,
- "refId": "A"
- }
- ],
- "thresholds": "70,80",
- "timeFrom": null,
- "timeShift": null,
- "title": "Memory Limits Commitment",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "singlestat",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Headlines",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 7,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{namespace}}",
- "legendLink": null,
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "CPU Usage",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "CPU",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "id": 8,
+ "id": 8,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -7140,8 +6751,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 0,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down to pods",
- "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
+ "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
"pattern": "Value #A",
"thresholds": [
@@ -7158,8 +6770,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 0,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down to workloads",
- "linkUrl": "./d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
+ "linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
"pattern": "Value #B",
"thresholds": [
@@ -7176,6 +6789,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -7194,6 +6808,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -7212,6 +6827,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -7230,6 +6846,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
@@ -7248,6 +6865,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #G",
@@ -7266,8 +6884,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down to pods",
- "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
+ "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
"pattern": "namespace",
"thresholds": [
@@ -7293,7 +6912,7 @@ items:
],
"targets": [
{
- "expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7302,7 +6921,7 @@ items:
"step": 10
},
{
- "expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)",
+ "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7311,7 +6930,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7320,7 +6939,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7329,7 +6948,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7338,7 +6957,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7347,7 +6966,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7364,7 +6983,7 @@ items:
"title": "CPU Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -7419,11 +7038,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 9,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -7447,7 +7069,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)",
+ "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -7463,7 +7085,7 @@ items:
"title": "Memory Usage (w/o cache)",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -7517,11 +7139,14 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 10,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -7559,8 +7184,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 0,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down to pods",
- "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
+ "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
"pattern": "Value #A",
"thresholds": [
@@ -7577,8 +7203,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 0,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down to workloads",
- "linkUrl": "./d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
+ "linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
"pattern": "Value #B",
"thresholds": [
@@ -7595,6 +7222,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -7613,6 +7241,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -7631,6 +7260,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -7649,6 +7279,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
@@ -7667,6 +7298,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #G",
@@ -7685,8 +7317,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down to pods",
- "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
+ "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
"pattern": "namespace",
"thresholds": [
@@ -7712,7 +7345,7 @@ items:
],
"targets": [
{
- "expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7721,7 +7354,7 @@ items:
"step": 10
},
{
- "expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)",
+ "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7730,7 +7363,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)",
+ "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7739,7 +7372,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7748,7 +7381,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7757,7 +7390,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7766,7 +7399,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7783,7 +7416,7 @@ items:
"title": "Requests by Namespace",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -7840,10 +7473,12 @@ items:
"id": 11,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -7881,6 +7516,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -7899,6 +7535,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -7917,6 +7554,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -7935,6 +7573,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -7953,6 +7592,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -7971,6 +7611,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
@@ -7989,8 +7630,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down to pods",
- "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
+ "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
"pattern": "namespace",
"thresholds": [
@@ -8016,7 +7658,7 @@ items:
],
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -8025,7 +7667,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -8034,7 +7676,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -8043,7 +7685,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -8052,7 +7694,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -8061,7 +7703,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -8078,7 +7720,7 @@ items:
"title": "Current Network Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -8116,7 +7758,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Current Network Usage",
"titleSize": "h6"
},
{
@@ -8133,11 +7775,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 12,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -8156,12 +7801,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -8177,7 +7822,7 @@ items:
"title": "Receive Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -8208,19 +7853,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -8231,11 +7864,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 13,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -8254,12 +7890,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -8275,7 +7911,7 @@ items:
"title": "Transmit Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -8312,7 +7948,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Bandwidth",
"titleSize": "h6"
},
{
@@ -8329,11 +7965,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 14,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -8352,12 +7991,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -8373,7 +8012,7 @@ items:
"title": "Average Container Bandwidth by Namespace: Received",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -8404,19 +8043,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -8427,11 +8054,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 15,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -8450,12 +8080,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -8471,7 +8101,7 @@ items:
"title": "Average Container Bandwidth by Namespace: Transmitted",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -8508,7 +8138,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Average Container Bandwidth by Namespace",
"titleSize": "h6"
},
{
@@ -8525,11 +8155,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 16,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -8548,12 +8181,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -8569,7 +8202,7 @@ items:
"title": "Rate of Received Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -8584,7 +8217,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -8600,19 +8233,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -8623,11 +8244,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 17,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -8646,12 +8270,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -8667,7 +8291,7 @@ items:
"title": "Rate of Transmitted Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -8682,7 +8306,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -8704,7 +8328,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Rate of Packets",
"titleSize": "h6"
},
{
@@ -8721,11 +8345,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 18,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -8744,12 +8371,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -8765,7 +8392,7 @@ items:
"title": "Rate of Received Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -8780,7 +8407,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -8796,19 +8423,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -8819,11 +8434,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 19,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -8842,12 +8460,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -8863,7 +8481,7 @@ items:
"title": "Rate of Transmitted Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -8878,7 +8496,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -8900,121 +8518,12 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Rate of Packets Dropped",
"titleSize": "h6"
- }
- ],
- "schemaVersion": 14,
- "style": "dark",
- "tags": [
- "kubernetes-mixin"
- ],
- "templating": {
- "list": [
- {
- "current": {
- "text": "default",
- "value": "default"
- },
- "hide": 0,
- "label": null,
- "name": "datasource",
- "options": [
-
- ],
- "query": "prometheus",
- "refresh": 1,
- "regex": "",
- "type": "datasource"
- },
- {
- "allValue": null,
- "current": {
- "text": "",
- "value": ""
- },
- "datasource": "$datasource",
- "hide": 2,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "cluster",
- "options": [
-
- ],
- "query": "label_values(node_cpu_seconds_total, cluster)",
- "refresh": 2,
- "regex": "",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- }
- ]
- },
- "time": {
- "from": "now-1h",
- "to": "now"
- },
- "timepicker": {
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
- "timezone": "UTC",
- "title": "Kubernetes / Compute Resources / Cluster",
- "uid": "efa86fd1d0c121a26444b636a3f509a8",
- "version": 0
- }
- kind: ConfigMap
- metadata:
- name: grafana-dashboard-k8s-resources-cluster
- namespace: monitoring
-- apiVersion: v1
- data:
- k8s-resources-namespace.json: |-
- {
- "annotations": {
- "list": [
-
- ]
- },
- "editable": true,
- "gnetId": null,
- "graphTooltip": 0,
- "hideControls": false,
- "links": [
-
- ],
- "refresh": "10s",
- "rows": [
+ },
{
"collapse": false,
- "height": "100px",
+ "height": "250px",
"panels": [
{
"aliasColors": {
@@ -9024,20 +8533,23 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "format": "percentunit",
- "id": 1,
+ "decimals": -1,
+ "fill": 10,
+ "id": 20,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
@@ -9050,28 +8562,31 @@ items:
],
"spaceLength": 10,
- "span": 3,
- "stack": false,
+ "span": 6,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"})",
+ "expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval])))",
"format": "time_series",
- "instant": true,
"intervalFactor": 2,
- "refId": "A"
+ "legendFormat": "{{namespace}}",
+ "legendLink": null,
+ "step": 10
}
],
- "thresholds": "70,80",
+ "thresholds": [
+
+ ],
"timeFrom": null,
"timeShift": null,
- "title": "CPU Utilisation (from requests)",
+ "title": "IOPS(Reads+Writes)",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
- "type": "singlestat",
+ "type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
@@ -9100,270 +8615,6 @@ items:
}
]
},
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "format": "percentunit",
- "id": 2,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 3,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"})",
- "format": "time_series",
- "instant": true,
- "intervalFactor": 2,
- "refId": "A"
- }
- ],
- "thresholds": "70,80",
- "timeFrom": null,
- "timeShift": null,
- "title": "CPU Utilisation (from limits)",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "singlestat",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "format": "percentunit",
- "id": 3,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 3,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"})",
- "format": "time_series",
- "instant": true,
- "intervalFactor": 2,
- "refId": "A"
- }
- ],
- "thresholds": "70,80",
- "timeFrom": null,
- "timeShift": null,
- "title": "Memory Utilization (from requests)",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "singlestat",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "format": "percentunit",
- "id": 4,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 3,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"})",
- "format": "time_series",
- "instant": true,
- "intervalFactor": 2,
- "refId": "A"
- }
- ],
- "thresholds": "70,80",
- "timeFrom": null,
- "timeShift": null,
- "title": "Memory Utilisation (from limits)",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "singlestat",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Headlines",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
{
"aliasColors": {
@@ -9373,12 +8624,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 10,
- "id": 5,
+ "id": 21,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -9394,53 +8648,18 @@ items:
"points": false,
"renderer": "flot",
"seriesOverrides": [
- {
- "alias": "quota - requests",
- "color": "#F2495C",
- "dashes": true,
- "fill": 0,
- "hideTooltip": true,
- "legend": false,
- "linewidth": 2,
- "stack": false
- },
- {
- "alias": "quota - limits",
- "color": "#FF9830",
- "dashes": true,
- "fill": 0,
- "hideTooltip": true,
- "legend": false,
- "linewidth": 2,
- "stack": false
- }
+
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{pod}}",
- "legendLink": null,
- "step": 10
- },
- {
- "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "quota - requests",
- "legendLink": null,
- "step": 10
- },
- {
- "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})",
+ "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "quota - limits",
+ "legendFormat": "{{namespace}}",
"legendLink": null,
"step": 10
}
@@ -9450,10 +8669,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "CPU Usage",
+ "title": "ThroughPut(Read+Write)",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -9468,7 +8687,7 @@ items:
},
"yaxes": [
{
- "format": "short",
+ "format": "Bps",
"label": null,
"logBase": 1,
"max": null,
@@ -9490,7 +8709,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "CPU Usage",
+ "title": "Storage IO",
"titleSize": "h6"
},
{
@@ -9506,12 +8725,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
- "id": 6,
+ "id": 22,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -9529,6 +8751,10 @@ items:
"seriesOverrides": [
],
+ "sort": {
+ "col": 4,
+ "desc": true
+ },
"spaceLength": 10,
"span": 12,
"stack": false,
@@ -9541,14 +8767,15 @@ items:
"type": "hidden"
},
{
- "alias": "CPU Usage",
+ "alias": "IOPS(Reads)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
+ "decimals": -1,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -9559,14 +8786,15 @@ items:
"unit": "short"
},
{
- "alias": "CPU Requests",
+ "alias": "IOPS(Writes)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
+ "decimals": -1,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -9577,14 +8805,15 @@ items:
"unit": "short"
},
{
- "alias": "CPU Requests %",
+ "alias": "IOPS(Reads + Writes)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
+ "decimals": -1,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -9592,10 +8821,10 @@ items:
],
"type": "number",
- "unit": "percentunit"
+ "unit": "short"
},
{
- "alias": "CPU Limits",
+ "alias": "Throughput(Read)",
"colorMode": null,
"colors": [
@@ -9603,6 +8832,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -9610,10 +8840,10 @@ items:
],
"type": "number",
- "unit": "short"
+ "unit": "Bps"
},
{
- "alias": "CPU Limits %",
+ "alias": "Throughput(Write)",
"colorMode": null,
"colors": [
@@ -9621,6 +8851,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -9628,20 +8859,40 @@ items:
],
"type": "number",
- "unit": "percentunit"
+ "unit": "Bps"
},
{
- "alias": "Pod",
+ "alias": "Throughput(Read + Write)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
- "link": true,
+ "link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
- "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
- "pattern": "pod",
+ "linkUrl": "",
+ "pattern": "Value #F",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Namespace",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": true,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down to pods",
+ "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
+ "pattern": "namespace",
"thresholds": [
],
@@ -9666,7 +8917,7 @@ items:
],
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -9675,7 +8926,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum by(namespace) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -9684,7 +8935,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -9693,7 +8944,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -9702,13 +8953,22 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E",
"step": 10
+ },
+ {
+ "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "F",
+ "step": 10
}
],
"thresholds": [
@@ -9716,10 +8976,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "CPU Quota",
+ "title": "Current Storage IO",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -9757,12 +9017,126 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "CPU Quota",
+ "title": "Storage IO - Distribution",
"titleSize": "h6"
- },
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "kubernetes-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [
+
+ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "",
+ "value": ""
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "UTC",
+ "title": "Kubernetes / Compute Resources / Cluster",
+ "uid": "efa86fd1d0c121a26444b636a3f509a8",
+ "version": 0
+ }
+ kind: ConfigMap
+ metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-k8s-resources-cluster
+ namespace: monitoring
+- apiVersion: v1
+ data:
+ k8s-resources-namespace.json: |-
+ {
+ "annotations": {
+ "list": [
+
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "links": [
+
+ ],
+ "refresh": "10s",
+ "rows": [
{
"collapse": false,
- "height": "250px",
+ "height": "100px",
"panels": [
{
"aliasColors": {
@@ -9772,19 +9146,23 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 10,
- "id": 7,
+ "fill": 1,
+ "format": "percentunit",
+ "id": 1,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 0,
+ "linewidth": 1,
"links": [
],
@@ -9794,42 +9172,403 @@ items:
"points": false,
"renderer": "flot",
"seriesOverrides": [
- {
- "alias": "quota - requests",
- "color": "#F2495C",
- "dashes": true,
- "fill": 0,
- "hideTooltip": true,
- "legend": false,
- "linewidth": 2,
- "stack": false
- },
- {
- "alias": "quota - limits",
- "color": "#FF9830",
- "dashes": true,
- "fill": 0,
- "hideTooltip": true,
- "legend": false,
- "linewidth": 2,
- "stack": false
- }
+
],
"spaceLength": 10,
- "span": 12,
- "stack": true,
+ "span": 3,
+ "stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"format": "time_series",
+ "instant": true,
"intervalFactor": 2,
- "legendFormat": "{{pod}}",
- "legendLink": null,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "70,80",
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Utilisation (from requests)",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "singlestat",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "format": "percentunit",
+ "id": 2,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 3,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "70,80",
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Utilisation (from limits)",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "singlestat",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "format": "percentunit",
+ "id": 3,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 3,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "70,80",
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory Utilisation (from requests)",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "singlestat",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "format": "percentunit",
+ "id": 4,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 3,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "70,80",
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory Utilisation (from limits)",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "singlestat",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Headlines",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "id": 5,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 0,
+ "links": [
+
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "quota - requests",
+ "color": "#F2495C",
+ "dashes": true,
+ "fill": 0,
+ "hiddenSeries": true,
+ "hideTooltip": true,
+ "legend": true,
+ "linewidth": 2,
+ "stack": false
+ },
+ {
+ "alias": "quota - limits",
+ "color": "#FF9830",
+ "dashes": true,
+ "fill": 0,
+ "hiddenSeries": true,
+ "hideTooltip": true,
+ "legend": true,
+ "linewidth": 2,
+ "stack": false
+ }
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{pod}}",
+ "legendLink": null,
"step": 10
},
{
- "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})",
+ "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "quota - requests",
@@ -9837,7 +9576,7 @@ items:
"step": 10
},
{
- "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})",
+ "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "quota - limits",
@@ -9850,10 +9589,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Memory Usage (w/o cache)",
+ "title": "CPU Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -9868,7 +9607,7 @@ items:
},
"yaxes": [
{
- "format": "bytes",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -9890,7 +9629,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Memory Usage",
+ "title": "CPU Usage",
"titleSize": "h6"
},
{
@@ -9906,12 +9645,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
- "id": 8,
+ "id": 6,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -9941,7 +9683,7 @@ items:
"type": "hidden"
},
{
- "alias": "Memory Usage",
+ "alias": "CPU Usage",
"colorMode": null,
"colors": [
@@ -9949,6 +9691,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -9956,10 +9699,10 @@ items:
],
"type": "number",
- "unit": "bytes"
+ "unit": "short"
},
{
- "alias": "Memory Requests",
+ "alias": "CPU Requests",
"colorMode": null,
"colors": [
@@ -9967,6 +9710,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -9974,10 +9718,10 @@ items:
],
"type": "number",
- "unit": "bytes"
+ "unit": "short"
},
{
- "alias": "Memory Requests %",
+ "alias": "CPU Requests %",
"colorMode": null,
"colors": [
@@ -9985,6 +9729,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -9995,7 +9740,7 @@ items:
"unit": "percentunit"
},
{
- "alias": "Memory Limits",
+ "alias": "CPU Limits",
"colorMode": null,
"colors": [
@@ -10003,6 +9748,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -10010,10 +9756,10 @@ items:
],
"type": "number",
- "unit": "bytes"
+ "unit": "short"
},
{
- "alias": "Memory Limits %",
+ "alias": "CPU Limits %",
"colorMode": null,
"colors": [
@@ -10021,6 +9767,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -10030,60 +9777,6 @@ items:
"type": "number",
"unit": "percentunit"
},
- {
- "alias": "Memory Usage (RSS)",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #F",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "bytes"
- },
- {
- "alias": "Memory Usage (Cache)",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #G",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "bytes"
- },
- {
- "alias": "Memory Usage (Swap)",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #H",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "bytes"
- },
{
"alias": "Pod",
"colorMode": null,
@@ -10093,8 +9786,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
- "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
+ "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
@@ -10120,7 +9814,7 @@ items:
],
"targets": [
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10129,7 +9823,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10138,7 +9832,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10147,7 +9841,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10156,40 +9850,13 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E",
"step": 10
- },
- {
- "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "F",
- "step": 10
- },
- {
- "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "G",
- "step": 10
- },
- {
- "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "H",
- "step": 10
}
],
"thresholds": [
@@ -10197,10 +9864,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Memory Quota",
+ "title": "CPU Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -10238,7 +9905,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Memory Quota",
+ "title": "CPU Quota",
"titleSize": "h6"
},
{
@@ -10253,20 +9920,22 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "id": 9,
+ "fill": 10,
+ "id": 7,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
@@ -10276,21 +9945,159 @@ items:
"points": false,
"renderer": "flot",
"seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": false,
- "steppedLine": false,
- "styles": [
- {
- "alias": "Time",
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "pattern": "Time",
- "type": "hidden"
- },
{
- "alias": "Current Receive Bandwidth",
+ "alias": "quota - requests",
+ "color": "#F2495C",
+ "dashes": true,
+ "fill": 0,
+ "hiddenSeries": true,
+ "hideTooltip": true,
+ "legend": true,
+ "linewidth": 2,
+ "stack": false
+ },
+ {
+ "alias": "quota - limits",
+ "color": "#FF9830",
+ "dashes": true,
+ "fill": 0,
+ "hiddenSeries": true,
+ "hideTooltip": true,
+ "legend": true,
+ "linewidth": 2,
+ "stack": false
+ }
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{pod}}",
+ "legendLink": null,
+ "step": 10
+ },
+ {
+ "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "quota - requests",
+ "legendLink": null,
+ "step": 10
+ },
+ {
+ "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "quota - limits",
+ "legendLink": null,
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory Usage (w/o cache)",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Memory Usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "id": 8,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "styles": [
+ {
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "hidden"
+ },
+ {
+ "alias": "Memory Usage",
"colorMode": null,
"colors": [
@@ -10298,6 +10105,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -10305,10 +10113,10 @@ items:
],
"type": "number",
- "unit": "Bps"
+ "unit": "bytes"
},
{
- "alias": "Current Transmit Bandwidth",
+ "alias": "Memory Requests",
"colorMode": null,
"colors": [
@@ -10316,6 +10124,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -10323,10 +10132,10 @@ items:
],
"type": "number",
- "unit": "Bps"
+ "unit": "bytes"
},
{
- "alias": "Rate of Received Packets",
+ "alias": "Memory Requests %",
"colorMode": null,
"colors": [
@@ -10334,6 +10143,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -10341,10 +10151,10 @@ items:
],
"type": "number",
- "unit": "pps"
+ "unit": "percentunit"
},
{
- "alias": "Rate of Transmitted Packets",
+ "alias": "Memory Limits",
"colorMode": null,
"colors": [
@@ -10352,6 +10162,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -10359,10 +10170,10 @@ items:
],
"type": "number",
- "unit": "pps"
+ "unit": "bytes"
},
{
- "alias": "Rate of Received Packets Dropped",
+ "alias": "Memory Limits %",
"colorMode": null,
"colors": [
@@ -10370,6 +10181,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -10377,10 +10189,10 @@ items:
],
"type": "number",
- "unit": "pps"
+ "unit": "percentunit"
},
{
- "alias": "Rate of Transmitted Packets Dropped",
+ "alias": "Memory Usage (RSS)",
"colorMode": null,
"colors": [
@@ -10388,6 +10200,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
@@ -10395,7 +10208,45 @@ items:
],
"type": "number",
- "unit": "pps"
+ "unit": "bytes"
+ },
+ {
+ "alias": "Memory Usage (Cache)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #G",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "bytes"
+ },
+ {
+ "alias": "Memory Usage (Swap)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #H",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "bytes"
},
{
"alias": "Pod",
@@ -10406,8 +10257,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
- "linkTooltip": "Drill down to pods",
- "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
@@ -10433,7 +10285,7 @@ items:
],
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10442,7 +10294,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10451,7 +10303,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10460,7 +10312,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10469,7 +10321,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10478,13 +10330,31 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F",
"step": 10
+ },
+ {
+ "expr": "sum(container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "G",
+ "step": 10
+ },
+ {
+ "expr": "sum(container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "H",
+ "step": 10
}
],
"thresholds": [
@@ -10492,10 +10362,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Current Network Usage",
+ "title": "Memory Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -10533,7 +10403,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Memory Quota",
"titleSize": "h6"
},
{
@@ -10548,19 +10418,22 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 10,
- "id": 10,
+ "fill": 1,
+ "id": 9,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 0,
+ "linewidth": 1,
"links": [
],
@@ -10574,15 +10447,217 @@ items:
],
"spaceLength": 10,
"span": 12,
- "stack": true,
+ "stack": false,
"steppedLine": false,
+ "styles": [
+ {
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "hidden"
+ },
+ {
+ "alias": "Current Receive Bandwidth",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #A",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Current Transmit Bandwidth",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #B",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Rate of Received Packets",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #C",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "pps"
+ },
+ {
+ "alias": "Rate of Transmitted Packets",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #D",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "pps"
+ },
+ {
+ "alias": "Rate of Received Packets Dropped",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #E",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "pps"
+ },
+ {
+ "alias": "Rate of Transmitted Packets Dropped",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #F",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "pps"
+ },
+ {
+ "alias": "Pod",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": true,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down to pods",
+ "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
+ "pattern": "pod",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [
+
+ ],
+ "type": "string",
+ "unit": "short"
+ }
+ ],
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
- "format": "time_series",
+ "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
+ "format": "table",
+ "instant": true,
"intervalFactor": 2,
- "legendFormat": "{{pod}}",
- "legendLink": null,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "C",
+ "step": 10
+ },
+ {
+ "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "D",
+ "step": 10
+ },
+ {
+ "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "E",
+ "step": 10
+ },
+ {
+ "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "F",
"step": 10
}
],
@@ -10591,13 +10666,14 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Receive Bandwidth",
+ "title": "Current Network Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
- "type": "graph",
+ "transform": "table",
+ "type": "table",
"xaxis": {
"buckets": null,
"mode": "time",
@@ -10609,7 +10685,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -10631,7 +10707,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Current Network Usage",
"titleSize": "h6"
},
{
@@ -10647,12 +10723,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 10,
- "id": 11,
+ "id": 10,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -10671,12 +10750,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -10689,10 +10768,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Transmit Bandwidth",
+ "title": "Receive Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -10723,19 +10802,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -10745,12 +10812,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 10,
- "id": 12,
+ "id": 11,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -10769,12 +10839,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -10787,10 +10857,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Rate of Received Packets",
+ "title": "Transmit Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -10827,7 +10897,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Bandwidth",
"titleSize": "h6"
},
{
@@ -10843,12 +10913,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 10,
- "id": 13,
+ "id": 12,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -10867,12 +10940,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -10885,10 +10958,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Rate of Transmitted Packets",
+ "title": "Rate of Received Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -10903,7 +10976,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -10919,19 +10992,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -10941,12 +11002,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 10,
- "id": 14,
+ "id": 13,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -10965,12 +11029,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -10983,10 +11047,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Rate of Received Packets Dropped",
+ "title": "Rate of Transmitted Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -11001,7 +11065,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -11023,7 +11087,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Rate of Packets",
"titleSize": "h6"
},
{
@@ -11039,12 +11103,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 10,
- "id": 15,
+ "id": 14,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -11063,12 +11130,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -11081,10 +11148,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Rate of Transmitted Packets Dropped",
+ "title": "Rate of Received Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -11099,7 +11166,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -11115,155 +11182,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- }
- ],
- "schemaVersion": 14,
- "style": "dark",
- "tags": [
- "kubernetes-mixin"
- ],
- "templating": {
- "list": [
- {
- "current": {
- "text": "default",
- "value": "default"
- },
- "hide": 0,
- "label": null,
- "name": "datasource",
- "options": [
-
- ],
- "query": "prometheus",
- "refresh": 1,
- "regex": "",
- "type": "datasource"
- },
- {
- "allValue": null,
- "current": {
- "text": "",
- "value": ""
- },
- "datasource": "$datasource",
- "hide": 2,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "cluster",
- "options": [
-
- ],
- "query": "label_values(kube_pod_info, cluster)",
- "refresh": 1,
- "regex": "",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "current": {
- "text": "",
- "value": ""
},
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "namespace",
- "options": [
-
- ],
- "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
- "refresh": 1,
- "regex": "",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- }
- ]
- },
- "time": {
- "from": "now-1h",
- "to": "now"
- },
- "timepicker": {
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
- "timezone": "UTC",
- "title": "Kubernetes / Compute Resources / Namespace (Pods)",
- "uid": "85a562078cdf77779eaa1add43ccec1e",
- "version": 0
- }
- kind: ConfigMap
- metadata:
- name: grafana-dashboard-k8s-resources-namespace
- namespace: monitoring
-- apiVersion: v1
- data:
- k8s-resources-node.json: |-
- {
- "annotations": {
- "list": [
-
- ]
- },
- "editable": true,
- "gnetId": null,
- "graphTooltip": 0,
- "hideControls": false,
- "links": [
-
- ],
- "refresh": "10s",
- "rows": [
- {
- "collapse": false,
- "height": "250px",
- "panels": [
{
"aliasColors": {
@@ -11273,12 +11192,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 10,
- "id": 1,
+ "id": 15,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -11297,12 +11219,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+ "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -11315,10 +11237,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "CPU Usage",
+ "title": "Rate of Transmitted Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -11333,7 +11255,7 @@ items:
},
"yaxes": [
{
- "format": "short",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -11355,7 +11277,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "CPU Usage",
+ "title": "Rate of Packets Dropped",
"titleSize": "h6"
},
{
@@ -11370,19 +11292,23 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "id": 2,
+ "decimals": -1,
+ "fill": 10,
+ "id": 16,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
@@ -11395,184 +11321,16 @@ items:
],
"spaceLength": 10,
- "span": 12,
- "stack": false,
+ "span": 6,
+ "stack": true,
"steppedLine": false,
- "styles": [
- {
- "alias": "Time",
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "pattern": "Time",
- "type": "hidden"
- },
- {
- "alias": "CPU Usage",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #A",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "short"
- },
- {
- "alias": "CPU Requests",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #B",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "short"
- },
- {
- "alias": "CPU Requests %",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #C",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "percentunit"
- },
- {
- "alias": "CPU Limits",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #D",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "short"
- },
- {
- "alias": "CPU Limits %",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #E",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "percentunit"
- },
- {
- "alias": "Pod",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "pod",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "short"
- },
- {
- "alias": "",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "/.*/",
- "thresholds": [
-
- ],
- "type": "string",
- "unit": "short"
- }
- ],
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- },
- {
- "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "B",
- "step": 10
- },
- {
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "C",
- "step": 10
- },
- {
- "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "D",
- "step": 10
- },
- {
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
- "format": "table",
- "instant": true,
+ "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))",
+ "format": "time_series",
"intervalFactor": 2,
- "legendFormat": "",
- "refId": "E",
+ "legendFormat": "{{pod}}",
+ "legendLink": null,
"step": 10
}
],
@@ -11581,14 +11339,13 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "CPU Quota",
+ "title": "IOPS(Reads+Writes)",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
- "transform": "table",
- "type": "table",
+ "type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
@@ -11616,19 +11373,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "CPU Quota",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -11638,12 +11383,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 10,
- "id": 3,
+ "id": 17,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -11662,12 +11410,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)",
+ "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -11680,10 +11428,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Memory Usage (w/o cache)",
+ "title": "ThroughPut(Read+Write)",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -11698,7 +11446,7 @@ items:
},
"yaxes": [
{
- "format": "bytes",
+ "format": "Bps",
"label": null,
"logBase": 1,
"max": null,
@@ -11720,7 +11468,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Memory Usage",
+ "title": "Storage IO",
"titleSize": "h6"
},
{
@@ -11736,12 +11484,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
- "id": 4,
+ "id": 18,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -11759,6 +11510,10 @@ items:
"seriesOverrides": [
],
+ "sort": {
+ "col": 4,
+ "desc": true
+ },
"spaceLength": 10,
"span": 12,
"stack": false,
@@ -11771,14 +11526,15 @@ items:
"type": "hidden"
},
{
- "alias": "Memory Usage",
+ "alias": "IOPS(Reads)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
+ "decimals": -1,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -11786,17 +11542,18 @@ items:
],
"type": "number",
- "unit": "bytes"
+ "unit": "short"
},
{
- "alias": "Memory Requests",
+ "alias": "IOPS(Writes)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
+ "decimals": -1,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -11804,17 +11561,18 @@ items:
],
"type": "number",
- "unit": "bytes"
+ "unit": "short"
},
{
- "alias": "Memory Requests %",
+ "alias": "IOPS(Reads + Writes)",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
+ "decimals": -1,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -11822,10 +11580,10 @@ items:
],
"type": "number",
- "unit": "percentunit"
+ "unit": "short"
},
{
- "alias": "Memory Limits",
+ "alias": "Throughput(Read)",
"colorMode": null,
"colors": [
@@ -11833,6 +11591,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -11840,10 +11599,10 @@ items:
],
"type": "number",
- "unit": "bytes"
+ "unit": "Bps"
},
{
- "alias": "Memory Limits %",
+ "alias": "Throughput(Write)",
"colorMode": null,
"colors": [
@@ -11851,6 +11610,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -11858,10 +11618,10 @@ items:
],
"type": "number",
- "unit": "percentunit"
+ "unit": "Bps"
},
{
- "alias": "Memory Usage (RSS)",
+ "alias": "Throughput(Read + Write)",
"colorMode": null,
"colors": [
@@ -11869,6 +11629,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
@@ -11876,43 +11637,7 @@ items:
],
"type": "number",
- "unit": "bytes"
- },
- {
- "alias": "Memory Usage (Cache)",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #G",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "bytes"
- },
- {
- "alias": "Memory Usage (Swap)",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #H",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "bytes"
+ "unit": "Bps"
},
{
"alias": "Pod",
@@ -11922,9 +11647,10 @@ items:
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
+ "link": true,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down to pods",
+ "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
@@ -11950,7 +11676,7 @@ items:
],
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
+ "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -11959,7 +11685,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+ "expr": "sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -11968,7 +11694,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=~\"$node\"}) by (pod)",
+ "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -11977,7 +11703,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+ "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -11986,7 +11712,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=~\"$node\"}) by (pod)",
+ "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -11995,31 +11721,13 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
+ "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F",
"step": 10
- },
- {
- "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "G",
- "step": 10
- },
- {
- "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "H",
- "step": 10
}
],
"thresholds": [
@@ -12027,10 +11735,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Memory Quota",
+ "title": "Current Storage IO",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -12068,7 +11776,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Memory Quota",
+ "title": "Storage IO - Distribution",
"titleSize": "h6"
}
],
@@ -12085,7 +11793,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -12110,8 +11818,8 @@ items:
"options": [
],
- "query": "label_values(kube_pod_info, cluster)",
- "refresh": 1,
+ "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -12132,13 +11840,13 @@ items:
"hide": 0,
"includeAll": false,
"label": null,
- "multi": true,
- "name": "node",
+ "multi": false,
+ "name": "namespace",
"options": [
],
- "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, node)",
- "refresh": 1,
+ "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -12181,17 +11889,22 @@ items:
]
},
"timezone": "UTC",
- "title": "Kubernetes / Compute Resources / Node (Pods)",
- "uid": "200ac8fdbfbb74b39aff88118e4d1c2c",
+ "title": "Kubernetes / Compute Resources / Namespace (Pods)",
+ "uid": "85a562078cdf77779eaa1add43ccec1e",
"version": 0
}
kind: ConfigMap
metadata:
- name: grafana-dashboard-k8s-resources-node
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-k8s-resources-namespace
namespace: monitoring
- apiVersion: v1
data:
- k8s-resources-pod.json: |-
+ k8s-resources-node.json: |-
{
"annotations": {
"list": [
@@ -12221,11 +11934,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 1,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -12242,18 +11958,11 @@ items:
"renderer": "flot",
"seriesOverrides": [
{
- "alias": "requests",
+ "alias": "max capacity",
"color": "#F2495C",
+ "dashes": true,
"fill": 0,
- "hideTooltip": true,
- "legend": true,
- "linewidth": 2,
- "stack": false
- },
- {
- "alias": "limits",
- "color": "#FF9830",
- "fill": 0,
+ "hiddenSeries": true,
"hideTooltip": true,
"legend": true,
"linewidth": 2,
@@ -12266,26 +11975,18 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}) by (container)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{container}}",
- "legendLink": null,
- "step": 10
- },
- {
- "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n",
+ "expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"})",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "requests",
+ "legendFormat": "max capacity",
"legendLink": null,
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "limits",
+ "legendFormat": "{{pod}}",
"legendLink": null,
"step": 10
}
@@ -12298,7 +11999,7 @@ items:
"title": "CPU Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -12345,123 +12046,21 @@ items:
{
"aliasColors": {
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 2,
- "legend": {
- "avg": false,
- "current": true,
- "max": true,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(increase(container_cpu_cfs_throttled_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}[5m])) by (container) /sum(increase(container_cpu_cfs_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}[5m])) by (container)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{container}}",
- "legendLink": null,
- "step": 10
- }
- ],
- "thresholds": [
- {
- "colorMode": "critical",
- "fill": true,
- "line": true,
- "op": "gt",
- "value": 0.25,
- "yaxis": "left"
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "CPU Throttling",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": 1,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "CPU Throttling",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
- "id": 3,
+ "id": 2,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -12499,6 +12098,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -12517,6 +12117,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -12535,6 +12136,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -12553,6 +12155,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -12571,6 +12174,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -12581,7 +12185,7 @@ items:
"unit": "percentunit"
},
{
- "alias": "Container",
+ "alias": "Pod",
"colorMode": null,
"colors": [
@@ -12589,9 +12193,10 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
- "pattern": "container",
+ "pattern": "pod",
"thresholds": [
],
@@ -12616,7 +12221,7 @@ items:
],
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\"}) by (container)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -12625,7 +12230,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -12634,7 +12239,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -12643,7 +12248,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -12652,7 +12257,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -12669,7 +12274,7 @@ items:
"title": "CPU Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -12723,12 +12328,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 10,
- "id": 4,
+ "id": 3,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -12745,22 +12353,13 @@ items:
"renderer": "flot",
"seriesOverrides": [
{
- "alias": "requests",
+ "alias": "max capacity",
"color": "#F2495C",
"dashes": true,
"fill": 0,
+ "hiddenSeries": true,
"hideTooltip": true,
- "legend": false,
- "linewidth": 2,
- "stack": false
- },
- {
- "alias": "limits",
- "color": "#FF9830",
- "dashes": true,
- "fill": 0,
- "hideTooltip": true,
- "legend": false,
+ "legend": true,
"linewidth": 2,
"stack": false
}
@@ -12771,26 +12370,18 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{container}}",
- "legendLink": null,
- "step": 10
- },
- {
- "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n",
+ "expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"})",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "requests",
+ "legendFormat": "max capacity",
"legendLink": null,
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n",
+ "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "limits",
+ "legendFormat": "{{pod}}",
"legendLink": null,
"step": 10
}
@@ -12800,10 +12391,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Memory Usage",
+ "title": "Memory Usage (w/o cache)",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -12856,12 +12447,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
- "id": 5,
+ "id": 4,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -12899,6 +12493,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -12917,6 +12512,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -12935,6 +12531,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -12953,6 +12550,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -12971,6 +12569,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -12989,6 +12588,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
@@ -13007,6 +12607,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #G",
@@ -13025,6 +12626,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #H",
@@ -13035,7 +12637,7 @@ items:
"unit": "bytes"
},
{
- "alias": "Container",
+ "alias": "Pod",
"colorMode": null,
"colors": [
@@ -13043,9 +12645,10 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
- "pattern": "container",
+ "pattern": "pod",
"thresholds": [
],
@@ -13070,7 +12673,7 @@ items:
],
"targets": [
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)",
+ "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -13079,7 +12682,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -13088,7 +12691,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -13097,7 +12700,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)",
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -13106,7 +12709,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -13115,7 +12718,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
+ "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -13124,7 +12727,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
+ "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -13133,7 +12736,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
+ "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -13150,7 +12753,7 @@ items:
"title": "Memory Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -13190,7 +12793,148 @@ items:
"showTitle": true,
"title": "Memory Quota",
"titleSize": "h6"
- },
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "kubernetes-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [
+
+ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "",
+ "value": ""
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "",
+ "value": ""
+ },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": true,
+ "name": "node",
+ "options": [
+
+ ],
+ "query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "UTC",
+ "title": "Kubernetes / Compute Resources / Node (Pods)",
+ "uid": "200ac8fdbfbb74b39aff88118e4d1c2c",
+ "version": 0
+ }
+ kind: ConfigMap
+ metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-k8s-resources-node
+ namespace: monitoring
+- apiVersion: v1
+ data:
+ k8s-resources-pod.json: |-
+ {
+ "annotations": {
+ "list": [
+
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "links": [
+
+ ],
+ "refresh": "10s",
+ "rows": [
{
"collapse": false,
"height": "250px",
@@ -13204,13 +12948,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 10,
- "id": 6,
+ "id": 1,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -13226,7 +12972,24 @@ items:
"points": false,
"renderer": "flot",
"seriesOverrides": [
-
+ {
+ "alias": "requests",
+ "color": "#F2495C",
+ "fill": 0,
+ "hideTooltip": true,
+ "legend": true,
+ "linewidth": 2,
+ "stack": false
+ },
+ {
+ "alias": "limits",
+ "color": "#FF9830",
+ "fill": 0,
+ "hideTooltip": true,
+ "legend": true,
+ "linewidth": 2,
+ "stack": false
+ }
],
"spaceLength": 10,
"span": 12,
@@ -13234,10 +12997,26 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{pod}}",
+ "legendFormat": "{{container}}",
+ "legendLink": null,
+ "step": 10
+ },
+ {
+ "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "requests",
+ "legendLink": null,
+ "step": 10
+ },
+ {
+ "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "limits",
"legendLink": null,
"step": 10
}
@@ -13247,10 +13026,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Receive Bandwidth",
+ "title": "CPU Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -13265,7 +13044,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -13287,7 +13066,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "CPU Usage",
"titleSize": "h6"
},
{
@@ -13303,13 +13082,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 10,
- "id": 7,
+ "id": 2,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
- "current": false,
- "max": false,
+ "current": true,
+ "max": true,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -13333,23 +13114,30 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
+ "expr": "sum(increase(container_cpu_cfs_throttled_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) /sum(increase(container_cpu_cfs_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{pod}}",
+ "legendFormat": "{{container}}",
"legendLink": null,
"step": 10
}
],
"thresholds": [
-
+ {
+ "colorMode": "critical",
+ "fill": true,
+ "line": true,
+ "op": "gt",
+ "value": 0.25,
+ "yaxis": "left"
+ }
],
"timeFrom": null,
"timeShift": null,
- "title": "Transmit Bandwidth",
+ "title": "CPU Throttling",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -13364,10 +13152,10 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "percentunit",
"label": null,
"logBase": 1,
- "max": null,
+ "max": 1,
"min": 0,
"show": true
},
@@ -13386,7 +13174,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "CPU Throttling",
"titleSize": "h6"
},
{
@@ -13401,20 +13189,22 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 10,
- "id": 8,
+ "fill": 1,
+ "id": 3,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 0,
+ "linewidth": 1,
"links": [
],
@@ -13428,114 +13218,189 @@ items:
],
"spaceLength": 10,
"span": 12,
- "stack": true,
+ "stack": false,
"steppedLine": false,
- "targets": [
+ "styles": [
{
- "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{pod}}",
- "legendLink": null,
- "step": 10
- }
- ],
- "thresholds": [
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "hidden"
+ },
+ {
+ "alias": "CPU Usage",
+ "colorMode": null,
+ "colors": [
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Rate of Received Packets",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #A",
+ "thresholds": [
- ]
- },
- "yaxes": [
+ ],
+ "type": "number",
+ "unit": "short"
+ },
{
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
+ "alias": "CPU Requests",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #B",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
},
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
+ "alias": "CPU Requests %",
+ "colorMode": null,
+ "colors": [
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 9,
- "interval": "1m",
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #C",
+ "thresholds": [
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ ],
+ "type": "number",
+ "unit": "percentunit"
+ },
+ {
+ "alias": "CPU Limits",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #D",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "CPU Limits %",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #E",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "percentunit"
+ },
+ {
+ "alias": "Container",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "container",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [
+ ],
+ "type": "string",
+ "unit": "short"
+ }
],
- "spaceLength": 10,
- "span": 12,
- "stack": true,
- "steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
- "format": "time_series",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "format": "table",
+ "instant": true,
"intervalFactor": 2,
- "legendFormat": "{{pod}}",
- "legendLink": null,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "C",
+ "step": 10
+ },
+ {
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "D",
+ "step": 10
+ },
+ {
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "E",
"step": 10
}
],
@@ -13544,13 +13409,14 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Rate of Transmitted Packets",
+ "title": "CPU Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
- "type": "graph",
+ "transform": "table",
+ "type": "table",
"xaxis": {
"buckets": null,
"mode": "time",
@@ -13562,7 +13428,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -13584,7 +13450,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "CPU Quota",
"titleSize": "h6"
},
{
@@ -13600,13 +13466,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 10,
- "id": 10,
+ "id": 4,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -13622,7 +13490,26 @@ items:
"points": false,
"renderer": "flot",
"seriesOverrides": [
-
+ {
+ "alias": "requests",
+ "color": "#F2495C",
+ "dashes": true,
+ "fill": 0,
+ "hideTooltip": true,
+ "legend": true,
+ "linewidth": 2,
+ "stack": false
+ },
+ {
+ "alias": "limits",
+ "color": "#FF9830",
+ "dashes": true,
+ "fill": 0,
+ "hideTooltip": true,
+ "legend": true,
+ "linewidth": 2,
+ "stack": false
+ }
],
"spaceLength": 10,
"span": 12,
@@ -13630,10 +13517,26 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{pod}}",
+ "legendFormat": "{{container}}",
+ "legendLink": null,
+ "step": 10
+ },
+ {
+ "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "requests",
+ "legendLink": null,
+ "step": 10
+ },
+ {
+ "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "limits",
"legendLink": null,
"step": 10
}
@@ -13643,10 +13546,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Rate of Received Packets Dropped",
+ "title": "Memory Usage (WSS)",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -13661,7 +13564,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "bytes",
"label": null,
"logBase": 1,
"max": null,
@@ -13683,7 +13586,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Memory Usage",
"titleSize": "h6"
},
{
@@ -13698,20 +13601,22 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 10,
- "id": 11,
+ "fill": 1,
+ "id": 5,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 0,
+ "linewidth": 1,
"links": [
],
@@ -13725,394 +13630,93 @@ items:
],
"spaceLength": 10,
"span": 12,
- "stack": true,
+ "stack": false,
"steppedLine": false,
- "targets": [
+ "styles": [
{
- "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{pod}}",
- "legendLink": null,
- "step": 10
- }
- ],
- "thresholds": [
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "hidden"
+ },
+ {
+ "alias": "Memory Usage (WSS)",
+ "colorMode": null,
+ "colors": [
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Rate of Transmitted Packets Dropped",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #A",
+ "thresholds": [
- ]
- },
- "yaxes": [
+ ],
+ "type": "number",
+ "unit": "bytes"
+ },
{
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
+ "alias": "Memory Requests",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #B",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "bytes"
},
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- }
- ],
- "schemaVersion": 14,
- "style": "dark",
- "tags": [
- "kubernetes-mixin"
- ],
- "templating": {
- "list": [
- {
- "current": {
- "text": "default",
- "value": "default"
- },
- "hide": 0,
- "label": null,
- "name": "datasource",
- "options": [
+ "alias": "Memory Requests %",
+ "colorMode": null,
+ "colors": [
- ],
- "query": "prometheus",
- "refresh": 1,
- "regex": "",
- "type": "datasource"
- },
- {
- "allValue": null,
- "current": {
- "text": "",
- "value": ""
- },
- "datasource": "$datasource",
- "hide": 2,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "cluster",
- "options": [
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #C",
+ "thresholds": [
- ],
- "query": "label_values(kube_pod_info, cluster)",
- "refresh": 1,
- "regex": "",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "current": {
- "text": "",
- "value": ""
- },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "namespace",
- "options": [
-
- ],
- "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
- "refresh": 1,
- "regex": "",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "current": {
- "text": "",
- "value": ""
- },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "pod",
- "options": [
-
- ],
- "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)",
- "refresh": 2,
- "regex": "",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- }
- ]
- },
- "time": {
- "from": "now-1h",
- "to": "now"
- },
- "timepicker": {
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
- "timezone": "UTC",
- "title": "Kubernetes / Compute Resources / Pod",
- "uid": "6581e46e4e5c7ba40a07646395ef7b23",
- "version": 0
- }
- kind: ConfigMap
- metadata:
- name: grafana-dashboard-k8s-resources-pod
- namespace: monitoring
-- apiVersion: v1
- data:
- k8s-resources-workload.json: |-
- {
- "annotations": {
- "list": [
-
- ]
- },
- "editable": true,
- "gnetId": null,
- "graphTooltip": 0,
- "hideControls": false,
- "links": [
-
- ],
- "refresh": "10s",
- "rows": [
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 1,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{pod}}",
- "legendLink": null,
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "CPU Usage",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "CPU Usage",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "id": 2,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": false,
- "steppedLine": false,
- "styles": [
- {
- "alias": "Time",
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "pattern": "Time",
- "type": "hidden"
- },
- {
- "alias": "CPU Usage",
- "colorMode": null,
- "colors": [
+ ],
+ "type": "number",
+ "unit": "percentunit"
+ },
+ {
+ "alias": "Memory Limits",
+ "colorMode": null,
+ "colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
- "pattern": "Value #A",
+ "pattern": "Value #D",
"thresholds": [
],
"type": "number",
- "unit": "short"
+ "unit": "bytes"
},
{
- "alias": "CPU Requests",
+ "alias": "Memory Limits %",
"colorMode": null,
"colors": [
@@ -14120,17 +13724,18 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
- "pattern": "Value #B",
+ "pattern": "Value #E",
"thresholds": [
],
"type": "number",
- "unit": "short"
+ "unit": "percentunit"
},
{
- "alias": "CPU Requests %",
+ "alias": "Memory Usage (RSS)",
"colorMode": null,
"colors": [
@@ -14138,17 +13743,18 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
- "pattern": "Value #C",
+ "pattern": "Value #F",
"thresholds": [
],
"type": "number",
- "unit": "percentunit"
+ "unit": "bytes"
},
{
- "alias": "CPU Limits",
+ "alias": "Memory Usage (Cache)",
"colorMode": null,
"colors": [
@@ -14156,17 +13762,18 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
- "pattern": "Value #D",
+ "pattern": "Value #G",
"thresholds": [
],
"type": "number",
- "unit": "short"
+ "unit": "bytes"
},
{
- "alias": "CPU Limits %",
+ "alias": "Memory Usage (Swap)",
"colorMode": null,
"colors": [
@@ -14174,27 +13781,29 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
- "pattern": "Value #E",
+ "pattern": "Value #H",
"thresholds": [
],
"type": "number",
- "unit": "percentunit"
+ "unit": "bytes"
},
{
- "alias": "Pod",
+ "alias": "Container",
"colorMode": null,
"colors": [
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
- "link": true,
+ "link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
- "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
- "pattern": "pod",
+ "linkUrl": "",
+ "pattern": "container",
"thresholds": [
],
@@ -14219,7 +13828,7 @@ items:
],
"targets": [
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14228,7 +13837,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14237,7 +13846,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14246,7 +13855,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14255,13 +13864,40 @@ items:
"step": 10
},
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E",
"step": 10
+ },
+ {
+ "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "F",
+ "step": 10
+ },
+ {
+ "expr": "sum(container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "G",
+ "step": 10
+ },
+ {
+ "expr": "sum(container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "H",
+ "step": 10
}
],
"thresholds": [
@@ -14269,10 +13905,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "CPU Quota",
+ "title": "Memory Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -14310,7 +13946,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "CPU Quota",
+ "title": "Memory Quota",
"titleSize": "h6"
},
{
@@ -14326,12 +13962,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 10,
- "id": 3,
+ "id": 6,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -14350,12 +13989,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -14368,10 +14007,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Memory Usage",
+ "title": "Receive Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -14386,7 +14025,96 @@ items:
},
"yaxes": [
{
- "format": "bytes",
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "id": 7,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 0,
+ "links": [
+
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{pod}}",
+ "legendLink": null,
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Transmit Bandwidth",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
"label": null,
"logBase": 1,
"max": null,
@@ -14408,7 +14136,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Memory Usage",
+ "title": "Bandwidth",
"titleSize": "h6"
},
{
@@ -14423,19 +14151,22 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "id": 4,
+ "fill": 10,
+ "id": 8,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
@@ -14448,184 +14179,105 @@ items:
],
"spaceLength": 10,
- "span": 12,
- "stack": false,
+ "span": 6,
+ "stack": true,
"steppedLine": false,
- "styles": [
- {
- "alias": "Time",
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "pattern": "Time",
- "type": "hidden"
- },
+ "targets": [
{
- "alias": "Memory Usage",
- "colorMode": null,
- "colors": [
+ "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{pod}}",
+ "legendLink": null,
+ "step": 10
+ }
+ ],
+ "thresholds": [
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #A",
- "thresholds": [
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Rate of Received Packets",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ],
- "type": "number",
- "unit": "bytes"
- },
+ ]
+ },
+ "yaxes": [
{
- "alias": "Memory Requests",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #B",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "bytes"
+ "format": "pps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
},
{
- "alias": "Memory Requests %",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #C",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "percentunit"
- },
- {
- "alias": "Memory Limits",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #D",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "bytes"
- },
- {
- "alias": "Memory Limits %",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #E",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "percentunit"
- },
- {
- "alias": "Pod",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": true,
- "linkTooltip": "Drill down",
- "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
- "pattern": "pod",
- "thresholds": [
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {
- ],
- "type": "number",
- "unit": "short"
- },
- {
- "alias": "",
- "colorMode": null,
- "colors": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "id": 9,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 0,
+ "links": [
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "/.*/",
- "thresholds": [
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
- ],
- "type": "string",
- "unit": "short"
- }
],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
"targets": [
{
- "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- },
- {
- "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "B",
- "step": 10
- },
- {
- "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "C",
- "step": 10
- },
- {
- "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "D",
- "step": 10
- },
- {
- "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
- "format": "table",
- "instant": true,
+ "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
+ "format": "time_series",
"intervalFactor": 2,
- "legendFormat": "",
- "refId": "E",
+ "legendFormat": "{{pod}}",
+ "legendLink": null,
"step": 10
}
],
@@ -14634,14 +14286,13 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Memory Quota",
+ "title": "Rate of Transmitted Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
- "transform": "table",
- "type": "table",
+ "type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
@@ -14653,7 +14304,7 @@ items:
},
"yaxes": [
{
- "format": "short",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -14675,7 +14326,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Memory Quota",
+ "title": "Rate of Packets",
"titleSize": "h6"
},
{
@@ -14690,20 +14341,22 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "id": 5,
+ "fill": 10,
+ "id": 10,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
@@ -14716,211 +14369,16 @@ items:
],
"spaceLength": 10,
- "span": 12,
- "stack": false,
+ "span": 6,
+ "stack": true,
"steppedLine": false,
- "styles": [
- {
- "alias": "Time",
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "pattern": "Time",
- "type": "hidden"
- },
- {
- "alias": "Current Receive Bandwidth",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #A",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "Bps"
- },
- {
- "alias": "Current Transmit Bandwidth",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #B",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "Bps"
- },
- {
- "alias": "Rate of Received Packets",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #C",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "pps"
- },
- {
- "alias": "Rate of Transmitted Packets",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #D",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "pps"
- },
- {
- "alias": "Rate of Received Packets Dropped",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #E",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "pps"
- },
- {
- "alias": "Rate of Transmitted Packets Dropped",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #F",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "pps"
- },
- {
- "alias": "Pod",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": true,
- "linkTooltip": "Drill down",
- "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
- "pattern": "pod",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "short"
- },
- {
- "alias": "",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "/.*/",
- "thresholds": [
-
- ],
- "type": "string",
- "unit": "short"
- }
- ],
"targets": [
{
- "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- },
- {
- "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "B",
- "step": 10
- },
- {
- "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "C",
- "step": 10
- },
- {
- "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "D",
- "step": 10
- },
- {
- "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "E",
- "step": 10
- },
- {
- "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
- "format": "table",
- "instant": true,
+ "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
+ "format": "time_series",
"intervalFactor": 2,
- "legendFormat": "",
- "refId": "F",
+ "legendFormat": "{{pod}}",
+ "legendLink": null,
"step": 10
}
],
@@ -14929,14 +14387,13 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Current Network Usage",
+ "title": "Rate of Received Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
- "transform": "table",
- "type": "table",
+ "type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
@@ -14948,7 +14405,7 @@ items:
},
"yaxes": [
{
- "format": "short",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -14964,19 +14421,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -14986,12 +14431,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 10,
- "id": 6,
+ "id": 11,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -15010,12 +14458,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -15028,10 +14476,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Receive Bandwidth",
+ "title": "Rate of Transmitted Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -15046,7 +14494,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -15068,7 +14516,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Rate of Packets Dropped",
"titleSize": "h6"
},
{
@@ -15083,13 +14531,17 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "decimals": -1,
"fill": 10,
- "id": 7,
+ "id": 12,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -15108,15 +14560,23 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{pod}}",
+ "legendFormat": "Reads",
+ "legendLink": null,
+ "step": 10
+ },
+ {
+ "expr": "ceil(sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Writes",
"legendLink": null,
"step": 10
}
@@ -15126,10 +14586,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Transmit Bandwidth",
+ "title": "IOPS",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -15144,7 +14604,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -15160,19 +14620,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -15182,12 +14630,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 10,
- "id": 8,
+ "id": 13,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -15206,15 +14657,23 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{pod}}",
+ "legendFormat": "Reads",
+ "legendLink": null,
+ "step": 10
+ },
+ {
+ "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Writes",
"legendLink": null,
"step": 10
}
@@ -15224,10 +14683,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Average Container Bandwidth by Pod: Received",
+ "title": "ThroughPut",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -15264,7 +14723,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Storage IO - Distribution(Pod - Read & Writes)",
"titleSize": "h6"
},
{
@@ -15279,13 +14738,17 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "decimals": -1,
"fill": 10,
- "id": 9,
+ "id": 14,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -15304,15 +14767,15 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "ceil(sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{pod}}",
+ "legendFormat": "{{container}}",
"legendLink": null,
"step": 10
}
@@ -15322,10 +14785,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Average Container Bandwidth by Pod: Transmitted",
+ "title": "IOPS(Reads+Writes)",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -15340,7 +14803,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -15356,19 +14819,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -15378,12 +14829,15 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 10,
- "id": 10,
+ "id": 15,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -15402,15 +14856,15 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{pod}}",
+ "legendFormat": "{{container}}",
"legendLink": null,
"step": 10
}
@@ -15420,10 +14874,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Rate of Received Packets",
+ "title": "ThroughPut(Read+Write)",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -15460,7 +14914,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Storage IO - Distribution(Containers)",
"titleSize": "h6"
},
{
@@ -15475,19 +14929,22 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 10,
- "id": 11,
+ "fill": 1,
+ "id": 16,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 0,
+ "linewidth": 1,
"links": [
],
@@ -15499,213 +14956,223 @@ items:
"seriesOverrides": [
],
+ "sort": {
+ "col": 4,
+ "desc": true
+ },
"spaceLength": 10,
"span": 12,
- "stack": true,
+ "stack": false,
"steppedLine": false,
- "targets": [
+ "styles": [
{
- "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{pod}}",
- "legendLink": null,
- "step": 10
- }
- ],
- "thresholds": [
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "hidden"
+ },
+ {
+ "alias": "IOPS(Reads)",
+ "colorMode": null,
+ "colors": [
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Rate of Transmitted Packets",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": -1,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #A",
+ "thresholds": [
- ]
- },
- "yaxes": [
+ ],
+ "type": "number",
+ "unit": "short"
+ },
{
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
+ "alias": "IOPS(Writes)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": -1,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #B",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
},
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
+ "alias": "IOPS(Reads + Writes)",
+ "colorMode": null,
+ "colors": [
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 12,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": -1,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #C",
+ "thresholds": [
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "Throughput(Read)",
+ "colorMode": null,
+ "colors": [
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": true,
- "steppedLine": false,
- "targets": [
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #D",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
{
- "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{pod}}",
- "legendLink": null,
- "step": 10
- }
- ],
- "thresholds": [
+ "alias": "Throughput(Write)",
+ "colorMode": null,
+ "colors": [
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Rate of Received Packets Dropped",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #E",
+ "thresholds": [
- ]
- },
- "yaxes": [
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
{
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
+ "alias": "Throughput(Read + Write)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #F",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
},
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
+ "alias": "Container",
+ "colorMode": null,
+ "colors": [
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 13,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "container",
+ "thresholds": [
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [
+ ],
+ "type": "string",
+ "unit": "short"
+ }
],
- "spaceLength": 10,
- "span": 12,
- "stack": true,
- "steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
- "format": "time_series",
+ "expr": "sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
"intervalFactor": 2,
- "legendFormat": "{{pod}}",
- "legendLink": null,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum by(container) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\",device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "C",
+ "step": 10
+ },
+ {
+ "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "D",
+ "step": 10
+ },
+ {
+ "expr": "sum by(container) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "E",
+ "step": 10
+ },
+ {
+ "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "F",
"step": 10
}
],
@@ -15714,13 +15181,14 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Rate of Transmitted Packets Dropped",
+ "title": "Current Storage IO",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
- "type": "graph",
+ "transform": "table",
+ "type": "table",
"xaxis": {
"buckets": null,
"mode": "time",
@@ -15732,7 +15200,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -15754,7 +15222,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Storage IO - Distribution",
"titleSize": "h6"
}
],
@@ -15771,7 +15239,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -15796,8 +15264,8 @@ items:
"options": [
],
- "query": "label_values(kube_pod_info, cluster)",
- "refresh": 1,
+ "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -15823,35 +15291,8 @@ items:
"options": [
],
- "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
- "refresh": 1,
- "regex": "",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "current": {
- "text": "",
- "value": ""
- },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "workload",
- "options": [
-
- ],
- "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)",
- "refresh": 1,
+ "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -15873,12 +15314,12 @@ items:
"includeAll": false,
"label": null,
"multi": false,
- "name": "type",
+ "name": "pod",
"options": [
],
- "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)",
- "refresh": 1,
+ "query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, pod)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -15921,17 +15362,22 @@ items:
]
},
"timezone": "UTC",
- "title": "Kubernetes / Compute Resources / Workload",
- "uid": "a164a7f0339f99e89cea5cb47e9be617",
+ "title": "Kubernetes / Compute Resources / Pod",
+ "uid": "6581e46e4e5c7ba40a07646395ef7b23",
"version": 0
}
kind: ConfigMap
metadata:
- name: grafana-dashboard-k8s-resources-workload
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-k8s-resources-pod
namespace: monitoring
- apiVersion: v1
data:
- k8s-resources-workloads-namespace.json: |-
+ k8s-resources-workload.json: |-
{
"annotations": {
"list": [
@@ -15961,11 +15407,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 1,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -15981,26 +15430,7 @@ items:
"points": false,
"renderer": "flot",
"seriesOverrides": [
- {
- "alias": "quota - requests",
- "color": "#F2495C",
- "dashes": true,
- "fill": 0,
- "hideTooltip": true,
- "legend": false,
- "linewidth": 2,
- "stack": false
- },
- {
- "alias": "quota - limits",
- "color": "#FF9830",
- "dashes": true,
- "fill": 0,
- "hideTooltip": true,
- "legend": false,
- "linewidth": 2,
- "stack": false
- }
+
],
"spaceLength": 10,
"span": 12,
@@ -16008,26 +15438,10 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{workload}} - {{workload_type}}",
- "legendLink": null,
- "step": 10
- },
- {
- "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "quota - requests",
- "legendLink": null,
- "step": 10
- },
- {
- "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "quota - limits",
+ "legendFormat": "{{pod}}",
"legendLink": null,
"step": 10
}
@@ -16040,7 +15454,7 @@ items:
"title": "CPU Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -16094,11 +15508,14 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 2,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -16127,24 +15544,6 @@ items:
"pattern": "Time",
"type": "hidden"
},
- {
- "alias": "Running Pods",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 0,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #A",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "short"
- },
{
"alias": "CPU Usage",
"colorMode": null,
@@ -16154,9 +15553,10 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
- "pattern": "Value #B",
+ "pattern": "Value #A",
"thresholds": [
],
@@ -16172,9 +15572,10 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
- "pattern": "Value #C",
+ "pattern": "Value #B",
"thresholds": [
],
@@ -16190,9 +15591,10 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
- "pattern": "Value #D",
+ "pattern": "Value #C",
"thresholds": [
],
@@ -16208,9 +15610,10 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
- "pattern": "Value #E",
+ "pattern": "Value #D",
"thresholds": [
],
@@ -16226,9 +15629,10 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
- "pattern": "Value #F",
+ "pattern": "Value #E",
"thresholds": [
],
@@ -16236,7 +15640,7 @@ items:
"unit": "percentunit"
},
{
- "alias": "Workload",
+ "alias": "Pod",
"colorMode": null,
"colors": [
@@ -16244,27 +15648,10 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
- "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
- "pattern": "workload",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "short"
- },
- {
- "alias": "Workload Type",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "workload_type",
+ "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
+ "pattern": "pod",
"thresholds": [
],
@@ -16289,7 +15676,7 @@ items:
],
"targets": [
{
- "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16298,7 +15685,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16307,7 +15694,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16316,7 +15703,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16325,22 +15712,13 @@ items:
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E",
"step": 10
- },
- {
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "F",
- "step": 10
}
],
"thresholds": [
@@ -16351,7 +15729,7 @@ items:
"title": "CPU Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -16406,11 +15784,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 3,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -16426,26 +15807,7 @@ items:
"points": false,
"renderer": "flot",
"seriesOverrides": [
- {
- "alias": "quota - requests",
- "color": "#F2495C",
- "dashes": true,
- "fill": 0,
- "hideTooltip": true,
- "legend": false,
- "linewidth": 2,
- "stack": false
- },
- {
- "alias": "quota - limits",
- "color": "#FF9830",
- "dashes": true,
- "fill": 0,
- "hideTooltip": true,
- "legend": false,
- "linewidth": 2,
- "stack": false
- }
+
],
"spaceLength": 10,
"span": 12,
@@ -16453,26 +15815,10 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{workload}} - {{workload_type}}",
- "legendLink": null,
- "step": 10
- },
- {
- "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "quota - requests",
- "legendLink": null,
- "step": 10
- },
- {
- "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "quota - limits",
+ "legendFormat": "{{pod}}",
"legendLink": null,
"step": 10
}
@@ -16485,7 +15831,7 @@ items:
"title": "Memory Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -16539,11 +15885,14 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 4,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -16572,24 +15921,6 @@ items:
"pattern": "Time",
"type": "hidden"
},
- {
- "alias": "Running Pods",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 0,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #A",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "short"
- },
{
"alias": "Memory Usage",
"colorMode": null,
@@ -16599,9 +15930,10 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
- "pattern": "Value #B",
+ "pattern": "Value #A",
"thresholds": [
],
@@ -16617,9 +15949,10 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
- "pattern": "Value #C",
+ "pattern": "Value #B",
"thresholds": [
],
@@ -16635,9 +15968,10 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
- "pattern": "Value #D",
+ "pattern": "Value #C",
"thresholds": [
],
@@ -16653,9 +15987,10 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
- "pattern": "Value #E",
+ "pattern": "Value #D",
"thresholds": [
],
@@ -16671,9 +16006,10 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
- "pattern": "Value #F",
+ "pattern": "Value #E",
"thresholds": [
],
@@ -16681,7 +16017,7 @@ items:
"unit": "percentunit"
},
{
- "alias": "Workload",
+ "alias": "Pod",
"colorMode": null,
"colors": [
@@ -16689,27 +16025,10 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
- "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
- "pattern": "workload",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "short"
- },
- {
- "alias": "Workload Type",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "workload_type",
+ "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
+ "pattern": "pod",
"thresholds": [
],
@@ -16734,7 +16053,7 @@ items:
],
"targets": [
{
- "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)",
+ "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16743,7 +16062,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16752,7 +16071,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16761,7 +16080,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16770,22 +16089,13 @@ items:
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E",
"step": 10
- },
- {
- "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "F",
- "step": 10
}
],
"thresholds": [
@@ -16796,7 +16106,7 @@ items:
"title": "Memory Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -16853,10 +16163,12 @@ items:
"id": 5,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -16894,6 +16206,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -16912,6 +16225,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -16930,6 +16244,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -16948,6 +16263,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -16966,6 +16282,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -16984,6 +16301,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
@@ -16994,7 +16312,7 @@ items:
"unit": "pps"
},
{
- "alias": "Workload",
+ "alias": "Pod",
"colorMode": null,
"colors": [
@@ -17002,27 +16320,10 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
- "linkTooltip": "Drill down to pods",
- "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$type",
- "pattern": "workload",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "short"
- },
- {
- "alias": "Workload Type",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "workload_type",
+ "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
+ "pattern": "pod",
"thresholds": [
],
@@ -17047,7 +16348,7 @@ items:
],
"targets": [
{
- "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -17056,7 +16357,7 @@ items:
"step": 10
},
{
- "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -17065,7 +16366,7 @@ items:
"step": 10
},
{
- "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -17074,7 +16375,7 @@ items:
"step": 10
},
{
- "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -17083,7 +16384,7 @@ items:
"step": 10
},
{
- "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -17092,7 +16393,7 @@ items:
"step": 10
},
{
- "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -17109,7 +16410,7 @@ items:
"title": "Current Network Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -17147,7 +16448,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Current Network Usage",
"titleSize": "h6"
},
{
@@ -17164,11 +16465,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 6,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -17187,15 +16491,15 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{workload}}",
+ "legendFormat": "{{pod}}",
"legendLink": null,
"step": 10
}
@@ -17208,7 +16512,7 @@ items:
"title": "Receive Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -17239,19 +16543,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -17262,11 +16554,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 7,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -17285,15 +16580,15 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{workload}}",
+ "legendFormat": "{{pod}}",
"legendLink": null,
"step": 10
}
@@ -17306,7 +16601,7 @@ items:
"title": "Transmit Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -17343,7 +16638,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Bandwidth",
"titleSize": "h6"
},
{
@@ -17360,11 +16655,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 8,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -17383,15 +16681,15 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{workload}}",
+ "legendFormat": "{{pod}}",
"legendLink": null,
"step": 10
}
@@ -17401,10 +16699,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Average Container Bandwidth by Workload: Received",
+ "title": "Average Container Bandwidth by Pod: Received",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -17435,19 +16733,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -17458,11 +16744,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 9,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -17481,15 +16770,15 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{workload}}",
+ "legendFormat": "{{pod}}",
"legendLink": null,
"step": 10
}
@@ -17499,10 +16788,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Average Container Bandwidth by Workload: Transmitted",
+ "title": "Average Container Bandwidth by Pod: Transmitted",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -17539,7 +16828,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Average Container Bandwidth by Pod",
"titleSize": "h6"
},
{
@@ -17556,11 +16845,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 10,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -17579,15 +16871,15 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{workload}}",
+ "legendFormat": "{{pod}}",
"legendLink": null,
"step": 10
}
@@ -17600,7 +16892,7 @@ items:
"title": "Rate of Received Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -17615,7 +16907,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -17631,19 +16923,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -17654,11 +16934,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 11,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -17677,15 +16960,15 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{workload}}",
+ "legendFormat": "{{pod}}",
"legendLink": null,
"step": 10
}
@@ -17698,7 +16981,7 @@ items:
"title": "Rate of Transmitted Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -17713,7 +16996,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -17735,7 +17018,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Rate of Packets",
"titleSize": "h6"
},
{
@@ -17752,11 +17035,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 12,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -17775,15 +17061,15 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{workload}}",
+ "legendFormat": "{{pod}}",
"legendLink": null,
"step": 10
}
@@ -17796,7 +17082,7 @@ items:
"title": "Rate of Received Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -17811,7 +17097,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -17827,19 +17113,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -17850,11 +17124,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 13,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -17873,15 +17150,15 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{workload}}",
+ "legendFormat": "{{pod}}",
"legendLink": null,
"step": 10
}
@@ -17894,7 +17171,7 @@ items:
"title": "Rate of Transmitted Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -17909,7 +17186,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -17931,7 +17208,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Rate of Packets Dropped",
"titleSize": "h6"
}
],
@@ -17948,7 +17225,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -17960,28 +17237,50 @@ items:
},
{
"allValue": null,
- "auto": false,
- "auto_count": 30,
- "auto_min": "10s",
"current": {
- "text": "deployment",
- "value": "deployment"
+ "text": "",
+ "value": ""
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "",
+ "value": ""
},
"datasource": "$datasource",
- "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
- "name": "type",
+ "name": "namespace",
"options": [
],
- "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)",
- "refresh": 1,
+ "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
+ "refresh": 2,
"regex": "",
- "skipUrlSync": false,
- "sort": 0,
+ "sort": 1,
"tagValuesQuery": "",
"tags": [
@@ -17997,16 +17296,16 @@ items:
"value": ""
},
"datasource": "$datasource",
- "hide": 2,
+ "hide": 0,
"includeAll": false,
"label": null,
"multi": false,
- "name": "cluster",
+ "name": "type",
"options": [
],
- "query": "label_values(kube_pod_info, cluster)",
- "refresh": 1,
+ "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload_type)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -18028,12 +17327,12 @@ items:
"includeAll": false,
"label": null,
"multi": false,
- "name": "namespace",
+ "name": "workload",
"options": [
],
- "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
- "refresh": 1,
+ "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}, workload)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -18076,34 +17375,32 @@ items:
]
},
"timezone": "UTC",
- "title": "Kubernetes / Compute Resources / Namespace (Workloads)",
- "uid": "a87fb0d919ec0ea5f6543124e16c42a5",
+ "title": "Kubernetes / Compute Resources / Workload",
+ "uid": "a164a7f0339f99e89cea5cb47e9be617",
"version": 0
}
kind: ConfigMap
metadata:
- name: grafana-dashboard-k8s-resources-workloads-namespace
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-k8s-resources-workload
namespace: monitoring
- apiVersion: v1
data:
- kubelet.json: |-
+ k8s-resources-workloads-namespace.json: |-
{
- "__inputs": [
-
- ],
- "__requires": [
-
- ],
"annotations": {
"list": [
]
},
- "editable": false,
+ "editable": true,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
- "id": null,
"links": [
],
@@ -18111,575 +17408,414 @@ items:
"rows": [
{
"collapse": false,
- "collapsed": false,
+ "height": "250px",
"panels": [
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
+ "aliasColors": {
},
- "id": 2,
- "interval": null,
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "id": 1,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 0,
"links": [
],
- "mappingType": 1,
- "mappingTypes": [
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
{
- "name": "value to text",
- "value": 1
+ "alias": "quota - requests",
+ "color": "#F2495C",
+ "dashes": true,
+ "fill": 0,
+ "hiddenSeries": true,
+ "hideTooltip": true,
+ "legend": true,
+ "linewidth": 2,
+ "stack": false
},
{
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
+ "alias": "quota - limits",
+ "color": "#FF9830",
+ "dashes": true,
+ "fill": 0,
+ "hiddenSeries": true,
+ "hideTooltip": true,
+ "legend": true,
+ "linewidth": 2,
+ "stack": false
}
],
- "span": 2,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
+ "spaceLength": 10,
+ "span": 12,
+ "stack": true,
+ "steppedLine": false,
"targets": [
{
- "expr": "sum(up{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Up",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "min"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
-
- },
- "id": 3,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
+ "legendFormat": "{{workload}} - {{workload_type}}",
+ "legendLink": null,
+ "step": 10
},
{
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 2,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
+ "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Running Pods",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
+ "legendFormat": "quota - requests",
+ "legendLink": null,
+ "step": 10
+ },
{
- "op": "=",
- "text": "N/A",
- "value": "null"
+ "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "quota - limits",
+ "legendLink": null,
+ "step": 10
}
],
- "valueName": "min"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
+ "thresholds": [
+
],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Usage",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
},
- "gridPos": {
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+ ]
},
- "id": 4,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
+ "yaxes": [
{
- "name": "value to text",
- "value": 1
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
},
{
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 2,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Running Container",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
}
- ],
- "valueName": "min"
- },
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "CPU Usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
+ "aliasColors": {
},
- "id": 5,
- "interval": null,
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "id": 2,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
"links": [
],
- "mappingType": 1,
- "mappingTypes": [
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "styles": [
{
- "name": "value to text",
- "value": 1
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "hidden"
},
{
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
+ "alias": "Running Pods",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 0,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #A",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
{
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 2,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
+ "alias": "CPU Usage",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #B",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
{
- "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Actual Volume Count",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
+ "alias": "CPU Requests",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #C",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
{
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "min"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
+ "alias": "CPU Requests %",
+ "colorMode": null,
+ "colors": [
- },
- "id": 6,
- "interval": null,
- "links": [
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #D",
+ "thresholds": [
- ],
- "mappingType": 1,
- "mappingTypes": [
+ ],
+ "type": "number",
+ "unit": "percentunit"
+ },
{
- "name": "value to text",
- "value": 1
+ "alias": "CPU Limits",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #E",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
},
{
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
+ "alias": "CPU Limits %",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #F",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "percentunit"
+ },
{
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 2,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
+ "alias": "Workload",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": true,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
+ "pattern": "workload",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
{
- "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Desired Volume Count",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
+ "alias": "Workload Type",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "workload_type",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
{
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "min"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
- },
- "id": 7,
- "interval": null,
- "links": [
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [
+ ],
+ "type": "string",
+ "unit": "short"
+ }
],
- "mappingType": 1,
- "mappingTypes": [
+ "targets": [
{
- "name": "value to text",
- "value": 1
+ "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
},
{
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "B",
+ "step": 10
+ },
{
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 2,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
+ "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "C",
+ "step": 10
+ },
{
- "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m]))",
- "format": "time_series",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "format": "table",
+ "instant": true,
"intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Config Error Count",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
+ "legendFormat": "",
+ "refId": "D",
+ "step": 10
+ },
{
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "min"
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 8,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
+ "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "E",
+ "step": 10
+ },
{
- "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (operation_type, instance)",
- "format": "time_series",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "format": "table",
+ "instant": true,
"intervalFactor": 2,
- "legendFormat": "{{instance}} {{operation_type}}",
- "refId": "A"
+ "legendFormat": "",
+ "refId": "F",
+ "step": 10
}
],
"thresholds": [
@@ -18687,13 +17823,14 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Operation Rate",
+ "title": "CPU Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
- "type": "graph",
+ "transform": "table",
+ "type": "table",
"xaxis": {
"buckets": null,
"mode": "time",
@@ -18705,23 +17842,35 @@ items:
},
"yaxes": [
{
- "format": "ops",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": 0,
"show": true
},
{
- "format": "ops",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": true
+ "show": false
}
]
- },
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "CPU Quota",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
{
"aliasColors": {
@@ -18730,48 +17879,82 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 9,
+ "fill": 10,
+ "id": 3,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
- "current": true,
+ "current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
- "sideWidth": null,
"total": false,
- "values": true
+ "values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
-
+ {
+ "alias": "quota - requests",
+ "color": "#F2495C",
+ "dashes": true,
+ "fill": 0,
+ "hiddenSeries": true,
+ "hideTooltip": true,
+ "legend": true,
+ "linewidth": 2,
+ "stack": false
+ },
+ {
+ "alias": "quota - limits",
+ "color": "#FF9830",
+ "dashes": true,
+ "fill": 0,
+ "hiddenSeries": true,
+ "hideTooltip": true,
+ "legend": true,
+ "linewidth": 2,
+ "stack": false
+ }
],
"spaceLength": 10,
- "span": 6,
- "stack": false,
+ "span": 12,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type)",
+ "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} {{operation_type}}",
- "refId": "A"
+ "legendFormat": "{{workload}} - {{workload_type}}",
+ "legendLink": null,
+ "step": 10
+ },
+ {
+ "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "quota - requests",
+ "legendLink": null,
+ "step": 10
+ },
+ {
+ "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "quota - limits",
+ "legendLink": null,
+ "step": 10
}
],
"thresholds": [
@@ -18779,10 +17962,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Operation Error Rate",
+ "title": "Memory Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -18797,7 +17980,7 @@ items:
},
"yaxes": [
{
- "format": "ops",
+ "format": "bytes",
"label": null,
"logBase": 1,
"max": null,
@@ -18805,12 +17988,12 @@ items:
"show": true
},
{
- "format": "ops",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
- "show": true
+ "min": null,
+ "show": false
}
]
}
@@ -18818,14 +18001,13 @@ items:
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
+ "showTitle": true,
+ "title": "Memory Usage",
+ "titleSize": "h6"
},
{
"collapse": false,
- "collapsed": false,
+ "height": "250px",
"panels": [
{
"aliasColors": {
@@ -18836,33 +18018,29 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
- "gridPos": {
-
- },
- "id": 10,
+ "id": 4,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
- "current": true,
+ "current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
- "sideWidth": null,
"total": false,
- "values": true
+ "values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
@@ -18870,27 +18048,250 @@ items:
"span": 12,
"stack": false,
"steppedLine": false,
- "targets": [
+ "styles": [
{
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} {{operation_type}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Operation duration 99th quantile",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "hidden"
+ },
+ {
+ "alias": "Running Pods",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 0,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #A",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "Memory Usage",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #B",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "bytes"
+ },
+ {
+ "alias": "Memory Requests",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #C",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "bytes"
+ },
+ {
+ "alias": "Memory Requests %",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #D",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "percentunit"
+ },
+ {
+ "alias": "Memory Limits",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #E",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "bytes"
+ },
+ {
+ "alias": "Memory Limits %",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #F",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "percentunit"
+ },
+ {
+ "alias": "Workload",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": true,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
+ "pattern": "workload",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "Workload Type",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "workload_type",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [
+
+ ],
+ "type": "string",
+ "unit": "short"
+ }
+ ],
+ "targets": [
+ {
+ "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "C",
+ "step": 10
+ },
+ {
+ "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "D",
+ "step": 10
+ },
+ {
+ "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "E",
+ "step": 10
+ },
+ {
+ "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "F",
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory Quota",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
},
- "type": "graph",
+ "transform": "table",
+ "type": "table",
"xaxis": {
"buckets": null,
"mode": "time",
@@ -18902,20 +18303,20 @@ items:
},
"yaxes": [
{
- "format": "s",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": 0,
"show": true
},
{
- "format": "s",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": true
+ "show": false
}
]
}
@@ -18923,14 +18324,13 @@ items:
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
+ "showTitle": true,
+ "title": "Memory Quota",
+ "titleSize": "h6"
},
{
"collapse": false,
- "collapsed": false,
+ "height": "250px",
"panels": [
{
"aliasColors": {
@@ -18941,54 +18341,265 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
- "gridPos": {
-
- },
- "id": 11,
+ "id": 5,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
- "current": true,
+ "current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
- "sideWidth": null,
"total": false,
- "values": true
+ "values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 6,
+ "span": 12,
"stack": false,
"steppedLine": false,
+ "styles": [
+ {
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "hidden"
+ },
+ {
+ "alias": "Current Receive Bandwidth",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #A",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Current Transmit Bandwidth",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #B",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Rate of Received Packets",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #C",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "pps"
+ },
+ {
+ "alias": "Rate of Transmitted Packets",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #D",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "pps"
+ },
+ {
+ "alias": "Rate of Received Packets Dropped",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #E",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "pps"
+ },
+ {
+ "alias": "Rate of Transmitted Packets Dropped",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #F",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "pps"
+ },
+ {
+ "alias": "Workload",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": true,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down to pods",
+ "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$type",
+ "pattern": "workload",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "Workload Type",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "workload_type",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [
+
+ ],
+ "type": "string",
+ "unit": "short"
+ }
+ ],
"targets": [
{
- "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)",
- "format": "time_series",
+ "expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "table",
+ "instant": true,
"intervalFactor": 2,
- "legendFormat": "{{instance}} pod",
- "refId": "A"
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
},
{
- "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)",
- "format": "time_series",
+ "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "table",
+ "instant": true,
"intervalFactor": 2,
- "legendFormat": "{{instance}} worker",
- "refId": "B"
+ "legendFormat": "",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "C",
+ "step": 10
+ },
+ {
+ "expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "D",
+ "step": 10
+ },
+ {
+ "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "E",
+ "step": 10
+ },
+ {
+ "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "F",
+ "step": 10
}
],
"thresholds": [
@@ -18996,13 +18607,14 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Pod Start Rate",
+ "title": "Current Network Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
- "type": "graph",
+ "transform": "table",
+ "type": "table",
"xaxis": {
"buckets": null,
"mode": "time",
@@ -19014,7 +18626,7 @@ items:
},
"yaxes": [
{
- "format": "ops",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -19022,15 +18634,27 @@ items:
"show": true
},
{
- "format": "ops",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
- "show": true
+ "min": null,
+ "show": false
}
]
- },
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Current Network Usage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
{
"aliasColors": {
@@ -19039,55 +18663,45 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 12,
+ "fill": 10,
+ "id": 6,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
- "current": true,
+ "current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
- "sideWidth": null,
"total": false,
- "values": true
+ "values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} pod",
- "refId": "A"
- },
- {
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} worker",
- "refId": "B"
+ "legendFormat": "{{workload}}",
+ "legendLink": null,
+ "step": 10
}
],
"thresholds": [
@@ -19095,10 +18709,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Pod Start Duration",
+ "title": "Receive Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -19113,7 +18727,7 @@ items:
},
"yaxes": [
{
- "format": "s",
+ "format": "Bps",
"label": null,
"logBase": 1,
"max": null,
@@ -19121,28 +18735,15 @@ items:
"show": true
},
{
- "format": "s",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
- "show": true
+ "min": null,
+ "show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
+ },
{
"aliasColors": {
@@ -19151,50 +18752,45 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 13,
+ "fill": 10,
+ "id": 7,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
- "current": true,
- "hideEmpty": true,
- "hideZero": true,
+ "current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
- "sideWidth": null,
"total": false,
- "values": true
+ "values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
+ "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
- "refId": "A"
+ "legendFormat": "{{workload}}",
+ "legendLink": null,
+ "step": 10
}
],
"thresholds": [
@@ -19202,10 +18798,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Storage Operation Rate",
+ "title": "Transmit Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -19220,7 +18816,7 @@ items:
},
"yaxes": [
{
- "format": "ops",
+ "format": "Bps",
"label": null,
"logBase": 1,
"max": null,
@@ -19228,15 +18824,27 @@ items:
"show": true
},
{
- "format": "ops",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
- "show": true
+ "min": null,
+ "show": false
}
]
- },
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Bandwidth",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
{
"aliasColors": {
@@ -19245,50 +18853,45 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 14,
+ "fill": 10,
+ "id": 8,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
- "current": true,
- "hideEmpty": true,
- "hideZero": true,
+ "current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
- "sideWidth": null,
"total": false,
- "values": true
+ "values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
+ "expr": "(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
- "refId": "A"
+ "legendFormat": "{{workload}}",
+ "legendLink": null,
+ "step": 10
}
],
"thresholds": [
@@ -19296,10 +18899,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Storage Operation Error Rate",
+ "title": "Average Container Bandwidth by Workload: Received",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -19314,7 +18917,7 @@ items:
},
"yaxes": [
{
- "format": "ops",
+ "format": "Bps",
"label": null,
"logBase": 1,
"max": null,
@@ -19322,28 +18925,15 @@ items:
"show": true
},
{
- "format": "ops",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
- "show": true
+ "min": null,
+ "show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
+ },
{
"aliasColors": {
@@ -19352,50 +18942,45 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 15,
+ "fill": 10,
+ "id": 9,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
- "current": true,
- "hideEmpty": true,
- "hideZero": true,
+ "current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
- "sideWidth": null,
"total": false,
- "values": true
+ "values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 12,
- "stack": false,
+ "span": 6,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))",
+ "expr": "(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
- "refId": "A"
+ "legendFormat": "{{workload}}",
+ "legendLink": null,
+ "step": 10
}
],
"thresholds": [
@@ -19403,10 +18988,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Storage Operation Duration 99th quantile",
+ "title": "Average Container Bandwidth by Workload: Transmitted",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -19421,7 +19006,7 @@ items:
},
"yaxes": [
{
- "format": "s",
+ "format": "Bps",
"label": null,
"logBase": 1,
"max": null,
@@ -19429,12 +19014,12 @@ items:
"show": true
},
{
- "format": "s",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
- "show": true
+ "min": null,
+ "show": false
}
]
}
@@ -19442,14 +19027,13 @@ items:
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
+ "showTitle": true,
+ "title": "Average Container Bandwidth by Workload",
+ "titleSize": "h6"
},
{
"collapse": false,
- "collapsed": false,
+ "height": "250px",
"panels": [
{
"aliasColors": {
@@ -19459,48 +19043,45 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 16,
+ "fill": 10,
+ "id": 10,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
- "current": true,
+ "current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
- "sideWidth": null,
"total": false,
- "values": true
+ "values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type)",
+ "expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{operation_type}}",
- "refId": "A"
+ "legendFormat": "{{workload}}",
+ "legendLink": null,
+ "step": 10
}
],
"thresholds": [
@@ -19508,10 +19089,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Cgroup manager operation rate",
+ "title": "Rate of Received Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -19526,7 +19107,7 @@ items:
},
"yaxes": [
{
- "format": "ops",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -19534,12 +19115,12 @@ items:
"show": true
},
{
- "format": "ops",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
- "show": true
+ "min": null,
+ "show": false
}
]
},
@@ -19551,48 +19132,45 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 17,
+ "fill": 10,
+ "id": 11,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
- "current": true,
+ "current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
- "sideWidth": null,
"total": false,
- "values": true
+ "values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
+ "expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} {{operation_type}}",
- "refId": "A"
+ "legendFormat": "{{workload}}",
+ "legendLink": null,
+ "step": 10
}
],
"thresholds": [
@@ -19600,10 +19178,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Cgroup manager 99th quantile",
+ "title": "Rate of Transmitted Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -19618,7 +19196,7 @@ items:
},
"yaxes": [
{
- "format": "s",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -19626,12 +19204,12 @@ items:
"show": true
},
{
- "format": "s",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
- "show": true
+ "min": null,
+ "show": false
}
]
}
@@ -19639,14 +19217,13 @@ items:
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
+ "showTitle": true,
+ "title": "Rate of Packets",
+ "titleSize": "h6"
},
{
"collapse": false,
- "collapsed": false,
+ "height": "250px",
"panels": [
{
"aliasColors": {
@@ -19656,49 +19233,45 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "description": "Pod lifecycle event generator",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 18,
+ "fill": 10,
+ "id": 12,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
- "current": true,
+ "current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
- "sideWidth": null,
"total": false,
- "values": true
+ "values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
+ "legendFormat": "{{workload}}",
+ "legendLink": null,
+ "step": 10
}
],
"thresholds": [
@@ -19706,10 +19279,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "PLEG relist rate",
+ "title": "Rate of Received Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -19724,7 +19297,7 @@ items:
},
"yaxes": [
{
- "format": "ops",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -19732,12 +19305,12 @@ items:
"show": true
},
{
- "format": "ops",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
- "show": true
+ "min": null,
+ "show": false
}
]
},
@@ -19749,48 +19322,45 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 19,
+ "fill": 10,
+ "id": 13,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
- "current": true,
+ "current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
- "sideWidth": null,
"total": false,
- "values": true
+ "values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
+ "legendFormat": "{{workload}}",
+ "legendLink": null,
+ "step": 10
}
],
"thresholds": [
@@ -19798,10 +19368,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "PLEG relist interval",
+ "title": "Rate of Transmitted Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -19816,7 +19386,7 @@ items:
},
"yaxes": [
{
- "format": "s",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -19824,12 +19394,12 @@ items:
"show": true
},
{
- "format": "s",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
- "show": true
+ "min": null,
+ "show": false
}
]
}
@@ -19837,1268 +19407,749 @@ items:
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
+ "showTitle": true,
+ "title": "Rate of Packets Dropped",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "kubernetes-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
+ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "",
+ "value": ""
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "cluster",
+ "options": [
- },
- "id": 20,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ ],
+ "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "",
+ "value": ""
+ },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "namespace",
+ "options": [
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": [
+ ],
+ "query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "PLEG relist duration",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "auto": false,
+ "auto_count": 30,
+ "auto_min": "10s",
+ "current": {
+ "text": "deployment",
+ "value": "deployment"
+ },
+ "datasource": "$datasource",
+ "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "type",
+ "options": [
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
+ ],
+ "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "UTC",
+ "title": "Kubernetes / Compute Resources / Namespace (Workloads)",
+ "uid": "a87fb0d919ec0ea5f6543124e16c42a5",
+ "version": 0
+ }
+ kind: ConfigMap
+ metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-k8s-resources-workloads-namespace
+ namespace: monitoring
+- apiVersion: v1
+ data:
+ kubelet.json: |-
+ {
+ "__inputs": [
- },
- "id": 21,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ ],
+ "__requires": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
+ ],
+ "annotations": {
+ "list": [
+
+ ]
+ },
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [
+
+ ],
+ "panels": [
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "links": [
],
- "spaceLength": 10,
- "span": 12,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "2xx",
- "refId": "A"
- },
- {
- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "3xx",
- "refId": "B"
- },
- {
- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "4xx",
- "refId": "C"
- },
- {
- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "5xx",
- "refId": "D"
- }
- ],
- "thresholds": [
+ "mappings": [
],
- "timeFrom": null,
- "timeShift": null,
- "title": "RPC Rate",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
]
},
- "yaxes": [
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
+ "unit": "none"
}
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
+ "links": [
+
],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7",
+ "targets": [
+ {
+ "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Running Kubelets",
+ "transparent": false,
+ "type": "stat"
},
{
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 22,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
"links": [
],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} {{verb}} {{url}}",
- "refId": "A"
- }
- ],
- "thresholds": [
+ "mappings": [
],
- "timeFrom": null,
- "timeShift": null,
- "title": "Request duration 99th quantile",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
]
},
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
+ "unit": "none"
}
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 4,
+ "y": 0
+ },
+ "id": 3,
+ "links": [
+
],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7",
+ "targets": [
+ {
+ "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Running Pods",
+ "transparent": false,
+ "type": "stat"
},
{
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "links": [
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
+ ],
+ "mappings": [
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+
+ ]
},
- "id": 23,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ "unit": "none"
+ }
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 8,
+ "y": 0
+ },
+ "id": 4,
+ "links": [
+ ],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7",
+ "targets": [
+ {
+ "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Running Containers",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "links": [
],
- "spaceLength": 10,
- "span": 4,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": [
+ "mappings": [
],
- "timeFrom": null,
- "timeShift": null,
- "title": "Memory",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
]
},
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ]
+ "unit": "none"
+ }
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 12,
+ "y": 0
+ },
+ "id": 5,
+ "links": [
+
+ ],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
},
+ "textMode": "auto"
+ },
+ "pluginVersion": "7",
+ "targets": [
{
- "aliasColors": {
+ "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Actual Volume Count",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "links": [
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 24,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 4,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "CPU usage",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 25,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 4,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": [
+ ],
+ "mappings": [
],
- "timeFrom": null,
- "timeShift": null,
- "title": "Goroutines",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
]
},
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ]
+ "unit": "none"
}
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- }
- ],
- "schemaVersion": 14,
- "style": "dark",
- "tags": [
- "kubernetes-mixin"
- ],
- "templating": {
- "list": [
- {
- "current": {
- "text": "default",
- "value": "default"
- },
- "hide": 0,
- "label": null,
- "name": "datasource",
- "options": [
-
- ],
- "query": "prometheus",
- "refresh": 1,
- "regex": "",
- "type": "datasource"
- },
- {
- "allValue": null,
- "current": {
-
- },
- "datasource": "$datasource",
- "hide": 2,
- "includeAll": false,
- "label": "cluster",
- "multi": false,
- "name": "cluster",
- "options": [
-
- ],
- "query": "label_values(kube_pod_info, cluster)",
- "refresh": 2,
- "regex": "",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "current": {
-
- },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": true,
- "label": null,
- "multi": false,
- "name": "instance",
- "options": [
-
- ],
- "query": "label_values(kubelet_runtime_operations_total{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"}, instance)",
- "refresh": 2,
- "regex": "",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- }
- ]
- },
- "time": {
- "from": "now-1h",
- "to": "now"
- },
- "timepicker": {
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
- "timezone": "UTC",
- "title": "Kubernetes / Kubelet",
- "uid": "3138fa155d5915769fbded898ac09fd9",
- "version": 0
- }
- kind: ConfigMap
- metadata:
- name: grafana-dashboard-kubelet
- namespace: monitoring
-- apiVersion: v1
- data:
- kubernetes-cluster-dashboard.json: |-
- {
- "annotations": {
- "list": [
- {
- "builtIn": 1,
- "datasource": "-- Grafana --",
- "enable": true,
- "hide": true,
- "iconColor": "rgba(0, 211, 255, 1)",
- "name": "Annotations & Alerts",
- "type": "dashboard"
- }
- ]
- },
- "description": "Monitor a Kubernetes cluster using Prometheus TSDB. Shows overall cluster CPU / Memory / Disk usage as well as individual pod statistics. ",
- "editable": true,
- "gnetId": 162,
- "graphTooltip": 1,
- "links": [
-
- ],
- "panels": [
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
- ],
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "format": "percent",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": true,
- "thresholdLabels": false,
- "thresholdMarkers": true
},
"gridPos": {
"h": 7,
- "w": 8,
- "x": 0,
+ "w": 4,
+ "x": 16,
"y": 0
},
- "id": 4,
- "interval": null,
- "isNew": true,
+ "id": 6,
"links": [
],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
},
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
+ "textMode": "auto"
},
- "tableColumn": "",
+ "pluginVersion": "7",
"targets": [
{
- "expr": "(sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100",
+ "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})",
"format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": "65, 90",
- "title": "Cluster memory usage",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
}
],
- "valueName": "current"
+ "title": "Desired Volume Count",
+ "transparent": false,
+ "type": "stat"
},
{
- "cacheTimeout": null,
- "colorBackground": true,
- "colorValue": false,
- "colors": [
- "rgba(0, 0, 0, 0)",
- "rgb(210, 1, 1)",
- "#890f02"
- ],
- "datasource": "prometheus",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
- "custom": {
+ "links": [
- }
- },
- "overrides": [
+ ],
+ "mappings": [
- ]
- },
- "format": "percentunit",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+
+ ]
+ },
+ "unit": "none"
+ }
},
"gridPos": {
- "h": 2,
- "w": 8,
- "x": 8,
+ "h": 7,
+ "w": 4,
+ "x": 20,
"y": 0
},
- "id": 23,
- "interval": null,
+ "id": 7,
"links": [
],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
},
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
+ "textMode": "auto"
},
- "tableColumn": "{job=\"kubelet\"}",
+ "pluginVersion": "7",
"targets": [
{
- "expr": "avg(up{job=\"kubelet\"}) BY (job)",
+ "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval]))",
"format": "time_series",
- "instant": true,
- "intervalFactor": 1,
- "legendFormat": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
"refId": "A"
}
],
- "thresholds": "1.1",
- "title": "Up Nodes",
- "type": "singlestat",
- "valueFontSize": "120%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "avg"
+ "title": "Config Error Count",
+ "transparent": false,
+ "type": "stat"
},
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
- ],
- "datasource": "prometheus",
- "decimals": 0,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
+ "aliasColors": {
- ]
- },
- "format": "percent",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": true,
- "thresholdLabels": false,
- "thresholdMarkers": true
},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
"h": 7,
- "w": 8,
- "x": 16,
- "y": 0
+ "w": 12,
+ "x": 0,
+ "y": 7
},
- "id": 6,
- "interval": null,
- "isNew": true,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
+ "id": 8,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
"targets": [
{
- "expr": "avg(100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100))",
+ "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (operation_type, instance)",
"format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": "65, 90",
- "title": "Cluster CPU usage",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} {{operation_type}}",
+ "refId": "A"
}
],
- "valueName": "current"
- },
- {
- "columns": [
+ "thresholds": [
],
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Operation Rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- }
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
},
- "overrides": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
- ]
},
- "fontSize": "90%",
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
- "h": 5,
- "w": 8,
- "x": 8,
- "y": 2
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 7
},
- "id": 25,
+ "id": 9,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
"links": [
],
- "pageSize": null,
- "scroll": true,
- "showHeader": true,
- "sort": {
- "col": 2,
- "desc": false
- },
- "styles": [
- {
- "alias": "Time",
- "align": "auto",
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "link": false,
- "pattern": "Time",
- "type": "date"
- },
- {
- "alias": "Uptime",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "Value",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "s"
- },
- {
- "alias": "",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "decimals": 2,
- "pattern": "/endpoint|job|namespace|pod|service/",
- "thresholds": [
-
- ],
- "type": "hidden",
- "unit": "short"
- },
- {
- "alias": "",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "instance",
- "preserveFormat": false,
- "sanitize": false,
- "thresholds": [
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "type": "string",
- "unit": "short"
- }
],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
"targets": [
{
- "expr": "(time() - node_boot_time_seconds)",
- "format": "table",
- "instant": true,
- "intervalFactor": 1,
+ "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} {{operation_type}}",
"refId": "A"
}
],
- "title": "Node Uptime",
- "transform": "table",
- "transparent": true,
- "type": "table-old"
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 7
- },
- "id": 15,
- "panels": [
+ "thresholds": [
],
- "title": "Nodes",
- "type": "row"
- },
- {
- "alert": {
- "conditions": [
- {
- "evaluator": {
- "params": [
- 0.85
- ],
- "type": "gt"
- },
- "operator": {
- "type": "and"
- },
- "query": {
- "params": [
- "B",
- "5m",
- "now"
- ]
- },
- "reducer": {
- "params": [
-
- ],
- "type": "max"
- },
- "type": "query"
- }
- ],
- "executionErrorState": "alerting",
- "for": "0m",
- "frequency": "60s",
- "handler": 1,
- "name": "Memory Usage alert",
- "noDataState": "no_data",
- "notifications": [
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Operation Error Rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
]
},
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
+ "datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
- "h": 9,
- "w": 12,
+ "h": 7,
+ "w": 24,
"x": 0,
- "y": 8
+ "y": 14
},
- "hiddenSeries": false,
"id": 10,
"legend": {
+ "alignAsTable": true,
"avg": false,
- "current": false,
+ "current": true,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
+ "sideWidth": null,
"total": false,
- "values": false
+ "values": true
},
"lines": true,
"linewidth": 1,
@@ -21106,15 +20157,11 @@ items:
],
"nullPointMode": "null",
- "options": {
- "dataLinks": [
-
- ]
- },
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
@@ -21123,35 +20170,19 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes)",
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{ instance }}",
+ "legendFormat": "{{instance}} {{operation_type}}",
"refId": "A"
- },
- {
- "expr": "(node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes))/node_memory_MemTotal_bytes",
- "format": "time_series",
- "hide": true,
- "intervalFactor": 1,
- "refId": "B"
}
],
"thresholds": [
- {
- "colorMode": "critical",
- "fill": true,
- "line": true,
- "op": "gt",
- "value": 0.85
- }
- ],
- "timeFrom": null,
- "timeRegions": [
],
+ "timeFrom": null,
"timeShift": null,
- "title": "Memory Usage",
+ "title": "Operation duration 99th quantile",
"tooltip": {
"shared": true,
"sort": 0,
@@ -21169,7 +20200,7 @@ items:
},
"yaxes": [
{
- "format": "decbytes",
+ "format": "s",
"label": null,
"logBase": 1,
"max": null,
@@ -21177,92 +20208,43 @@ items:
"show": true
},
{
- "format": "short",
+ "format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ ]
},
{
- "alert": {
- "conditions": [
- {
- "evaluator": {
- "params": [
- 90
- ],
- "type": "gt"
- },
- "operator": {
- "type": "and"
- },
- "query": {
- "params": [
- "A",
- "15m",
- "now"
- ]
- },
- "reducer": {
- "params": [
-
- ],
- "type": "max"
- },
- "type": "query"
- }
- ],
- "executionErrorState": "alerting",
- "frequency": "60s",
- "handler": 1,
- "name": "CPU Usage alert",
- "noDataState": "no_data",
- "notifications": [
-
- ]
- },
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
+ "datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
- "h": 9,
+ "h": 7,
"w": 12,
- "x": 12,
- "y": 8
+ "x": 0,
+ "y": 21
},
- "hiddenSeries": false,
"id": 11,
"legend": {
+ "alignAsTable": true,
"avg": false,
- "current": false,
+ "current": true,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
+ "sideWidth": null,
"total": false,
- "values": false
+ "values": true
},
"lines": true,
"linewidth": 1,
@@ -21270,15 +20252,11 @@ items:
],
"nullPointMode": "null",
- "options": {
- "dataLinks": [
-
- ]
- },
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
@@ -21287,28 +20265,26 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100)",
+ "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance)",
"format": "time_series",
- "intervalFactor": 3,
- "legendFormat": "{{instance}}",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} pod",
"refId": "A"
- }
- ],
- "thresholds": [
+ },
{
- "colorMode": "critical",
- "fill": true,
- "line": true,
- "op": "gt",
- "value": 90
+ "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} worker",
+ "refId": "B"
}
],
- "timeFrom": null,
- "timeRegions": [
+ "thresholds": [
],
+ "timeFrom": null,
"timeShift": null,
- "title": "CPU Usage",
+ "title": "Pod Start Rate",
"tooltip": {
"shared": true,
"sort": 0,
@@ -21326,8 +20302,7 @@ items:
},
"yaxes": [
{
- "decimals": null,
- "format": "percent",
+ "format": "ops",
"label": null,
"logBase": 1,
"max": null,
@@ -21335,343 +20310,159 @@ items:
"show": true
},
{
- "format": "short",
+ "format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ ]
},
{
- "columns": [
-
- ],
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
+ "aliasColors": {
- ]
},
- "fontSize": "100%",
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
- "h": 9,
+ "h": 7,
"w": 12,
- "x": 0,
- "y": 17
+ "x": 12,
+ "y": 21
+ },
+ "id": 12,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
},
- "id": 31,
+ "lines": true,
+ "linewidth": 1,
"links": [
],
- "pageSize": null,
- "scroll": true,
- "showHeader": true,
- "sort": {
- "col": 0,
- "desc": true
- },
- "styles": [
- {
- "alias": "Time",
- "align": "auto",
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "link": false,
- "pattern": "Time",
- "type": "date"
- },
- {
- "alias": "",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "condition|container|daemonset|endpoint|namespace|node",
- "thresholds": [
-
- ],
- "type": "hidden",
- "unit": "short"
- },
- {
- "alias": "",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "decimals": 2,
- "pattern": "/.*/",
- "thresholds": [
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "type": "number",
- "unit": "short"
- }
],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
"targets": [
{
- "expr": "ALERTS{alertstate=\"firing\"}",
- "format": "table",
- "instant": true,
- "intervalFactor": 1,
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} pod",
"refId": "A"
},
{
- "expr": "ALERTS{alertstate=\"firing\",alertname!=\"DeadMansSwitch\"}",
- "format": "table",
- "hide": true,
- "intervalFactor": 1,
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} worker",
"refId": "B"
}
],
- "title": "Active Alerts",
- "transform": "table",
- "type": "table-old"
- },
- {
- "dashboardFilter": "",
- "dashboardTags": [
+ "thresholds": [
],
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pod Start Duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
]
},
- "folderId": null,
- "gridPos": {
- "h": 9,
- "w": 5,
- "x": 12,
- "y": 17
- },
- "id": 27,
- "limit": 10,
- "links": [
-
- ],
- "nameFilter": "",
- "onlyAlertsOnDashboard": false,
- "show": "current",
- "sortOrder": 1,
- "stateFilter": [
-
- ],
- "title": "Alarms",
- "type": "alertlist"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
- ],
- "datasource": "prometheus",
- "decimals": null,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "format": "percent",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": true,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
- "h": 5,
- "w": 7,
- "x": 17,
- "y": 17
- },
- "id": 7,
- "interval": null,
- "isNew": true,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
+ "yaxes": [
{
- "name": "value to text",
- "value": 1
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
},
{
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "(sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) - sum(node_filesystem_free_bytes{device=~\"/dev/.*\"}) ) / sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) * 100",
- "format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "metric": "",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": "65, 90",
- "title": "Cluster Filesystem usage",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
}
- ],
- "valueName": "current"
+ ]
},
{
- "alert": {
- "conditions": [
- {
- "evaluator": {
- "params": [
- 1
- ],
- "type": "lt"
- },
- "operator": {
- "type": "and"
- },
- "query": {
- "params": [
- "C",
- "5m",
- "now"
- ]
- },
- "reducer": {
- "params": [
-
- ],
- "type": "avg"
- },
- "type": "query"
- }
- ],
- "executionErrorState": "alerting",
- "frequency": "60s",
- "handler": 1,
- "name": "Node Down",
- "noDataState": "alerting",
- "notifications": [
-
- ]
- },
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
+ "datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
- "h": 4,
- "w": 7,
- "x": 17,
- "y": 22
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 28
},
- "hiddenSeries": false,
- "id": 29,
+ "id": 13,
"legend": {
+ "alignAsTable": true,
"avg": false,
- "current": false,
+ "current": true,
+ "hideEmpty": true,
+ "hideZero": true,
"max": false,
"min": false,
- "show": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
"total": false,
- "values": false
+ "values": true
},
"lines": true,
"linewidth": 1,
"links": [
],
- "nullPointMode": "null as zero",
- "options": {
- "dataLinks": [
-
- ]
- },
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
@@ -21680,46 +20471,19 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(up{job=\"kubelet\"}) BY (job)",
+ "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)",
"format": "time_series",
- "hide": true,
- "instant": false,
- "intervalFactor": 1,
- "legendFormat": "Up Nodes",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
"refId": "A"
- },
- {
- "expr": "count(up{job=\"kubelet\"})",
- "format": "time_series",
- "hide": true,
- "instant": false,
- "intervalFactor": 1,
- "legendFormat": "Total Nodes",
- "refId": "B"
- },
- {
- "expr": "avg(up{job=\"kubelet\"}) BY (job)",
- "format": "time_series",
- "hide": false,
- "intervalFactor": 1,
- "refId": "C"
}
],
"thresholds": [
- {
- "colorMode": "critical",
- "fill": true,
- "line": true,
- "op": "lt",
- "value": 1
- }
- ],
- "timeFrom": null,
- "timeRegions": [
],
+ "timeFrom": null,
"timeShift": null,
- "title": "Up Nodes",
+ "title": "Storage Operation Rate",
"tooltip": {
"shared": true,
"sort": 0,
@@ -21737,7 +20501,7 @@ items:
},
"yaxes": [
{
- "format": "short",
+ "format": "ops",
"label": null,
"logBase": 1,
"max": null,
@@ -21745,92 +20509,45 @@ items:
"show": true
},
{
- "format": "short",
+ "format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ ]
},
{
- "alert": {
- "conditions": [
- {
- "evaluator": {
- "params": [
- 85
- ],
- "type": "gt"
- },
- "operator": {
- "type": "and"
- },
- "query": {
- "params": [
- "A",
- "1m",
- "now"
- ]
- },
- "reducer": {
- "params": [
-
- ],
- "type": "avg"
- },
- "type": "query"
- }
- ],
- "executionErrorState": "alerting",
- "frequency": "60s",
- "handler": 1,
- "name": "CPU Temperature alert",
- "noDataState": "no_data",
- "notifications": [
-
- ]
- },
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
+ "datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
- "h": 6,
- "w": 24,
- "x": 0,
- "y": 26
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 28
},
- "hiddenSeries": false,
- "id": 13,
+ "id": 14,
"legend": {
+ "alignAsTable": true,
"avg": false,
- "current": false,
+ "current": true,
+ "hideEmpty": true,
+ "hideZero": true,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
+ "sideWidth": null,
"total": false,
- "values": false
+ "values": true
},
"lines": true,
"linewidth": 1,
@@ -21838,15 +20555,11 @@ items:
],
"nullPointMode": "null",
- "options": {
- "dataLinks": [
-
- ]
- },
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
@@ -21855,28 +20568,19 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "rpi_cpu_temperature_celsius",
+ "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)",
"format": "time_series",
- "intervalFactor": 5,
- "legendFormat": "{{instance}}",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
"refId": "A"
}
],
"thresholds": [
- {
- "colorMode": "critical",
- "fill": true,
- "line": true,
- "op": "gt",
- "value": 85
- }
- ],
- "timeFrom": null,
- "timeRegions": [
],
+ "timeFrom": null,
"timeShift": null,
- "title": "CPU Temperature",
+ "title": "Storage Operation Error Rate",
"tooltip": {
"shared": true,
"sort": 0,
@@ -21894,7 +20598,7 @@ items:
},
"yaxes": [
{
- "format": "celsius",
+ "format": "ops",
"label": null,
"logBase": 1,
"max": null,
@@ -21902,34 +20606,14 @@ items:
"show": true
},
{
- "format": "short",
+ "format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 32
- },
- "id": 17,
- "panels": [
-
- ],
- "title": "Pods",
- "type": "row"
+ ]
},
{
"aliasColors": {
@@ -21938,37 +20622,19 @@ items:
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
- "decimals": 0,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 0,
+ "datasource": "$datasource",
+ "fill": 1,
"fillGradient": 0,
- "grid": {
-
- },
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
- "y": 33
+ "y": 35
},
- "hiddenSeries": false,
- "id": 3,
- "isNew": true,
+ "id": 15,
"legend": {
"alignAsTable": true,
- "avg": true,
+ "avg": false,
"current": true,
"hideEmpty": true,
"hideZero": true,
@@ -21976,27 +20642,21 @@ items:
"min": false,
"rightSide": true,
"show": true,
- "sideWidth": 270,
- "sort": "current",
- "sortDesc": true,
+ "sideWidth": null,
"total": false,
"values": true
},
"lines": true,
- "linewidth": 2,
+ "linewidth": 1,
"links": [
],
- "nullPointMode": "null as zero",
- "options": {
- "dataLinks": [
-
- ]
- },
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
@@ -22005,31 +20665,23 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "topk(10,sum by (pod)(rate(container_cpu_usage_seconds_total{image!=\"\"}[1m] ) ))",
+ "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin, le))",
"format": "time_series",
- "instant": false,
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "{{ pod}}",
- "metric": "container_cpu",
- "refId": "A",
- "step": 10
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
+ "refId": "A"
}
],
"thresholds": [
],
"timeFrom": null,
- "timeRegions": [
-
- ],
"timeShift": null,
- "title": "Pod CPU usage",
+ "title": "Storage Operation Duration 99th quantile",
"tooltip": {
- "msResolution": true,
"shared": true,
- "sort": 2,
- "value_type": "cumulative"
+ "sort": 0,
+ "value_type": "individual"
},
"type": "graph",
"xaxis": {
@@ -22043,7 +20695,7 @@ items:
},
"yaxes": [
{
- "format": "percentunit",
+ "format": "s",
"label": null,
"logBase": 1,
"max": null,
@@ -22051,18 +20703,14 @@ items:
"show": true
},
{
- "format": "short",
+ "format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ ]
},
{
"aliasColors": {
@@ -22071,63 +20719,39 @@ items:
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
- "decimals": 2,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 0,
+ "datasource": "$datasource",
+ "fill": 1,
"fillGradient": 0,
- "grid": {
-
- },
"gridPos": {
"h": 7,
- "w": 24,
+ "w": 12,
"x": 0,
- "y": 40
+ "y": 42
},
- "hiddenSeries": false,
- "id": 2,
- "isNew": true,
+ "id": 16,
"legend": {
"alignAsTable": true,
- "avg": true,
+ "avg": false,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
- "sideWidth": 250,
- "sort": "avg",
- "sortDesc": true,
+ "sideWidth": null,
"total": false,
"values": true
},
"lines": true,
- "linewidth": 2,
+ "linewidth": 1,
"links": [
],
- "nullPointMode": "connected",
- "options": {
- "dataLinks": [
-
- ]
- },
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
@@ -22136,39 +20760,23 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (pod, image))",
- "format": "time_series",
- "hide": true,
- "interval": "10s",
- "intervalFactor": 1,
- "legendFormat": "{{ pod }}",
- "metric": "container_memory_usage:sort_desc",
- "refId": "A",
- "step": 10
- },
- {
- "expr": "topk(10,sum(container_memory_rss{name=~\".+\"}) by (pod))",
+ "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)",
"format": "time_series",
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "{{ pod }}",
- "refId": "B"
+ "intervalFactor": 2,
+ "legendFormat": "{{operation_type}}",
+ "refId": "A"
}
],
"thresholds": [
],
"timeFrom": null,
- "timeRegions": [
-
- ],
"timeShift": null,
- "title": "Pod memory usage",
+ "title": "Cgroup manager operation rate",
"tooltip": {
- "msResolution": false,
"shared": true,
- "sort": 2,
- "value_type": "cumulative"
+ "sort": 0,
+ "value_type": "individual"
},
"type": "graph",
"xaxis": {
@@ -22182,7 +20790,7 @@ items:
},
"yaxes": [
{
- "format": "bytes",
+ "format": "ops",
"label": null,
"logBase": 1,
"max": null,
@@ -22190,18 +20798,14 @@ items:
"show": true
},
{
- "format": "short",
+ "format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ ]
},
{
"aliasColors": {
@@ -22210,63 +20814,39 @@ items:
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
+ "datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
- "grid": {
-
- },
"gridPos": {
"h": 7,
"w": 12,
- "x": 0,
- "y": 47
+ "x": 12,
+ "y": 42
},
- "hiddenSeries": false,
- "id": 19,
+ "id": 17,
"legend": {
"alignAsTable": true,
- "avg": true,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
+ "avg": false,
+ "current": true,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
- "sideWidth": 550,
- "sort": "avg",
- "sortDesc": true,
+ "sideWidth": null,
"total": false,
"values": true
},
"lines": true,
- "linewidth": 2,
+ "linewidth": 1,
"links": [
],
"nullPointMode": "null",
- "options": {
- "dataLinks": [
-
- ]
- },
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
@@ -22275,39 +20855,23 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "topk(10,sum(rate(container_network_transmit_bytes_total{pod=~\".+\"}[5m])) by (pod))",
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))",
"format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ pod_name }}",
- "refId": "A",
- "step": 240
- },
- {
- "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])",
- "format": "time_series",
- "hide": true,
- "interval": "",
"intervalFactor": 2,
- "legendFormat": "",
- "refId": "B",
- "step": 10
+ "legendFormat": "{{instance}} {{operation_type}}",
+ "refId": "A"
}
],
"thresholds": [
],
"timeFrom": null,
- "timeRegions": [
-
- ],
"timeShift": null,
- "title": "Sent Network Traffic per Container",
+ "title": "Cgroup manager 99th quantile",
"tooltip": {
- "msResolution": true,
"shared": true,
- "sort": 2,
- "value_type": "cumulative"
+ "sort": 0,
+ "value_type": "individual"
},
"type": "graph",
"xaxis": {
@@ -22321,26 +20885,22 @@ items:
},
"yaxes": [
{
- "format": "Bps",
- "label": "",
+ "format": "s",
+ "label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
- "format": "short",
- "label": "",
- "logBase": 10,
- "max": 8,
- "min": 0,
- "show": false
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
}
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ ]
},
{
"aliasColors": {
@@ -22349,63 +20909,40 @@ items:
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
+ "datasource": "$datasource",
+ "description": "Pod lifecycle event generator",
"fill": 1,
"fillGradient": 0,
- "grid": {
-
- },
"gridPos": {
"h": 7,
"w": 12,
- "x": 12,
- "y": 47
+ "x": 0,
+ "y": 49
},
- "hiddenSeries": false,
- "id": 21,
+ "id": 18,
"legend": {
"alignAsTable": true,
- "avg": true,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
+ "avg": false,
+ "current": true,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
- "sideWidth": 150,
- "sort": "avg",
- "sortDesc": true,
+ "sideWidth": null,
"total": false,
"values": true
},
"lines": true,
- "linewidth": 2,
+ "linewidth": 1,
"links": [
],
"nullPointMode": "null",
- "options": {
- "dataLinks": [
-
- ]
- },
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
@@ -22414,38 +20951,23 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "topk(10,sum(rate(container_network_receive_bytes_total{pod=~\".+\"}[5m])) by (pod))",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{pod_name}}",
- "refId": "A",
- "step": 240
- },
- {
- "expr": "- rate(container_network_transmit_bytes_total{pod_name=~\".+\"}[$interval])",
+ "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance)",
"format": "time_series",
- "hide": true,
"intervalFactor": 2,
- "legendFormat": "{{pod_name}}",
- "refId": "B",
- "step": 10
+ "legendFormat": "{{instance}}",
+ "refId": "A"
}
],
"thresholds": [
],
"timeFrom": null,
- "timeRegions": [
-
- ],
"timeShift": null,
- "title": "Received Network Traffic per Container",
+ "title": "PLEG relist rate",
"tooltip": {
- "msResolution": true,
"shared": true,
- "sort": 2,
- "value_type": "cumulative"
+ "sort": 0,
+ "value_type": "individual"
},
"type": "graph",
"xaxis": {
@@ -22459,7 +20981,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "ops",
"label": null,
"logBase": 1,
"max": null,
@@ -22467,18 +20989,14 @@ items:
"show": true
},
{
- "format": "short",
+ "format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ ]
},
{
"aliasColors": {
@@ -22487,63 +21005,39 @@ items:
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
- "decimals": 2,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 0,
+ "datasource": "$datasource",
+ "fill": 1,
"fillGradient": 0,
- "grid": {
-
- },
"gridPos": {
"h": 7,
- "w": 24,
- "x": 0,
- "y": 54
+ "w": 12,
+ "x": 12,
+ "y": 49
},
- "hiddenSeries": false,
- "id": 8,
- "isNew": true,
+ "id": 19,
"legend": {
"alignAsTable": true,
- "avg": true,
+ "avg": false,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
- "sideWidth": 220,
- "sort": "current",
- "sortDesc": true,
+ "sideWidth": null,
"total": false,
"values": true
},
"lines": true,
- "linewidth": 2,
+ "linewidth": 1,
"links": [
],
- "nullPointMode": "connected",
- "options": {
- "dataLinks": [
-
- ]
- },
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
@@ -22552,40 +21046,23 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_receive_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))",
- "format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "legendFormat": "Receive Traffic",
- "metric": "network",
- "refId": "A",
- "step": 10
- },
- {
- "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_transmit_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))",
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
"format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "legendFormat": "Transmit Traffic",
- "metric": "network",
- "refId": "B",
- "step": 10
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
}
],
"thresholds": [
],
"timeFrom": null,
- "timeRegions": [
-
- ],
"timeShift": null,
- "title": "Pod Network i/o",
+ "title": "PLEG relist interval",
"tooltip": {
- "msResolution": false,
"shared": true,
"sort": 0,
- "value_type": "cumulative"
+ "value_type": "individual"
},
"type": "graph",
"xaxis": {
@@ -22599,8 +21076,7 @@ items:
},
"yaxes": [
{
- "$$hashKey": "object:1163",
- "format": "bytes",
+ "format": "s",
"label": null,
"logBase": 1,
"max": null,
@@ -22608,700 +21084,356 @@ items:
"show": true
},
{
- "$$hashKey": "object:1164",
- "format": "short",
+ "format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- }
- ],
- "refresh": "10s",
- "schemaVersion": 25,
- "style": "dark",
- "tags": [
- "custom"
- ],
- "templating": {
- "list": [
-
- ]
- },
- "time": {
- "from": "now-3h",
- "to": "now"
- },
- "timepicker": {
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
- "timezone": "browser",
- "title": "Kubernetes cluster monitoring (via Prometheus)",
- "version": 1
- }
- kind: ConfigMap
- metadata:
- name: grafana-dashboard-kubernetes-cluster-dashboard
- namespace: monitoring
-- apiVersion: v1
- data:
- namespace-by-pod.json: |-
- {
- "__inputs": [
-
- ],
- "__requires": [
-
- ],
- "annotations": {
- "list": [
- {
- "builtIn": 1,
- "datasource": "-- Grafana --",
- "enable": true,
- "hide": true,
- "iconColor": "rgba(0, 211, 255, 1)",
- "name": "Annotations & Alerts",
- "type": "dashboard"
- }
- ]
- },
- "editable": true,
- "gnetId": null,
- "graphTooltip": 0,
- "hideControls": false,
- "id": null,
- "links": [
-
- ],
- "panels": [
+ ]
+ },
{
- "collapse": false,
- "collapsed": false,
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
- "h": 1,
+ "h": 7,
"w": 24,
"x": 0,
- "y": 0
+ "y": 56
},
- "id": 2,
- "panels": [
+ "id": 20,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
"repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Current Bandwidth",
- "titleSize": "h6",
- "type": "row"
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "PLEG relist duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
},
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "decimals": 0,
- "format": "time_series",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
+ "aliasColors": {
+
},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
- "h": 9,
- "w": 12,
+ "h": 7,
+ "w": 24,
"x": 0,
- "y": 1
+ "y": 63
},
- "height": 9,
- "id": 3,
- "interval": null,
+ "id": 21,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
"links": [
],
- "mappingType": 1,
- "mappingTypes": [
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
{
- "name": "value to text",
- "value": 1
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "2xx",
+ "refId": "A"
},
{
- "name": "range to text",
- "value": 2
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "3xx",
+ "refId": "B"
+ },
+ {
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "4xx",
+ "refId": "C"
+ },
+ {
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "5xx",
+ "refId": "D"
}
],
- "maxDataPoints": 100,
- "minSpan": 12,
- "nullPointMode": "connected",
- "nullText": null,
- "options": {
- "fieldOptions": {
- "calcs": [
- "last"
- ],
- "defaults": {
- "max": 10000000000,
- "min": 0,
- "title": "$namespace",
- "unit": "Bps"
- },
- "mappings": [
+ "thresholds": [
- ],
- "override": {
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "RPC Rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- },
- "thresholds": [
- {
- "color": "dark-green",
- "index": 0,
- "value": null
- },
- {
- "color": "dark-yellow",
- "index": 1,
- "value": 5000000000
- },
- {
- "color": "dark-red",
- "index": 2,
- "value": 7000000000
- }
- ],
- "values": false
- }
+ ]
},
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
+ "yaxes": [
{
- "from": "null",
- "text": "N/A",
- "to": "null"
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
}
- ],
- "span": 12,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
+ ]
+ },
+ {
+ "aliasColors": {
+
},
- "tableColumn": "",
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 70
+ },
+ "id": 22,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution]))",
+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, url, le))",
"format": "time_series",
- "instant": null,
- "intervalFactor": 1,
- "legendFormat": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} {{verb}} {{url}}",
"refId": "A"
}
],
- "thresholds": "",
+ "thresholds": [
+
+ ],
"timeFrom": null,
"timeShift": null,
- "title": "Current Rate of Bytes Received",
- "type": "gauge",
- "valueFontSize": "80%",
- "valueMaps": [
+ "title": "Request duration 99th quantile",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
{
- "op": "=",
- "text": "N/A",
- "value": "null"
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
}
- ],
- "valueName": "current"
+ ]
},
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "decimals": 0,
- "format": "time_series",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
+ "aliasColors": {
+
},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
- "h": 9,
- "w": 12,
- "x": 12,
- "y": 1
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 77
},
- "height": 9,
- "id": 4,
- "interval": null,
+ "id": 23,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
"links": [
],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "minSpan": 12,
- "nullPointMode": "connected",
- "nullText": null,
- "options": {
- "fieldOptions": {
- "calcs": [
- "last"
- ],
- "defaults": {
- "max": 10000000000,
- "min": 0,
- "title": "$namespace",
- "unit": "Bps"
- },
- "mappings": [
-
- ],
- "override": {
-
- },
- "thresholds": [
- {
- "color": "dark-green",
- "index": 0,
- "value": null
- },
- {
- "color": "dark-yellow",
- "index": 1,
- "value": 5000000000
- },
- {
- "color": "dark-red",
- "index": 2,
- "value": 7000000000
- }
- ],
- "values": false
- }
- },
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 12,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution]))",
- "format": "time_series",
- "instant": null,
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "timeFrom": null,
- "timeShift": null,
- "title": "Current Rate of Bytes Transmitted",
- "type": "gauge",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "columns": [
- {
- "text": "Time",
- "value": "Time"
- },
- {
- "text": "Value #A",
- "value": "Value #A"
- },
- {
- "text": "Value #B",
- "value": "Value #B"
- },
- {
- "text": "Value #C",
- "value": "Value #C"
- },
- {
- "text": "Value #D",
- "value": "Value #D"
- },
- {
- "text": "Value #E",
- "value": "Value #E"
- },
- {
- "text": "Value #F",
- "value": "Value #F"
- },
- {
- "text": "pod",
- "value": "pod"
- }
- ],
- "datasource": "$datasource",
- "fill": 1,
- "fontSize": "100%",
- "gridPos": {
- "h": 9,
- "w": 24,
- "x": 0,
- "y": 10
- },
- "id": 5,
- "lines": true,
- "linewidth": 1,
- "minSpan": 24,
- "nullPointMode": "null as zero",
- "renderer": "flot",
- "scroll": true,
- "showHeader": true,
- "sort": {
- "col": 0,
- "desc": false
- },
- "spaceLength": 10,
- "span": 24,
- "styles": [
- {
- "alias": "Time",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Time",
- "thresholds": [
-
- ],
- "type": "hidden",
- "unit": "short"
- },
- {
- "alias": "Bandwidth Received",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #A",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "Bps"
- },
- {
- "alias": "Bandwidth Transmitted",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #B",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "Bps"
- },
- {
- "alias": "Rate of Received Packets",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #C",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "pps"
- },
- {
- "alias": "Rate of Transmitted Packets",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #D",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "pps"
- },
- {
- "alias": "Rate of Received Packets Dropped",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #E",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "pps"
- },
- {
- "alias": "Rate of Transmitted Packets Dropped",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #F",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "pps"
- },
- {
- "alias": "Pod",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": true,
- "linkTooltip": "Drill down",
- "linkUrl": "d/7a18067ce943a40ae25454675c19ff5c/kubernetes-networking-pod?orgId=1&refresh=30s&var-namespace=$namespace&var-pod=$__cell",
- "pattern": "pod",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "short"
- }
- ],
- "targets": [
- {
- "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- },
- {
- "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "B",
- "step": 10
- },
- {
- "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "C",
- "step": 10
- },
- {
- "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "D",
- "step": 10
- },
- {
- "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "E",
- "step": 10
- },
- {
- "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "F",
- "step": 10
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Current Status",
- "type": "table"
- },
- {
- "collapse": false,
- "collapsed": false,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 19
- },
- "id": 6,
- "panels": [
-
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Bandwidth",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 2,
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 0,
- "y": 20
- },
- "id": 7,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "minSpan": 12,
- "nullPointMode": "connected",
- "paceLength": 10,
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
@@ -23311,17 +21443,15 @@ items:
],
"spaceLength": 10,
- "span": 12,
- "stack": true,
+ "stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}",
"format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "{{pod}}",
- "refId": "A",
- "step": 10
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -23329,10 +21459,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Receive Bandwidth",
+ "title": "Memory",
"tooltip": {
"shared": true,
- "sort": 2,
+ "sort": 0,
"value_type": "individual"
},
"type": "graph",
@@ -23347,19 +21477,19 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "bytes",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
- "format": "Bps",
+ "format": "bytes",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
}
]
@@ -23372,20 +21502,19 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 2,
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
- "h": 9,
- "w": 12,
- "x": 12,
- "y": 20
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 77
},
- "id": 8,
+ "id": 24,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
- "hideEmpty": true,
- "hideZero": true,
"max": false,
"min": false,
"rightSide": false,
@@ -23395,13 +21524,11 @@ items:
"values": false
},
"lines": true,
- "linewidth": 2,
+ "linewidth": 1,
"links": [
],
- "minSpan": 12,
- "nullPointMode": "connected",
- "paceLength": 10,
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
@@ -23411,17 +21538,15 @@ items:
],
"spaceLength": 10,
- "span": 12,
- "stack": true,
+ "stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "{{pod}}",
- "refId": "A",
- "step": 10
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -23429,5660 +21554,140 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Transmit Bandwidth",
+ "title": "CPU usage",
"tooltip": {
"shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "collapse": true,
- "collapsed": true,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 29
- },
- "id": 9,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 2,
- "gridPos": {
- "h": 10,
- "w": 12,
- "x": 0,
- "y": 30
- },
- "id": 10,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "minSpan": 12,
- "nullPointMode": "connected",
- "paceLength": 10,
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "{{pod}}",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Rate of Received Packets",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "pps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "pps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 2,
- "gridPos": {
- "h": 10,
- "w": 12,
- "x": 12,
- "y": 30
- },
- "id": 11,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "minSpan": 12,
- "nullPointMode": "connected",
- "paceLength": 10,
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "{{pod}}",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Rate of Transmitted Packets",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "pps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "pps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Packets",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": true,
- "collapsed": true,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 30
- },
- "id": 12,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 2,
- "gridPos": {
- "h": 10,
- "w": 12,
- "x": 0,
- "y": 40
- },
- "id": 13,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "minSpan": 12,
- "nullPointMode": "connected",
- "paceLength": 10,
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "{{pod}}",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Rate of Received Packets Dropped",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "pps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "pps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 2,
- "gridPos": {
- "h": 10,
- "w": 12,
- "x": 12,
- "y": 40
- },
- "id": 14,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "minSpan": 12,
- "nullPointMode": "connected",
- "paceLength": 10,
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "{{pod}}",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Rate of Transmitted Packets Dropped",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "pps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "pps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Errors",
- "titleSize": "h6",
- "type": "row"
- }
- ],
- "refresh": "10s",
- "rows": [
-
- ],
- "schemaVersion": 18,
- "style": "dark",
- "tags": [
- "kubernetes-mixin"
- ],
- "templating": {
- "list": [
- {
- "current": {
- "text": "default",
- "value": "default"
- },
- "hide": 0,
- "label": null,
- "name": "datasource",
- "options": [
-
- ],
- "query": "prometheus",
- "refresh": 1,
- "regex": "",
- "type": "datasource"
- },
- {
- "allValue": ".+",
- "auto": false,
- "auto_count": 30,
- "auto_min": "10s",
- "current": {
- "text": "kube-system",
- "value": "kube-system"
- },
- "datasource": "$datasource",
- "definition": "label_values(container_network_receive_packets_total, namespace)",
- "hide": 0,
- "includeAll": true,
- "label": null,
- "multi": false,
- "name": "namespace",
- "options": [
-
- ],
- "query": "label_values(container_network_receive_packets_total, namespace)",
- "refresh": 1,
- "regex": "",
- "skipUrlSync": false,
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "auto": false,
- "auto_count": 30,
- "auto_min": "10s",
- "current": {
- "text": "5m",
- "value": "5m"
- },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "resolution",
- "options": [
- {
- "selected": false,
- "text": "30s",
- "value": "30s"
- },
- {
- "selected": true,
- "text": "5m",
- "value": "5m"
- },
- {
- "selected": false,
- "text": "1h",
- "value": "1h"
- }
- ],
- "query": "30s,5m,1h",
- "refresh": 2,
- "regex": "",
- "skipUrlSync": false,
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "interval",
- "useTags": false
- },
- {
- "allValue": null,
- "auto": false,
- "auto_count": 30,
- "auto_min": "10s",
- "current": {
- "text": "5m",
- "value": "5m"
- },
- "datasource": "$datasource",
- "hide": 2,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "interval",
- "options": [
- {
- "selected": true,
- "text": "4h",
- "value": "4h"
- }
- ],
- "query": "4h",
- "refresh": 2,
- "regex": "",
- "skipUrlSync": false,
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "interval",
- "useTags": false
- }
- ]
- },
- "time": {
- "from": "now-1h",
- "to": "now"
- },
- "timepicker": {
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
- "timezone": "UTC",
- "title": "Kubernetes / Networking / Namespace (Pods)",
- "uid": "8b7a8b326d7a6f1f04244066368c67af",
- "version": 0
- }
- kind: ConfigMap
- metadata:
- name: grafana-dashboard-namespace-by-pod
- namespace: monitoring
-- apiVersion: v1
- data:
- namespace-by-workload.json: |-
- {
- "__inputs": [
-
- ],
- "__requires": [
-
- ],
- "annotations": {
- "list": [
- {
- "builtIn": 1,
- "datasource": "-- Grafana --",
- "enable": true,
- "hide": true,
- "iconColor": "rgba(0, 211, 255, 1)",
- "name": "Annotations & Alerts",
- "type": "dashboard"
- }
- ]
- },
- "editable": true,
- "gnetId": null,
- "graphTooltip": 0,
- "hideControls": false,
- "id": null,
- "links": [
-
- ],
- "panels": [
- {
- "collapse": false,
- "collapsed": false,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 0
- },
- "id": 2,
- "panels": [
-
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Current Bandwidth",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "aliasColors": {
-
- },
- "bars": true,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 2,
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 0,
- "y": 1
- },
- "id": 3,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "sort": "current",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": false,
- "linewidth": 1,
- "links": [
-
- ],
- "minSpan": 24,
- "nullPointMode": "null",
- "paceLength": 10,
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 24,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "{{ workload }}",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Current Rate of Bytes Received",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "series",
- "name": null,
- "show": false,
- "values": [
- "current"
- ]
- },
- "yaxes": [
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": true,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 2,
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 12,
- "y": 1
- },
- "id": 4,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "sort": "current",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": false,
- "linewidth": 1,
- "links": [
-
- ],
- "minSpan": 24,
- "nullPointMode": "null",
- "paceLength": 10,
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 24,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "{{ workload }}",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Current Rate of Bytes Transmitted",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "series",
- "name": null,
- "show": false,
- "values": [
- "current"
- ]
- },
- "yaxes": [
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "columns": [
- {
- "text": "Time",
- "value": "Time"
- },
- {
- "text": "Value #A",
- "value": "Value #A"
- },
- {
- "text": "Value #B",
- "value": "Value #B"
- },
- {
- "text": "Value #C",
- "value": "Value #C"
- },
- {
- "text": "Value #D",
- "value": "Value #D"
- },
- {
- "text": "Value #E",
- "value": "Value #E"
- },
- {
- "text": "Value #F",
- "value": "Value #F"
- },
- {
- "text": "Value #G",
- "value": "Value #G"
- },
- {
- "text": "Value #H",
- "value": "Value #H"
- },
- {
- "text": "workload",
- "value": "workload"
- }
- ],
- "datasource": "$datasource",
- "fill": 1,
- "fontSize": "90%",
- "gridPos": {
- "h": 9,
- "w": 24,
- "x": 0,
- "y": 10
- },
- "id": 5,
- "lines": true,
- "linewidth": 1,
- "minSpan": 24,
- "nullPointMode": "null as zero",
- "renderer": "flot",
- "scroll": true,
- "showHeader": true,
- "sort": {
- "col": 0,
- "desc": false
- },
- "spaceLength": 10,
- "span": 24,
- "styles": [
- {
- "alias": "Time",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Time",
- "thresholds": [
-
- ],
- "type": "hidden",
- "unit": "short"
- },
- {
- "alias": "Current Bandwidth Received",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #A",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "Bps"
- },
- {
- "alias": "Current Bandwidth Transmitted",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #B",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "Bps"
- },
- {
- "alias": "Average Bandwidth Received",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #C",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "Bps"
- },
- {
- "alias": "Average Bandwidth Transmitted",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #D",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "Bps"
- },
- {
- "alias": "Rate of Received Packets",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #E",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "pps"
- },
- {
- "alias": "Rate of Transmitted Packets",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #F",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "pps"
- },
- {
- "alias": "Rate of Received Packets Dropped",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #G",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "pps"
- },
- {
- "alias": "Rate of Transmitted Packets Dropped",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #H",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "pps"
- },
- {
- "alias": "Workload",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": true,
- "linkTooltip": "Drill down",
- "linkUrl": "d/728bf77cc1166d2f3133bf25846876cc/kubernetes-networking-workload?orgId=1&refresh=30s&var-namespace=$namespace&var-type=$type&var-workload=$__cell",
- "pattern": "workload",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "short"
- }
- ],
- "targets": [
- {
- "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- },
- {
- "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "B",
- "step": 10
- },
- {
- "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "C",
- "step": 10
- },
- {
- "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "D",
- "step": 10
- },
- {
- "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "E",
- "step": 10
- },
- {
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "F",
- "step": 10
- },
- {
- "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "G",
- "step": 10
- },
- {
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "H",
- "step": 10
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Current Status",
- "type": "table"
- },
- {
- "collapse": true,
- "collapsed": true,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 19
- },
- "id": 6,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": true,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 2,
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 0,
- "y": 20
- },
- "id": 7,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "sort": "current",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": false,
- "linewidth": 1,
- "links": [
-
- ],
- "minSpan": 24,
- "nullPointMode": "null",
- "paceLength": 10,
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 24,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "{{ workload }}",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Average Rate of Bytes Received",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "series",
- "name": null,
- "show": false,
- "values": [
- "current"
- ]
- },
- "yaxes": [
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": true,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 2,
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 12,
- "y": 20
- },
- "id": 8,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "sort": "current",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": false,
- "linewidth": 1,
- "links": [
-
- ],
- "minSpan": 24,
- "nullPointMode": "null",
- "paceLength": 10,
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 24,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "{{ workload }}",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Average Rate of Bytes Transmitted",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "series",
- "name": null,
- "show": false,
- "values": [
- "current"
- ]
- },
- "yaxes": [
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Average Bandwidth",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 29
- },
- "id": 9,
- "panels": [
-
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Bandwidth HIstory",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 2,
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 0,
- "y": 38
- },
- "id": 10,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "minSpan": 12,
- "nullPointMode": "connected",
- "paceLength": 10,
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "{{workload}}",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Receive Bandwidth",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 2,
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 12,
- "y": 38
- },
- "id": 11,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "minSpan": 12,
- "nullPointMode": "connected",
- "paceLength": 10,
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "{{workload}}",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Transmit Bandwidth",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "collapse": true,
- "collapsed": true,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 39
- },
- "id": 12,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 2,
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 0,
- "y": 40
- },
- "id": 13,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "minSpan": 12,
- "nullPointMode": "connected",
- "paceLength": 10,
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "{{workload}}",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Rate of Received Packets",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "pps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "pps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 2,
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 12,
- "y": 40
- },
- "id": 14,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "minSpan": 12,
- "nullPointMode": "connected",
- "paceLength": 10,
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "{{workload}}",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Rate of Transmitted Packets",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "pps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "pps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Packets",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": true,
- "collapsed": true,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 40
- },
- "id": 15,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 2,
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 0,
- "y": 41
- },
- "id": 16,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "minSpan": 12,
- "nullPointMode": "connected",
- "paceLength": 10,
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "{{workload}}",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Rate of Received Packets Dropped",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "pps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "pps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 2,
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 12,
- "y": 41
- },
- "id": 17,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "minSpan": 12,
- "nullPointMode": "connected",
- "paceLength": 10,
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "{{workload}}",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Rate of Transmitted Packets Dropped",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "pps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "pps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Errors",
- "titleSize": "h6",
- "type": "row"
- }
- ],
- "refresh": "10s",
- "rows": [
-
- ],
- "schemaVersion": 18,
- "style": "dark",
- "tags": [
- "kubernetes-mixin"
- ],
- "templating": {
- "list": [
- {
- "current": {
- "text": "default",
- "value": "default"
- },
- "hide": 0,
- "label": null,
- "name": "datasource",
- "options": [
-
- ],
- "query": "prometheus",
- "refresh": 1,
- "regex": "",
- "type": "datasource"
- },
- {
- "allValue": null,
- "auto": false,
- "auto_count": 30,
- "auto_min": "10s",
- "current": {
- "text": "kube-system",
- "value": "kube-system"
- },
- "datasource": "$datasource",
- "definition": "label_values(container_network_receive_packets_total, namespace)",
- "hide": 0,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "namespace",
- "options": [
-
- ],
- "query": "label_values(container_network_receive_packets_total, namespace)",
- "refresh": 1,
- "regex": "",
- "skipUrlSync": false,
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "auto": false,
- "auto_count": 30,
- "auto_min": "10s",
- "current": {
- "text": "deployment",
- "value": "deployment"
- },
- "datasource": "$datasource",
- "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)",
- "hide": 0,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "type",
- "options": [
-
- ],
- "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)",
- "refresh": 1,
- "regex": "",
- "skipUrlSync": false,
- "sort": 0,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "auto": false,
- "auto_count": 30,
- "auto_min": "10s",
- "current": {
- "text": "5m",
- "value": "5m"
- },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "resolution",
- "options": [
- {
- "selected": false,
- "text": "30s",
- "value": "30s"
- },
- {
- "selected": true,
- "text": "5m",
- "value": "5m"
- },
- {
- "selected": false,
- "text": "1h",
- "value": "1h"
- }
- ],
- "query": "30s,5m,1h",
- "refresh": 2,
- "regex": "",
- "skipUrlSync": false,
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "interval",
- "useTags": false
- },
- {
- "allValue": null,
- "auto": false,
- "auto_count": 30,
- "auto_min": "10s",
- "current": {
- "text": "5m",
- "value": "5m"
- },
- "datasource": "$datasource",
- "hide": 2,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "interval",
- "options": [
- {
- "selected": true,
- "text": "4h",
- "value": "4h"
- }
- ],
- "query": "4h",
- "refresh": 2,
- "regex": "",
- "skipUrlSync": false,
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "interval",
- "useTags": false
- }
- ]
- },
- "time": {
- "from": "now-1h",
- "to": "now"
- },
- "timepicker": {
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
- "timezone": "UTC",
- "title": "Kubernetes / Networking / Namespace (Workload)",
- "uid": "bbb2a765a623ae38130206c7d94a160f",
- "version": 0
- }
- kind: ConfigMap
- metadata:
- name: grafana-dashboard-namespace-by-workload
- namespace: monitoring
-- apiVersion: v1
- data:
- node-cluster-rsrc-use.json: |-
- {
- "annotations": {
- "list": [
-
- ]
- },
- "editable": true,
- "gnetId": null,
- "graphTooltip": 0,
- "hideControls": false,
- "links": [
-
- ],
- "refresh": "10s",
- "rows": [
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 1,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "(\n instance:node_cpu_utilisation:rate1m{job=\"node-exporter\"}\n*\n instance:node_num_cpu:sum{job=\"node-exporter\"}\n)\n/ scalar(sum(instance:node_num_cpu:sum{job=\"node-exporter\"}))\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "CPU Utilisation",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": 1,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 2,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance:node_load1_per_cpu:ratio{job=\"node-exporter\"}\n/ scalar(count(instance:node_load1_per_cpu:ratio{job=\"node-exporter\"}))\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "CPU Saturation (load1 per CPU)",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": 1,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "CPU",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 3,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance:node_memory_utilisation:ratio{job=\"node-exporter\"}\n/ scalar(count(instance:node_memory_utilisation:ratio{job=\"node-exporter\"}))\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Memory Utilisation",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": 1,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 4,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance:node_vmstat_pgmajfault:rate1m{job=\"node-exporter\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Memory Saturation (Major Page Faults)",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "rps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Memory",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 5,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "/ Receive/",
- "stack": "A"
- },
- {
- "alias": "/ Transmit/",
- "stack": "B",
- "transform": "negative-Y"
- }
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance:node_network_receive_bytes_excluding_lo:rate1m{job=\"node-exporter\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} Receive",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- },
- {
- "expr": "instance:node_network_transmit_bytes_excluding_lo:rate1m{job=\"node-exporter\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} Transmit",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Net Utilisation (Bytes Receive/Transmit)",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 6,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "/ Receive/",
- "stack": "A"
- },
- {
- "alias": "/ Transmit/",
- "stack": "B",
- "transform": "negative-Y"
- }
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance:node_network_receive_drop_excluding_lo:rate1m{job=\"node-exporter\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} Receive",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- },
- {
- "expr": "instance:node_network_transmit_drop_excluding_lo:rate1m{job=\"node-exporter\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} Transmit",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Net Saturation (Drops Receive/Transmit)",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "rps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 7,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance_device:node_disk_io_time_seconds:rate1m{job=\"node-exporter\"}\n/ scalar(count(instance_device:node_disk_io_time_seconds:rate1m{job=\"node-exporter\"}))\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} {{device}}",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Disk IO Utilisation",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": 1,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 8,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance_device:node_disk_io_time_weighted_seconds:rate1m{job=\"node-exporter\"}\n/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate1m{job=\"node-exporter\"}))\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} {{device}}",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Disk IO Saturation",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": 1,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Disk IO",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 9,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum without (device) (\n max without (fstype, mountpoint) (\n node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\"} - node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\"}\n )\n) \n/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\"})))\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Disk Space Utilisation",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": 1,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Disk Space",
- "titleSize": "h6"
- }
- ],
- "schemaVersion": 14,
- "style": "dark",
- "tags": [
-
- ],
- "templating": {
- "list": [
- {
- "current": {
- "text": "default",
- "value": "default"
- },
- "hide": 0,
- "label": null,
- "name": "datasource",
- "options": [
-
- ],
- "query": "prometheus",
- "refresh": 1,
- "regex": "",
- "type": "datasource"
- }
- ]
- },
- "time": {
- "from": "now-1h",
- "to": "now"
- },
- "timepicker": {
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
- "timezone": "UTC",
- "title": "USE Method / Cluster",
- "uid": "3e97d1d02672cdd0861f4c97c64f89b2",
- "version": 0
- }
- kind: ConfigMap
- metadata:
- name: grafana-dashboard-node-cluster-rsrc-use
- namespace: monitoring
-- apiVersion: v1
- data:
- node-rsrc-use.json: |-
- {
- "annotations": {
- "list": [
-
- ]
- },
- "editable": true,
- "gnetId": null,
- "graphTooltip": 0,
- "hideControls": false,
- "links": [
-
- ],
- "refresh": "10s",
- "rows": [
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "id": 1,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance:node_cpu_utilisation:rate1m{job=\"node-exporter\", instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Utilisation",
- "legendLink": null,
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "CPU Utilisation",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "id": 2,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance:node_load1_per_cpu:ratio{job=\"node-exporter\", instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Saturation",
- "legendLink": null,
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "CPU Saturation (Load1 per CPU)",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "CPU",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "id": 3,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance:node_memory_utilisation:ratio{job=\"node-exporter\", job=\"node-exporter\", instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Memory",
- "legendLink": null,
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Memory Utilisation",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "id": 4,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance:node_vmstat_pgmajfault:rate1m{job=\"node-exporter\", instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Major page faults",
- "legendLink": null,
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Memory Saturation (Major Page Faults)",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Memory",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "id": 5,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "/Receive/",
- "stack": "A"
- },
- {
- "alias": "/Transmit/",
- "stack": "B",
- "transform": "negative-Y"
- }
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance:node_network_receive_bytes_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Receive",
- "legendLink": null,
- "step": 10
- },
- {
- "expr": "instance:node_network_transmit_bytes_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Transmit",
- "legendLink": null,
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Net Utilisation (Bytes Receive/Transmit)",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "id": 6,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "/Receive/",
- "stack": "A"
- },
- {
- "alias": "/Transmit/",
- "stack": "B",
- "transform": "negative-Y"
- }
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance:node_network_receive_drop_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Receive drops",
- "legendLink": null,
- "step": 10
- },
- {
- "expr": "instance:node_network_transmit_drop_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Transmit drops",
- "legendLink": null,
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Net Saturation (Drops Receive/Transmit)",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "rps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Net",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "id": 7,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance_device:node_disk_io_time_seconds:rate1m{job=\"node-exporter\", instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{device}}",
- "legendLink": null,
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Disk IO Utilisation",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "id": 8,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance_device:node_disk_io_time_weighted_seconds:rate1m{job=\"node-exporter\", instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{device}}",
- "legendLink": null,
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Disk IO Saturation",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Disk IO",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "id": 9,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "1 -\n(\n max without (mountpoint, fstype) (node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\"})\n/\n max without (mountpoint, fstype) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\"})\n)\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{device}}",
- "legendLink": null,
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Disk Space Utilisation",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Disk Space",
- "titleSize": "h6"
- }
- ],
- "schemaVersion": 14,
- "style": "dark",
- "tags": [
-
- ],
- "templating": {
- "list": [
- {
- "current": {
- "text": "default",
- "value": "default"
- },
- "hide": 0,
- "label": null,
- "name": "datasource",
- "options": [
-
- ],
- "query": "prometheus",
- "refresh": 1,
- "regex": "",
- "type": "datasource"
- },
- {
- "allValue": null,
- "current": {
- "text": "prod",
- "value": "prod"
- },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": false,
- "label": "instance",
- "multi": false,
- "name": "instance",
- "options": [
-
- ],
- "query": "label_values(up{job=\"node-exporter\"}, instance)",
- "refresh": 1,
- "regex": "",
- "sort": 2,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- }
- ]
- },
- "time": {
- "from": "now-1h",
- "to": "now"
- },
- "timepicker": {
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
- "timezone": "UTC",
- "title": "USE Method / Node",
- "uid": "fac67cfbe174d3ef53eb473d73d9212f",
- "version": 0
- }
- kind: ConfigMap
- metadata:
- name: grafana-dashboard-node-rsrc-use
- namespace: monitoring
-- apiVersion: v1
- data:
- nodes.json: |-
- {
- "__inputs": [
-
- ],
- "__requires": [
-
- ],
- "annotations": {
- "list": [
-
- ]
- },
- "editable": false,
- "gnetId": null,
- "graphTooltip": 0,
- "hideControls": false,
- "id": null,
- "links": [
-
- ],
- "refresh": "",
- "rows": [
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 2,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "(\n (1 - rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"}[$__interval]))\n/ ignoring(cpu) group_left\n count without (cpu)( node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n",
- "format": "time_series",
- "interval": "1m",
- "intervalFactor": 5,
- "legendFormat": "{{cpu}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "CPU Usage",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": 1,
- "min": 0,
- "show": true
- },
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": 1,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 0,
- "gridPos": {
-
- },
- "id": 3,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "1m load average",
- "refId": "A"
- },
- {
- "expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "5m load average",
- "refId": "B"
- },
- {
- "expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "15m load average",
- "refId": "C"
- },
- {
- "expr": "count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", mode=\"idle\"})",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "logical cores",
- "refId": "D"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Load Average",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 4,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 9,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "(\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "memory used",
- "refId": "A"
- },
- {
- "expr": "node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "memory buffers",
- "refId": "B"
- },
- {
- "expr": "node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "memory cached",
- "refId": "C"
- },
- {
- "expr": "node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "memory free",
- "refId": "D"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Memory Usage",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
- ],
- "datasource": "$datasource",
- "format": "percent",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": true,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
-
- },
- "id": 5,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 3,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "100 -\n(\n node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"}\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n* 100\n)\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "thresholds": "80, 90",
- "title": "Memory Usage",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "current"
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 0,
- "gridPos": {
-
- },
- "id": 6,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
- {
- "alias": "/ read| written/",
- "yaxis": 1
- },
- {
- "alias": "/ io time/",
- "yaxis": 2
- }
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])",
- "format": "time_series",
- "interval": "1m",
- "intervalFactor": 2,
- "legendFormat": "{{device}} read",
- "refId": "A"
- },
- {
- "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])",
- "format": "time_series",
- "interval": "1m",
- "intervalFactor": 2,
- "legendFormat": "{{device}} written",
- "refId": "B"
- },
- {
- "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])",
- "format": "time_series",
- "interval": "1m",
- "intervalFactor": 2,
- "legendFormat": "{{device}} io time",
- "refId": "C"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Disk I/O",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 7,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
- {
- "alias": "used",
- "color": "#E0B400"
- },
- {
- "alias": "available",
- "color": "#73BF69"
- }
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(\n max by (device) (\n node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "used",
- "refId": "A"
- },
- {
- "expr": "sum(\n max by (device) (\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "available",
- "refId": "B"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Disk Space Usage",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 0,
- "gridPos": {
-
- },
- "id": 8,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])",
- "format": "time_series",
- "interval": "1m",
- "intervalFactor": 2,
- "legendFormat": "{{device}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Network Received",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 0,
- "gridPos": {
-
- },
- "id": 9,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])",
- "format": "time_series",
- "interval": "1m",
- "intervalFactor": 2,
- "legendFormat": "{{device}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Network Transmitted",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- }
- ],
- "schemaVersion": 14,
- "style": "dark",
- "tags": [
-
- ],
- "templating": {
- "list": [
- {
- "current": {
- "text": "Prometheus",
- "value": "Prometheus"
- },
- "hide": 0,
- "label": null,
- "name": "datasource",
- "options": [
-
- ],
- "query": "prometheus",
- "refresh": 1,
- "regex": "",
- "type": "datasource"
- },
- {
- "allValue": null,
- "current": {
-
- },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "instance",
- "options": [
-
- ],
- "query": "label_values(node_exporter_build_info{job=\"node-exporter\"}, instance)",
- "refresh": 2,
- "regex": "",
- "sort": 0,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- }
- ]
- },
- "time": {
- "from": "now-1h",
- "to": "now"
- },
- "timepicker": {
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
- "timezone": "UTC",
- "title": "Nodes",
- "uid": "fa49a4706d07a042595b664c87fb33ea",
- "version": 0
- }
- kind: ConfigMap
- metadata:
- name: grafana-dashboard-nodes
- namespace: monitoring
-- apiVersion: v1
- data:
- persistentvolumesusage.json: |-
- {
- "__inputs": [
-
- ],
- "__requires": [
-
- ],
- "annotations": {
- "list": [
-
- ]
- },
- "editable": false,
- "gnetId": null,
- "graphTooltip": 0,
- "hideControls": false,
- "id": null,
- "links": [
-
- ],
- "refresh": "10s",
- "rows": [
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 2,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": true,
- "max": true,
- "min": true,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 9,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "Used Space",
- "refId": "A"
- },
- {
- "expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "Free Space",
- "refId": "B"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Volume Space Usage",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
- ],
- "datasource": "$datasource",
- "format": "percent",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": true,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
-
- },
- "id": 3,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 3,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "thresholds": "80, 90",
- "title": "Volume Space Usage",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "current"
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 4,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": true,
- "max": true,
- "min": true,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 9,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "Used inodes",
- "refId": "A"
- },
- {
- "expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": " Free inodes",
- "refId": "B"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Volume inodes Usage",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "none",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "none",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
},
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
- ],
- "datasource": "$datasource",
- "format": "percent",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": true,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
- },
- "id": 5,
- "interval": null,
- "links": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 77
+ },
+ "id": 25,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 3,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "thresholds": "80, 90",
- "title": "Volume inodes Usage",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "current"
- }
],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
"repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Goroutines",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
}
+ ],
+ "refresh": "10s",
+ "rows": [
+
],
"schemaVersion": 14,
"style": "dark",
@@ -29097,7 +21702,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -29121,33 +21726,7 @@ items:
"options": [
],
- "query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)",
- "refresh": 2,
- "regex": "",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "current": {
-
- },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": false,
- "label": "Namespace",
- "multi": false,
- "name": "namespace",
- "options": [
-
- ],
- "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"}, namespace)",
+ "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -29166,14 +21745,14 @@ items:
},
"datasource": "$datasource",
"hide": 0,
- "includeAll": false,
- "label": "PersistentVolumeClaim",
+ "includeAll": true,
+ "label": "instance",
"multi": false,
- "name": "volume",
+ "name": "instance",
"options": [
],
- "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\"}, persistentvolumeclaim)",
+ "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics\",cluster=\"$cluster\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -29188,7 +21767,7 @@ items:
]
},
"time": {
- "from": "now-7d",
+ "from": "now-1h",
"to": "now"
},
"timepicker": {
@@ -29217,17 +21796,22 @@ items:
]
},
"timezone": "UTC",
- "title": "Kubernetes / Persistent Volumes",
- "uid": "919b92a8e8041bd567af9edab12c840c",
+ "title": "Kubernetes / Kubelet",
+ "uid": "3138fa155d5915769fbded898ac09fd9",
"version": 0
}
kind: ConfigMap
metadata:
- name: grafana-dashboard-persistentvolumesusage
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-kubelet
namespace: monitoring
- apiVersion: v1
data:
- pod-total.json: |-
+ namespace-by-pod.json: |-
{
"__inputs": [
@@ -29332,7 +21916,7 @@ items:
"defaults": {
"max": 10000000000,
"min": 0,
- "title": "$namespace: $pod",
+ "title": "$namespace",
"unit": "Bps"
},
"mappings": [
@@ -29382,7 +21966,7 @@ items:
"tableColumn": "",
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))",
+ "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution]))",
"format": "time_series",
"instant": null,
"intervalFactor": 1,
@@ -29459,7 +22043,7 @@ items:
"defaults": {
"max": 10000000000,
"min": 0,
- "title": "$namespace: $pod",
+ "title": "$namespace",
"unit": "Bps"
},
"mappings": [
@@ -29509,7 +22093,7 @@ items:
"tableColumn": "",
"targets": [
{
- "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))",
+ "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution]))",
"format": "time_series",
"instant": null,
"intervalFactor": 1,
@@ -29532,6 +22116,274 @@ items:
],
"valueName": "current"
},
+ {
+ "columns": [
+ {
+ "text": "Time",
+ "value": "Time"
+ },
+ {
+ "text": "Value #A",
+ "value": "Value #A"
+ },
+ {
+ "text": "Value #B",
+ "value": "Value #B"
+ },
+ {
+ "text": "Value #C",
+ "value": "Value #C"
+ },
+ {
+ "text": "Value #D",
+ "value": "Value #D"
+ },
+ {
+ "text": "Value #E",
+ "value": "Value #E"
+ },
+ {
+ "text": "Value #F",
+ "value": "Value #F"
+ },
+ {
+ "text": "pod",
+ "value": "pod"
+ }
+ ],
+ "datasource": "$datasource",
+ "fill": 1,
+ "fontSize": "100%",
+ "gridPos": {
+ "h": 9,
+ "w": 24,
+ "x": 0,
+ "y": 10
+ },
+ "id": 5,
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "minSpan": 24,
+ "nullPointMode": "null as zero",
+ "renderer": "flot",
+ "scroll": true,
+ "showHeader": true,
+ "sort": {
+ "col": 0,
+ "desc": false
+ },
+ "spaceLength": 10,
+ "span": 24,
+ "styles": [
+ {
+ "alias": "Time",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Time",
+ "thresholds": [
+
+ ],
+ "type": "hidden",
+ "unit": "short"
+ },
+ {
+ "alias": "Bandwidth Received",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #A",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Bandwidth Transmitted",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #B",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Rate of Received Packets",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #C",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "pps"
+ },
+ {
+ "alias": "Rate of Transmitted Packets",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #D",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "pps"
+ },
+ {
+ "alias": "Rate of Received Packets Dropped",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #E",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "pps"
+ },
+ {
+ "alias": "Rate of Transmitted Packets Dropped",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #F",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "pps"
+ },
+ {
+ "alias": "Pod",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": true,
+ "linkTooltip": "Drill down",
+ "linkUrl": "d/7a18067ce943a40ae25454675c19ff5c/kubernetes-networking-pod?orgId=1&refresh=30s&var-namespace=$namespace&var-pod=$__cell",
+ "pattern": "pod",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ }
+ ],
+ "targets": [
+ {
+ "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "C",
+ "step": 10
+ },
+ {
+ "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "D",
+ "step": 10
+ },
+ {
+ "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "E",
+ "step": 10
+ },
+ {
+ "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "F",
+ "step": 10
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Current Status",
+ "type": "table"
+ },
{
"collapse": false,
"collapsed": false,
@@ -29539,9 +22391,9 @@ items:
"h": 1,
"w": 24,
"x": 0,
- "y": 10
+ "y": 19
},
- "id": 5,
+ "id": 6,
"panels": [
],
@@ -29562,13 +22414,14 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
- "y": 11
+ "y": 20
},
- "id": 6,
+ "id": 7,
"legend": {
"alignAsTable": false,
"avg": false,
@@ -29605,7 +22458,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -29662,13 +22515,14 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
- "y": 11
+ "y": 20
},
- "id": 7,
+ "id": 8,
"legend": {
"alignAsTable": false,
"avg": false,
@@ -29705,7 +22559,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -29760,9 +22614,9 @@ items:
"h": 1,
"w": 24,
"x": 0,
- "y": 20
+ "y": 29
},
- "id": 8,
+ "id": 9,
"panels": [
{
"aliasColors": {
@@ -29773,13 +22627,14 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 10,
"w": 12,
"x": 0,
- "y": 21
+ "y": 30
},
- "id": 9,
+ "id": 10,
"legend": {
"alignAsTable": false,
"avg": false,
@@ -29816,7 +22671,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -29873,13 +22728,14 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 10,
"w": 12,
"x": 12,
- "y": 21
+ "y": 30
},
- "id": 10,
+ "id": 11,
"legend": {
"alignAsTable": false,
"avg": false,
@@ -29916,7 +22772,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -29980,9 +22836,9 @@ items:
"h": 1,
"w": 24,
"x": 0,
- "y": 21
+ "y": 30
},
- "id": 11,
+ "id": 12,
"panels": [
{
"aliasColors": {
@@ -29993,13 +22849,14 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 10,
"w": 12,
"x": 0,
- "y": 32
+ "y": 40
},
- "id": 12,
+ "id": 13,
"legend": {
"alignAsTable": false,
"avg": false,
@@ -30036,7 +22893,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -30093,13 +22950,14 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 10,
"w": 12,
"x": 12,
- "y": 32
+ "y": 40
},
- "id": 13,
+ "id": 14,
"legend": {
"alignAsTable": false,
"avg": false,
@@ -30136,7 +22994,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -30211,7 +23069,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -30222,29 +23080,23 @@ items:
"type": "datasource"
},
{
- "allValue": ".+",
- "auto": false,
- "auto_count": 30,
- "auto_min": "10s",
+ "allValue": null,
"current": {
- "text": "kube-system",
- "value": "kube-system"
+
},
"datasource": "$datasource",
- "definition": "label_values(container_network_receive_packets_total, namespace)",
- "hide": 0,
- "includeAll": true,
+ "hide": 2,
+ "includeAll": false,
"label": null,
"multi": false,
- "name": "namespace",
+ "name": "cluster",
"options": [
],
- "query": "label_values(container_network_receive_packets_total, namespace)",
- "refresh": 1,
+ "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "refresh": 2,
"regex": "",
- "skipUrlSync": false,
- "sort": 1,
+ "sort": 0,
"tagValuesQuery": "",
"tags": [
@@ -30259,21 +23111,21 @@ items:
"auto_count": 30,
"auto_min": "10s",
"current": {
- "text": "",
- "value": ""
+ "text": "kube-system",
+ "value": "kube-system"
},
"datasource": "$datasource",
- "definition": "label_values(container_network_receive_packets_total{namespace=~\"$namespace\"}, pod)",
+ "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)",
"hide": 0,
- "includeAll": false,
+ "includeAll": true,
"label": null,
"multi": false,
- "name": "pod",
+ "name": "namespace",
"options": [
],
- "query": "label_values(container_network_receive_packets_total{namespace=~\"$namespace\"}, pod)",
- "refresh": 1,
+ "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)",
+ "refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
@@ -30397,18 +23249,29 @@ items:
]
},
"timezone": "UTC",
- "title": "Kubernetes / Networking / Pod",
- "uid": "7a18067ce943a40ae25454675c19ff5c",
+ "title": "Kubernetes / Networking / Namespace (Pods)",
+ "uid": "8b7a8b326d7a6f1f04244066368c67af",
"version": 0
}
kind: ConfigMap
metadata:
- name: grafana-dashboard-pod-total
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-namespace-by-pod
namespace: monitoring
- apiVersion: v1
data:
- prometheus-dashboard.json: |-
+ namespace-by-workload.json: |-
{
+ "__inputs": [
+
+ ],
+ "__requires": [
+
+ ],
"annotations": {
"list": [
{
@@ -30419,607 +23282,922 @@ items:
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
- },
- {
- "datasource": "$datasource",
- "enable": true,
- "expr": "count(sum(up{instance=\"$instance\"}) by (instance) < 1)",
- "hide": false,
- "iconColor": "rgb(250, 44, 18)",
- "limit": 100,
- "name": "downage",
- "showIn": 0,
- "step": "30s",
- "tagKeys": "instance",
- "textFormat": "prometheus down",
- "titleFormat": "Downage",
- "type": "alert"
- },
- {
- "datasource": "$datasource",
- "enable": true,
- "expr": "sum(changes(prometheus_config_last_reload_success_timestamp_seconds[10m])) by (instance)",
- "hide": false,
- "iconColor": "#fceaca",
- "limit": 100,
- "name": "Reload",
- "showIn": 0,
- "step": "5m",
- "tagKeys": "instance",
- "tags": [
-
- ],
- "titleFormat": "Reload",
- "type": "tags"
}
]
},
- "description": "Dashboard for monitoring of Prometheus v2.x.x",
"editable": true,
- "gnetId": 3681,
- "graphTooltip": 1,
- "id": 4,
- "iteration": 1596721016726,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
"links": [
- {
- "icon": "info",
- "tags": [
-
- ],
- "targetBlank": true,
- "title": "Dashboard's Github ",
- "tooltip": "Github repo of this dashboard",
- "type": "link",
- "url": "https://github.com/FUSAKLA/Prometheus2-grafana-dashboard"
- },
- {
- "icon": "doc",
- "tags": [
- ],
- "targetBlank": true,
- "title": "Prometheus Docs",
- "tooltip": "",
- "type": "link",
- "url": "http://prometheus.io/docs/introduction/overview/"
- }
],
"panels": [
{
+ "collapse": false,
"collapsed": false,
- "datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
- "id": 55,
+ "id": 2,
"panels": [
],
"repeat": null,
- "title": "Header instance info",
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Current Bandwidth",
+ "titleSize": "h6",
"type": "row"
},
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#bf1b00"
- ],
- "datasource": "$datasource",
- "decimals": 1,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
+ "aliasColors": {
- ]
- },
- "format": "s",
- "gauge": {
- "maxValue": 1000000,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
},
+ "bars": true,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 2,
+ "fillGradient": 0,
"gridPos": {
- "h": 5,
- "w": 4,
+ "h": 9,
+ "w": 12,
"x": 0,
"y": 1
},
- "id": 41,
- "interval": null,
+ "id": 3,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": false,
+ "linewidth": 1,
"links": [
],
- "mappingType": 1,
- "mappingTypes": [
+ "minSpan": 24,
+ "nullPointMode": "null",
+ "paceLength": 10,
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 24,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
{
- "name": "value to text",
- "value": 1
+ "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{ workload }}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Current Rate of Bytes Received",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "series",
+ "name": null,
+ "show": false,
+ "values": [
+ "current"
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
},
{
- "name": "range to text",
- "value": 2
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
}
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": true,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 1
+ },
+ "id": 4,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": false,
+ "linewidth": 1,
+ "links": [
+
],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
+ "minSpan": 24,
+ "nullPointMode": "null",
+ "paceLength": 10,
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 24,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
{
- "from": "null",
- "text": "N/A",
- "to": "null"
+ "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{ workload }}",
+ "refId": "A",
+ "step": 10
}
],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Current Rate of Bytes Transmitted",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
},
- "tableColumn": "",
- "targets": [
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "series",
+ "name": null,
+ "show": false,
+ "values": [
+ "current"
+ ]
+ },
+ "yaxes": [
{
- "expr": "min(time() - process_start_time_seconds{instance=\"$instance\"})",
- "format": "time_series",
- "instant": false,
- "intervalFactor": 2,
- "refId": "A"
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "columns": [
+ {
+ "text": "Time",
+ "value": "Time"
+ },
+ {
+ "text": "Value #A",
+ "value": "Value #A"
+ },
+ {
+ "text": "Value #B",
+ "value": "Value #B"
+ },
+ {
+ "text": "Value #C",
+ "value": "Value #C"
+ },
+ {
+ "text": "Value #D",
+ "value": "Value #D"
+ },
+ {
+ "text": "Value #E",
+ "value": "Value #E"
+ },
+ {
+ "text": "Value #F",
+ "value": "Value #F"
+ },
+ {
+ "text": "Value #G",
+ "value": "Value #G"
+ },
+ {
+ "text": "Value #H",
+ "value": "Value #H"
+ },
+ {
+ "text": "workload",
+ "value": "workload"
}
],
- "thresholds": "",
- "title": "Uptime",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
+ "datasource": "$datasource",
+ "fill": 1,
+ "fontSize": "90%",
+ "gridPos": {
+ "h": 9,
+ "w": 24,
+ "x": 0,
+ "y": 10
+ },
+ "id": 5,
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "minSpan": 24,
+ "nullPointMode": "null as zero",
+ "renderer": "flot",
+ "scroll": true,
+ "showHeader": true,
+ "sort": {
+ "col": 0,
+ "desc": false
+ },
+ "spaceLength": 10,
+ "span": 24,
+ "styles": [
+ {
+ "alias": "Time",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Time",
+ "thresholds": [
+
+ ],
+ "type": "hidden",
+ "unit": "short"
+ },
+ {
+ "alias": "Current Bandwidth Received",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #A",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Current Bandwidth Transmitted",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #B",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Average Bandwidth Received",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #C",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Average Bandwidth Transmitted",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #D",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
{
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": true,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#bf1b00"
- ],
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
+ "alias": "Rate of Received Packets",
+ "colorMode": null,
+ "colors": [
- }
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #E",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "pps"
},
- "overrides": [
+ {
+ "alias": "Rate of Transmitted Packets",
+ "colorMode": null,
+ "colors": [
- ]
- },
- "format": "short",
- "gauge": {
- "maxValue": 1000000,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
- "h": 5,
- "w": 8,
- "x": 4,
- "y": 1
- },
- "id": 42,
- "interval": null,
- "links": [
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #F",
+ "thresholds": [
- ],
- "mappingType": 1,
- "mappingTypes": [
+ ],
+ "type": "number",
+ "unit": "pps"
+ },
{
- "name": "value to text",
- "value": 1
+ "alias": "Rate of Received Packets Dropped",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #G",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "pps"
},
{
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
+ "alias": "Rate of Transmitted Packets Dropped",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #H",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "pps"
+ },
{
- "from": "null",
- "text": "N/A",
- "to": "null"
+ "alias": "Workload",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": true,
+ "linkTooltip": "Drill down",
+ "linkUrl": "d/728bf77cc1166d2f3133bf25846876cc/kubernetes-networking-workload?orgId=1&refresh=30s&var-namespace=$namespace&var-type=$type&var-workload=$__cell",
+ "pattern": "workload",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
}
],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": true
- },
- "tableColumn": "prometheus_tsdb_head_series{instance=\"localhost:9090\", job=\"prometheus\"}",
"targets": [
{
- "expr": "prometheus_tsdb_head_series{instance=\"$instance\"}",
- "format": "time_series",
- "instant": false,
+ "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "table",
+ "instant": true,
"intervalFactor": 2,
- "refId": "A"
- }
- ],
- "thresholds": "500000,800000,1000000",
- "title": "Total count of time series",
- "type": "singlestat",
- "valueFontSize": "150%",
- "valueMaps": [
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ },
{
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
+ "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "B",
+ "step": 10
},
- "overrides": [
-
- ]
- },
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
- "h": 5,
- "w": 3,
- "x": 12,
- "y": 1
- },
- "id": 48,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
{
- "name": "value to text",
- "value": 1
+ "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "C",
+ "step": 10
},
{
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
+ "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "D",
+ "step": 10
+ },
{
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "version",
- "targets": [
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "E",
+ "step": 10
+ },
{
- "expr": "prometheus_build_info{instance=\"$instance\"}",
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Version",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
+ "legendFormat": "",
+ "refId": "F",
+ "step": 10
+ },
{
- "op": "=",
- "text": "N/A",
- "value": "null"
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "G",
+ "step": 10
+ },
+ {
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "H",
+ "step": 10
}
],
- "valueName": "first"
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Current Status",
+ "type": "table"
},
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "decimals": 2,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "format": "ms",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
+ "collapse": true,
+ "collapsed": true,
"gridPos": {
- "h": 5,
- "w": 4,
- "x": 15,
- "y": 1
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 19
},
- "id": 49,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
+ "id": 6,
+ "panels": [
{
- "name": "value to text",
- "value": 1
+ "aliasColors": {
+
+ },
+ "bars": true,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 0,
+ "y": 20
+ },
+ "id": 7,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": false,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "minSpan": 24,
+ "nullPointMode": "null",
+ "paceLength": 10,
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 24,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{ workload }}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Average Rate of Bytes Received",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "series",
+ "name": null,
+ "show": false,
+ "values": [
+ "current"
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
},
{
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "{instance=\"localhost:9090\", job=\"prometheus\"}",
- "targets": [
- {
- "expr": "prometheus_tsdb_head_max_time{instance=\"$instance\"} - prometheus_tsdb_head_min_time{instance=\"$instance\"}",
- "format": "time_series",
- "instant": true,
- "intervalFactor": 2,
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Actual head block length",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
+ "aliasColors": {
+
+ },
+ "bars": true,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 20
+ },
+ "id": 8,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": false,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "minSpan": 24,
+ "nullPointMode": "null",
+ "paceLength": 10,
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 24,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{ workload }}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Average Rate of Bytes Transmitted",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "series",
+ "name": null,
+ "show": false,
+ "values": [
+ "current"
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
}
],
- "valueName": "current"
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Average Bandwidth",
+ "titleSize": "h6",
+ "type": "row"
},
{
- "content": "
",
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
+ "collapse": false,
+ "collapsed": false,
"gridPos": {
- "h": 5,
- "w": 2,
- "x": 19,
- "y": 1
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 29
},
- "height": "",
- "id": 50,
- "links": [
+ "id": 9,
+ "panels": [
],
- "mode": "html",
- "options": {
- "content": "
",
- "mode": "html"
- },
- "pluginVersion": "7.1.0",
- "title": "",
- "transparent": true,
- "type": "text"
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Bandwidth HIstory",
+ "titleSize": "h6",
+ "type": "row"
},
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": true,
- "colors": [
- "#e6522c",
- "rgba(237, 129, 40, 0.89)",
- "#299c46"
- ],
- "datasource": "$datasource",
- "decimals": 1,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
+ "aliasColors": {
- ]
- },
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 2,
+ "fillGradient": 0,
"gridPos": {
- "h": 5,
- "w": 3,
- "x": 21,
- "y": 1
+ "h": 9,
+ "w": 12,
+ "x": 0,
+ "y": 38
},
- "id": 52,
- "interval": null,
+ "id": 10,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
"links": [
],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
+ "minSpan": 12,
"nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
+ "paceLength": 10,
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
+ "spaceLength": 10,
+ "span": 12,
+ "stack": true,
+ "steppedLine": false,
"targets": [
{
- "expr": "2",
+ "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
- "intervalFactor": 2,
- "refId": "A"
+ "intervalFactor": 1,
+ "legendFormat": "{{workload}}",
+ "refId": "A",
+ "step": 10
}
],
- "thresholds": "10,20",
- "title": "",
- "transparent": true,
- "type": "singlestat",
- "valueFontSize": "200%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
+ "thresholds": [
+
],
- "valueName": "avg"
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 6
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Receive Bandwidth",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
},
- "id": 56,
- "panels": [
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ],
- "repeat": null,
- "title": "Main info",
- "type": "row"
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
},
{
"aliasColors": {
@@ -31029,76 +24207,66 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
+ "fill": 2,
"fillGradient": 0,
"gridPos": {
- "h": 7,
- "w": 8,
- "x": 0,
- "y": 7
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 38
},
- "hiddenSeries": false,
- "id": 15,
+ "id": 11,
"legend": {
- "avg": true,
+ "alignAsTable": false,
+ "avg": false,
"current": false,
+ "hideEmpty": true,
+ "hideZero": true,
"max": false,
"min": false,
- "show": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
"total": false,
- "values": true
+ "values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 2,
"links": [
],
- "nullPointMode": "null",
+ "minSpan": 12,
+ "nullPointMode": "connected",
+ "paceLength": 10,
"percentage": false,
- "pluginVersion": "7.1.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
+ "span": 12,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "max(prometheus_engine_query_duration_seconds{instance=\"$instance\"}) by (instance, slice)",
+ "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "max duration for {{slice}}",
- "metric": "prometheus_local_storage_rushed_mode",
+ "legendFormat": "{{workload}}",
"refId": "A",
- "step": 900
+ "step": 10
}
],
"thresholds": [
],
"timeFrom": null,
- "timeRegions": [
-
- ],
"timeShift": null,
- "title": "Query elapsed time",
+ "title": "Transmit Bandwidth",
"tooltip": {
- "msResolution": false,
"shared": true,
"sort": 2,
"value_type": "individual"
@@ -31115,3799 +24283,5214 @@ items:
},
"yaxes": [
{
- "format": "s",
- "label": "",
+ "format": "Bps",
+ "label": null,
"logBase": 1,
"max": null,
- "min": "0",
+ "min": 0,
"show": true
},
{
- "format": "short",
+ "format": "Bps",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": 0,
"show": true
}
+ ]
+ },
+ {
+ "collapse": true,
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 39
+ },
+ "id": 12,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 0,
+ "y": 40
+ },
+ "id": 13,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [
+
+ ],
+ "minSpan": 12,
+ "nullPointMode": "connected",
+ "paceLength": 10,
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{workload}}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Rate of Received Packets",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "pps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 40
+ },
+ "id": 14,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [
+
+ ],
+ "minSpan": 12,
+ "nullPointMode": "connected",
+ "paceLength": 10,
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{workload}}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Rate of Transmitted Packets",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "pps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Packets",
+ "titleSize": "h6",
+ "type": "row"
},
{
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
+ "collapse": true,
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 40
},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
+ "id": 15,
+ "panels": [
+ {
+ "aliasColors": {
- }
- },
- "overrides": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 0,
+ "y": 41
+ },
+ "id": 16,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 8,
- "y": 7
- },
- "hiddenSeries": false,
- "id": 17,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ ],
+ "minSpan": 12,
+ "nullPointMode": "connected",
+ "paceLength": 10,
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{workload}}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(increase(prometheus_tsdb_head_series_created_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "created on {{ instance }}",
- "metric": "prometheus_local_storage_maintain_series_duration_seconds_count",
- "refId": "A",
- "step": 1800
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Rate of Received Packets Dropped",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "pps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
},
{
- "expr": "sum(increase(prometheus_tsdb_head_series_removed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) * -1",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "removed on {{ instance }}",
- "refId": "B"
- }
- ],
- "thresholds": [
+ "aliasColors": {
- ],
- "timeFrom": null,
- "timeRegions": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 41
+ },
+ "id": 17,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [
- ],
- "timeShift": null,
- "title": "Head series created/deleted",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "minSpan": 12,
+ "nullPointMode": "connected",
+ "paceLength": 10,
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{workload}}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Rate of Transmitted Packets Dropped",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "pps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Errors",
+ "titleSize": "h6",
+ "type": "row"
+ }
+ ],
+ "refresh": "10s",
+ "rows": [
- }
+ ],
+ "schemaVersion": 18,
+ "style": "dark",
+ "tags": [
+ "kubernetes-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
},
- "overrides": [
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 16,
- "y": 7
- },
- "hiddenSeries": false,
- "id": 13,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
+ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
},
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ {
+ "allValue": null,
+ "current": {
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "exceeded_sample_limit on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "A",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "duplicate_timestamp on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "B",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_bounds_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "out_of_bounds on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "C",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_order_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "out_of_order on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "D",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_rule_evaluation_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "rule_evaluation_failure on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "G",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_tsdb_compactions_failed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "tsdb_compactions_failed on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "K",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_tsdb_reloads_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "tsdb_reloads_failures on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "L",
- "step": 1800
},
- {
- "expr": "sum(increase(prometheus_tsdb_head_series_not_found{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "head_series_not_found on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "N",
- "step": 1800
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "auto": false,
+ "auto_count": 30,
+ "auto_min": "10s",
+ "current": {
+ "text": "kube-system",
+ "value": "kube-system"
},
- {
- "expr": "sum(increase(prometheus_evaluator_iterations_missed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "evaluator_iterations_missed on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "O",
- "step": 1800
+ "datasource": "$datasource",
+ "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "namespace",
+ "options": [
+
+ ],
+ "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "auto": false,
+ "auto_count": 30,
+ "auto_min": "10s",
+ "current": {
+ "text": "deployment",
+ "value": "deployment"
},
- {
- "expr": "sum(increase(prometheus_evaluator_iterations_skipped_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "evaluator_iterations_skipped on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "P",
- "step": 1800
- }
- ],
- "thresholds": [
+ "datasource": "$datasource",
+ "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\"}, workload_type)",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "type",
+ "options": [
- ],
- "timeFrom": null,
- "timeRegions": [
+ ],
+ "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\"}, workload_type)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [
- ],
- "timeShift": null,
- "title": "Prometheus errors",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
},
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ {
+ "allValue": null,
+ "auto": false,
+ "auto_count": 30,
+ "auto_min": "10s",
+ "current": {
+ "text": "5m",
+ "value": "5m"
+ },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "resolution",
+ "options": [
+ {
+ "selected": false,
+ "text": "30s",
+ "value": "30s"
+ },
+ {
+ "selected": true,
+ "text": "5m",
+ "value": "5m"
+ },
+ {
+ "selected": false,
+ "text": "1h",
+ "value": "1h"
+ }
+ ],
+ "query": "30s,5m,1h",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
- ]
+ ],
+ "tagsQuery": "",
+ "type": "interval",
+ "useTags": false
},
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
+ {
+ "allValue": null,
+ "auto": false,
+ "auto_count": 30,
+ "auto_min": "10s",
+ "current": {
+ "text": "5m",
+ "value": "5m"
},
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "interval",
+ "options": [
+ {
+ "selected": true,
+ "text": "4h",
+ "value": "4h"
+ }
+ ],
+ "query": "4h",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "interval",
+ "useTags": false
}
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 14
- },
- "id": 57,
- "panels": [
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "UTC",
+ "title": "Kubernetes / Networking / Namespace (Workload)",
+ "uid": "bbb2a765a623ae38130206c7d94a160f",
+ "version": 0
+ }
+ kind: ConfigMap
+ metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-namespace-by-workload
+ namespace: monitoring
+- apiVersion: v1
+ data:
+ node-cluster-rsrc-use.json: |-
+ {
+ "__inputs": [
- ],
- "repeat": null,
- "title": "Scrape & rule duration",
- "type": "row"
- },
- {
- "aliasColors": {
+ ],
+ "__requires": [
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "description": "",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
+ ],
+ "annotations": {
+ "list": [
- }
- },
- "overrides": [
+ ]
+ },
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 1,
+ "hideControls": false,
+ "id": null,
+ "links": [
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "grid": {
+ ],
+ "refresh": "30s",
+ "rows": [
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
- },
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 0,
- "y": 15
- },
- "hiddenSeries": false,
- "id": 25,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": true,
- "max": true,
- "min": false,
- "show": false,
- "sort": "max",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 2,
- "links": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ },
+ "id": 2,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "prometheus_target_interval_length_seconds{instance=\"$instance\",quantile=\"0.99\"} - $scrape_interval",
- "format": "time_series",
- "interval": "2m",
- "intervalFactor": 1,
- "legendFormat": "{{instance}}",
- "metric": "",
- "refId": "A",
- "step": 300
- }
- ],
- "thresholds": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "timeFrom": null,
- "timeRegions": [
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "((\n instance:node_cpu_utilisation:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}\n *\n instance:node_num_cpu:sum{job=\"node-exporter\", cluster=\"$cluster\"}\n) != 0 )\n/ scalar(sum(instance:node_num_cpu:sum{job=\"node-exporter\", cluster=\"$cluster\"}))\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ instance }}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
- ],
- "timeShift": null,
- "title": "Scrape delay (counts with 1m scrape interval)",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Utilisation",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
},
{
- "format": "short",
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
+ "aliasColors": {
- }
- },
- "overrides": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 12,
- "y": 15
- },
- "hiddenSeries": false,
- "id": 14,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ },
+ "id": 3,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "Queue length",
- "yaxis": 2
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(prometheus_evaluator_duration_seconds{instance=\"$instance\"}) by (instance, quantile)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Queue length",
- "metric": "prometheus_local_storage_indexing_queue_length",
- "refId": "B",
- "step": 1800
- }
- ],
- "thresholds": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "timeFrom": null,
- "timeRegions": [
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "(\n instance:node_load1_per_cpu:ratio{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance:node_load1_per_cpu:ratio{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
- ],
- "timeShift": null,
- "title": "Rule evaulation duration",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Saturation (Load1 per CPU)",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "CPU",
+ "titleSize": "h6",
+ "type": "row"
},
{
+ "collapse": false,
"collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 22
- },
- "id": 58,
"panels": [
+ {
+ "aliasColors": {
- ],
- "repeat": null,
- "title": "Requests & queries",
- "type": "row"
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
- }
- },
- "overrides": [
+ },
+ "id": 4,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 0,
- "y": 23
- },
- "hiddenSeries": false,
- "id": 18,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "(\n instance:node_memory_utilisation:ratio{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance:node_memory_utilisation:ratio{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory Utilisation",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
{
- "expr": "sum(increase(http_requests_total{instance=\"$instance\"}[$aggregation_interval])) by (instance, handler) > 0",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{ handler }} on {{ instance }}",
- "metric": "",
- "refId": "A",
- "step": 1800
- }
- ],
- "thresholds": [
+ "aliasColors": {
- ],
- "timeFrom": null,
- "timeRegions": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
- ],
- "timeShift": null,
- "title": "Request count",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ },
+ "id": 5,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ]
- },
- "yaxes": [
- {
- "format": "none",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory Saturation (Major Page Faults)",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "rds",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "rds",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Memory",
+ "titleSize": "h6",
+ "type": "row"
},
{
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
- }
- },
- "overrides": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 6,
- "y": 23
- },
- "hiddenSeries": false,
- "id": 16,
- "legend": {
- "avg": false,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ },
+ "id": 6,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "/Receive/",
+ "stack": "A"
+ },
+ {
+ "alias": "/Transmit/",
+ "stack": "B",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Receive",
+ "refId": "A"
+ },
+ {
+ "expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Transmit",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network Utilisation (Bytes Receive/Transmit)",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
{
- "expr": "max(sum(http_request_duration_microseconds{instance=\"$instance\"}) by (instance, handler, quantile)) by (instance, handler) > 0",
- "format": "time_series",
- "hide": false,
- "intervalFactor": 2,
- "legendFormat": "{{ handler }} on {{ instance }}",
- "refId": "B"
- }
- ],
- "thresholds": [
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 7,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "timeFrom": null,
- "timeRegions": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "/ Receive/",
+ "stack": "A"
+ },
+ {
+ "alias": "/ Transmit/",
+ "stack": "B",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Receive",
+ "refId": "A"
+ },
+ {
+ "expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Transmit",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [
- ],
- "timeShift": null,
- "title": "Request duration per handler",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network Saturation (Drops Receive/Transmit)",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ]
- },
- "yaxes": [
- {
- "format": "µs",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Network",
+ "titleSize": "h6",
+ "type": "row"
},
{
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 12,
- "y": 23
- },
- "hiddenSeries": false,
- "id": 19,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ },
+ "id": 8,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(increase(http_request_size_bytes{instance=\"$instance\", quantile=\"0.99\"}[$aggregation_interval])) by (instance, handler) > 0",
- "format": "time_series",
- "hide": false,
- "intervalFactor": 2,
- "legendFormat": "{{ handler }} in {{ instance }}",
- "refId": "B"
- }
- ],
- "thresholds": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "timeFrom": null,
- "timeRegions": [
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "(\n instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} {{device}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
- ],
- "timeShift": null,
- "title": "Request size by handler",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk IO Utilisation",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
},
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Allocated bytes": "#F9BA8F",
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max count collector": "#bf1b00",
- "Max count harvester": "#bf1b00",
- "Max to persist": "#3F6833",
- "RSS": "#890F02"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
+ "aliasColors": {
- }
- },
- "overrides": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 18,
- "y": 23
- },
- "hiddenSeries": false,
- "id": 8,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ },
+ "id": 9,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "/Max.*/",
- "fill": 0,
- "linewidth": 2
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(prometheus_engine_queries{instance=\"$instance\"}) by (instance, handler)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Current count ",
- "metric": "last",
- "refId": "A",
- "step": 1800
- },
- {
- "expr": "sum(prometheus_engine_queries_concurrent_max{instance=\"$instance\"}) by (instance, handler)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Max count",
- "metric": "last",
- "refId": "B",
- "step": 1800
- }
- ],
- "thresholds": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "timeFrom": null,
- "timeRegions": [
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "(\n instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} {{device}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
- ],
- "timeShift": null,
- "title": "Cont of concurent queries",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk IO Saturation",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
}
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 30
- },
- "id": 59,
- "panels": [
-
],
"repeat": null,
- "title": "Alerting",
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Disk IO",
+ "titleSize": "h6",
"type": "row"
},
{
- "aliasColors": {
- "Alert queue capacity on o collector": "#bf1b00",
- "Alert queue capacity on o harvester": "#bf1b00",
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
- }
- },
- "overrides": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 0,
- "y": 31
- },
- "hiddenSeries": false,
- "id": 20,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ },
+ "id": 10,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "/.*capacity.*/",
- "fill": 0,
- "linewidth": 2
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(prometheus_notifications_queue_capacity{instance=\"$instance\"})by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Alert queue capacity ",
- "metric": "prometheus_local_storage_checkpoint_last_size_bytes",
- "refId": "A",
- "step": 1800
- },
- {
- "expr": "sum(prometheus_notifications_queue_length{instance=\"$instance\"})by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Alert queue size on ",
- "metric": "prometheus_local_storage_checkpoint_last_size_bytes",
- "refId": "B",
- "step": 1800
- }
- ],
- "thresholds": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "timeFrom": null,
- "timeRegions": [
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum without (device) (\n max without (fstype, mountpoint) ((\n node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"}\n ) != 0)\n)\n/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"})))\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
- ],
- "timeShift": null,
- "title": "Alert queue size",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk Space Utilisation",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Disk Space",
+ "titleSize": "h6",
+ "type": "row"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "node-exporter-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
},
- "overrides": [
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 8,
- "y": 31
- },
- "hiddenSeries": false,
- "id": 21,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
+ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
},
- "lines": true,
- "linewidth": 1,
- "links": [
+ {
+ "allValue": null,
+ "current": {
+ "text": "",
+ "value": ""
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "cluster",
+ "options": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ ],
+ "query": "label_values(node_time_seconds, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(prometheus_notifications_alertmanagers_discovered{instance=\"$instance\"}) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Checkpoint chunks written/s",
- "metric": "prometheus_local_storage_checkpoint_series_chunks_written_sum",
- "refId": "A",
- "step": 1800
- }
- ],
- "thresholds": [
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "utc",
+ "title": "Node Exporter / USE Method / Cluster",
+ "version": 0
+ }
+ kind: ConfigMap
+ metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-node-cluster-rsrc-use
+ namespace: monitoring
+- apiVersion: v1
+ data:
+ node-rsrc-use.json: |-
+ {
+ "__inputs": [
- ],
- "timeFrom": null,
- "timeRegions": [
+ ],
+ "__requires": [
- ],
- "timeShift": null,
- "title": "Count of discovered alertmanagers",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "annotations": {
+ "list": [
- ]
- },
- "yaxes": [
- {
- "format": "none",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
+ ]
+ },
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 1,
+ "hideControls": false,
+ "id": null,
+ "links": [
+
+ ],
+ "refresh": "30s",
+ "rows": [
{
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
- }
- },
- "overrides": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 16,
- "y": 31
- },
- "hiddenSeries": false,
- "id": 39,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ },
+ "id": 2,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance:node_cpu_utilisation:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Utilisation",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Utilisation",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(increase(prometheus_notifications_dropped_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "notifications_dropped on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "F",
- "step": 1800
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
},
{
- "expr": "sum(increase(prometheus_rule_evaluation_failures_total{rule_type=\"alerting\",instance=\"$instance\"}[$aggregation_interval])) by (rule_type,instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "rule_evaluation_failures on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "A",
- "step": 1800
- }
- ],
- "thresholds": [
+ "aliasColors": {
- ],
- "timeFrom": null,
- "timeRegions": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
- ],
- "timeShift": null,
- "title": "Alerting errors",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ },
+ "id": 3,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 38
- },
- "id": 60,
- "panels": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance:node_load1_per_cpu:ratio{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Saturation",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Saturation (Load1 per CPU)",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
],
"repeat": null,
- "title": "Service discovery",
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "CPU",
+ "titleSize": "h6",
"type": "row"
},
{
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 0,
- "y": 39
- },
- "hiddenSeries": false,
- "id": 43,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ },
+ "id": 4,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"consul\", instance=\"$instance\"}[$aggregation_interval])",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Consul target sync count",
- "refId": "A",
- "step": 240
- }
- ],
- "thresholds": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "timeFrom": null,
- "timeRegions": [
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance:node_memory_utilisation:ratio{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Utilisation",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
- ],
- "timeShift": null,
- "title": "Consul SD sync count",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory Utilisation",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
},
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 6,
- "y": 39
- },
- "hiddenSeries": false,
- "id": 44,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ "aliasColors": {
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"marathon\", instance=\"$instance\"}[$aggregation_interval])",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Marathon target sync count",
- "refId": "A",
- "step": 240
- }
- ],
- "thresholds": [
+ },
+ "id": 5,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "timeFrom": null,
- "timeRegions": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "timeShift": null,
- "title": "Marathon SD sync count",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Major page Faults",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory Saturation (Major Page Faults)",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "rds",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "rds",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Memory",
+ "titleSize": "h6",
+ "type": "row"
},
{
- "aliasColors": {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
- }
- },
- "overrides": [
+ },
+ "id": 6,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 12,
- "y": 39
- },
- "hiddenSeries": false,
- "id": 45,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "/Receive/",
+ "stack": "A"
+ },
+ {
+ "alias": "/Transmit/",
+ "stack": "B",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Receive",
+ "refId": "A"
+ },
+ {
+ "expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Transmit",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network Utilisation (Bytes Receive/Transmit)",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
{
- "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"kubernetes\"}[$aggregation_interval])",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Count of target synces",
- "refId": "A",
- "step": 240
- }
- ],
- "thresholds": [
+ "aliasColors": {
- ],
- "timeFrom": null,
- "timeRegions": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
- ],
- "timeShift": null,
- "title": "Kubernetes SD sync count",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ },
+ "id": 7,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "/ Receive/",
+ "stack": "A"
+ },
+ {
+ "alias": "/ Transmit/",
+ "stack": "B",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Receive",
+ "refId": "A"
+ },
+ {
+ "expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Transmit",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network Saturation (Drops Receive/Transmit)",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Network",
+ "titleSize": "h6",
+ "type": "row"
},
{
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
- }
- },
- "overrides": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 18,
- "y": 39
- },
- "hiddenSeries": false,
- "id": 46,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ },
+ "id": 8,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "exceeded_sample_limit on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "A",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_sd_file_read_errors_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "sd_file_read_error on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "E",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_sd_consul_rpc_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "sd_consul_rpc_failure on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "H",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_sd_marathon_refresh_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "sd_marathon_refresh_failure on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "I",
- "step": 1800
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk IO Utilisation",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
},
{
- "expr": "sum(increase(prometheus_sd_openstack_refresh_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "sd_openstack_refresh_failure on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "J",
- "step": 1800
- }
- ],
- "thresholds": [
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
- ],
- "timeFrom": null,
- "timeRegions": [
+ },
+ "id": 9,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "timeShift": null,
- "title": "Service discovery errors",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk IO Saturation",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Disk IO",
+ "titleSize": "h6",
+ "type": "row"
},
{
+ "collapse": false,
"collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 46
- },
- "id": 61,
"panels": [
+ {
+ "aliasColors": {
- ],
- "repeat": null,
- "title": "TSDB stats",
- "type": "row"
- },
- {
- "aliasColors": {
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
+ },
+ "id": 10,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- }
- },
- "overrides": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 0,
- "y": 47
- },
- "hiddenSeries": false,
- "id": 36,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(1 -\n (\n max without (mountpoint, fstype) (node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\", cluster=\"$cluster\"})\n /\n max without (mountpoint, fstype) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\", cluster=\"$cluster\"})\n ) != 0\n)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk Space Utilisation",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(increase(prometheus_tsdb_reloads_total{instance=\"$instance\"}[30m])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{ instance }}",
- "refId": "A"
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
}
],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Reloaded block from disk",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Disk Space",
+ "titleSize": "h6",
+ "type": "row"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "node-exporter-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [
- ]
+ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
},
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ {
+ "allValue": null,
+ "current": {
+ "text": "",
+ "value": ""
},
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "cluster",
+ "options": [
- }
- },
- "overrides": [
+ ],
+ "query": "label_values(node_time_seconds, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 6,
- "y": 47
- },
- "hiddenSeries": false,
- "id": 5,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
},
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(prometheus_tsdb_blocks_loaded{instance=\"$instance\"}) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Loaded data blocks",
- "metric": "prometheus_local_storage_memory_chunkdescs",
- "refId": "A",
- "step": 1800
- }
- ],
- "thresholds": [
+ {
+ "allValue": null,
+ "current": {
- ],
- "timeFrom": null,
- "timeRegions": [
+ },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "instance",
+ "options": [
- ],
- "timeShift": null,
- "title": "Loaded data blocks",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "query": "label_values(node_exporter_build_info{job=\"node-exporter\", cluster=\"$cluster\"}, instance)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
}
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "utc",
+ "title": "Node Exporter / USE Method / Node",
+ "version": 0
+ }
+ kind: ConfigMap
+ metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-node-rsrc-use
+ namespace: monitoring
+- apiVersion: v1
+ data:
+ nodes-darwin.json: |-
+ {
+ "__inputs": [
- }
- },
- "overrides": [
+ ],
+ "__requires": [
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 12,
- "y": 47
- },
- "hiddenSeries": false,
- "id": 3,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ ],
+ "annotations": {
+ "list": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ ]
+ },
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 1,
+ "hideControls": false,
+ "id": null,
+ "links": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
+ ],
+ "refresh": "30s",
+ "rows": [
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
{
- "expr": "prometheus_tsdb_head_series{instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Time series count",
- "metric": "prometheus_local_storage_memory_series",
- "refId": "A",
- "step": 1800
- }
- ],
- "thresholds": [
+ "aliasColors": {
- ],
- "timeFrom": null,
- "timeRegions": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
- ],
- "timeShift": null,
- "title": "Time series total count",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ },
+ "id": 2,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "(\n (1 - sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", instance=\"$instance\"}[$__rate_interval])))\n/ ignoring(cpu) group_left\n count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n",
+ "format": "time_series",
+ "intervalFactor": 5,
+ "legendFormat": "{{cpu}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
- }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Usage",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": 1,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": 1,
+ "min": 0,
+ "show": true
+ }
+ ]
},
- "overrides": [
+ {
+ "aliasColors": {
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 18,
- "y": 47
- },
- "hiddenSeries": false,
- "id": 1,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": {
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ },
+ "id": 3,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(prometheus_tsdb_head_samples_appended_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "samples/s {{instance}}",
- "metric": "prometheus_local_storage_ingested_samples_total",
- "refId": "A",
- "step": 1800
- }
- ],
- "thresholds": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "timeFrom": null,
- "timeRegions": [
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "1m load average",
+ "refId": "A"
+ },
+ {
+ "expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "5m load average",
+ "refId": "B"
+ },
+ {
+ "expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "15m load average",
+ "refId": "C"
+ },
+ {
+ "expr": "count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", mode=\"idle\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "logical cores",
+ "refId": "D"
+ }
+ ],
+ "thresholds": [
- ],
- "timeShift": null,
- "title": "Samples Appended per second",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Load Average",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": "",
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "CPU",
+ "titleSize": "h6",
+ "type": "row"
},
{
+ "collapse": false,
"collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 54
- },
- "id": 62,
"panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 4,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 9,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_total_bytes{job=\"node-exporter\", instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Physical Memory",
+ "refId": "A"
+ },
+ {
+ "expr": "(\n node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\"} -\n node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\"} +\n node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\"} +\n node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Memory Used",
+ "refId": "B"
+ },
+ {
+ "expr": "(\n node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\"} -\n node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "App Memory",
+ "refId": "C"
+ },
+ {
+ "expr": "node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Wired Memory",
+ "refId": "D"
+ },
+ {
+ "expr": "node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Compressed",
+ "refId": "E"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory Usage",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "max": 100,
+ "min": 0,
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(50, 172, 45, 0.97)"
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 80
+ },
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": 90
+ }
+ ]
+ },
+ "unit": "percent"
+ }
+ },
+ "gridPos": {
+ },
+ "id": 5,
+ "span": 3,
+ "targets": [
+ {
+ "expr": "(\n (\n avg(node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\"}) -\n avg(node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\"}) +\n avg(node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\"}) +\n avg(node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\"})\n ) /\n avg(node_memory_total_bytes{job=\"node-exporter\", instance=\"$instance\"})\n)\n*\n100\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": ""
+ }
+ ],
+ "title": "Memory Usage",
+ "transparent": false,
+ "type": "gauge"
+ }
],
"repeat": null,
- "title": "Head block stats",
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Memory",
+ "titleSize": "h6",
"type": "row"
},
{
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833",
- "To persist": "#9AC48A"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 0,
- "y": 55
- },
- "hiddenSeries": false,
- "id": 2,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "/Max.*/",
- "fill": 0
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
{
- "expr": "sum(prometheus_tsdb_head_chunks{instance=\"$instance\"}) by (instance)",
- "format": "time_series",
- "hide": false,
- "intervalFactor": 2,
- "legendFormat": "Head chunk count",
- "metric": "prometheus_local_storage_memory_chunks",
- "refId": "A",
- "step": 1800
- }
- ],
- "thresholds": [
+ "aliasColors": {
- ],
- "timeFrom": null,
- "timeRegions": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": {
- ],
- "timeShift": null,
- "title": "Head chunks count",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ },
+ "id": 6,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "/ read| written/",
+ "yaxis": 1
+ },
+ {
+ "alias": "/ io time/",
+ "yaxis": 2
+ }
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}} read",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}} written",
+ "refId": "B"
+ },
+ {
+ "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}} io time",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk I/O",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- }
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
},
- "overrides": [
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 8,
- "y": 55
- },
- "hiddenSeries": false,
- "id": 35,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "yellow",
+ "value": 0.8
+ },
+ {
+ "color": "red",
+ "value": 0.9
+ }
+ ]
+ },
+ "unit": "decbytes"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Mounted on"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 260
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Size"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 93
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Used"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 72
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Available"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 88
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Used, %"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "percentunit"
+ },
+ {
+ "id": "custom.displayMode",
+ "value": "gradient-gauge"
+ },
+ {
+ "id": "max",
+ "value": 1
+ },
+ {
+ "id": "min",
+ "value": 0
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ },
+ "id": 7,
+ "span": 6,
+ "targets": [
+ {
+ "expr": "max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": ""
+ },
+ {
+ "expr": "max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": ""
+ }
+ ],
+ "title": "Disk Space Usage",
+ "transformations": [
+ {
+ "id": "groupBy",
+ "options": {
+ "fields": {
+ "Value #A": {
+ "aggregations": [
+ "lastNotNull"
+ ],
+ "operation": "aggregate"
+ },
+ "Value #B": {
+ "aggregations": [
+ "lastNotNull"
+ ],
+ "operation": "aggregate"
+ },
+ "mountpoint": {
+ "aggregations": [
+
+ ],
+ "operation": "groupby"
+ }
+ }
+ }
+ },
+ {
+ "id": "merge",
+ "options": {
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "max(prometheus_tsdb_head_max_time{instance=\"$instance\"}) by (instance) - min(prometheus_tsdb_head_min_time{instance=\"$instance\"}) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{ instance }}",
- "refId": "A"
- }
- ],
- "thresholds": [
+ }
+ },
+ {
+ "id": "calculateField",
+ "options": {
+ "alias": "Used",
+ "binary": {
+ "left": "Value #A (lastNotNull)",
+ "operator": "-",
+ "reducer": "sum",
+ "right": "Value #B (lastNotNull)"
+ },
+ "mode": "binary",
+ "reduce": {
+ "reducer": "sum"
+ }
+ }
+ },
+ {
+ "id": "calculateField",
+ "options": {
+ "alias": "Used, %",
+ "binary": {
+ "left": "Used",
+ "operator": "/",
+ "reducer": "sum",
+ "right": "Value #A (lastNotNull)"
+ },
+ "mode": "binary",
+ "reduce": {
+ "reducer": "sum"
+ }
+ }
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
- ],
- "timeFrom": null,
- "timeRegions": [
+ },
+ "indexByName": {
- ],
- "timeShift": null,
- "title": "Length of head block",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ },
+ "renameByName": {
+ "Value #A (lastNotNull)": "Size",
+ "Value #B (lastNotNull)": "Available",
+ "mountpoint": "Mounted on"
+ }
+ }
+ },
+ {
+ "id": "sortBy",
+ "options": {
+ "fields": {
- ]
- },
- "yaxes": [
- {
- "format": "ms",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ },
+ "sort": [
+ {
+ "field": "Mounted on"
+ }
+ ]
+ }
+ }
+ ],
+ "transparent": false,
+ "type": "table"
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Disk",
+ "titleSize": "h6",
+ "type": "row"
},
{
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
- }
- },
- "overrides": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Network received (bits/s)",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": {
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 16,
- "y": 55
- },
- "hiddenSeries": false,
- "id": 4,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ },
+ "id": 8,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(prometheus_tsdb_head_chunks_created_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "created on {{ instance }}",
- "refId": "B"
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network Received",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
},
{
- "expr": "sum(rate(prometheus_tsdb_head_chunks_removed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) * -1",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "deleted on {{ instance }}",
- "refId": "C"
- }
- ],
- "thresholds": [
+ "aliasColors": {
- ],
- "timeFrom": null,
- "timeRegions": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Network transmitted (bits/s)",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": {
- ],
- "timeShift": null,
- "title": "Head Chunks Created/Deleted per second",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ },
+ "id": 9,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 62
- },
- "id": 63,
- "panels": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network Transmitted",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
],
"repeat": null,
- "title": "Data maintenance",
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Network",
+ "titleSize": "h6",
"type": "row"
- },
- {
- "aliasColors": {
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "node-exporter-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [
+ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
+ {
+ "allValue": null,
+ "current": {
- }
},
- "overrides": [
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Instance",
+ "multi": false,
+ "name": "instance",
+ "options": [
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 0,
- "y": 63
- },
- "hiddenSeries": false,
- "id": 33,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ ],
+ "query": "label_values(node_uname_info{job=\"node-exporter\", sysname=\"Darwin\"}, instance)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "utc",
+ "title": "Node Exporter / MacOS",
+ "version": 0
+ }
+ kind: ConfigMap
+ metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-nodes-darwin
+ namespace: monitoring
+- apiVersion: v1
+ data:
+ nodes.json: |-
+ {
+ "__inputs": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(increase(prometheus_tsdb_compaction_duration_sum{instance=\"$instance\"}[30m]) / increase(prometheus_tsdb_compaction_duration_count{instance=\"$instance\"}[30m])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{ instance }}",
- "refId": "B"
- }
- ],
- "thresholds": [
+ ],
+ "__requires": [
- ],
- "timeFrom": null,
- "timeRegions": [
+ ],
+ "annotations": {
+ "list": [
- ],
- "timeShift": null,
- "title": "Compaction duration",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ]
+ },
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 1,
+ "hideControls": false,
+ "id": null,
+ "links": [
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
+ ],
+ "refresh": "30s",
+ "rows": [
{
- "aliasColors": {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
- }
+ },
+ "id": 2,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "(\n (1 - sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", instance=\"$instance\"}[$__rate_interval])))\n/ ignoring(cpu) group_left\n count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n",
+ "format": "time_series",
+ "intervalFactor": 5,
+ "legendFormat": "{{cpu}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Usage",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": 1,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": 1,
+ "min": 0,
+ "show": true
+ }
+ ]
},
- "overrides": [
+ {
+ "aliasColors": {
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 6,
- "y": 63
- },
- "hiddenSeries": false,
- "id": 34,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": {
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ },
+ "id": 3,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(prometheus_tsdb_head_gc_duration_seconds{instance=\"$instance\"}) by (instance, quantile)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{ quantile }} on {{ instance }}",
- "refId": "A"
- }
- ],
- "thresholds": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "timeFrom": null,
- "timeRegions": [
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "1m load average",
+ "refId": "A"
+ },
+ {
+ "expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "5m load average",
+ "refId": "B"
+ },
+ {
+ "expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "15m load average",
+ "refId": "C"
+ },
+ {
+ "expr": "count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", mode=\"idle\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "logical cores",
+ "refId": "D"
+ }
+ ],
+ "thresholds": [
- ],
- "timeShift": null,
- "title": "Go Garbage collection duration",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Load Average",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "CPU",
+ "titleSize": "h6",
+ "type": "row"
},
{
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 12,
- "y": 63
- },
- "hiddenSeries": false,
- "id": 37,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ },
+ "id": 4,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(prometheus_tsdb_wal_truncate_duration_seconds{instance=\"$instance\"}) by (instance, quantile)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{ quantile }} on {{ instance }}",
- "refId": "A"
- }
- ],
- "thresholds": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "timeFrom": null,
- "timeRegions": [
+ ],
+ "spaceLength": 10,
+ "span": 9,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "(\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "memory used",
+ "refId": "A"
+ },
+ {
+ "expr": "node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "memory buffers",
+ "refId": "B"
+ },
+ {
+ "expr": "node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "memory cached",
+ "refId": "C"
+ },
+ {
+ "expr": "node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "memory free",
+ "refId": "D"
+ }
+ ],
+ "thresholds": [
- ],
- "timeShift": null,
- "title": "WAL truncate duration seconds",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory Usage",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
},
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "max": 100,
+ "min": 0,
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(50, 172, 45, 0.97)"
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 80
+ },
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": 90
+ }
+ ]
+ },
+ "unit": "percent"
+ }
+ },
+ "gridPos": {
+
+ },
+ "id": 5,
+ "span": 3,
+ "targets": [
+ {
+ "expr": "100 -\n(\n avg(node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"}) /\n avg(node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"})\n* 100\n)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": ""
+ }
+ ],
+ "title": "Memory Usage",
+ "transparent": false,
+ "type": "gauge"
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Memory",
+ "titleSize": "h6",
+ "type": "row"
},
{
- "aliasColors": {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": {
- }
- },
- "overrides": [
+ },
+ "id": 6,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 18,
- "y": 63
- },
- "hiddenSeries": false,
- "id": 38,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "/ read| written/",
+ "yaxis": 1
+ },
+ {
+ "alias": "/ io time/",
+ "yaxis": 2
+ }
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}} read",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}} written",
+ "refId": "B"
+ },
+ {
+ "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}} io time",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk I/O",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "yellow",
+ "value": 0.8
+ },
+ {
+ "color": "red",
+ "value": 0.9
+ }
+ ]
+ },
+ "unit": "decbytes"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Mounted on"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 260
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Size"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 93
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Used"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 72
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Available"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 88
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Used, %"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "percentunit"
+ },
+ {
+ "id": "custom.displayMode",
+ "value": "gradient-gauge"
+ },
+ {
+ "id": "max",
+ "value": 1
+ },
+ {
+ "id": "min",
+ "value": 0
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(tsdb_wal_fsync_duration_seconds{instance=\"$instance\"}) by (instance, quantile)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{ quantile }} {{ instance }}",
- "refId": "A"
- }
- ],
- "thresholds": [
+ },
+ "id": 7,
+ "span": 6,
+ "targets": [
+ {
+ "expr": "max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": ""
+ },
+ {
+ "expr": "max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": ""
+ }
+ ],
+ "title": "Disk Space Usage",
+ "transformations": [
+ {
+ "id": "groupBy",
+ "options": {
+ "fields": {
+ "Value #A": {
+ "aggregations": [
+ "lastNotNull"
+ ],
+ "operation": "aggregate"
+ },
+ "Value #B": {
+ "aggregations": [
+ "lastNotNull"
+ ],
+ "operation": "aggregate"
+ },
+ "mountpoint": {
+ "aggregations": [
+
+ ],
+ "operation": "groupby"
+ }
+ }
+ }
+ },
+ {
+ "id": "merge",
+ "options": {
- ],
- "timeFrom": null,
- "timeRegions": [
+ }
+ },
+ {
+ "id": "calculateField",
+ "options": {
+ "alias": "Used",
+ "binary": {
+ "left": "Value #A (lastNotNull)",
+ "operator": "-",
+ "reducer": "sum",
+ "right": "Value #B (lastNotNull)"
+ },
+ "mode": "binary",
+ "reduce": {
+ "reducer": "sum"
+ }
+ }
+ },
+ {
+ "id": "calculateField",
+ "options": {
+ "alias": "Used, %",
+ "binary": {
+ "left": "Used",
+ "operator": "/",
+ "reducer": "sum",
+ "right": "Value #A (lastNotNull)"
+ },
+ "mode": "binary",
+ "reduce": {
+ "reducer": "sum"
+ }
+ }
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
- ],
- "timeShift": null,
- "title": "WAL fsync duration seconds",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ },
+ "indexByName": {
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ },
+ "renameByName": {
+ "Value #A (lastNotNull)": "Size",
+ "Value #B (lastNotNull)": "Available",
+ "mountpoint": "Mounted on"
+ }
+ }
+ },
+ {
+ "id": "sortBy",
+ "options": {
+ "fields": {
+
+ },
+ "sort": [
+ {
+ "field": "Mounted on"
+ }
+ ]
+ }
+ }
+ ],
+ "transparent": false,
+ "type": "table"
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Disk",
+ "titleSize": "h6",
+ "type": "row"
},
{
+ "collapse": false,
"collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 70
- },
- "id": 64,
"panels": [
+ {
+ "aliasColors": {
- ],
- "repeat": null,
- "title": "RAM&CPU",
- "type": "row"
- },
- {
- "aliasColors": {
- "Allocated bytes": "#7EB26D",
- "Allocated bytes - 1m max": "#BF1B00",
- "Allocated bytes - 1m min": "#BF1B00",
- "Allocated bytes - 5m max": "#BF1B00",
- "Allocated bytes - 5m min": "#BF1B00",
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833",
- "RSS": "#447EBC"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "decimals": null,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Network received (bits/s)",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": {
- }
- },
- "overrides": [
+ },
+ "id": 8,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 0,
- "y": 71
- },
- "hiddenSeries": false,
- "id": 6,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "/-/",
- "fill": 0
- },
- {
- "alias": "collector heap size",
- "color": "#E0752D",
- "fill": 0,
- "linewidth": 2
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network Received",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
},
{
- "alias": "collector kubernetes memory limit",
- "color": "#BF1B00",
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Network transmitted (bits/s)",
"fill": 0,
- "linewidth": 3
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(process_resident_memory_bytes{instance=\"$instance\"}) by (instance)",
- "format": "time_series",
- "hide": false,
- "intervalFactor": 2,
- "legendFormat": "Total resident memory - {{instance}}",
- "metric": "process_resident_memory_bytes",
- "refId": "B",
- "step": 1800
- },
- {
- "expr": "sum(go_memstats_alloc_bytes{instance=\"$instance\"}) by (instance)",
- "format": "time_series",
- "hide": false,
- "intervalFactor": 2,
- "legendFormat": "Total llocated bytes - {{instance}}",
- "metric": "go_memstats_alloc_bytes",
- "refId": "A",
- "step": 1800
- }
- ],
- "thresholds": [
+ "fillGradient": 0,
+ "gridPos": {
- ],
- "timeFrom": null,
- "timeRegions": [
+ },
+ "id": 9,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "timeShift": null,
- "title": "Memory",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network Transmitted",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Allocated bytes": "#F9BA8F",
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833",
- "RSS": "#890F02"
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Network",
+ "titleSize": "h6",
+ "type": "row"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "node-exporter-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [
+
+ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
+ {
+ "allValue": null,
+ "current": {
- }
},
- "overrides": [
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Instance",
+ "multi": false,
+ "name": "instance",
+ "options": [
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 8,
- "y": 71
- },
- "hiddenSeries": false,
- "id": 7,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ ],
+ "query": "label_values(node_uname_info{job=\"node-exporter\", sysname!=\"Darwin\"}, instance)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "utc",
+ "title": "Node Exporter / Nodes",
+ "version": 0
+ }
+ kind: ConfigMap
+ metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-nodes
+ namespace: monitoring
+- apiVersion: v1
+ data:
+ persistentvolumesusage.json: |-
+ {
+ "__inputs": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "rate(go_memstats_alloc_bytes_total{instance=\"$instance\"}[$aggregation_interval])",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Allocated Bytes/s",
- "metric": "go_memstats_alloc_bytes",
- "refId": "A",
- "step": 1800
- }
- ],
- "thresholds": [
+ ],
+ "__requires": [
- ],
- "timeFrom": null,
- "timeRegions": [
+ ],
+ "annotations": {
+ "list": [
- ],
- "timeShift": null,
- "title": "Allocations per second",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ]
+ },
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
+ ],
+ "refresh": "10s",
+ "rows": [
{
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "decimals": 2,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
- }
- },
- "overrides": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 16,
- "y": 71
- },
- "hiddenSeries": false,
- "id": 9,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "hideEmpty": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ },
+ "id": 2,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(process_cpu_seconds_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "CPU/s",
- "metric": "prometheus_local_storage_ingested_samples_total",
- "refId": "B",
- "step": 1800
- }
- ],
- "thresholds": [
+ ],
+ "spaceLength": 10,
+ "span": 9,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Used Space",
+ "refId": "A"
+ },
+ {
+ "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Free Space",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [
- ],
- "timeFrom": null,
- "timeRegions": [
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Volume Space Usage",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ],
- "timeShift": null,
- "title": "CPU per second",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
- "avg"
- ]
- },
- "yaxes": [
- {
- "format": "none",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
},
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "$datasource",
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+
+ },
+ "id": 3,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "rightSide": true
+ },
+ "links": [
+
+ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 3,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "max without(instance,node) (\n(\n topk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n topk(1, kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n/\ntopk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "80, 90",
+ "title": "Volume Space Usage",
+ "tooltip": {
+ "shared": false
+ },
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
+ "type": "row"
},
{
- "collapsed": true,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 78
- },
- "id": 68,
+ "collapse": false,
+ "collapsed": false,
"panels": [
{
"aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
+
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
"fill": 1,
"fillGradient": 0,
"gridPos": {
- "h": 7,
- "w": 24,
- "x": 0,
- "y": 79
+
},
- "hiddenSeries": false,
- "id": 47,
+ "id": 4,
+ "interval": "1m",
"legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
"total": false,
- "values": false
+ "values": true
},
"lines": true,
"linewidth": 1,
@@ -34916,187 +29499,212 @@ items:
],
"nullPointMode": "null",
"percentage": false,
- "pluginVersion": "7.1.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
+ "spaceLength": 10,
+ "span": 9,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Used inodes",
+ "refId": "A"
+ },
+ {
+ "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": " Free inodes",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Volume inodes Usage",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "$datasource",
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+
+ },
+ "id": 5,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "rightSide": true
+ },
+ "links": [
+
+ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 3,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
"targets": [
{
- "expr": "sum(increase(net_conntrack_dialer_conn_failed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "expr": "max without(instance,node) (\ntopk(1, kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n/\ntopk(1, kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n",
"format": "time_series",
- "hide": false,
- "interval": "",
"intervalFactor": 2,
- "legendFormat": "conntrack_dialer_conn_failed on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "M",
- "step": 1800
+ "legendFormat": "",
+ "refId": "A"
}
],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Net errors",
+ "thresholds": "80, 90",
+ "title": "Volume inodes Usage",
"tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
+ "shared": false
},
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "valueName": "current"
}
],
"repeat": null,
- "title": "Contrac errors",
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
"type": "row"
}
],
- "refresh": "5m",
- "schemaVersion": 26,
+ "schemaVersion": 14,
"style": "dark",
"tags": [
- "custom"
+ "kubernetes-mixin"
],
"templating": {
"list": [
{
- "auto": true,
- "auto_count": 30,
- "auto_min": "2m",
"current": {
- "selected": false,
- "text": "auto",
- "value": "$__auto_interval_aggregation_interval"
+ "text": "default",
+ "value": "default"
},
"hide": 0,
- "label": "aggregation intarval",
- "name": "aggregation_interval",
+ "label": "Data Source",
+ "name": "datasource",
"options": [
- {
- "selected": true,
- "text": "auto",
- "value": "$__auto_interval_aggregation_interval"
- },
- {
- "selected": false,
- "text": "1m",
- "value": "1m"
- },
- {
- "selected": false,
- "text": "10m",
- "value": "10m"
- },
- {
- "selected": false,
- "text": "30m",
- "value": "30m"
- },
- {
- "selected": false,
- "text": "1h",
- "value": "1h"
- },
- {
- "selected": false,
- "text": "6h",
- "value": "6h"
- },
- {
- "selected": false,
- "text": "12h",
- "value": "12h"
- },
- {
- "selected": false,
- "text": "1d",
- "value": "1d"
- },
- {
- "selected": false,
- "text": "7d",
- "value": "7d"
- },
- {
- "selected": false,
- "text": "14d",
- "value": "14d"
- },
- {
- "selected": false,
- "text": "30d",
- "value": "30d"
- }
+
],
- "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
- "refresh": 2,
- "skipUrlSync": false,
- "type": "interval"
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
},
{
"allValue": null,
"current": {
- "selected": false,
- "text": "localhost:9090",
- "value": "localhost:9090"
+
},
"datasource": "$datasource",
- "definition": "",
- "hide": 0,
+ "hide": 2,
"includeAll": false,
- "label": "Instance",
+ "label": "cluster",
"multi": false,
- "name": "instance",
+ "name": "cluster",
"options": [
],
- "query": "label_values(prometheus_build_info, instance)",
+ "query": "label_values(kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}, cluster)",
"refresh": 2,
"regex": "",
- "skipUrlSync": false,
- "sort": 2,
+ "sort": 1,
"tagValuesQuery": "",
"tags": [
@@ -35106,67 +29714,61 @@ items:
"useTags": false
},
{
+ "allValue": null,
"current": {
- "text": "60",
- "value": "60"
- },
- "hide": 0,
- "label": "Scrape interval seconds",
- "name": "scrape_interval",
- "options": [
- {
- "text": "60",
- "value": "60"
- }
- ],
- "query": "60",
- "skipUrlSync": false,
- "type": "constant"
- },
- {
- "current": {
- "selected": false,
- "text": "Prometheus",
- "value": "Prometheus"
+
},
+ "datasource": "$datasource",
"hide": 0,
"includeAll": false,
- "label": "Prometheus datasource",
+ "label": "Namespace",
"multi": false,
- "name": "datasource",
+ "name": "namespace",
"options": [
],
- "query": "prometheus",
- "refresh": 1,
+ "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"}, namespace)",
+ "refresh": 2,
"regex": "",
- "skipUrlSync": false,
- "type": "datasource"
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
},
{
+ "allValue": null,
"current": {
- "selected": false,
- "text": "No data sources found",
- "value": ""
+
},
+ "datasource": "$datasource",
"hide": 0,
"includeAll": false,
- "label": "InfluxDB datasource",
+ "label": "PersistentVolumeClaim",
"multi": false,
- "name": "influx_datasource",
+ "name": "volume",
"options": [
],
- "query": "influxdb",
- "refresh": 1,
+ "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\"}, persistentvolumeclaim)",
+ "refresh": 2,
"regex": "",
- "skipUrlSync": false,
- "type": "datasource"
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
}
]
},
"time": {
- "from": "now-3h",
+ "from": "now-7d",
"to": "now"
},
"timepicker": {
@@ -35194,18 +29796,23 @@ items:
"30d"
]
},
- "timezone": "browser",
- "title": "Prometheus Monitoring",
- "uid": "XmsJC9mRz",
- "version": 2
+ "timezone": "UTC",
+ "title": "Kubernetes / Persistent Volumes",
+ "uid": "919b92a8e8041bd567af9edab12c840c",
+ "version": 0
}
kind: ConfigMap
metadata:
- name: grafana-dashboard-prometheus-dashboard
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-persistentvolumesusage
namespace: monitoring
- apiVersion: v1
data:
- prometheus-remote-write.json: |-
+ pod-total.json: |-
{
"__inputs": [
@@ -35215,7 +29822,15 @@ items:
],
"annotations": {
"list": [
-
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
]
},
"editable": true,
@@ -35226,695 +29841,515 @@ items:
"links": [
],
- "refresh": "",
- "rows": [
+ "panels": [
{
"collapse": false,
"collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
"panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 2,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Current Bandwidth",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "decimals": 0,
+ "format": "time_series",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 0,
+ "y": 1
+ },
+ "height": 9,
+ "id": 3,
+ "interval": null,
+ "links": [
+ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "minSpan": 12,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {
+ "fieldOptions": {
+ "calcs": [
+ "last"
],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(remote_name, url) group_right(instance) prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}\n)\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
- "refId": "A"
- }
- ],
- "thresholds": [
+ "defaults": {
+ "max": 10000000000,
+ "min": 0,
+ "title": "$namespace: $pod",
+ "unit": "Bps"
+ },
+ "mappings": [
],
- "timeFrom": null,
- "timeShift": null,
- "title": "Highest Timestamp In vs. Highest Timestamp Sent",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ "override": {
- ]
},
- "yaxes": [
+ "thresholds": [
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "color": "dark-green",
+ "index": 0,
+ "value": null
},
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "color": "dark-yellow",
+ "index": 1,
+ "value": 5000000000
+ },
+ {
+ "color": "dark-red",
+ "index": 2,
+ "value": 7000000000
}
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 3,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
+ "values": false
+ }
+ },
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 12,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))",
+ "format": "time_series",
+ "instant": null,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Current Rate of Bytes Received",
+ "type": "gauge",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "decimals": 0,
+ "format": "time_series",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 1
+ },
+ "height": 9,
+ "id": 4,
+ "interval": null,
+ "links": [
+ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "minSpan": 12,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {
+ "fieldOptions": {
+ "calcs": [
+ "last"
],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (remote_name, url) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n)\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
- "refId": "A"
- }
- ],
- "thresholds": [
+ "defaults": {
+ "max": 10000000000,
+ "min": 0,
+ "title": "$namespace: $pod",
+ "unit": "Bps"
+ },
+ "mappings": [
],
- "timeFrom": null,
- "timeShift": null,
- "title": "Rate[5m]",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ "override": {
- ]
},
- "yaxes": [
+ "thresholds": [
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "color": "dark-green",
+ "index": 0,
+ "value": null
},
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "color": "dark-yellow",
+ "index": 1,
+ "value": 5000000000
+ },
+ {
+ "color": "dark-red",
+ "index": 2,
+ "value": 7000000000
}
- ]
+ ],
+ "values": false
+ }
+ },
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
}
],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Timestamps",
- "titleSize": "h6",
- "type": "row"
+ "span": 12,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))",
+ "format": "time_series",
+ "instant": null,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Current Rate of Bytes Transmitted",
+ "type": "gauge",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
},
{
"collapse": false,
"collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 10
+ },
+ "id": 5,
"panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 4,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Rate, in vs. succeeded or dropped [5m]",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ]
- }
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Samples",
+ "title": "Bandwidth",
"titleSize": "h6",
"type": "row"
},
{
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 5,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "minSpan": 6,
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Current Shards",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 6,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 4,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "prometheus_remote_storage_shards_max{cluster=~\"$cluster\", instance=~\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Max Shards",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 7,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ "aliasColors": {
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 0,
+ "y": 11
+ },
+ "id": 6,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [
- ],
- "spaceLength": 10,
- "span": 4,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "prometheus_remote_storage_shards_min{cluster=~\"$cluster\", instance=~\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
- "refId": "A"
- }
- ],
- "thresholds": [
+ ],
+ "minSpan": 12,
+ "nullPointMode": "connected",
+ "paceLength": 10,
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Min Shards",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{pod}}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ]
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Receive Bandwidth",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
},
{
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
- },
- "id": 8,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 11
+ },
+ "id": 7,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
+ ],
+ "minSpan": 12,
+ "nullPointMode": "connected",
+ "paceLength": 10,
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "spaceLength": 10,
- "span": 4,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "prometheus_remote_storage_shards_desired{cluster=~\"$cluster\", instance=~\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
- "refId": "A"
- }
- ],
- "thresholds": [
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{pod}}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Desired Shards",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Transmit Bandwidth",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ]
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
}
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Shards",
- "titleSize": "h6",
- "type": "row"
+ ]
},
{
- "collapse": false,
- "collapsed": false,
+ "collapse": true,
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 20
+ },
+ "id": 8,
"panels": [
{
"aliasColors": {
@@ -35924,15 +30359,21 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
+ "fill": 2,
+ "fillGradient": 0,
"gridPos": {
-
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 21
},
"id": 9,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
+ "hideEmpty": true,
+ "hideZero": true,
"max": false,
"min": false,
"rightSide": false,
@@ -35942,11 +30383,13 @@ items:
"values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 2,
"links": [
],
- "nullPointMode": "null",
+ "minSpan": 12,
+ "nullPointMode": "connected",
+ "paceLength": 10,
"percentage": false,
"pointradius": 5,
"points": false,
@@ -35956,16 +30399,17 @@ items:
],
"spaceLength": 10,
- "span": 6,
- "stack": false,
+ "span": 12,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "prometheus_remote_storage_shard_capacity{cluster=~\"$cluster\", instance=~\"$instance\"}",
+ "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
- "refId": "A"
+ "intervalFactor": 1,
+ "legendFormat": "{{pod}}",
+ "refId": "A",
+ "step": 10
}
],
"thresholds": [
@@ -35973,10 +30417,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Shard Capacity",
+ "title": "Rate of Received Packets",
"tooltip": {
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -35991,19 +30435,19 @@ items:
},
"yaxes": [
{
- "format": "short",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": 0,
"show": true
},
{
- "format": "short",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": 0,
"show": true
}
]
@@ -36016,15 +30460,21 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
+ "fill": 2,
+ "fillGradient": 0,
"gridPos": {
-
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 21
},
"id": 10,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
+ "hideEmpty": true,
+ "hideZero": true,
"max": false,
"min": false,
"rightSide": false,
@@ -36034,11 +30484,13 @@ items:
"values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 2,
"links": [
],
- "nullPointMode": "null",
+ "minSpan": 12,
+ "nullPointMode": "connected",
+ "paceLength": 10,
"percentage": false,
"pointradius": 5,
"points": false,
@@ -36048,16 +30500,17 @@ items:
],
"spaceLength": 10,
- "span": 6,
- "stack": false,
+ "span": 12,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"}",
+ "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
- "refId": "A"
+ "intervalFactor": 1,
+ "legendFormat": "{{pod}}",
+ "refId": "A",
+ "step": 10
}
],
"thresholds": [
@@ -36065,10 +30518,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Pending Samples",
+ "title": "Rate of Transmitted Packets",
"tooltip": {
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -36083,19 +30536,19 @@ items:
},
"yaxes": [
{
- "format": "short",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": 0,
"show": true
},
{
- "format": "short",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": 0,
"show": true
}
]
@@ -36105,13 +30558,20 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Shard Details",
+ "title": "Packets",
"titleSize": "h6",
"type": "row"
},
{
- "collapse": false,
- "collapsed": false,
+ "collapse": true,
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 21
+ },
+ "id": 11,
"panels": [
{
"aliasColors": {
@@ -36121,107 +30581,21 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 11,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "prometheus_tsdb_wal_segment_current{cluster=~\"$cluster\", instance=~\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{cluster}}:{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "TSDB Current Segment",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "none",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
+ "fill": 2,
+ "fillGradient": 0,
"gridPos": {
-
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 32
},
"id": 12,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
+ "hideEmpty": true,
+ "hideZero": true,
"max": false,
"min": false,
"rightSide": false,
@@ -36231,11 +30605,13 @@ items:
"values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 2,
"links": [
],
- "nullPointMode": "null",
+ "minSpan": 12,
+ "nullPointMode": "connected",
+ "paceLength": 10,
"percentage": false,
"pointradius": 5,
"points": false,
@@ -36245,16 +30621,17 @@ items:
],
"spaceLength": 10,
- "span": 6,
- "stack": false,
+ "span": 12,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "prometheus_wal_watcher_current_segment{cluster=~\"$cluster\", instance=~\"$instance\"}",
+ "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{cluster}}:{{instance}} {{consumer}}",
- "refId": "A"
+ "intervalFactor": 1,
+ "legendFormat": "{{pod}}",
+ "refId": "A",
+ "step": 10
}
],
"thresholds": [
@@ -36262,10 +30639,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Remote Write Current Segment",
+ "title": "Rate of Received Packets Dropped",
"tooltip": {
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -36280,36 +30657,23 @@ items:
},
"yaxes": [
{
- "format": "none",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": 0,
"show": true
},
{
- "format": "short",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": 0,
"show": true
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Segments",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
+ },
{
"aliasColors": {
@@ -36318,15 +30682,21 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
+ "fill": 2,
+ "fillGradient": 0,
"gridPos": {
-
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 32
},
"id": 13,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
+ "hideEmpty": true,
+ "hideZero": true,
"max": false,
"min": false,
"rightSide": false,
@@ -36336,11 +30706,13 @@ items:
"values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 2,
"links": [
],
- "nullPointMode": "null",
+ "minSpan": 12,
+ "nullPointMode": "connected",
+ "paceLength": 10,
"percentage": false,
"pointradius": 5,
"points": false,
@@ -36350,16 +30722,17 @@ items:
],
"spaceLength": 10,
- "span": 3,
- "stack": false,
+ "span": 12,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
+ "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
- "refId": "A"
+ "intervalFactor": 1,
+ "legendFormat": "{{pod}}",
+ "refId": "A",
+ "step": 10
}
],
"thresholds": [
@@ -36367,10 +30740,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Dropped Samples",
+ "title": "Rate of Transmitted Packets Dropped",
"tooltip": {
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -36385,23 +30758,304 @@ items:
},
"yaxes": [
{
- "format": "short",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": 0,
"show": true
},
{
- "format": "short",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": 0,
"show": true
}
]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Errors",
+ "titleSize": "h6",
+ "type": "row"
+ }
+ ],
+ "refresh": "10s",
+ "rows": [
+
+ ],
+ "schemaVersion": 18,
+ "style": "dark",
+ "tags": [
+ "kubernetes-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [
+
+ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {
+
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "auto": false,
+ "auto_count": 30,
+ "auto_min": "10s",
+ "current": {
+ "text": "kube-system",
+ "value": "kube-system"
+ },
+ "datasource": "$datasource",
+ "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "namespace",
+ "options": [
+
+ ],
+ "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "auto": false,
+ "auto_count": 30,
+ "auto_min": "10s",
+ "current": {
+ "text": "",
+ "value": ""
+ },
+ "datasource": "$datasource",
+ "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "pod",
+ "options": [
+
+ ],
+ "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "auto": false,
+ "auto_count": 30,
+ "auto_min": "10s",
+ "current": {
+ "text": "5m",
+ "value": "5m"
+ },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "resolution",
+ "options": [
+ {
+ "selected": false,
+ "text": "30s",
+ "value": "30s"
+ },
+ {
+ "selected": true,
+ "text": "5m",
+ "value": "5m"
+ },
+ {
+ "selected": false,
+ "text": "1h",
+ "value": "1h"
+ }
+ ],
+ "query": "30s,5m,1h",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "interval",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "auto": false,
+ "auto_count": 30,
+ "auto_min": "10s",
+ "current": {
+ "text": "5m",
+ "value": "5m"
},
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "interval",
+ "options": [
+ {
+ "selected": true,
+ "text": "4h",
+ "value": "4h"
+ }
+ ],
+ "query": "4h",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "interval",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "UTC",
+ "title": "Kubernetes / Networking / Pod",
+ "uid": "7a18067ce943a40ae25454675c19ff5c",
+ "version": 0
+ }
+ kind: ConfigMap
+ metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-pod-total
+ namespace: monitoring
+- apiVersion: v1
+ data:
+ prometheus-remote-write.json: |-
+ {
+ "__inputs": [
+
+ ],
+ "__requires": [
+
+ ],
+ "annotations": {
+ "list": [
+
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [
+
+ ],
+ "refresh": "60s",
+ "rows": [
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
{
"aliasColors": {
@@ -36411,10 +31065,11 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 14,
+ "id": 2,
"legend": {
"alignAsTable": false,
"avg": false,
@@ -36442,12 +31097,12 @@ items:
],
"spaceLength": 10,
- "span": 3,
+ "span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
+ "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(remote_name, url) group_right(instance) (prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} != 0)\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
@@ -36459,7 +31114,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Failed Samples",
+ "title": "Highest Timestamp In vs. Highest Timestamp Sent",
"tooltip": {
"shared": true,
"sort": 0,
@@ -36503,10 +31158,11 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 15,
+ "id": 3,
"legend": {
"alignAsTable": false,
"avg": false,
@@ -36534,12 +31190,12 @@ items:
],
"spaceLength": 10,
- "span": 3,
+ "span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
+ "expr": "clamp_min(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (remote_name, url) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n, 0)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
@@ -36551,7 +31207,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Retried Samples",
+ "title": "Rate[5m]",
"tooltip": {
"shared": true,
"sort": 0,
@@ -36585,7 +31241,20 @@ items:
"show": true
}
]
- },
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Timestamps",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
{
"aliasColors": {
@@ -36595,10 +31264,11 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 16,
+ "id": 4,
"legend": {
"alignAsTable": false,
"avg": false,
@@ -36626,12 +31296,12 @@ items:
],
"spaceLength": 10,
- "span": 3,
+ "span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "rate(prometheus_remote_storage_enqueue_retries_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
+ "expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) (rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n- \n (rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
@@ -36643,7 +31313,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Enqueue Retries",
+ "title": "Rate, in vs. succeeded or dropped [5m]",
"tooltip": {
"shared": true,
"sort": 0,
@@ -36683,186 +31353,13 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Misc. Rates",
- "titleSize": "h6",
- "type": "row"
- }
- ],
- "schemaVersion": 14,
- "style": "dark",
- "tags": [
-
- ],
- "templating": {
- "list": [
- {
- "hide": 0,
- "label": null,
- "name": "datasource",
- "options": [
-
- ],
- "query": "prometheus",
- "refresh": 1,
- "regex": "",
- "type": "datasource"
- },
- {
- "allValue": null,
- "current": {
- "text": {
- "selected": true,
- "text": "All",
- "value": "$__all"
- },
- "value": {
- "selected": true,
- "text": "All",
- "value": "$__all"
- }
- },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": true,
- "label": null,
- "multi": false,
- "name": "instance",
- "options": [
-
- ],
- "query": "label_values(prometheus_build_info, instance)",
- "refresh": 2,
- "regex": "",
- "sort": 0,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "current": {
- "text": {
- "selected": true,
- "text": "All",
- "value": "$__all"
- },
- "value": {
- "selected": true,
- "text": "All",
- "value": "$__all"
- }
- },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": true,
- "label": null,
- "multi": false,
- "name": "cluster",
- "options": [
-
- ],
- "query": "label_values(kube_pod_container_info{image=~\".*prometheus.*\"}, cluster)",
- "refresh": 2,
- "regex": "",
- "sort": 0,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "current": {
-
- },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": true,
- "label": null,
- "multi": false,
- "name": "url",
- "options": [
-
- ],
- "query": "label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}, url)",
- "refresh": 2,
- "regex": "",
- "sort": 0,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- }
- ]
- },
- "time": {
- "from": "now-6h",
- "to": "now"
- },
- "timepicker": {
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
- "timezone": "browser",
- "title": "Prometheus Remote Write",
- "version": 0
- }
- kind: ConfigMap
- metadata:
- name: grafana-dashboard-prometheus-remote-write
- namespace: monitoring
-- apiVersion: v1
- data:
- prometheus.json: |-
- {
- "annotations": {
- "list": [
-
- ]
- },
- "editable": true,
- "gnetId": null,
- "graphTooltip": 0,
- "hideControls": false,
- "links": [
-
- ],
- "refresh": "10s",
- "rows": [
+ "title": "Samples",
+ "titleSize": "h6",
+ "type": "row"
+ },
{
"collapse": false,
- "height": "250px",
+ "collapsed": false,
"panels": [
{
"aliasColors": {
@@ -36873,13 +31370,20 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
- "id": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 5,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": false,
"show": true,
+ "sideWidth": null,
"total": false,
"values": false
},
@@ -36888,11 +31392,13 @@ items:
"links": [
],
- "nullPointMode": "null as zero",
+ "minSpan": 6,
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
@@ -36900,137 +31406,106 @@ items:
"span": 12,
"stack": false,
"steppedLine": false,
- "styles": [
- {
- "alias": "Time",
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "pattern": "Time",
- "type": "hidden"
- },
- {
- "alias": "Count",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #A",
- "thresholds": [
-
- ],
- "type": "hidden",
- "unit": "short"
- },
- {
- "alias": "Uptime",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "Value #B",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "short"
- },
+ "targets": [
{
- "alias": "Instance",
- "colorMode": null,
- "colors": [
+ "expr": "prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "instance",
- "thresholds": [
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Current Shards",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
- ],
- "type": "number",
- "unit": "short"
- },
+ ]
+ },
+ "yaxes": [
{
- "alias": "Job",
- "colorMode": null,
- "colors": [
-
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "job",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "short"
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
},
{
- "alias": "Version",
- "colorMode": null,
- "colors": [
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "link": false,
- "linkTooltip": "Drill down",
- "linkUrl": "",
- "pattern": "version",
- "thresholds": [
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
- ],
- "type": "number",
- "unit": "short"
- },
- {
- "alias": "",
- "colorMode": null,
- "colors": [
+ },
+ "id": 6,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "/.*/",
- "thresholds": [
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
- ],
- "type": "string",
- "unit": "short"
- }
],
+ "spaceLength": 10,
+ "span": 4,
+ "stack": false,
+ "steppedLine": false,
"targets": [
{
- "expr": "count by (job, instance, version) (prometheus_build_info{job=~\"$job\", instance=~\"$instance\"})",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A",
- "step": 10
- },
- {
- "expr": "max by (job, instance) (time() - process_start_time_seconds{job=~\"$job\", instance=~\"$instance\"})",
- "format": "table",
- "instant": true,
+ "expr": "prometheus_remote_storage_shards_max{cluster=~\"$cluster\", instance=~\"$instance\"}",
+ "format": "time_series",
"intervalFactor": 2,
- "legendFormat": "",
- "refId": "B",
- "step": 10
+ "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -37038,14 +31513,13 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Prometheus Stats",
+ "title": "Max Shards",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
- "transform": "table",
- "type": "table",
+ "type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
@@ -37061,7 +31535,7 @@ items:
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
@@ -37070,22 +31544,10 @@ items:
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Prometheus Stats",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -37095,13 +31557,20 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
- "id": 2,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 7,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": false,
"show": true,
+ "sideWidth": null,
"total": false,
"values": false
},
@@ -37110,26 +31579,26 @@ items:
"links": [
],
- "nullPointMode": "null as zero",
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 6,
+ "span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(prometheus_target_sync_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[5m])) by (scrape_job) * 1e3",
+ "expr": "prometheus_remote_storage_shards_min{cluster=~\"$cluster\", instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{scrape_job}}",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -37137,7 +31606,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Target Sync",
+ "title": "Min Shards",
"tooltip": {
"shared": true,
"sort": 0,
@@ -37155,11 +31624,11 @@ items:
},
"yaxes": [
{
- "format": "ms",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
@@ -37168,7 +31637,7 @@ items:
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
}
]
},
@@ -37180,42 +31649,49 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 10,
- "id": 3,
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 8,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": false,
"show": true,
+ "sideWidth": null,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 0,
+ "linewidth": 1,
"links": [
],
- "nullPointMode": "null as zero",
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 6,
- "stack": true,
+ "span": 4,
+ "stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "sum(prometheus_sd_discovered_targets{job=~\"$job\",instance=~\"$instance\"})",
+ "expr": "prometheus_remote_storage_shards_desired{cluster=~\"$cluster\", instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "Targets",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -37223,7 +31699,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Targets",
+ "title": "Desired Shards",
"tooltip": {
"shared": true,
"sort": 0,
@@ -37245,7 +31721,7 @@ items:
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
@@ -37254,7 +31730,7 @@ items:
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
}
]
}
@@ -37263,12 +31739,13 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Discovery",
- "titleSize": "h6"
+ "title": "Shards",
+ "titleSize": "h6",
+ "type": "row"
},
{
"collapse": false,
- "height": "250px",
+ "collapsed": false,
"panels": [
{
"aliasColors": {
@@ -37279,13 +31756,20 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
- "id": 4,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 9,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": false,
"show": true,
+ "sideWidth": null,
"total": false,
"values": false
},
@@ -37294,26 +31778,26 @@ items:
"links": [
],
- "nullPointMode": "null as zero",
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 4,
+ "span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "rate(prometheus_target_interval_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[5m]) / rate(prometheus_target_interval_length_seconds_count{job=~\"$job\",instance=~\"$instance\"}[5m]) * 1e3",
+ "expr": "prometheus_remote_storage_shard_capacity{cluster=~\"$cluster\", instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{interval}} configured",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -37321,7 +31805,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Average Scrape Interval Duration",
+ "title": "Shard Capacity",
"tooltip": {
"shared": true,
"sort": 0,
@@ -37339,11 +31823,11 @@ items:
},
"yaxes": [
{
- "format": "ms",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
@@ -37352,7 +31836,7 @@ items:
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
}
]
},
@@ -37364,66 +31848,155 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 10,
- "id": 5,
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 10,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": false,
"show": true,
+ "sideWidth": null,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 0,
+ "linewidth": 1,
"links": [
],
- "nullPointMode": "null as zero",
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 4,
- "stack": true,
+ "span": 6,
+ "stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total[1m]))",
+ "expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"} or prometheus_remote_storage_samples_pending{cluster=~\"$cluster\", instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "exceeded sample limit: {{job}}",
- "legendLink": null,
- "step": 10
- },
+ "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pending Samples",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
{
- "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total[1m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "duplicate timestamp: {{job}}",
- "legendLink": null,
- "step": 10
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
},
{
- "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_bounds_total[1m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "out of bounds: {{job}}",
- "legendLink": null,
- "step": 10
- },
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Shard Details",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 11,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
{
- "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_order_total[1m]))",
+ "expr": "prometheus_tsdb_wal_segment_current{cluster=~\"$cluster\", instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "out of order: {{job}}",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{cluster}}:{{instance}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -37431,7 +32004,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Scrape failures",
+ "title": "TSDB Current Segment",
"tooltip": {
"shared": true,
"sort": 0,
@@ -37449,11 +32022,11 @@ items:
},
"yaxes": [
{
- "format": "short",
+ "format": "none",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
@@ -37462,7 +32035,7 @@ items:
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
}
]
},
@@ -37474,42 +32047,49 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 10,
- "id": 6,
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 12,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": false,
"show": true,
+ "sideWidth": null,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 0,
+ "linewidth": 1,
"links": [
],
- "nullPointMode": "null as zero",
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 4,
- "stack": true,
+ "span": 6,
+ "stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "rate(prometheus_tsdb_head_samples_appended_total{job=~\"$job\",instance=~\"$instance\"}[5m])",
+ "expr": "prometheus_wal_watcher_current_segment{cluster=~\"$cluster\", instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{job}} {{instance}}",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{cluster}}:{{instance}} {{consumer}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -37517,7 +32097,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Appended Samples",
+ "title": "Remote Write Current Segment",
"tooltip": {
"shared": true,
"sort": 0,
@@ -37535,11 +32115,11 @@ items:
},
"yaxes": [
{
- "format": "short",
+ "format": "none",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
@@ -37548,7 +32128,7 @@ items:
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
}
]
}
@@ -37557,12 +32137,13 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Retrieval",
- "titleSize": "h6"
+ "title": "Segments",
+ "titleSize": "h6",
+ "type": "row"
},
{
"collapse": false,
- "height": "250px",
+ "collapsed": false,
"panels": [
{
"aliasColors": {
@@ -37572,42 +32153,49 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 10,
- "id": 7,
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 13,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": false,
"show": true,
+ "sideWidth": null,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 0,
+ "linewidth": 1,
"links": [
],
- "nullPointMode": "null as zero",
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 6,
- "stack": true,
+ "span": 3,
+ "stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "prometheus_tsdb_head_series{job=~\"$job\",instance=~\"$instance\"}",
+ "expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{job}} {{instance}} head series",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -37615,7 +32203,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Head Series",
+ "title": "Dropped Samples",
"tooltip": {
"shared": true,
"sort": 0,
@@ -37637,7 +32225,7 @@ items:
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
@@ -37646,7 +32234,7 @@ items:
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
}
]
},
@@ -37658,42 +32246,49 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 10,
- "id": 8,
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 14,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": false,
"show": true,
+ "sideWidth": null,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 0,
+ "linewidth": 1,
"links": [
],
- "nullPointMode": "null as zero",
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 6,
- "stack": true,
+ "span": 3,
+ "stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "prometheus_tsdb_head_chunks{job=~\"$job\",instance=~\"$instance\"}",
+ "expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{job}} {{instance}} head chunks",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -37701,7 +32296,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Head Chunks",
+ "title": "Failed Samples",
"tooltip": {
"shared": true,
"sort": 0,
@@ -37723,7 +32318,7 @@ items:
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
@@ -37732,22 +32327,10 @@ items:
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Storage",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -37756,42 +32339,49 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 10,
- "id": 9,
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 15,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": false,
"show": true,
+ "sideWidth": null,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 0,
+ "linewidth": 1,
"links": [
],
- "nullPointMode": "null as zero",
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 6,
- "stack": true,
+ "span": 3,
+ "stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "rate(prometheus_engine_query_duration_seconds_count{job=~\"$job\",instance=~\"$instance\",slice=\"inner_eval\"}[5m])",
+ "expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_retried_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{job}} {{instance}}",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -37799,7 +32389,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Query Rate",
+ "title": "Retried Samples",
"tooltip": {
"shared": true,
"sort": 0,
@@ -37821,7 +32411,7 @@ items:
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
@@ -37830,7 +32420,7 @@ items:
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
}
]
},
@@ -37842,42 +32432,49 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 10,
- "id": 10,
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 16,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": false,
"show": true,
+ "sideWidth": null,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 0,
+ "linewidth": 1,
"links": [
],
- "nullPointMode": "null as zero",
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 6,
- "stack": true,
+ "span": 3,
+ "stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "max by (slice) (prometheus_engine_query_duration_seconds{quantile=\"0.9\",job=~\"$job\",instance=~\"$instance\"}) * 1e3",
+ "expr": "rate(prometheus_remote_storage_enqueue_retries_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{slice}}",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -37885,7 +32482,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Stage Duration",
+ "title": "Enqueue Retries",
"tooltip": {
"shared": true,
"sort": 0,
@@ -37903,11 +32500,11 @@ items:
},
"yaxes": [
{
- "format": "ms",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
@@ -37916,7 +32513,7 @@ items:
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
}
]
}
@@ -37925,22 +32522,19 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Query",
- "titleSize": "h6"
+ "title": "Misc. Rates",
+ "titleSize": "h6",
+ "type": "row"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [
-
+ "prometheus-mixin"
],
"templating": {
"list": [
{
- "current": {
- "text": "default",
- "value": "default"
- },
"hide": 0,
"label": null,
"name": "datasource",
@@ -37955,23 +32549,30 @@ items:
{
"allValue": null,
"current": {
- "selected": true,
- "text": "All",
- "value": "$__all"
+ "text": {
+ "selected": true,
+ "text": "All",
+ "value": "$__all"
+ },
+ "value": {
+ "selected": true,
+ "text": "All",
+ "value": "$__all"
+ }
},
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
- "label": "job",
- "multi": true,
- "name": "job",
+ "label": null,
+ "multi": false,
+ "name": "cluster",
"options": [
],
- "query": "label_values(prometheus_build_info, job)",
- "refresh": 1,
+ "query": "label_values(kube_pod_container_info{image=~\".*prometheus.*\"}, cluster)",
+ "refresh": 2,
"regex": "",
- "sort": 2,
+ "sort": 0,
"tagValuesQuery": "",
"tags": [
@@ -37983,23 +32584,56 @@ items:
{
"allValue": null,
"current": {
- "selected": true,
- "text": "All",
- "value": "$__all"
+ "text": {
+ "selected": true,
+ "text": "All",
+ "value": "$__all"
+ },
+ "value": {
+ "selected": true,
+ "text": "All",
+ "value": "$__all"
+ }
},
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
- "label": "instance",
- "multi": true,
+ "label": null,
+ "multi": false,
"name": "instance",
"options": [
],
- "query": "label_values(prometheus_build_info, instance)",
- "refresh": 1,
+ "query": "label_values(prometheus_build_info{cluster=~\"$cluster\"}, instance)",
+ "refresh": 2,
"regex": "",
- "sort": 2,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+
+ },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "url",
+ "options": [
+
+ ],
+ "query": "label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}, url)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 0,
"tagValuesQuery": "",
"tags": [
@@ -38011,7 +32645,7 @@ items:
]
},
"time": {
- "from": "now-1h",
+ "from": "now-6h",
"to": "now"
},
"timepicker": {
@@ -38039,128 +32673,41 @@ items:
"30d"
]
},
- "timezone": "utc",
- "title": "Prometheus",
- "uid": "",
+ "timezone": "browser",
+ "title": "Prometheus / Remote Write",
"version": 0
}
kind: ConfigMap
metadata:
- name: grafana-dashboard-prometheus
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-prometheus-remote-write
namespace: monitoring
- apiVersion: v1
data:
- proxy.json: |-
+ prometheus.json: |-
{
- "__inputs": [
-
- ],
- "__requires": [
-
- ],
"annotations": {
"list": [
]
},
- "editable": false,
+ "editable": true,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
- "id": null,
"links": [
],
- "refresh": "10s",
+ "refresh": "60s",
"rows": [
{
"collapse": false,
- "collapsed": false,
+ "height": "250px",
"panels": [
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
-
- },
- "id": 2,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 2,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "sum(up{job=\"kube-proxy\"})",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Up",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "min"
- },
{
"aliasColors": {
@@ -38170,19 +32717,13 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
- "gridPos": {
-
- },
- "id": 3,
+ "id": 1,
"legend": {
- "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
"show": true,
- "sideWidth": null,
"total": false,
"values": false
},
@@ -38191,26 +32732,154 @@ items:
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 5,
+ "span": 12,
"stack": false,
"steppedLine": false,
+ "styles": [
+ {
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "hidden"
+ },
+ {
+ "alias": "Count",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #A",
+ "thresholds": [
+
+ ],
+ "type": "hidden",
+ "unit": "short"
+ },
+ {
+ "alias": "Uptime",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #B",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "s"
+ },
+ {
+ "alias": "Instance",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "instance",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "Job",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "job",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "Version",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "version",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [
+
+ ],
+ "type": "string",
+ "unit": "short"
+ }
+ ],
"targets": [
{
- "expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{job=\"kube-proxy\", instance=~\"$instance\"}[5m]))",
- "format": "time_series",
+ "expr": "count by (job, instance, version) (prometheus_build_info{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "table",
+ "instant": true,
"intervalFactor": 2,
- "legendFormat": "rate",
- "refId": "A"
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "max by (job, instance) (time() - process_start_time_seconds{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "B",
+ "step": 10
}
],
"thresholds": [
@@ -38218,13 +32887,14 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Rules Sync Rate",
+ "title": "Prometheus Stats",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
- "type": "graph",
+ "transform": "table",
+ "type": "table",
"xaxis": {
"buckets": null,
"mode": "time",
@@ -38236,7 +32906,7 @@ items:
},
"yaxes": [
{
- "format": "ops",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -38244,15 +32914,27 @@ items:
"show": true
},
{
- "format": "ops",
+ "format": "short",
"label": null,
"logBase": 1,
- "max": null,
- "min": 0,
- "show": true
+ "max": null,
+ "min": null,
+ "show": false
}
]
- },
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Prometheus Stats",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
{
"aliasColors": {
@@ -38262,47 +32944,41 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
- "gridPos": {
-
- },
- "id": 4,
+ "id": 2,
"legend": {
- "alignAsTable": true,
"avg": false,
- "current": true,
+ "current": false,
"max": false,
"min": false,
- "rightSide": true,
"show": true,
- "sideWidth": null,
"total": false,
- "values": true
+ "values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 5,
+ "span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{job=\"kube-proxy\", instance=~\"$instance\"}[5m]))",
+ "expr": "sum(rate(prometheus_target_sync_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[5m])) by (scrape_job) * 1e3",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
+ "legendFormat": "{{scrape_job}}",
+ "legendLink": null,
+ "step": 10
}
],
"thresholds": [
@@ -38310,10 +32986,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Rule Sync Latency 99th Quantile",
+ "title": "Target Sync",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -38328,7 +33004,7 @@ items:
},
"yaxes": [
{
- "format": "s",
+ "format": "ms",
"label": null,
"logBase": 1,
"max": null,
@@ -38336,28 +33012,15 @@ items:
"show": true
},
{
- "format": "s",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
- "show": true
+ "min": null,
+ "show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
+ },
{
"aliasColors": {
@@ -38366,48 +33029,42 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 5,
+ "fill": 10,
+ "id": 3,
"legend": {
- "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
"show": true,
- "sideWidth": null,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{job=\"kube-proxy\", instance=~\"$instance\"}[5m]))",
+ "expr": "sum(prometheus_sd_discovered_targets{job=~\"$job\",instance=~\"$instance\"})",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "rate",
- "refId": "A"
+ "legendFormat": "Targets",
+ "legendLink": null,
+ "step": 10
}
],
"thresholds": [
@@ -38415,10 +33072,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Network Programming Rate",
+ "title": "Targets",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -38433,7 +33090,7 @@ items:
},
"yaxes": [
{
- "format": "ops",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -38441,15 +33098,27 @@ items:
"show": true
},
{
- "format": "ops",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
- "show": true
+ "min": null,
+ "show": false
}
]
- },
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Discovery",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
{
"aliasColors": {
@@ -38459,47 +33128,41 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
- "gridPos": {
-
- },
- "id": 6,
+ "id": 4,
"legend": {
- "alignAsTable": true,
"avg": false,
- "current": true,
+ "current": false,
"max": false,
"min": false,
- "rightSide": true,
"show": true,
- "sideWidth": null,
"total": false,
- "values": true
+ "values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 6,
+ "span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{job=\"kube-proxy\", instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "rate(prometheus_target_interval_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[5m]) / rate(prometheus_target_interval_length_seconds_count{job=~\"$job\",instance=~\"$instance\"}[5m]) * 1e3",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
+ "legendFormat": "{{interval}} configured",
+ "legendLink": null,
+ "step": 10
}
],
"thresholds": [
@@ -38507,10 +33170,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Network Programming Latency 99th Quantile",
+ "title": "Average Scrape Interval Duration",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -38525,7 +33188,7 @@ items:
},
"yaxes": [
{
- "format": "s",
+ "format": "ms",
"label": null,
"logBase": 1,
"max": null,
@@ -38533,28 +33196,15 @@ items:
"show": true
},
{
- "format": "s",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
- "show": true
+ "min": null,
+ "show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
+ },
{
"aliasColors": {
@@ -38563,69 +33213,74 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 7,
+ "fill": 10,
+ "id": 5,
"legend": {
- "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
"show": true,
- "sideWidth": null,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 4,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
+ "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total[1m]))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "2xx",
- "refId": "A"
+ "legendFormat": "exceeded body size limit: {{job}}",
+ "legendLink": null,
+ "step": 10
},
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
+ "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total[1m]))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "3xx",
- "refId": "B"
+ "legendFormat": "exceeded sample limit: {{job}}",
+ "legendLink": null,
+ "step": 10
},
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
+ "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total[1m]))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "4xx",
- "refId": "C"
+ "legendFormat": "duplicate timestamp: {{job}}",
+ "legendLink": null,
+ "step": 10
},
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
+ "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_bounds_total[1m]))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "5xx",
- "refId": "D"
+ "legendFormat": "out of bounds: {{job}}",
+ "legendLink": null,
+ "step": 10
+ },
+ {
+ "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_order_total[1m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "out of order: {{job}}",
+ "legendLink": null,
+ "step": 10
}
],
"thresholds": [
@@ -38633,10 +33288,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Kube API Request Rate",
+ "title": "Scrape failures",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -38651,20 +33306,20 @@ items:
},
"yaxes": [
{
- "format": "ops",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": 0,
"show": true
},
{
- "format": "ops",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": true
+ "show": false
}
]
},
@@ -38676,48 +33331,42 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 8,
+ "fill": 10,
+ "id": 6,
"legend": {
- "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
"show": true,
- "sideWidth": null,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 8,
- "stack": false,
+ "span": 4,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[5m])) by (verb, url, le))",
+ "expr": "rate(prometheus_tsdb_head_samples_appended_total{job=~\"$job\",instance=~\"$instance\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{verb}} {{url}}",
- "refId": "A"
+ "legendFormat": "{{job}} {{instance}}",
+ "legendLink": null,
+ "step": 10
}
],
"thresholds": [
@@ -38725,10 +33374,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Post Request Latency 99th Quantile",
+ "title": "Appended Samples",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -38743,7 +33392,7 @@ items:
},
"yaxes": [
{
- "format": "s",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -38751,12 +33400,12 @@ items:
"show": true
},
{
- "format": "s",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
- "show": true
+ "min": null,
+ "show": false
}
]
}
@@ -38764,14 +33413,13 @@ items:
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
+ "showTitle": true,
+ "title": "Retrieval",
+ "titleSize": "h6"
},
{
"collapse": false,
- "collapsed": false,
+ "height": "250px",
"panels": [
{
"aliasColors": {
@@ -38781,48 +33429,42 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 9,
+ "fill": 10,
+ "id": 7,
"legend": {
- "alignAsTable": true,
"avg": false,
- "current": true,
+ "current": false,
"max": false,
"min": false,
- "rightSide": true,
"show": true,
- "sideWidth": null,
"total": false,
- "values": true
+ "values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 12,
- "stack": false,
+ "span": 6,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
+ "expr": "prometheus_tsdb_head_series{job=~\"$job\",instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{verb}} {{url}}",
- "refId": "A"
+ "legendFormat": "{{job}} {{instance}} head series",
+ "legendLink": null,
+ "step": 10
}
],
"thresholds": [
@@ -38830,10 +33472,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Get Request Latency 99th Quantile",
+ "title": "Head Series",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -38848,7 +33490,7 @@ items:
},
"yaxes": [
{
- "format": "s",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -38856,28 +33498,15 @@ items:
"show": true
},
{
- "format": "s",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
- "show": true
+ "min": null,
+ "show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
+ },
{
"aliasColors": {
@@ -38886,48 +33515,42 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 10,
+ "fill": 10,
+ "id": 8,
"legend": {
- "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
"show": true,
- "sideWidth": null,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 4,
- "stack": false,
+ "span": 6,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "process_resident_memory_bytes{job=\"kube-proxy\",instance=~\"$instance\"}",
+ "expr": "prometheus_tsdb_head_chunks{job=~\"$job\",instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
+ "legendFormat": "{{job}} {{instance}} head chunks",
+ "legendLink": null,
+ "step": 10
}
],
"thresholds": [
@@ -38935,10 +33558,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Memory",
+ "title": "Head Chunks",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -38953,23 +33576,35 @@ items:
},
"yaxes": [
{
- "format": "bytes",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": 0,
"show": true
},
{
- "format": "bytes",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": true
+ "show": false
}
]
- },
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Storage",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
{
"aliasColors": {
@@ -38978,48 +33613,42 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 11,
+ "fill": 10,
+ "id": 9,
"legend": {
- "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
"show": true,
- "sideWidth": null,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 4,
- "stack": false,
+ "span": 6,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "rate(process_cpu_seconds_total{job=\"kube-proxy\",instance=~\"$instance\"}[5m])",
+ "expr": "rate(prometheus_engine_query_duration_seconds_count{job=~\"$job\",instance=~\"$instance\",slice=\"inner_eval\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
+ "legendFormat": "{{job}} {{instance}}",
+ "legendLink": null,
+ "step": 10
}
],
"thresholds": [
@@ -39027,10 +33656,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "CPU usage",
+ "title": "Query Rate",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -39057,8 +33686,8 @@ items:
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
- "show": true
+ "min": null,
+ "show": false
}
]
},
@@ -39070,48 +33699,42 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 12,
+ "fill": 10,
+ "id": 10,
"legend": {
- "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
"show": true,
- "sideWidth": null,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 1,
+ "linewidth": 0,
"links": [
],
- "nullPointMode": "null",
+ "nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
- "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
- "span": 4,
- "stack": false,
+ "span": 6,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "go_goroutines{job=\"kube-proxy\",instance=~\"$instance\"}",
+ "expr": "max by (slice) (prometheus_engine_query_duration_seconds{quantile=\"0.9\",job=~\"$job\",instance=~\"$instance\"}) * 1e3",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
+ "legendFormat": "{{slice}}",
+ "legendLink": null,
+ "step": 10
}
],
"thresholds": [
@@ -39119,10 +33742,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Goroutines",
+ "title": "Stage Duration",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -39137,11 +33760,11 @@ items:
},
"yaxes": [
{
- "format": "short",
+ "format": "ms",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": 0,
"show": true
},
{
@@ -39150,7 +33773,7 @@ items:
"logBase": 1,
"max": null,
"min": null,
- "show": true
+ "show": false
}
]
}
@@ -39158,16 +33781,15 @@ items:
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
+ "showTitle": true,
+ "title": "Query",
+ "titleSize": "h6"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [
- "kubernetes-mixin"
+ "prometheus-mixin"
],
"templating": {
"list": [
@@ -39177,7 +33799,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -39188,23 +33810,53 @@ items:
"type": "datasource"
},
{
- "allValue": null,
+ "allValue": ".+",
"current": {
+ "selected": true,
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [
+
+ ],
+ "query": "label_values(prometheus_build_info{job=\"prometheus-k8s\",namespace=\"monitoring\"}, job)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 2,
+ "tagValuesQuery": "",
+ "tags": [
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": {
+ "selected": true,
+ "text": "All",
+ "value": "$__all"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
- "label": null,
- "multi": false,
+ "label": "instance",
+ "multi": true,
"name": "instance",
"options": [
],
- "query": "label_values(kubeproxy_network_programming_duration_seconds_bucket{job=\"kube-proxy\"}, instance)",
- "refresh": 2,
+ "query": "label_values(prometheus_build_info{job=~\"$job\"}, instance)",
+ "refresh": 1,
"regex": "",
- "sort": 1,
+ "sort": 2,
"tagValuesQuery": "",
"tags": [
@@ -39244,18 +33896,23 @@ items:
"30d"
]
},
- "timezone": "UTC",
- "title": "Kubernetes / Proxy",
- "uid": "632e265de029684c40b21cb76bca4f94",
+ "timezone": "utc",
+ "title": "Prometheus / Overview",
+ "uid": "",
"version": 0
}
kind: ConfigMap
metadata:
- name: grafana-dashboard-proxy
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-prometheus
namespace: monitoring
- apiVersion: v1
data:
- scheduler.json: |-
+ proxy.json: |-
{
"__inputs": [
@@ -39304,7 +33961,11 @@ items:
},
"id": 2,
- "interval": null,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "rightSide": true
+ },
"links": [
],
@@ -39343,7 +34004,7 @@ items:
"tableColumn": "",
"targets": [
{
- "expr": "sum(up{job=\"kube-scheduler\"})",
+ "expr": "sum(up{cluster=\"$cluster\", job=\"kube-proxy\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -39375,21 +34036,23 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 3,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
- "current": true,
+ "current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
- "values": true
+ "values": false
},
"lines": true,
"linewidth": 1,
@@ -39411,32 +34074,11 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} e2e",
+ "legendFormat": "rate",
"refId": "A"
- },
- {
- "expr": "sum(rate(scheduler_binding_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} binding",
- "refId": "B"
- },
- {
- "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} scheduling algorithm",
- "refId": "C"
- },
- {
- "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} volume",
- "refId": "D"
}
],
"thresholds": [
@@ -39444,7 +34086,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Scheduling Rate",
+ "title": "Rules Sync Rate",
"tooltip": {
"shared": false,
"sort": 0,
@@ -39488,10 +34130,12 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 4,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@@ -39524,32 +34168,212 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} e2e",
+ "legendFormat": "{{instance}}",
"refId": "A"
- },
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Rule Sync Latency 99th Quantile",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
{
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} binding",
- "refId": "B"
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
},
{
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 5,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} scheduling algorithm",
- "refId": "C"
+ "legendFormat": "rate",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network Programming Rate",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
},
{
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 6,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} volume",
- "refId": "D"
+ "legendFormat": "{{instance}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -39557,7 +34381,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Scheduling latency 99th Quantile",
+ "title": "Network Programming Latency 99th Quantile",
"tooltip": {
"shared": false,
"sort": 0,
@@ -39614,17 +34438,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 5,
+ "id": 7,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -39650,28 +34476,28 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "2xx",
"refId": "A"
},
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "3xx",
"refId": "B"
},
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "4xx",
"refId": "C"
},
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "5xx",
@@ -39705,7 +34531,7 @@ items:
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
@@ -39713,7 +34539,7 @@ items:
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
}
]
@@ -39727,17 +34553,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 6,
+ "id": 8,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -39763,7 +34591,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@@ -39832,10 +34660,12 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 7,
+ "id": 9,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@@ -39868,7 +34698,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@@ -39937,17 +34767,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 8,
+ "id": 10,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -39973,7 +34805,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "process_resident_memory_bytes{job=\"kube-scheduler\", instance=~\"$instance\"}",
+ "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -40029,17 +34861,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 9,
+ "id": 11,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -40065,7 +34899,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "rate(process_cpu_seconds_total{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])",
+ "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -40095,7 +34929,7 @@ items:
},
"yaxes": [
{
- "format": "bytes",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -40103,7 +34937,7 @@ items:
"show": true
},
{
- "format": "bytes",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -40121,17 +34955,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 10,
+ "id": 12,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -40157,7 +34993,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "go_goroutines{job=\"kube-scheduler\",instance=~\"$instance\"}",
+ "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -40227,7 +35063,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -40241,6 +35077,32 @@ items:
"allValue": null,
"current": {
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": "cluster",
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(up{job=\"kube-proxy\"}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+
},
"datasource": "$datasource",
"hide": 0,
@@ -40251,7 +35113,7 @@ items:
"options": [
],
- "query": "label_values(process_cpu_seconds_total{job=\"kube-scheduler\"}, instance)",
+ "query": "label_values(up{job=\"kube-proxy\", cluster=\"$cluster\", job=\"kube-proxy\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -40295,17 +35157,22 @@ items:
]
},
"timezone": "UTC",
- "title": "Kubernetes / Scheduler",
- "uid": "2e6b6a3b4bddf1427b3a55aa1311c656",
+ "title": "Kubernetes / Proxy",
+ "uid": "632e265de029684c40b21cb76bca4f94",
"version": 0
}
kind: ConfigMap
metadata:
- name: grafana-dashboard-scheduler
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-proxy
namespace: monitoring
- apiVersion: v1
data:
- statefulset.json: |-
+ scheduler.json: |-
{
"__inputs": [
@@ -40326,7 +35193,7 @@ items:
"links": [
],
- "refresh": "",
+ "refresh": "10s",
"rows": [
{
"collapse": false,
@@ -40354,7 +35221,11 @@ items:
},
"id": 2,
- "interval": null,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "rightSide": true
+ },
"links": [
],
@@ -40372,7 +35243,7 @@ items:
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
- "postfix": "cores",
+ "postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
@@ -40383,16 +35254,17 @@ items:
"to": "null"
}
],
- "span": 4,
+ "span": 2,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
"lineColor": "rgb(31, 120, 193)",
- "show": true
+ "show": false
},
"tableColumn": "",
"targets": [
{
- "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m]))",
+ "expr": "sum(up{cluster=\"$cluster\", job=\"kube-scheduler\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -40400,7 +35272,7 @@ items:
}
],
"thresholds": "",
- "title": "CPU",
+ "title": "Up",
"tooltip": {
"shared": false
},
@@ -40409,177 +35281,241 @@ items:
"valueMaps": [
{
"op": "=",
- "text": "0",
+ "text": "N/A",
"value": "null"
}
],
- "valueName": "current"
+ "valueName": "min"
},
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
+ "aliasColors": {
+
},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 3,
- "interval": null,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
"links": [
],
- "mappingType": 1,
- "mappingTypes": [
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 5,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
{
- "name": "value to text",
- "value": 1
+ "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{cluster}} {{instance}} e2e",
+ "refId": "A"
},
{
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "GB",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
+ "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{cluster}} {{instance}} binding",
+ "refId": "B"
+ },
{
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 4,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "lineColor": "rgb(31, 120, 193)",
- "show": true
- },
- "tableColumn": "",
- "targets": [
+ "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{cluster}} {{instance}} scheduling algorithm",
+ "refId": "C"
+ },
{
- "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}) / 1024^3",
+ "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
+ "legendFormat": "{{cluster}} {{instance}} volume",
+ "refId": "D"
}
],
- "thresholds": "",
- "title": "Memory",
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Scheduling Rate",
"tooltip": {
- "shared": false
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
},
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
{
- "op": "=",
- "text": "0",
- "value": "null"
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
}
- ],
- "valueName": "current"
+ ]
},
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
+ "aliasColors": {
+
},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 4,
- "interval": null,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
"links": [
],
- "mappingType": 1,
- "mappingTypes": [
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 5,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
{
- "name": "value to text",
- "value": 1
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{cluster}} {{instance}} e2e",
+ "refId": "A"
},
{
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "Bps",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{cluster}} {{instance}} binding",
+ "refId": "B"
+ },
{
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 4,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "lineColor": "rgb(31, 120, 193)",
- "show": true
- },
- "tableColumn": "",
- "targets": [
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{cluster}} {{instance}} scheduling algorithm",
+ "refId": "C"
+ },
{
- "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod=~\"$statefulset.*\"}[3m]))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
+ "legendFormat": "{{cluster}} {{instance}} volume",
+ "refId": "D"
}
],
- "thresholds": "",
- "title": "Network",
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Scheduling latency 99th Quantile",
"tooltip": {
- "shared": false
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
},
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
{
- "op": "=",
- "text": "0",
- "value": "null"
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
}
- ],
- "valueName": "current"
+ ]
}
],
"repeat": null,
@@ -40593,343 +35529,322 @@ items:
{
"collapse": false,
"collapsed": false,
- "height": "100px",
"panels": [
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
+ "aliasColors": {
+
},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 5,
- "interval": null,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
"links": [
],
- "mappingType": 1,
- "mappingTypes": [
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 4,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
{
- "name": "value to text",
- "value": 1
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "2xx",
+ "refId": "A"
},
{
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "3xx",
+ "refId": "B"
+ },
{
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 3,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "4xx",
+ "refId": "C"
+ },
{
- "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
+ "legendFormat": "5xx",
+ "refId": "D"
}
],
- "thresholds": "",
- "title": "Desired Replicas",
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Kube API Request Rate",
"tooltip": {
- "shared": false
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
},
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
{
- "op": "=",
- "text": "0",
- "value": "null"
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
}
- ],
- "valueName": "current"
+ ]
},
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
+ "aliasColors": {
+
},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 6,
- "interval": null,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
"links": [
],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
],
- "span": 3,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
+ "spaceLength": 10,
+ "span": 8,
+ "stack": false,
+ "steppedLine": false,
"targets": [
{
- "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "",
+ "legendFormat": "{{verb}} {{url}}",
"refId": "A"
}
],
- "thresholds": "",
- "title": "Replicas of current version",
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Post Request Latency 99th Quantile",
"tooltip": {
- "shared": false
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
},
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
{
- "op": "=",
- "text": "0",
- "value": "null"
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
}
- ],
- "valueName": "current"
- },
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
+ "aliasColors": {
+
},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 7,
- "interval": null,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
"links": [
],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
],
- "span": 3,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
"targets": [
{
- "expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "",
+ "legendFormat": "{{verb}} {{url}}",
"refId": "A"
}
],
- "thresholds": "",
- "title": "Observed Generation",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "0",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
+ "thresholds": [
+
],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Get Request Latency 99th Quantile",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
},
- "gridPos": {
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+ ]
},
- "id": 8,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
+ "yaxes": [
{
- "name": "value to text",
- "value": 1
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
},
{
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 3,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Metadata Generation",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "0",
- "value": "null"
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
}
- ],
- "valueName": "current"
+ ]
}
],
"repeat": null,
@@ -40953,17 +35868,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 9,
+ "id": 8,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -40984,43 +35901,204 @@ items:
],
"spaceLength": 10,
+ "span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
+ "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "replicas specified",
+ "legendFormat": "{{instance}}",
"refId": "A"
- },
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
{
- "expr": "max(kube_statefulset_status_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "replicas created",
- "refId": "B"
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
},
{
- "expr": "min(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "ready",
- "refId": "C"
- },
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 9,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 4,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
{
- "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
+ "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "replicas of current version",
- "refId": "D"
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU usage",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
},
{
- "expr": "min(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 10,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 4,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "updated",
- "refId": "E"
+ "legendFormat": "{{instance}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -41028,7 +36106,7 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Replicas",
+ "title": "Goroutines",
"tooltip": {
"shared": false,
"sort": 0,
@@ -41086,7 +36164,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -41110,33 +36188,7 @@ items:
"options": [
],
- "query": "label_values(kube_statefulset_metadata_generation, cluster)",
- "refresh": 2,
- "regex": "",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "current": {
-
- },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": false,
- "label": "Namespace",
- "multi": false,
- "name": "namespace",
- "options": [
-
- ],
- "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
+ "query": "label_values(up{job=\"kube-scheduler\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -41155,14 +36207,14 @@ items:
},
"datasource": "$datasource",
"hide": 0,
- "includeAll": false,
- "label": "Name",
+ "includeAll": true,
+ "label": null,
"multi": false,
- "name": "statefulset",
+ "name": "instance",
"options": [
],
- "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, statefulset)",
+ "query": "label_values(up{job=\"kube-scheduler\", cluster=\"$cluster\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -41206,13 +36258,18 @@ items:
]
},
"timezone": "UTC",
- "title": "Kubernetes / StatefulSets",
- "uid": "a31c1f46e6f727cb37c0d731a7245005",
+ "title": "Kubernetes / Scheduler",
+ "uid": "2e6b6a3b4bddf1427b3a55aa1311c656",
"version": 0
}
kind: ConfigMap
metadata:
- name: grafana-dashboard-statefulset
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-scheduler
namespace: monitoring
- apiVersion: v1
data:
@@ -41276,6 +36333,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -41321,7 +36379,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ pod }}",
@@ -41378,6 +36436,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -41423,7 +36482,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ pod }}",
@@ -41491,6 +36550,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -41536,7 +36596,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ pod }}",
@@ -41593,6 +36653,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -41638,7 +36699,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ pod }}",
@@ -41725,6 +36786,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -41768,7 +36830,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -41825,6 +36887,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -41868,7 +36931,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -41936,6 +36999,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -41979,7 +37043,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -42036,6 +37100,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -42079,7 +37144,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -42156,6 +37221,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -42199,7 +37265,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -42256,6 +37322,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -42299,7 +37366,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -42374,7 +37441,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -42384,6 +37451,32 @@ items:
"regex": "",
"type": "datasource"
},
+ {
+ "allValue": null,
+ "current": {
+
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(kube_pod_info{job=\"kube-state-metrics\"}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
{
"allValue": ".+",
"auto": false,
@@ -42394,7 +37487,7 @@ items:
"value": "kube-system"
},
"datasource": "$datasource",
- "definition": "label_values(container_network_receive_packets_total, namespace)",
+ "definition": "label_values(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\"}, namespace)",
"hide": 0,
"includeAll": true,
"label": null,
@@ -42403,8 +37496,8 @@ items:
"options": [
],
- "query": "label_values(container_network_receive_packets_total, namespace)",
- "refresh": 1,
+ "query": "label_values(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\"}, namespace)",
+ "refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
@@ -42426,7 +37519,7 @@ items:
"value": ""
},
"datasource": "$datasource",
- "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\"}, workload)",
+ "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\"}, workload)",
"hide": 0,
"includeAll": false,
"label": null,
@@ -42435,8 +37528,8 @@ items:
"options": [
],
- "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\"}, workload)",
- "refresh": 1,
+ "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\"}, workload)",
+ "refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
@@ -42458,7 +37551,7 @@ items:
"value": "deployment"
},
"datasource": "$datasource",
- "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)",
+ "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)",
"hide": 0,
"includeAll": false,
"label": null,
@@ -42467,8 +37560,8 @@ items:
"options": [
],
- "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)",
- "refresh": 1,
+ "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)",
+ "refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
@@ -42598,6 +37691,11 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-workload-total
namespace: monitoring
kind: ConfigMapList
diff --git a/manifests/grafana-dashboardSources.yaml b/manifests/grafana-dashboardSources.yaml
index fffec98..8fc6d8e 100644
--- a/manifests/grafana-dashboardSources.yaml
+++ b/manifests/grafana-dashboardSources.yaml
@@ -6,6 +6,7 @@ data:
"providers": [
{
"folder": "Default",
+ "folderUid": "",
"name": "0",
"options": {
"path": "/grafana-dashboard-definitions/0"
@@ -17,5 +18,10 @@ data:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboards
namespace: monitoring
diff --git a/manifests/grafana-deployment.yaml b/manifests/grafana-deployment.yaml
index ed55fa9..eca41b6 100644
--- a/manifests/grafana-deployment.yaml
+++ b/manifests/grafana-deployment.yaml
@@ -2,22 +2,35 @@ apiVersion: apps/v1
kind: Deployment
metadata:
labels:
- app: grafana
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
- app: grafana
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
template:
metadata:
+ annotations:
+ checksum/grafana-config: adbde4cde1aa3ca57c408943af53e6f7
+ checksum/grafana-dashboardproviders: d8fb24844314114bed088b83042b1bdb
+ checksum/grafana-datasources: 0800bab7ea1e2d8ad5c09586d089e033
labels:
- app: grafana
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
spec:
+ automountServiceAccountToken: false
containers:
- env: []
- image: grafana/grafana:7.0.3
+ image: grafana/grafana:9.3.2
name: grafana
ports:
- containerPort: 3000
@@ -33,6 +46,12 @@ spec:
requests:
cpu: 100m
memory: 100Mi
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
volumeMounts:
- mountPath: /var/lib/grafana
name: grafana-storage
@@ -43,6 +62,12 @@ spec:
- mountPath: /etc/grafana/provisioning/dashboards
name: grafana-dashboards
readOnly: false
+ - mountPath: /tmp
+ name: tmp-plugins
+ readOnly: false
+ - mountPath: /grafana-dashboard-definitions/0/alertmanager-overview
+ name: grafana-dashboard-alertmanager-overview
+ readOnly: false
- mountPath: /grafana-dashboard-definitions/0/apiserver
name: grafana-dashboard-apiserver
readOnly: false
@@ -52,8 +77,8 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/controller-manager
name: grafana-dashboard-controller-manager
readOnly: false
- - mountPath: /grafana-dashboard-definitions/0/coredns-dashboard
- name: grafana-dashboard-coredns-dashboard
+ - mountPath: /grafana-dashboard-definitions/0/grafana-overview
+ name: grafana-dashboard-grafana-overview
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-cluster
name: grafana-dashboard-k8s-resources-cluster
@@ -76,9 +101,6 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/kubelet
name: grafana-dashboard-kubelet
readOnly: false
- - mountPath: /grafana-dashboard-definitions/0/kubernetes-cluster-dashboard
- name: grafana-dashboard-kubernetes-cluster-dashboard
- readOnly: false
- mountPath: /grafana-dashboard-definitions/0/namespace-by-pod
name: grafana-dashboard-namespace-by-pod
readOnly: false
@@ -91,6 +113,9 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/node-rsrc-use
name: grafana-dashboard-node-rsrc-use
readOnly: false
+ - mountPath: /grafana-dashboard-definitions/0/nodes-darwin
+ name: grafana-dashboard-nodes-darwin
+ readOnly: false
- mountPath: /grafana-dashboard-definitions/0/nodes
name: grafana-dashboard-nodes
readOnly: false
@@ -100,9 +125,6 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/pod-total
name: grafana-dashboard-pod-total
readOnly: false
- - mountPath: /grafana-dashboard-definitions/0/prometheus-dashboard
- name: grafana-dashboard-prometheus-dashboard
- readOnly: false
- mountPath: /grafana-dashboard-definitions/0/prometheus-remote-write
name: grafana-dashboard-prometheus-remote-write
readOnly: false
@@ -115,9 +137,6 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/scheduler
name: grafana-dashboard-scheduler
readOnly: false
- - mountPath: /grafana-dashboard-definitions/0/statefulset
- name: grafana-dashboard-statefulset
- readOnly: false
- mountPath: /grafana-dashboard-definitions/0/workload-total
name: grafana-dashboard-workload-total
readOnly: false
@@ -125,8 +144,9 @@ spec:
name: grafana-config
readOnly: false
nodeSelector:
- beta.kubernetes.io/os: linux
+ kubernetes.io/os: linux
securityContext:
+ fsGroup: 65534
runAsNonRoot: true
runAsUser: 65534
serviceAccountName: grafana
@@ -139,6 +159,12 @@ spec:
- configMap:
name: grafana-dashboards
name: grafana-dashboards
+ - emptyDir:
+ medium: Memory
+ name: tmp-plugins
+ - configMap:
+ name: grafana-dashboard-alertmanager-overview
+ name: grafana-dashboard-alertmanager-overview
- configMap:
name: grafana-dashboard-apiserver
name: grafana-dashboard-apiserver
@@ -149,8 +175,8 @@ spec:
name: grafana-dashboard-controller-manager
name: grafana-dashboard-controller-manager
- configMap:
- name: grafana-dashboard-coredns-dashboard
- name: grafana-dashboard-coredns-dashboard
+ name: grafana-dashboard-grafana-overview
+ name: grafana-dashboard-grafana-overview
- configMap:
name: grafana-dashboard-k8s-resources-cluster
name: grafana-dashboard-k8s-resources-cluster
@@ -172,9 +198,6 @@ spec:
- configMap:
name: grafana-dashboard-kubelet
name: grafana-dashboard-kubelet
- - configMap:
- name: grafana-dashboard-kubernetes-cluster-dashboard
- name: grafana-dashboard-kubernetes-cluster-dashboard
- configMap:
name: grafana-dashboard-namespace-by-pod
name: grafana-dashboard-namespace-by-pod
@@ -187,6 +210,9 @@ spec:
- configMap:
name: grafana-dashboard-node-rsrc-use
name: grafana-dashboard-node-rsrc-use
+ - configMap:
+ name: grafana-dashboard-nodes-darwin
+ name: grafana-dashboard-nodes-darwin
- configMap:
name: grafana-dashboard-nodes
name: grafana-dashboard-nodes
@@ -196,9 +222,6 @@ spec:
- configMap:
name: grafana-dashboard-pod-total
name: grafana-dashboard-pod-total
- - configMap:
- name: grafana-dashboard-prometheus-dashboard
- name: grafana-dashboard-prometheus-dashboard
- configMap:
name: grafana-dashboard-prometheus-remote-write
name: grafana-dashboard-prometheus-remote-write
@@ -211,9 +234,6 @@ spec:
- configMap:
name: grafana-dashboard-scheduler
name: grafana-dashboard-scheduler
- - configMap:
- name: grafana-dashboard-statefulset
- name: grafana-dashboard-statefulset
- configMap:
name: grafana-dashboard-workload-total
name: grafana-dashboard-workload-total
diff --git a/manifests/grafana-networkPolicy.yaml b/manifests/grafana-networkPolicy.yaml
new file mode 100644
index 0000000..cab676c
--- /dev/null
+++ b/manifests/grafana-networkPolicy.yaml
@@ -0,0 +1,29 @@
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana
+ namespace: monitoring
+spec:
+ egress:
+ - {}
+ ingress:
+ - from:
+ - podSelector:
+ matchLabels:
+ app.kubernetes.io/name: prometheus
+ ports:
+ - port: 3000
+ protocol: TCP
+ podSelector:
+ matchLabels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ policyTypes:
+ - Egress
+ - Ingress
diff --git a/manifests/grafana-prometheusRule.yaml b/manifests/grafana-prometheusRule.yaml
new file mode 100644
index 0000000..7ac2cfc
--- /dev/null
+++ b/manifests/grafana-prometheusRule.yaml
@@ -0,0 +1,33 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ prometheus: k8s
+ role: alert-rules
+ name: grafana-rules
+ namespace: monitoring
+spec:
+ groups:
+ - name: GrafanaAlerts
+ rules:
+ - alert: GrafanaRequestsFailing
+ annotations:
+ message: '{{ $labels.namespace }}/{{ $labels.job }}/{{ $labels.handler }} is experiencing {{ $value | humanize }}% errors'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/grafana/grafanarequestsfailing
+ expr: |
+ 100 * namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query", status_code=~"5.."}
+ / ignoring (status_code)
+ sum without (status_code) (namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query"})
+ > 50
+ for: 5m
+ labels:
+ severity: warning
+ - name: grafana_rules
+ rules:
+ - expr: |
+ sum by (namespace, job, handler, status_code) (rate(grafana_http_request_duration_seconds_count[5m]))
+ record: namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m
diff --git a/manifests/grafana-service.yaml b/manifests/grafana-service.yaml
index 3acdf1e..ce95f07 100644
--- a/manifests/grafana-service.yaml
+++ b/manifests/grafana-service.yaml
@@ -2,7 +2,10 @@ apiVersion: v1
kind: Service
metadata:
labels:
- app: grafana
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana
namespace: monitoring
spec:
@@ -11,4 +14,6 @@ spec:
port: 3000
targetPort: http
selector:
- app: grafana
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/grafana-serviceAccount.yaml b/manifests/grafana-serviceAccount.yaml
index 3ed3e03..2f87aca 100644
--- a/manifests/grafana-serviceAccount.yaml
+++ b/manifests/grafana-serviceAccount.yaml
@@ -1,5 +1,11 @@
apiVersion: v1
+automountServiceAccountToken: false
kind: ServiceAccount
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana
namespace: monitoring
diff --git a/manifests/grafana-serviceMonitor.yaml b/manifests/grafana-serviceMonitor.yaml
index 7ede266..f13c73c 100644
--- a/manifests/grafana-serviceMonitor.yaml
+++ b/manifests/grafana-serviceMonitor.yaml
@@ -1,6 +1,11 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana
namespace: monitoring
spec:
@@ -9,4 +14,4 @@ spec:
port: http
selector:
matchLabels:
- app: grafana
+ app.kubernetes.io/name: grafana
diff --git a/manifests/kube-state-metrics-clusterRole.yaml b/manifests/kube-state-metrics-clusterRole.yaml
index 8c72322..1c1f60a 100644
--- a/manifests/kube-state-metrics-clusterRole.yaml
+++ b/manifests/kube-state-metrics-clusterRole.yaml
@@ -2,8 +2,10 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
- app.kubernetes.io/version: 1.9.6
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.7.0
name: kube-state-metrics
rules:
- apiGroups:
@@ -14,6 +16,7 @@ rules:
- nodes
- pods
- services
+ - serviceaccounts
- resourcequotas
- replicationcontrollers
- limitranges
@@ -24,16 +27,6 @@ rules:
verbs:
- list
- watch
-- apiGroups:
- - extensions
- resources:
- - daemonsets
- - deployments
- - replicasets
- - ingresses
- verbs:
- - list
- - watch
- apiGroups:
- apps
resources:
@@ -85,6 +78,13 @@ rules:
verbs:
- list
- watch
+- apiGroups:
+ - discovery.k8s.io
+ resources:
+ - endpointslices
+ verbs:
+ - list
+ - watch
- apiGroups:
- storage.k8s.io
resources:
@@ -105,6 +105,8 @@ rules:
- networking.k8s.io
resources:
- networkpolicies
+ - ingressclasses
+ - ingresses
verbs:
- list
- watch
@@ -115,3 +117,13 @@ rules:
verbs:
- list
- watch
+- apiGroups:
+ - rbac.authorization.k8s.io
+ resources:
+ - clusterrolebindings
+ - clusterroles
+ - rolebindings
+ - roles
+ verbs:
+ - list
+ - watch
diff --git a/manifests/kube-state-metrics-clusterRoleBinding.yaml b/manifests/kube-state-metrics-clusterRoleBinding.yaml
index 750ff09..88c5faf 100644
--- a/manifests/kube-state-metrics-clusterRoleBinding.yaml
+++ b/manifests/kube-state-metrics-clusterRoleBinding.yaml
@@ -2,8 +2,10 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
- app.kubernetes.io/version: 1.9.6
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.7.0
name: kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
diff --git a/manifests/kube-state-metrics-deployment.yaml b/manifests/kube-state-metrics-deployment.yaml
index 787d86e..ec95b5c 100644
--- a/manifests/kube-state-metrics-deployment.yaml
+++ b/manifests/kube-state-metrics-deployment.yaml
@@ -2,55 +2,104 @@ apiVersion: apps/v1
kind: Deployment
metadata:
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
- app.kubernetes.io/version: 1.9.6
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.7.0
name: kube-state-metrics
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/part-of: kube-prometheus
template:
metadata:
+ annotations:
+ kubectl.kubernetes.io/default-container: kube-state-metrics
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
- app.kubernetes.io/version: 1.9.6
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.7.0
spec:
+ automountServiceAccountToken: true
containers:
- args:
- --host=127.0.0.1
- --port=8081
- --telemetry-host=127.0.0.1
- --telemetry-port=8082
- image: carlosedp/kube-state-metrics:v1.9.6
+ image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.7.0
name: kube-state-metrics
+ resources:
+ limits:
+ cpu: 100m
+ memory: 250Mi
+ requests:
+ cpu: 10m
+ memory: 190Mi
securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
runAsUser: 65534
- args:
- --logtostderr
- --secure-listen-address=:8443
- - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
+ - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:8081/
- image: carlosedp/kube-rbac-proxy:v0.5.0
+ image: quay.io/brancz/kube-rbac-proxy:v0.14.0
name: kube-rbac-proxy-main
ports:
- containerPort: 8443
name: https-main
+ resources:
+ limits:
+ cpu: 40m
+ memory: 40Mi
+ requests:
+ cpu: 20m
+ memory: 20Mi
securityContext:
- runAsUser: 65534
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
+ runAsGroup: 65532
+ runAsNonRoot: true
+ runAsUser: 65532
- args:
- --logtostderr
- --secure-listen-address=:9443
- - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
+ - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:8082/
- image: carlosedp/kube-rbac-proxy:v0.5.0
+ image: quay.io/brancz/kube-rbac-proxy:v0.14.0
name: kube-rbac-proxy-self
ports:
- containerPort: 9443
name: https-self
+ resources:
+ limits:
+ cpu: 20m
+ memory: 40Mi
+ requests:
+ cpu: 10m
+ memory: 20Mi
securityContext:
- runAsUser: 65534
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
+ runAsGroup: 65532
+ runAsNonRoot: true
+ runAsUser: 65532
nodeSelector:
kubernetes.io/os: linux
serviceAccountName: kube-state-metrics
diff --git a/manifests/kube-state-metrics-networkPolicy.yaml b/manifests/kube-state-metrics-networkPolicy.yaml
new file mode 100644
index 0000000..9815df8
--- /dev/null
+++ b/manifests/kube-state-metrics-networkPolicy.yaml
@@ -0,0 +1,31 @@
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+ labels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.7.0
+ name: kube-state-metrics
+ namespace: monitoring
+spec:
+ egress:
+ - {}
+ ingress:
+ - from:
+ - podSelector:
+ matchLabels:
+ app.kubernetes.io/name: prometheus
+ ports:
+ - port: 8443
+ protocol: TCP
+ - port: 9443
+ protocol: TCP
+ podSelector:
+ matchLabels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/part-of: kube-prometheus
+ policyTypes:
+ - Egress
+ - Ingress
diff --git a/manifests/kube-state-metrics-prometheusRule.yaml b/manifests/kube-state-metrics-prometheusRule.yaml
new file mode 100644
index 0000000..5bfcc43
--- /dev/null
+++ b/manifests/kube-state-metrics-prometheusRule.yaml
@@ -0,0 +1,65 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ labels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.7.0
+ prometheus: k8s
+ role: alert-rules
+ name: kube-state-metrics-rules
+ namespace: monitoring
+spec:
+ groups:
+ - name: kube-state-metrics
+ rules:
+ - alert: KubeStateMetricsListErrors
+ annotations:
+ description: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricslisterrors
+ summary: kube-state-metrics is experiencing errors in list operations.
+ expr: |
+ (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
+ /
+ sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])))
+ > 0.01
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeStateMetricsWatchErrors
+ annotations:
+ description: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricswatcherrors
+ summary: kube-state-metrics is experiencing errors in watch operations.
+ expr: |
+ (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
+ /
+ sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m])))
+ > 0.01
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeStateMetricsShardingMismatch
+ annotations:
+ description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardingmismatch
+ summary: kube-state-metrics sharding is misconfigured.
+ expr: |
+ stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) != 0
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeStateMetricsShardsMissing
+ annotations:
+ description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardsmissing
+ summary: kube-state-metrics shards are missing.
+ expr: |
+ 2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) - 1
+ -
+ sum( 2 ^ max by (shard_ordinal) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) )
+ != 0
+ for: 15m
+ labels:
+ severity: critical
diff --git a/manifests/kube-state-metrics-service.yaml b/manifests/kube-state-metrics-service.yaml
index 7e07515..e349fe7 100644
--- a/manifests/kube-state-metrics-service.yaml
+++ b/manifests/kube-state-metrics-service.yaml
@@ -2,8 +2,10 @@ apiVersion: v1
kind: Service
metadata:
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
- app.kubernetes.io/version: 1.9.6
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.7.0
name: kube-state-metrics
namespace: monitoring
spec:
@@ -16,4 +18,6 @@ spec:
port: 9443
targetPort: https-self
selector:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/kube-state-metrics-serviceAccount.yaml b/manifests/kube-state-metrics-serviceAccount.yaml
index 6f856d9..be116f6 100644
--- a/manifests/kube-state-metrics-serviceAccount.yaml
+++ b/manifests/kube-state-metrics-serviceAccount.yaml
@@ -1,8 +1,11 @@
apiVersion: v1
+automountServiceAccountToken: false
kind: ServiceAccount
metadata:
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
- app.kubernetes.io/version: 1.9.6
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.7.0
name: kube-state-metrics
namespace: monitoring
diff --git a/manifests/kube-state-metrics-serviceMonitor.yaml b/manifests/kube-state-metrics-serviceMonitor.yaml
index ad7a643..1dde4b0 100644
--- a/manifests/kube-state-metrics-serviceMonitor.yaml
+++ b/manifests/kube-state-metrics-serviceMonitor.yaml
@@ -2,8 +2,10 @@ apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
- app.kubernetes.io/version: 1.9.6
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.7.0
name: kube-state-metrics
namespace: monitoring
spec:
@@ -11,6 +13,11 @@ spec:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
honorLabels: true
interval: 30s
+ metricRelabelings:
+ - action: drop
+ regex: kube_endpoint_address_not_ready|kube_endpoint_address_available
+ sourceLabels:
+ - __name__
port: https-main
relabelings:
- action: labeldrop
@@ -28,4 +35,6 @@ spec:
jobLabel: app.kubernetes.io/name
selector:
matchLabels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/node-exporter-clusterRole.yaml b/manifests/node-exporter-clusterRole.yaml
index ad783ae..e4a8b76 100644
--- a/manifests/node-exporter-clusterRole.yaml
+++ b/manifests/node-exporter-clusterRole.yaml
@@ -1,7 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
+ labels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: node-exporter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 1.5.0
name: node-exporter
+ namespace: monitoring
rules:
- apiGroups:
- authentication.k8s.io
diff --git a/manifests/node-exporter-clusterRoleBinding.yaml b/manifests/node-exporter-clusterRoleBinding.yaml
index a5a2050..ba3594e 100644
--- a/manifests/node-exporter-clusterRoleBinding.yaml
+++ b/manifests/node-exporter-clusterRoleBinding.yaml
@@ -1,7 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
+ labels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: node-exporter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 1.5.0
name: node-exporter
+ namespace: monitoring
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
diff --git a/manifests/node-exporter-daemonset.yaml b/manifests/node-exporter-daemonset.yaml
index afe2901..34dca7a 100644
--- a/manifests/node-exporter-daemonset.yaml
+++ b/manifests/node-exporter-daemonset.yaml
@@ -2,30 +2,41 @@ apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
- app.kubernetes.io/version: v0.18.1
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 1.5.0
name: node-exporter
namespace: monitoring
spec:
selector:
matchLabels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
+ app.kubernetes.io/part-of: kube-prometheus
template:
metadata:
+ annotations:
+ kubectl.kubernetes.io/default-container: node-exporter
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
- app.kubernetes.io/version: v0.18.1
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 1.5.0
spec:
+ automountServiceAccountToken: true
containers:
- args:
- --web.listen-address=127.0.0.1:9100
- - --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --path.rootfs=/host/root
+ - --path.udev.data=/host/root/run/udev/data
- --no-collector.wifi
- --no-collector.hwmon
- - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
- image: prom/node-exporter:v0.18.1
+ - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run/k3s/containerd/.+|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
+ - --collector.netclass.ignored-devices=^(veth.*|[a-f0-9]{15})$
+ - --collector.netdev.device-exclude=^(veth.*|[a-f0-9]{15})$
+ image: quay.io/prometheus/node-exporter:v1.5.0
name: node-exporter
resources:
limits:
@@ -34,13 +45,19 @@ spec:
requests:
cpu: 102m
memory: 180Mi
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ add:
+ - SYS_TIME
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
volumeMounts:
- - mountPath: /host/proc
- name: proc
- readOnly: false
- mountPath: /host/sys
+ mountPropagation: HostToContainer
name: sys
- readOnly: false
+ readOnly: true
- mountPath: /host/root
mountPropagation: HostToContainer
name: root
@@ -48,14 +65,14 @@ spec:
- args:
- --logtostderr
- --secure-listen-address=[$(IP)]:9100
- - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
+ - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:9100/
env:
- name: IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- image: carlosedp/kube-rbac-proxy:v0.5.0
+ image: quay.io/brancz/kube-rbac-proxy:v0.14.0
name: kube-rbac-proxy
ports:
- containerPort: 9100
@@ -68,10 +85,20 @@ spec:
requests:
cpu: 10m
memory: 20Mi
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
+ runAsGroup: 65532
+ runAsNonRoot: true
+ runAsUser: 65532
hostNetwork: true
hostPID: true
nodeSelector:
kubernetes.io/os: linux
+ priorityClassName: system-cluster-critical
securityContext:
runAsNonRoot: true
runAsUser: 65534
@@ -79,12 +106,13 @@ spec:
tolerations:
- operator: Exists
volumes:
- - hostPath:
- path: /proc
- name: proc
- hostPath:
path: /sys
name: sys
- hostPath:
path: /
name: root
+ updateStrategy:
+ rollingUpdate:
+ maxUnavailable: 10%
+ type: RollingUpdate
diff --git a/manifests/node-exporter-networkPolicy.yaml b/manifests/node-exporter-networkPolicy.yaml
new file mode 100644
index 0000000..c03fdd4
--- /dev/null
+++ b/manifests/node-exporter-networkPolicy.yaml
@@ -0,0 +1,29 @@
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+ labels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: node-exporter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 1.5.0
+ name: node-exporter
+ namespace: monitoring
+spec:
+ egress:
+ - {}
+ ingress:
+ - from:
+ - podSelector:
+ matchLabels:
+ app.kubernetes.io/name: prometheus
+ ports:
+ - port: 9100
+ protocol: TCP
+ podSelector:
+ matchLabels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: node-exporter
+ app.kubernetes.io/part-of: kube-prometheus
+ policyTypes:
+ - Egress
+ - Ingress
diff --git a/manifests/node-exporter-prometheusRule.yaml b/manifests/node-exporter-prometheusRule.yaml
new file mode 100644
index 0000000..5e5e52c
--- /dev/null
+++ b/manifests/node-exporter-prometheusRule.yaml
@@ -0,0 +1,316 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ labels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: node-exporter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 1.5.0
+ prometheus: k8s
+ role: alert-rules
+ name: node-exporter-rules
+ namespace: monitoring
+spec:
+ groups:
+ - name: node-exporter
+ rules:
+ - alert: NodeFilesystemSpaceFillingUp
+ annotations:
+ description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
+ summary: Filesystem is predicted to run out of space within the next 24 hours.
+ expr: |
+ (
+ node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 15
+ and
+ predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: warning
+ - alert: NodeFilesystemSpaceFillingUp
+ annotations:
+ description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
+ summary: Filesystem is predicted to run out of space within the next 4 hours.
+ expr: |
+ (
+ node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 10
+ and
+ predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: critical
+ - alert: NodeFilesystemAlmostOutOfSpace
+ annotations:
+ description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
+ summary: Filesystem has less than 5% space left.
+ expr: |
+ (
+ node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+ )
+ for: 30m
+ labels:
+ severity: warning
+ - alert: NodeFilesystemAlmostOutOfSpace
+ annotations:
+ description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
+ summary: Filesystem has less than 3% space left.
+ expr: |
+ (
+ node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+ )
+ for: 30m
+ labels:
+ severity: critical
+ - alert: NodeFilesystemFilesFillingUp
+ annotations:
+ description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
+ summary: Filesystem is predicted to run out of inodes within the next 24 hours.
+ expr: |
+ (
+ node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 40
+ and
+ predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: warning
+ - alert: NodeFilesystemFilesFillingUp
+ annotations:
+ description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
+ summary: Filesystem is predicted to run out of inodes within the next 4 hours.
+ expr: |
+ (
+ node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 20
+ and
+ predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: critical
+ - alert: NodeFilesystemAlmostOutOfFiles
+ annotations:
+ description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
+ summary: Filesystem has less than 5% inodes left.
+ expr: |
+ (
+ node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: warning
+ - alert: NodeFilesystemAlmostOutOfFiles
+ annotations:
+ description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
+ summary: Filesystem has less than 3% inodes left.
+ expr: |
+ (
+ node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: critical
+ - alert: NodeNetworkReceiveErrs
+ annotations:
+ description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs
+ summary: Network interface is reporting many receive errors.
+ expr: |
+ rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01
+ for: 1h
+ labels:
+ severity: warning
+ - alert: NodeNetworkTransmitErrs
+ annotations:
+ description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs
+ summary: Network interface is reporting many transmit errors.
+ expr: |
+ rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01
+ for: 1h
+ labels:
+ severity: warning
+ - alert: NodeHighNumberConntrackEntriesUsed
+ annotations:
+ description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused
+ summary: Number of conntrack are getting close to the limit.
+ expr: |
+ (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
+ labels:
+ severity: warning
+ - alert: NodeTextFileCollectorScrapeError
+ annotations:
+ description: Node Exporter text file collector failed to scrape.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror
+ summary: Node Exporter text file collector failed to scrape.
+ expr: |
+ node_textfile_scrape_error{job="node-exporter"} == 1
+ labels:
+ severity: warning
+ - alert: NodeClockSkewDetected
+ annotations:
+ description: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected
+ summary: Clock skew detected.
+ expr: |
+ (
+ node_timex_offset_seconds{job="node-exporter"} > 0.05
+ and
+ deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0
+ )
+ or
+ (
+ node_timex_offset_seconds{job="node-exporter"} < -0.05
+ and
+ deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0
+ )
+ for: 10m
+ labels:
+ severity: warning
+ - alert: NodeClockNotSynchronising
+ annotations:
+ description: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising
+ summary: Clock not synchronising.
+ expr: |
+ min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0
+ and
+ node_timex_maxerror_seconds{job="node-exporter"} >= 16
+ for: 10m
+ labels:
+ severity: warning
+ - alert: NodeRAIDDegraded
+ annotations:
+ description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded
+ summary: RAID Array is degraded
+ expr: |
+ node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}) > 0
+ for: 15m
+ labels:
+ severity: critical
+ - alert: NodeRAIDDiskFailure
+ annotations:
+ description: At least one device in RAID array on {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure
+ summary: Failed device in RAID array
+ expr: |
+ node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} > 0
+ labels:
+ severity: warning
+ - alert: NodeFileDescriptorLimit
+ annotations:
+ description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
+ summary: Kernel is predicted to exhaust file descriptors limit soon.
+ expr: |
+ (
+ node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70
+ )
+ for: 15m
+ labels:
+ severity: warning
+ - alert: NodeFileDescriptorLimit
+ annotations:
+ description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
+ summary: Kernel is predicted to exhaust file descriptors limit soon.
+ expr: |
+ (
+ node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90
+ )
+ for: 15m
+ labels:
+ severity: critical
+ - name: node-exporter.rules
+ rules:
+ - expr: |
+ count without (cpu, mode) (
+ node_cpu_seconds_total{job="node-exporter",mode="idle"}
+ )
+ record: instance:node_num_cpu:sum
+ - expr: |
+ 1 - avg without (cpu) (
+ sum without (mode) (rate(node_cpu_seconds_total{job="node-exporter", mode=~"idle|iowait|steal"}[5m]))
+ )
+ record: instance:node_cpu_utilisation:rate5m
+ - expr: |
+ (
+ node_load1{job="node-exporter"}
+ /
+ instance:node_num_cpu:sum{job="node-exporter"}
+ )
+ record: instance:node_load1_per_cpu:ratio
+ - expr: |
+ 1 - (
+ (
+ node_memory_MemAvailable_bytes{job="node-exporter"}
+ or
+ (
+ node_memory_Buffers_bytes{job="node-exporter"}
+ +
+ node_memory_Cached_bytes{job="node-exporter"}
+ +
+ node_memory_MemFree_bytes{job="node-exporter"}
+ +
+ node_memory_Slab_bytes{job="node-exporter"}
+ )
+ )
+ /
+ node_memory_MemTotal_bytes{job="node-exporter"}
+ )
+ record: instance:node_memory_utilisation:ratio
+ - expr: |
+ rate(node_vmstat_pgmajfault{job="node-exporter"}[5m])
+ record: instance:node_vmstat_pgmajfault:rate5m
+ - expr: |
+ rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m])
+ record: instance_device:node_disk_io_time_seconds:rate5m
+ - expr: |
+ rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m])
+ record: instance_device:node_disk_io_time_weighted_seconds:rate5m
+ - expr: |
+ sum without (device) (
+ rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[5m])
+ )
+ record: instance:node_network_receive_bytes_excluding_lo:rate5m
+ - expr: |
+ sum without (device) (
+ rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[5m])
+ )
+ record: instance:node_network_transmit_bytes_excluding_lo:rate5m
+ - expr: |
+ sum without (device) (
+ rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[5m])
+ )
+ record: instance:node_network_receive_drop_excluding_lo:rate5m
+ - expr: |
+ sum without (device) (
+ rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[5m])
+ )
+ record: instance:node_network_transmit_drop_excluding_lo:rate5m
diff --git a/manifests/node-exporter-service.yaml b/manifests/node-exporter-service.yaml
index 7dfbef6..7f3b270 100644
--- a/manifests/node-exporter-service.yaml
+++ b/manifests/node-exporter-service.yaml
@@ -2,8 +2,10 @@ apiVersion: v1
kind: Service
metadata:
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
- app.kubernetes.io/version: v0.18.1
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 1.5.0
name: node-exporter
namespace: monitoring
spec:
@@ -13,4 +15,6 @@ spec:
port: 9100
targetPort: https
selector:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/node-exporter-serviceAccount.yaml b/manifests/node-exporter-serviceAccount.yaml
index 8a03ac1..b3d72ad 100644
--- a/manifests/node-exporter-serviceAccount.yaml
+++ b/manifests/node-exporter-serviceAccount.yaml
@@ -1,5 +1,11 @@
apiVersion: v1
+automountServiceAccountToken: false
kind: ServiceAccount
metadata:
+ labels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: node-exporter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 1.5.0
name: node-exporter
namespace: monitoring
diff --git a/manifests/node-exporter-serviceMonitor.yaml b/manifests/node-exporter-serviceMonitor.yaml
index 357164d..00081b2 100644
--- a/manifests/node-exporter-serviceMonitor.yaml
+++ b/manifests/node-exporter-serviceMonitor.yaml
@@ -2,8 +2,10 @@ apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
- app.kubernetes.io/version: v0.18.1
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 1.5.0
name: node-exporter
namespace: monitoring
spec:
@@ -24,4 +26,6 @@ spec:
jobLabel: app.kubernetes.io/name
selector:
matchLabels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/prometheus-adapter-apiService.yaml b/manifests/prometheus-adapter-apiService.yaml
index a215efe..bf17b25 100644
--- a/manifests/prometheus-adapter-apiService.yaml
+++ b/manifests/prometheus-adapter-apiService.yaml
@@ -1,6 +1,11 @@
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: v1beta1.metrics.k8s.io
spec:
group: metrics.k8s.io
diff --git a/manifests/prometheus-adapter-clusterRole.yaml b/manifests/prometheus-adapter-clusterRole.yaml
index a02d2bb..57e4925 100644
--- a/manifests/prometheus-adapter-clusterRole.yaml
+++ b/manifests/prometheus-adapter-clusterRole.yaml
@@ -1,7 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: prometheus-adapter
+ namespace: monitoring
rules:
- apiGroups:
- ""
diff --git a/manifests/prometheus-adapter-clusterRoleAggregatedMetricsReader.yaml b/manifests/prometheus-adapter-clusterRoleAggregatedMetricsReader.yaml
index 9f0dbb3..b1f3175 100644
--- a/manifests/prometheus-adapter-clusterRoleAggregatedMetricsReader.yaml
+++ b/manifests/prometheus-adapter-clusterRoleAggregatedMetricsReader.yaml
@@ -2,10 +2,15 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
rbac.authorization.k8s.io/aggregate-to-admin: "true"
rbac.authorization.k8s.io/aggregate-to-edit: "true"
rbac.authorization.k8s.io/aggregate-to-view: "true"
name: system:aggregated-metrics-reader
+ namespace: monitoring
rules:
- apiGroups:
- metrics.k8s.io
diff --git a/manifests/prometheus-adapter-clusterRoleBinding.yaml b/manifests/prometheus-adapter-clusterRoleBinding.yaml
index 7e8f3da..749fc66 100644
--- a/manifests/prometheus-adapter-clusterRoleBinding.yaml
+++ b/manifests/prometheus-adapter-clusterRoleBinding.yaml
@@ -1,7 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: prometheus-adapter
+ namespace: monitoring
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
diff --git a/manifests/prometheus-adapter-clusterRoleBindingDelegator.yaml b/manifests/prometheus-adapter-clusterRoleBindingDelegator.yaml
index 4295b50..eb8fc9e 100644
--- a/manifests/prometheus-adapter-clusterRoleBindingDelegator.yaml
+++ b/manifests/prometheus-adapter-clusterRoleBindingDelegator.yaml
@@ -1,7 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: resource-metrics:system:auth-delegator
+ namespace: monitoring
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
diff --git a/manifests/prometheus-adapter-clusterRoleServerResources.yaml b/manifests/prometheus-adapter-clusterRoleServerResources.yaml
index fcb914c..73d78b0 100644
--- a/manifests/prometheus-adapter-clusterRoleServerResources.yaml
+++ b/manifests/prometheus-adapter-clusterRoleServerResources.yaml
@@ -1,7 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: resource-metrics-server-resources
+ namespace: monitoring
rules:
- apiGroups:
- metrics.k8s.io
diff --git a/manifests/prometheus-adapter-configMap.yaml b/manifests/prometheus-adapter-configMap.yaml
index b2bde3c..a1690bb 100644
--- a/manifests/prometheus-adapter-configMap.yaml
+++ b/manifests/prometheus-adapter-configMap.yaml
@@ -4,8 +4,26 @@ data:
"resourceRules":
"cpu":
"containerLabel": "container"
- "containerQuery": "sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!=\"POD\",container!=\"\",pod!=\"\"}[5m])) by (<<.GroupBy>>)"
- "nodeQuery": "sum(1 - irate(node_cpu_seconds_total{mode=\"idle\"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)"
+ "containerQuery": |
+ sum by (<<.GroupBy>>) (
+ irate (
+ container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="",pod!=""}[120s]
+ )
+ )
+ "nodeQuery": |
+ sum by (<<.GroupBy>>) (
+ 1 - irate(
+ node_cpu_seconds_total{mode="idle"}[60s]
+ )
+ * on(namespace, pod) group_left(node) (
+ node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}
+ )
+ )
+ or sum by (<<.GroupBy>>) (
+ 1 - irate(
+ windows_cpu_time_total{mode="idle", job="windows-exporter",<<.LabelMatchers>>}[4m]
+ )
+ )
"resources":
"overrides":
"namespace":
@@ -16,8 +34,21 @@ data:
"resource": "pod"
"memory":
"containerLabel": "container"
- "containerQuery": "sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!=\"POD\",container!=\"\",pod!=\"\"}) by (<<.GroupBy>>)"
- "nodeQuery": "sum(node_memory_MemTotal_bytes{job=\"node-exporter\",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job=\"node-exporter\",<<.LabelMatchers>>}) by (<<.GroupBy>>)"
+ "containerQuery": |
+ sum by (<<.GroupBy>>) (
+ container_memory_working_set_bytes{<<.LabelMatchers>>,container!="",pod!=""}
+ )
+ "nodeQuery": |
+ sum by (<<.GroupBy>>) (
+ node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>}
+ -
+ node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}
+ )
+ or sum by (<<.GroupBy>>) (
+ windows_cs_physical_memory_bytes{job="windows-exporter",<<.LabelMatchers>>}
+ -
+ windows_memory_available_bytes{job="windows-exporter",<<.LabelMatchers>>}
+ )
"resources":
"overrides":
"instance":
@@ -29,5 +60,10 @@ data:
"window": "5m"
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: adapter-config
namespace: monitoring
diff --git a/manifests/prometheus-adapter-deployment.yaml b/manifests/prometheus-adapter-deployment.yaml
index b95f07d..a4048ac 100644
--- a/manifests/prometheus-adapter-deployment.yaml
+++ b/manifests/prometheus-adapter-deployment.yaml
@@ -1,22 +1,46 @@
apiVersion: apps/v1
kind: Deployment
metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: prometheus-adapter
namespace: monitoring
spec:
- replicas: 1
+ replicas: 2
selector:
matchLabels:
- name: prometheus-adapter
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
strategy:
rollingUpdate:
maxSurge: 1
- maxUnavailable: 0
+ maxUnavailable: 1
template:
metadata:
labels:
- name: prometheus-adapter
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
spec:
+ affinity:
+ podAntiAffinity:
+ preferredDuringSchedulingIgnoredDuringExecution:
+ - podAffinityTerm:
+ labelSelector:
+ matchLabels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ namespaces:
+ - monitoring
+ topologyKey: kubernetes.io/hostname
+ weight: 100
+ automountServiceAccountToken: true
containers:
- args:
- --cert-dir=/var/run/serving-cert
@@ -25,10 +49,41 @@ spec:
- --metrics-relist-interval=1m
- --prometheus-url=http://prometheus-k8s.monitoring.svc:9090/
- --secure-port=6443
- image: directxman12/k8s-prometheus-adapter:v0.7.0
+ - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA,TLS_RSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA
+ image: registry.k8s.io/prometheus-adapter/prometheus-adapter:v0.10.0
+ livenessProbe:
+ failureThreshold: 5
+ httpGet:
+ path: /livez
+ port: https
+ scheme: HTTPS
+ initialDelaySeconds: 30
+ periodSeconds: 5
name: prometheus-adapter
ports:
- containerPort: 6443
+ name: https
+ readinessProbe:
+ failureThreshold: 5
+ httpGet:
+ path: /readyz
+ port: https
+ scheme: HTTPS
+ initialDelaySeconds: 30
+ periodSeconds: 5
+ resources:
+ limits:
+ cpu: 250m
+ memory: 180Mi
+ requests:
+ cpu: 102m
+ memory: 180Mi
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
volumeMounts:
- mountPath: /tmp
name: tmpfs
diff --git a/manifests/prometheus-adapter-networkPolicy.yaml b/manifests/prometheus-adapter-networkPolicy.yaml
new file mode 100644
index 0000000..29a11fd
--- /dev/null
+++ b/manifests/prometheus-adapter-networkPolicy.yaml
@@ -0,0 +1,23 @@
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
+ name: prometheus-adapter
+ namespace: monitoring
+spec:
+ egress:
+ - {}
+ ingress:
+ - {}
+ podSelector:
+ matchLabels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ policyTypes:
+ - Egress
+ - Ingress
diff --git a/manifests/prometheus-adapter-podDisruptionBudget.yaml b/manifests/prometheus-adapter-podDisruptionBudget.yaml
new file mode 100644
index 0000000..033fcaa
--- /dev/null
+++ b/manifests/prometheus-adapter-podDisruptionBudget.yaml
@@ -0,0 +1,17 @@
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
+ name: prometheus-adapter
+ namespace: monitoring
+spec:
+ minAvailable: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/prometheus-adapter-roleBindingAuthReader.yaml b/manifests/prometheus-adapter-roleBindingAuthReader.yaml
index 48c8f32..5d0d903 100644
--- a/manifests/prometheus-adapter-roleBindingAuthReader.yaml
+++ b/manifests/prometheus-adapter-roleBindingAuthReader.yaml
@@ -1,6 +1,11 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: resource-metrics-auth-reader
namespace: kube-system
roleRef:
diff --git a/manifests/prometheus-adapter-service.yaml b/manifests/prometheus-adapter-service.yaml
index e786e01..c984828 100644
--- a/manifests/prometheus-adapter-service.yaml
+++ b/manifests/prometheus-adapter-service.yaml
@@ -2,7 +2,10 @@ apiVersion: v1
kind: Service
metadata:
labels:
- name: prometheus-adapter
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: prometheus-adapter
namespace: monitoring
spec:
@@ -11,4 +14,6 @@ spec:
port: 443
targetPort: 6443
selector:
- name: prometheus-adapter
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/prometheus-adapter-serviceAccount.yaml b/manifests/prometheus-adapter-serviceAccount.yaml
index d7e7050..bb1058b 100644
--- a/manifests/prometheus-adapter-serviceAccount.yaml
+++ b/manifests/prometheus-adapter-serviceAccount.yaml
@@ -1,5 +1,11 @@
apiVersion: v1
+automountServiceAccountToken: false
kind: ServiceAccount
metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: prometheus-adapter
namespace: monitoring
diff --git a/manifests/prometheus-adapter-serviceMonitor.yaml b/manifests/prometheus-adapter-serviceMonitor.yaml
new file mode 100644
index 0000000..d894145
--- /dev/null
+++ b/manifests/prometheus-adapter-serviceMonitor.yaml
@@ -0,0 +1,28 @@
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
+ name: prometheus-adapter
+ namespace: monitoring
+spec:
+ endpoints:
+ - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
+ interval: 30s
+ metricRelabelings:
+ - action: drop
+ regex: (apiserver_client_certificate_.*|apiserver_envelope_.*|apiserver_flowcontrol_.*|apiserver_storage_.*|apiserver_webhooks_.*|workqueue_.*)
+ sourceLabels:
+ - __name__
+ port: https
+ scheme: https
+ tlsConfig:
+ insecureSkipVerify: true
+ selector:
+ matchLabels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/prometheus-clusterRole.yaml b/manifests/prometheus-clusterRole.yaml
index d5c4598..e8e866b 100644
--- a/manifests/prometheus-clusterRole.yaml
+++ b/manifests/prometheus-clusterRole.yaml
@@ -1,6 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
rules:
- apiGroups:
diff --git a/manifests/prometheus-clusterRoleBinding.yaml b/manifests/prometheus-clusterRoleBinding.yaml
index 554bb6f..abd9d64 100644
--- a/manifests/prometheus-clusterRoleBinding.yaml
+++ b/manifests/prometheus-clusterRoleBinding.yaml
@@ -1,6 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
roleRef:
apiGroup: rbac.authorization.k8s.io
diff --git a/manifests/prometheus-kubeControllerManagerPrometheusDiscoveryService.yaml b/manifests/prometheus-kubeControllerManagerPrometheusDiscoveryService.yaml
deleted file mode 100644
index 9506973..0000000
--- a/manifests/prometheus-kubeControllerManagerPrometheusDiscoveryService.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
- labels:
- k8s-app: kube-controller-manager
- name: kube-controller-manager-prometheus-discovery
- namespace: kube-system
-spec:
- clusterIP: None
- ports:
- - name: http-metrics
- port: 10252
- targetPort: 10252
- selector:
- component: kube-controller-manager
diff --git a/manifests/prometheus-kubeDnsPrometheusDiscoveryService.yaml b/manifests/prometheus-kubeDnsPrometheusDiscoveryService.yaml
deleted file mode 100644
index 34e746c..0000000
--- a/manifests/prometheus-kubeDnsPrometheusDiscoveryService.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
- labels:
- k8s-app: kube-dns
- name: kube-dns-prometheus-discovery
- namespace: kube-system
-spec:
- clusterIP: None
- ports:
- - name: metrics
- port: 9153
- targetPort: 9153
- selector:
- k8s-app: kube-dns
diff --git a/manifests/prometheus-kubeSchedulerPrometheusDiscoveryService.yaml b/manifests/prometheus-kubeSchedulerPrometheusDiscoveryService.yaml
deleted file mode 100644
index b4843c7..0000000
--- a/manifests/prometheus-kubeSchedulerPrometheusDiscoveryService.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
- labels:
- k8s-app: kube-scheduler
- name: kube-scheduler-prometheus-discovery
- namespace: kube-system
-spec:
- clusterIP: None
- ports:
- - name: http-metrics
- port: 10251
- targetPort: 10251
- selector:
- component: kube-scheduler
diff --git a/manifests/prometheus-networkPolicy.yaml b/manifests/prometheus-networkPolicy.yaml
new file mode 100644
index 0000000..7fbb293
--- /dev/null
+++ b/manifests/prometheus-networkPolicy.yaml
@@ -0,0 +1,40 @@
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
+ name: prometheus-k8s
+ namespace: monitoring
+spec:
+ egress:
+ - {}
+ ingress:
+ - from:
+ - podSelector:
+ matchLabels:
+ app.kubernetes.io/name: prometheus
+ ports:
+ - port: 9090
+ protocol: TCP
+ - port: 8080
+ protocol: TCP
+ - from:
+ - podSelector:
+ matchLabels:
+ app.kubernetes.io/name: grafana
+ ports:
+ - port: 9090
+ protocol: TCP
+ podSelector:
+ matchLabels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ policyTypes:
+ - Egress
+ - Ingress
diff --git a/manifests/prometheus-operator-serviceMonitor.yaml b/manifests/prometheus-operator-serviceMonitor.yaml
index 39e48aa..50d6cd8 100644
--- a/manifests/prometheus-operator-serviceMonitor.yaml
+++ b/manifests/prometheus-operator-serviceMonitor.yaml
@@ -4,7 +4,8 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
- app.kubernetes.io/version: v0.40.0
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.61.1
name: prometheus-operator
namespace: monitoring
spec:
@@ -19,4 +20,5 @@ spec:
matchLabels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
- app.kubernetes.io/version: v0.40.0
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.61.1
diff --git a/manifests/prometheus-podDisruptionBudget.yaml b/manifests/prometheus-podDisruptionBudget.yaml
new file mode 100644
index 0000000..7e3e656
--- /dev/null
+++ b/manifests/prometheus-podDisruptionBudget.yaml
@@ -0,0 +1,19 @@
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
+ name: prometheus-k8s
+ namespace: monitoring
+spec:
+ minAvailable: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/prometheus-prometheus.yaml b/manifests/prometheus-prometheus.yaml
index 2280b93..6e1d307 100644
--- a/manifests/prometheus-prometheus.yaml
+++ b/manifests/prometheus-prometheus.yaml
@@ -2,7 +2,11 @@ apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
labels:
- prometheus: k8s
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: k8s
namespace: monitoring
spec:
@@ -11,35 +15,45 @@ spec:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
- matchExpressions:
- - key: prometheus
- operator: In
- values:
- - k8s
+ matchLabels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
namespaces:
- monitoring
topologyKey: kubernetes.io/hostname
weight: 100
alerting:
alertmanagers:
- - name: alertmanager-main
+ - apiVersion: v2
+ name: alertmanager-main
namespace: monitoring
port: web
+ enableFeatures: []
+ externalLabels: {}
externalUrl: http://prometheus.192.168.1.15.nip.io
- image: prom/prometheus:v2.19.1
+ image: quay.io/prometheus/prometheus:v2.41.0
nodeSelector:
kubernetes.io/os: linux
+ podMetadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
podMonitorNamespaceSelector: {}
podMonitorSelector: {}
+ probeNamespaceSelector: {}
+ probeSelector: {}
replicas: 1
resources:
requests:
memory: 400Mi
retention: 15d
- ruleSelector:
- matchLabels:
- prometheus: k8s
- role: alert-rules
+ ruleNamespaceSelector: {}
+ ruleSelector: {}
scrapeInterval: 30s
scrapeTimeout: 30s
securityContext:
@@ -49,4 +63,4 @@ spec:
serviceAccountName: prometheus-k8s
serviceMonitorNamespaceSelector: {}
serviceMonitorSelector: {}
- version: v2.19.1
+ version: 2.41.0
diff --git a/manifests/prometheus-prometheusRule.yaml b/manifests/prometheus-prometheusRule.yaml
new file mode 100644
index 0000000..34600e6
--- /dev/null
+++ b/manifests/prometheus-prometheusRule.yaml
@@ -0,0 +1,280 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
+ prometheus: k8s
+ role: alert-rules
+ name: prometheus-k8s-prometheus-rules
+ namespace: monitoring
+spec:
+ groups:
+ - name: prometheus
+ rules:
+ - alert: PrometheusBadConfig
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to reload its configuration.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusbadconfig
+ summary: Failed Prometheus configuration reload.
+ expr: |
+ # Without max_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ max_over_time(prometheus_config_last_reload_successful{job="prometheus-k8s",namespace="monitoring"}[5m]) == 0
+ for: 10m
+ labels:
+ severity: critical
+ - alert: PrometheusNotificationQueueRunningFull
+ annotations:
+ description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} is running full.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotificationqueuerunningfull
+ summary: Prometheus alert notification queue predicted to run full in less than 30m.
+ expr: |
+ # Without min_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ (
+ predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s",namespace="monitoring"}[5m], 60 * 30)
+ >
+ min_over_time(prometheus_notifications_queue_capacity{job="prometheus-k8s",namespace="monitoring"}[5m])
+ )
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
+ annotations:
+ description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstosomealertmanagers
+ summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
+ expr: |
+ (
+ rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
+ /
+ rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
+ )
+ * 100
+ > 1
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusNotConnectedToAlertmanagers
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotconnectedtoalertmanagers
+ summary: Prometheus is not connected to any Alertmanagers.
+ expr: |
+ # Without max_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s",namespace="monitoring"}[5m]) < 1
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusTSDBReloadsFailing
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} reload failures over the last 3h.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbreloadsfailing
+ summary: Prometheus has issues reloading blocks from disk.
+ expr: |
+ increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
+ for: 4h
+ labels:
+ severity: warning
+ - alert: PrometheusTSDBCompactionsFailing
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} compaction failures over the last 3h.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbcompactionsfailing
+ summary: Prometheus has issues compacting blocks.
+ expr: |
+ increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
+ for: 4h
+ labels:
+ severity: warning
+ - alert: PrometheusNotIngestingSamples
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotingestingsamples
+ summary: Prometheus is not ingesting samples.
+ expr: |
+ (
+ rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
+ and
+ (
+ sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-k8s",namespace="monitoring"}) > 0
+ or
+ sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-k8s",namespace="monitoring"}) > 0
+ )
+ )
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusDuplicateTimestamps
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with different values but duplicated timestamp.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusduplicatetimestamps
+ summary: Prometheus is dropping samples with duplicate timestamps.
+ expr: |
+ rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusOutOfOrderTimestamps
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusoutofordertimestamps
+ summary: Prometheus drops samples with out-of-order timestamps.
+ expr: |
+ rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusRemoteStorageFailures
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotestoragefailures
+ summary: Prometheus fails to send samples to remote storage.
+ expr: |
+ (
+ (rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="monitoring"}[5m]))
+ /
+ (
+ (rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="monitoring"}[5m]))
+ +
+ (rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]))
+ )
+ )
+ * 100
+ > 1
+ for: 15m
+ labels:
+ severity: critical
+ - alert: PrometheusRemoteWriteBehind
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritebehind
+ summary: Prometheus remote write is behind.
+ expr: |
+ # Without max_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ (
+ max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
+ - ignoring(remote_name, url) group_right
+ max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
+ )
+ > 120
+ for: 15m
+ labels:
+ severity: critical
+ - alert: PrometheusRemoteWriteDesiredShards
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}` $labels.instance | query | first | value }}.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritedesiredshards
+ summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
+ expr: |
+ # Without max_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ (
+ max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-k8s",namespace="monitoring"}[5m])
+ >
+ max_over_time(prometheus_remote_storage_shards_max{job="prometheus-k8s",namespace="monitoring"}[5m])
+ )
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusRuleFailures
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusrulefailures
+ summary: Prometheus is failing rule evaluations.
+ expr: |
+ increase(prometheus_rule_evaluation_failures_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
+ for: 15m
+ labels:
+ severity: critical
+ - alert: PrometheusMissingRuleEvaluations
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusmissingruleevaluations
+ summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
+ expr: |
+ increase(prometheus_rule_group_iterations_missed_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusTargetLimitHit
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because the number of targets exceeded the configured target_limit.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetlimithit
+ summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit.
+ expr: |
+ increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusLabelLimitHit
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuslabellimithit
+ summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit.
+ expr: |
+ increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusScrapeBodySizeLimitHit
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured body_size_limit.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapebodysizelimithit
+ summary: Prometheus has dropped some targets that exceeded body size limit.
+ expr: |
+ increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusScrapeSampleLimitHit
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured sample_limit.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapesamplelimithit
+ summary: Prometheus has failed scrapes that have exceeded the configured sample limit.
+ expr: |
+ increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusTargetSyncFailure
+ annotations:
+ description: '{{ printf "%.0f" $value }} targets in Prometheus {{$labels.namespace}}/{{$labels.pod}} have failed to sync because invalid configuration was supplied.'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetsyncfailure
+ summary: Prometheus has failed to sync targets.
+ expr: |
+ increase(prometheus_target_sync_failed_total{job="prometheus-k8s",namespace="monitoring"}[30m]) > 0
+ for: 5m
+ labels:
+ severity: critical
+ - alert: PrometheusHighQueryLoad
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} query API has less than 20% available capacity in its query engine for the last 15 minutes.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheushighqueryload
+ summary: Prometheus is reaching its maximum capacity serving concurrent requests.
+ expr: |
+ avg_over_time(prometheus_engine_queries{job="prometheus-k8s",namespace="monitoring"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0.8
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
+ annotations:
+ description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstoanyalertmanager
+ summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
+ expr: |
+ min without (alertmanager) (
+ rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring",alertmanager!~``}[5m])
+ /
+ rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring",alertmanager!~``}[5m])
+ )
+ * 100
+ > 3
+ for: 15m
+ labels:
+ severity: critical
diff --git a/manifests/prometheus-roleBindingConfig.yaml b/manifests/prometheus-roleBindingConfig.yaml
index ec0129d..27685aa 100644
--- a/manifests/prometheus-roleBindingConfig.yaml
+++ b/manifests/prometheus-roleBindingConfig.yaml
@@ -1,6 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s-config
namespace: monitoring
roleRef:
diff --git a/manifests/prometheus-roleBindingSpecificNamespaces.yaml b/manifests/prometheus-roleBindingSpecificNamespaces.yaml
index c7527f6..6658f10 100644
--- a/manifests/prometheus-roleBindingSpecificNamespaces.yaml
+++ b/manifests/prometheus-roleBindingSpecificNamespaces.yaml
@@ -3,6 +3,12 @@ items:
- apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
namespace: default
roleRef:
@@ -16,6 +22,12 @@ items:
- apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
namespace: kube-system
roleRef:
@@ -29,6 +41,12 @@ items:
- apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
namespace: monitoring
roleRef:
diff --git a/manifests/prometheus-roleConfig.yaml b/manifests/prometheus-roleConfig.yaml
index 5f1cd04..311bfbf 100644
--- a/manifests/prometheus-roleConfig.yaml
+++ b/manifests/prometheus-roleConfig.yaml
@@ -1,6 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s-config
namespace: monitoring
rules:
diff --git a/manifests/prometheus-roleSpecificNamespaces.yaml b/manifests/prometheus-roleSpecificNamespaces.yaml
index b920b88..cbdf10a 100644
--- a/manifests/prometheus-roleSpecificNamespaces.yaml
+++ b/manifests/prometheus-roleSpecificNamespaces.yaml
@@ -3,6 +3,12 @@ items:
- apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
namespace: default
rules:
@@ -16,9 +22,31 @@ items:
- get
- list
- watch
+ - apiGroups:
+ - extensions
+ resources:
+ - ingresses
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - networking.k8s.io
+ resources:
+ - ingresses
+ verbs:
+ - get
+ - list
+ - watch
- apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
namespace: kube-system
rules:
@@ -32,9 +60,31 @@ items:
- get
- list
- watch
+ - apiGroups:
+ - extensions
+ resources:
+ - ingresses
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - networking.k8s.io
+ resources:
+ - ingresses
+ verbs:
+ - get
+ - list
+ - watch
- apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
namespace: monitoring
rules:
@@ -48,4 +98,20 @@ items:
- get
- list
- watch
+ - apiGroups:
+ - extensions
+ resources:
+ - ingresses
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - networking.k8s.io
+ resources:
+ - ingresses
+ verbs:
+ - get
+ - list
+ - watch
kind: RoleList
diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml
deleted file mode 100644
index 722a797..0000000
--- a/manifests/prometheus-rules.yaml
+++ /dev/null
@@ -1,1759 +0,0 @@
-apiVersion: monitoring.coreos.com/v1
-kind: PrometheusRule
-metadata:
- labels:
- prometheus: k8s
- role: alert-rules
- name: prometheus-k8s-rules
- namespace: monitoring
-spec:
- groups:
- - name: node-exporter.rules
- rules:
- - expr: |
- count without (cpu) (
- count without (mode) (
- node_cpu_seconds_total{job="node-exporter"}
- )
- )
- record: instance:node_num_cpu:sum
- - expr: |
- 1 - avg without (cpu, mode) (
- rate(node_cpu_seconds_total{job="node-exporter", mode="idle"}[1m])
- )
- record: instance:node_cpu_utilisation:rate1m
- - expr: |
- (
- node_load1{job="node-exporter"}
- /
- instance:node_num_cpu:sum{job="node-exporter"}
- )
- record: instance:node_load1_per_cpu:ratio
- - expr: |
- 1 - (
- node_memory_MemAvailable_bytes{job="node-exporter"}
- /
- node_memory_MemTotal_bytes{job="node-exporter"}
- )
- record: instance:node_memory_utilisation:ratio
- - expr: |
- rate(node_vmstat_pgmajfault{job="node-exporter"}[1m])
- record: instance:node_vmstat_pgmajfault:rate1m
- - expr: |
- rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
- record: instance_device:node_disk_io_time_seconds:rate1m
- - expr: |
- rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
- record: instance_device:node_disk_io_time_weighted_seconds:rate1m
- - expr: |
- sum without (device) (
- rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[1m])
- )
- record: instance:node_network_receive_bytes_excluding_lo:rate1m
- - expr: |
- sum without (device) (
- rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[1m])
- )
- record: instance:node_network_transmit_bytes_excluding_lo:rate1m
- - expr: |
- sum without (device) (
- rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[1m])
- )
- record: instance:node_network_receive_drop_excluding_lo:rate1m
- - expr: |
- sum without (device) (
- rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[1m])
- )
- record: instance:node_network_transmit_drop_excluding_lo:rate1m
- - name: kube-apiserver.rules
- rules:
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d]))
- -
- (
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1d])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d]))
- )
- )
- +
- # errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d]))
- labels:
- verb: read
- record: apiserver_request:burnrate1d
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h]))
- -
- (
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1h])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h]))
- )
- )
- +
- # errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h]))
- labels:
- verb: read
- record: apiserver_request:burnrate1h
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h]))
- -
- (
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[2h])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h]))
- )
- )
- +
- # errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h]))
- labels:
- verb: read
- record: apiserver_request:burnrate2h
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m]))
- -
- (
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30m])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m]))
- )
- )
- +
- # errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m]))
- labels:
- verb: read
- record: apiserver_request:burnrate30m
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d]))
- -
- (
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[3d])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d]))
- )
- )
- +
- # errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d]))
- labels:
- verb: read
- record: apiserver_request:burnrate3d
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m]))
- -
- (
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[5m])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m]))
- )
- )
- +
- # errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
- labels:
- verb: read
- record: apiserver_request:burnrate5m
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h]))
- -
- (
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[6h])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h]))
- )
- )
- +
- # errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h]))
- labels:
- verb: read
- record: apiserver_request:burnrate6h
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
- -
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1d]))
- )
- +
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
- labels:
- verb: write
- record: apiserver_request:burnrate1d
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
- -
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1h]))
- )
- +
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
- labels:
- verb: write
- record: apiserver_request:burnrate1h
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
- -
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[2h]))
- )
- +
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
- labels:
- verb: write
- record: apiserver_request:burnrate2h
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
- -
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[30m]))
- )
- +
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
- labels:
- verb: write
- record: apiserver_request:burnrate30m
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
- -
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[3d]))
- )
- +
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
- labels:
- verb: write
- record: apiserver_request:burnrate3d
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
- -
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[5m]))
- )
- +
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
- labels:
- verb: write
- record: apiserver_request:burnrate5m
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
- -
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[6h]))
- )
- +
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
- labels:
- verb: write
- record: apiserver_request:burnrate6h
- - expr: |
- sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
- labels:
- verb: read
- record: code_resource:apiserver_request_total:rate5m
- - expr: |
- sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
- labels:
- verb: write
- record: code_resource:apiserver_request_total:rate5m
- - expr: |
- histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0
- labels:
- quantile: "0.99"
- verb: read
- record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0
- labels:
- quantile: "0.99"
- verb: write
- record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- - expr: |
- sum(rate(apiserver_request_duration_seconds_sum{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)
- /
- sum(rate(apiserver_request_duration_seconds_count{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)
- record: cluster:apiserver_request_duration_seconds:mean5m
- - expr: |
- histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
- labels:
- quantile: "0.99"
- record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
- labels:
- quantile: "0.9"
- record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
- labels:
- quantile: "0.5"
- record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- - interval: 3m
- name: kube-apiserver-availability.rules
- rules:
- - expr: |
- 1 - (
- (
- # write too slow
- sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
- -
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
- ) +
- (
- # read too slow
- sum(increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d]))
- -
- (
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) +
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) +
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
- )
- ) +
- # errors
- sum(code:apiserver_request_total:increase30d{code=~"5.."} or vector(0))
- )
- /
- sum(code:apiserver_request_total:increase30d)
- labels:
- verb: all
- record: apiserver_request:availability30d
- - expr: |
- 1 - (
- sum(increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d]))
- -
- (
- # too slow
- sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) +
- sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) +
- sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
- )
- +
- # errors
- sum(code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0))
- )
- /
- sum(code:apiserver_request_total:increase30d{verb="read"})
- labels:
- verb: read
- record: apiserver_request:availability30d
- - expr: |
- 1 - (
- (
- # too slow
- sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
- -
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
- )
- +
- # errors
- sum(code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0))
- )
- /
- sum(code:apiserver_request_total:increase30d{verb="write"})
- labels:
- verb: write
- record: apiserver_request:availability30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
- labels:
- verb: read
- record: code:apiserver_request_total:increase30d
- - expr: |
- sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
- labels:
- verb: write
- record: code:apiserver_request_total:increase30d
- - name: k8s.rules
- rules:
- - expr: |
- sum(rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])) by (namespace)
- record: namespace:container_cpu_usage_seconds_total:sum_rate
- - expr: |
- sum by (cluster, namespace, pod, container) (
- rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])
- ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
- 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
- )
- record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
- - expr: |
- container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
- * on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
- max by(namespace, pod, node) (kube_pod_info{node!=""})
- )
- record: node_namespace_pod_container:container_memory_working_set_bytes
- - expr: |
- container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
- * on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
- max by(namespace, pod, node) (kube_pod_info{node!=""})
- )
- record: node_namespace_pod_container:container_memory_rss
- - expr: |
- container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
- * on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
- max by(namespace, pod, node) (kube_pod_info{node!=""})
- )
- record: node_namespace_pod_container:container_memory_cache
- - expr: |
- container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
- * on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
- max by(namespace, pod, node) (kube_pod_info{node!=""})
- )
- record: node_namespace_pod_container:container_memory_swap
- - expr: |
- sum(container_memory_usage_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}) by (namespace)
- record: namespace:container_memory_usage_bytes:sum
- - expr: |
- sum by (namespace) (
- sum by (namespace, pod) (
- max by (namespace, pod, container) (
- kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"}
- ) * on(namespace, pod) group_left() max by (namespace, pod) (
- kube_pod_status_phase{phase=~"Pending|Running"} == 1
- )
- )
- )
- record: namespace:kube_pod_container_resource_requests_memory_bytes:sum
- - expr: |
- sum by (namespace) (
- sum by (namespace, pod) (
- max by (namespace, pod, container) (
- kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"}
- ) * on(namespace, pod) group_left() max by (namespace, pod) (
- kube_pod_status_phase{phase=~"Pending|Running"} == 1
- )
- )
- )
- record: namespace:kube_pod_container_resource_requests_cpu_cores:sum
- - expr: |
- max by (cluster, namespace, workload, pod) (
- label_replace(
- label_replace(
- kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"},
- "replicaset", "$1", "owner_name", "(.*)"
- ) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (
- 1, max by (replicaset, namespace, owner_name) (
- kube_replicaset_owner{job="kube-state-metrics"}
- )
- ),
- "workload", "$1", "owner_name", "(.*)"
- )
- )
- labels:
- workload_type: deployment
- record: mixin_pod_workload
- - expr: |
- max by (cluster, namespace, workload, pod) (
- label_replace(
- kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"},
- "workload", "$1", "owner_name", "(.*)"
- )
- )
- labels:
- workload_type: daemonset
- record: mixin_pod_workload
- - expr: |
- max by (cluster, namespace, workload, pod) (
- label_replace(
- kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"},
- "workload", "$1", "owner_name", "(.*)"
- )
- )
- labels:
- workload_type: statefulset
- record: mixin_pod_workload
- - name: kube-scheduler.rules
- rules:
- - expr: |
- histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
- labels:
- quantile: "0.99"
- record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
- labels:
- quantile: "0.99"
- record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
- labels:
- quantile: "0.99"
- record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
- labels:
- quantile: "0.9"
- record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
- labels:
- quantile: "0.9"
- record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.9, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
- labels:
- quantile: "0.9"
- record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
- labels:
- quantile: "0.5"
- record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
- labels:
- quantile: "0.5"
- record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.5, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
- labels:
- quantile: "0.5"
- record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile
- - name: node.rules
- rules:
- - expr: |
- sum(min(kube_pod_info{node!=""}) by (cluster, node))
- record: ':kube_pod_info_node_count:'
- - expr: |
- topk by(namespace, pod) (1,
- max by (node, namespace, pod) (
- label_replace(kube_pod_info{job="kube-state-metrics",node!=""}, "pod", "$1", "pod", "(.*)")
- ))
- record: 'node_namespace_pod:kube_pod_info:'
- - expr: |
- count by (cluster, node) (sum by (node, cpu) (
- node_cpu_seconds_total{job="node-exporter"}
- * on (namespace, pod) group_left(node)
- node_namespace_pod:kube_pod_info:
- ))
- record: node:node_num_cpu:sum
- - expr: |
- sum(
- node_memory_MemAvailable_bytes{job="node-exporter"} or
- (
- node_memory_Buffers_bytes{job="node-exporter"} +
- node_memory_Cached_bytes{job="node-exporter"} +
- node_memory_MemFree_bytes{job="node-exporter"} +
- node_memory_Slab_bytes{job="node-exporter"}
- )
- ) by (cluster)
- record: :node_memory_MemAvailable_bytes:sum
- - name: kubelet.rules
- rules:
- - expr: |
- histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
- labels:
- quantile: "0.99"
- record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
- labels:
- quantile: "0.9"
- record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
- labels:
- quantile: "0.5"
- record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
- - name: kube-prometheus-node-recording.rules
- rules:
- - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance)
- record: instance:node_cpu:rate:sum
- - expr: sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"})) BY (instance)
- record: instance:node_filesystem_usage:sum
- - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
- record: instance:node_network_receive_bytes:rate:sum
- - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
- record: instance:node_network_transmit_bytes:rate:sum
- - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)
- record: instance:node_cpu:ratio
- - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))
- record: cluster:node_cpu:sum_rate5m
- - expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
- record: cluster:node_cpu:ratio
- - name: kube-prometheus-general.rules
- rules:
- - expr: count without(instance, pod, node) (up == 1)
- record: count:up1
- - expr: count without(instance, pod, node) (up == 0)
- record: count:up0
- - name: kube-state-metrics
- rules:
- - alert: KubeStateMetricsListErrors
- annotations:
- message: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricslisterrors
- expr: |
- (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
- /
- sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])))
- > 0.01
- for: 15m
- labels:
- severity: critical
- - alert: KubeStateMetricsWatchErrors
- annotations:
- message: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricswatcherrors
- expr: |
- (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
- /
- sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m])))
- > 0.01
- for: 15m
- labels:
- severity: critical
- - name: node-exporter
- rules:
- - alert: NodeFilesystemSpaceFillingUp
- annotations:
- description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup
- summary: Filesystem is predicted to run out of space within the next 24 hours.
- expr: |
- (
- node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 40
- and
- predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
- and
- node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
- )
- for: 1h
- labels:
- severity: warning
- - alert: NodeFilesystemSpaceFillingUp
- annotations:
- description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup
- summary: Filesystem is predicted to run out of space within the next 4 hours.
- expr: |
- (
- node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 15
- and
- predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
- and
- node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
- )
- for: 1h
- labels:
- severity: critical
- - alert: NodeFilesystemAlmostOutOfSpace
- annotations:
- description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace
- summary: Filesystem has less than 5% space left.
- expr: |
- (
- node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 5
- and
- node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
- )
- for: 1h
- labels:
- severity: warning
- - alert: NodeFilesystemAlmostOutOfSpace
- annotations:
- description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace
- summary: Filesystem has less than 3% space left.
- expr: |
- (
- node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 3
- and
- node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
- )
- for: 1h
- labels:
- severity: critical
- - alert: NodeFilesystemFilesFillingUp
- annotations:
- description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup
- summary: Filesystem is predicted to run out of inodes within the next 24 hours.
- expr: |
- (
- node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 40
- and
- predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
- and
- node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
- )
- for: 1h
- labels:
- severity: warning
- - alert: NodeFilesystemFilesFillingUp
- annotations:
- description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup
- summary: Filesystem is predicted to run out of inodes within the next 4 hours.
- expr: |
- (
- node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 20
- and
- predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
- and
- node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
- )
- for: 1h
- labels:
- severity: critical
- - alert: NodeFilesystemAlmostOutOfFiles
- annotations:
- description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles
- summary: Filesystem has less than 5% inodes left.
- expr: |
- (
- node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 5
- and
- node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
- )
- for: 1h
- labels:
- severity: warning
- - alert: NodeFilesystemAlmostOutOfFiles
- annotations:
- description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles
- summary: Filesystem has less than 3% inodes left.
- expr: |
- (
- node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 3
- and
- node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
- )
- for: 1h
- labels:
- severity: critical
- - alert: NodeNetworkReceiveErrs
- annotations:
- description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworkreceiveerrs
- summary: Network interface is reporting many receive errors.
- expr: |
- increase(node_network_receive_errs_total[2m]) > 10
- for: 1h
- labels:
- severity: warning
- - alert: NodeNetworkTransmitErrs
- annotations:
- description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworktransmiterrs
- summary: Network interface is reporting many transmit errors.
- expr: |
- increase(node_network_transmit_errs_total[2m]) > 10
- for: 1h
- labels:
- severity: warning
- - alert: NodeHighNumberConntrackEntriesUsed
- annotations:
- description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodehighnumberconntrackentriesused
- summary: Number of conntrack are getting close to the limit.
- expr: |
- (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
- labels:
- severity: warning
- - alert: NodeTextFileCollectorScrapeError
- annotations:
- description: Node Exporter text file collector failed to scrape.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodetextfilecollectorscrapeerror
- summary: Node Exporter text file collector failed to scrape.
- expr: |
- node_textfile_scrape_error{job="node-exporter"} == 1
- labels:
- severity: warning
- - alert: NodeClockSkewDetected
- annotations:
- message: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclockskewdetected
- summary: Clock skew detected.
- expr: |
- (
- node_timex_offset_seconds > 0.05
- and
- deriv(node_timex_offset_seconds[5m]) >= 0
- )
- or
- (
- node_timex_offset_seconds < -0.05
- and
- deriv(node_timex_offset_seconds[5m]) <= 0
- )
- for: 10m
- labels:
- severity: warning
- - alert: NodeClockNotSynchronising
- annotations:
- message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclocknotsynchronising
- summary: Clock not synchronising.
- expr: |
- min_over_time(node_timex_sync_status[5m]) == 0
- for: 10m
- labels:
- severity: warning
- - name: kubernetes-apps
- rules:
- - alert: KubePodCrashLooping
- annotations:
- message: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
- expr: |
- rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0
- for: 15m
- labels:
- severity: warning
- - alert: KubePodNotReady
- annotations:
- message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
- expr: |
- sum by (namespace, pod) (
- max by(namespace, pod) (
- kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}
- ) * on(namespace, pod) group_left(owner_kind) topk by(namespace, pod) (
- 1, max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!="Job"})
- )
- ) > 0
- for: 15m
- labels:
- severity: warning
- - alert: KubeDeploymentGenerationMismatch
- annotations:
- message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch
- expr: |
- kube_deployment_status_observed_generation{job="kube-state-metrics"}
- !=
- kube_deployment_metadata_generation{job="kube-state-metrics"}
- for: 15m
- labels:
- severity: warning
- - alert: KubeDeploymentReplicasMismatch
- annotations:
- message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch
- expr: |
- (
- kube_deployment_spec_replicas{job="kube-state-metrics"}
- !=
- kube_deployment_status_replicas_available{job="kube-state-metrics"}
- ) and (
- changes(kube_deployment_status_replicas_updated{job="kube-state-metrics"}[5m])
- ==
- 0
- )
- for: 15m
- labels:
- severity: warning
- - alert: KubeStatefulSetReplicasMismatch
- annotations:
- message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch
- expr: |
- (
- kube_statefulset_status_replicas_ready{job="kube-state-metrics"}
- !=
- kube_statefulset_status_replicas{job="kube-state-metrics"}
- ) and (
- changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics"}[5m])
- ==
- 0
- )
- for: 15m
- labels:
- severity: warning
- - alert: KubeStatefulSetGenerationMismatch
- annotations:
- message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch
- expr: |
- kube_statefulset_status_observed_generation{job="kube-state-metrics"}
- !=
- kube_statefulset_metadata_generation{job="kube-state-metrics"}
- for: 15m
- labels:
- severity: warning
- - alert: KubeStatefulSetUpdateNotRolledOut
- annotations:
- message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout
- expr: |
- (
- max without (revision) (
- kube_statefulset_status_current_revision{job="kube-state-metrics"}
- unless
- kube_statefulset_status_update_revision{job="kube-state-metrics"}
- )
- *
- (
- kube_statefulset_replicas{job="kube-state-metrics"}
- !=
- kube_statefulset_status_replicas_updated{job="kube-state-metrics"}
- )
- ) and (
- changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics"}[5m])
- ==
- 0
- )
- for: 15m
- labels:
- severity: warning
- - alert: KubeDaemonSetRolloutStuck
- annotations:
- message: Only {{ $value | humanizePercentage }} of the desired Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are scheduled and ready.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck
- expr: |
- kube_daemonset_status_number_ready{job="kube-state-metrics"}
- /
- kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} < 1.00
- for: 15m
- labels:
- severity: warning
- - alert: KubeContainerWaiting
- annotations:
- message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} has been in waiting state for longer than 1 hour.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
- expr: |
- sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
- for: 1h
- labels:
- severity: warning
- - alert: KubeDaemonSetNotScheduled
- annotations:
- message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled
- expr: |
- kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"}
- -
- kube_daemonset_status_current_number_scheduled{job="kube-state-metrics"} > 0
- for: 10m
- labels:
- severity: warning
- - alert: KubeDaemonSetMisScheduled
- annotations:
- message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled
- expr: |
- kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
- for: 15m
- labels:
- severity: warning
- - alert: KubeCronJobRunning
- annotations:
- message: CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is taking more than 1h to complete.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecronjobrunning
- expr: |
- time() - kube_cronjob_next_schedule_time{job="kube-state-metrics"} > 3600
- for: 1h
- labels:
- severity: warning
- - alert: KubeJobCompletion
- annotations:
- message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than one hour to complete.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
- expr: |
- kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0
- for: 1h
- labels:
- severity: warning
- - alert: KubeJobFailed
- annotations:
- message: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed
- expr: |
- kube_job_failed{job="kube-state-metrics"} > 0
- for: 15m
- labels:
- severity: warning
- - alert: KubeHpaReplicasMismatch
- annotations:
- message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the desired number of replicas for longer than 15 minutes.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpareplicasmismatch
- expr: |
- (kube_hpa_status_desired_replicas{job="kube-state-metrics"}
- !=
- kube_hpa_status_current_replicas{job="kube-state-metrics"})
- and
- changes(kube_hpa_status_current_replicas[15m]) == 0
- for: 15m
- labels:
- severity: warning
- - alert: KubeHpaMaxedOut
- annotations:
- message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at max replicas for longer than 15 minutes.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpamaxedout
- expr: |
- kube_hpa_status_current_replicas{job="kube-state-metrics"}
- ==
- kube_hpa_spec_max_replicas{job="kube-state-metrics"}
- for: 15m
- labels:
- severity: warning
- - name: kubernetes-resources
- rules:
- - alert: KubeCPUOvercommit
- annotations:
- message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
- expr: |
- sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{})
- /
- sum(kube_node_status_allocatable_cpu_cores)
- >
- (count(kube_node_status_allocatable_cpu_cores)-1) / count(kube_node_status_allocatable_cpu_cores)
- for: 5m
- labels:
- severity: warning
- - alert: KubeMemoryOvercommit
- annotations:
- message: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
- expr: |
- sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{})
- /
- sum(kube_node_status_allocatable_memory_bytes)
- >
- (count(kube_node_status_allocatable_memory_bytes)-1)
- /
- count(kube_node_status_allocatable_memory_bytes)
- for: 5m
- labels:
- severity: warning
- - alert: KubeCPUQuotaOvercommit
- annotations:
- message: Cluster has overcommitted CPU resource requests for Namespaces.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuquotaovercommit
- expr: |
- sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"})
- /
- sum(kube_node_status_allocatable_cpu_cores)
- > 1.5
- for: 5m
- labels:
- severity: warning
- - alert: KubeMemoryQuotaOvercommit
- annotations:
- message: Cluster has overcommitted memory resource requests for Namespaces.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryquotaovercommit
- expr: |
- sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"})
- /
- sum(kube_node_status_allocatable_memory_bytes{job="node-exporter"})
- > 1.5
- for: 5m
- labels:
- severity: warning
- - alert: KubeQuotaExceeded
- annotations:
- message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded
- expr: |
- kube_resourcequota{job="kube-state-metrics", type="used"}
- / ignoring(instance, job, type)
- (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
- > 0.90
- for: 15m
- labels:
- severity: warning
- - alert: CPUThrottlingHigh
- annotations:
- message: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
- expr: |
- sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace)
- /
- sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container, pod, namespace)
- > ( 25 / 100 )
- for: 15m
- labels:
- severity: warning
- - name: kubernetes-storage
- rules:
- - alert: KubePersistentVolumeFillingUp
- annotations:
- message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
- expr: |
- kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}
- /
- kubelet_volume_stats_capacity_bytes{job="kubelet", metrics_path="/metrics"}
- < 0.03
- for: 1m
- labels:
- severity: critical
- - alert: KubePersistentVolumeFillingUp
- annotations:
- message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
- expr: |
- (
- kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}
- /
- kubelet_volume_stats_capacity_bytes{job="kubelet", metrics_path="/metrics"}
- ) < 0.15
- and
- predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
- for: 1h
- labels:
- severity: warning
- - alert: KubePersistentVolumeErrors
- annotations:
- message: The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors
- expr: |
- kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
- for: 5m
- labels:
- severity: critical
- - name: kubernetes-system
- rules:
- - alert: KubeVersionMismatch
- annotations:
- message: There are {{ $value }} different semantic versions of Kubernetes components running.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch
- expr: |
- count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1
- for: 15m
- labels:
- severity: warning
- - alert: KubeClientErrors
- annotations:
- message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
- expr: |
- (sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
- /
- sum(rate(rest_client_requests_total[5m])) by (instance, job))
- > 0.01
- for: 15m
- labels:
- severity: warning
- - name: kube-apiserver-slos
- rules:
- - alert: KubeAPIErrorBudgetBurn
- annotations:
- message: The API server is burning too much error budget
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
- expr: |
- sum(apiserver_request:burnrate1h) > (14.40 * 0.01000)
- and
- sum(apiserver_request:burnrate5m) > (14.40 * 0.01000)
- for: 2m
- labels:
- long: 1h
- severity: critical
- short: 5m
- - alert: KubeAPIErrorBudgetBurn
- annotations:
- message: The API server is burning too much error budget
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
- expr: |
- sum(apiserver_request:burnrate6h) > (6.00 * 0.01000)
- and
- sum(apiserver_request:burnrate30m) > (6.00 * 0.01000)
- for: 15m
- labels:
- long: 6h
- severity: critical
- short: 30m
- - alert: KubeAPIErrorBudgetBurn
- annotations:
- message: The API server is burning too much error budget
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
- expr: |
- sum(apiserver_request:burnrate1d) > (3.00 * 0.01000)
- and
- sum(apiserver_request:burnrate2h) > (3.00 * 0.01000)
- for: 1h
- labels:
- long: 1d
- severity: warning
- short: 2h
- - alert: KubeAPIErrorBudgetBurn
- annotations:
- message: The API server is burning too much error budget
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
- expr: |
- sum(apiserver_request:burnrate3d) > (1.00 * 0.01000)
- and
- sum(apiserver_request:burnrate6h) > (1.00 * 0.01000)
- for: 3h
- labels:
- long: 3d
- severity: warning
- short: 6h
- - name: kubernetes-system-apiserver
- rules:
- - alert: KubeAPILatencyHigh
- annotations:
- message: The API server has an abnormal latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
- expr: |
- cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"}
- >
- 1
- and on (verb,resource)
- (
- cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"}
- >
- on (verb) group_left()
- (
- avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
- +
- 2*stddev by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
- )
- ) > on (verb) group_left()
- 1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
- for: 5m
- labels:
- severity: warning
- - alert: KubeAPIErrorsHigh
- annotations:
- message: API server is returning errors for {{ $value | humanizePercentage }} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
- expr: |
- sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m])) by (resource,subresource,verb)
- /
- sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) > 0.05
- for: 10m
- labels:
- severity: warning
- - alert: KubeClientCertificateExpiration
- annotations:
- message: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
- expr: |
- apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
- labels:
- severity: warning
- - alert: KubeClientCertificateExpiration
- annotations:
- message: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
- expr: |
- apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
- labels:
- severity: critical
- - alert: AggregatedAPIErrors
- annotations:
- message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors
- expr: |
- sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2
- labels:
- severity: warning
- - alert: AggregatedAPIDown
- annotations:
- message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} is down. It has not been available at least for the past five minutes.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown
- expr: |
- sum by(name, namespace)(sum_over_time(aggregator_unavailable_apiservice[5m])) > 0
- for: 5m
- labels:
- severity: warning
- - alert: KubeAPIDown
- annotations:
- message: KubeAPI has disappeared from Prometheus target discovery.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown
- expr: |
- absent(up{job="apiserver"} == 1)
- for: 15m
- labels:
- severity: critical
- - name: kubernetes-system-kubelet
- rules:
- - alert: KubeNodeNotReady
- annotations:
- message: '{{ $labels.node }} has been unready for more than 15 minutes.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready
- expr: |
- kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
- for: 15m
- labels:
- severity: warning
- - alert: KubeNodeUnreachable
- annotations:
- message: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable
- expr: |
- (kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key="ToBeDeletedByClusterAutoscaler"}) == 1
- labels:
- severity: warning
- - alert: KubeletTooManyPods
- annotations:
- message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
- expr: |
- max(max(kubelet_running_pod_count{job="kubelet", metrics_path="/metrics"}) by(instance) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) by(node) / max(kube_node_status_capacity_pods{job="kube-state-metrics"} != 1) by(node) > 0.95
- for: 15m
- labels:
- severity: warning
- - alert: KubeNodeReadinessFlapping
- annotations:
- message: The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping
- expr: |
- sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2
- for: 15m
- labels:
- severity: warning
- - alert: KubeletPlegDurationHigh
- annotations:
- message: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh
- expr: |
- node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
- for: 5m
- labels:
- severity: warning
- - alert: KubeletPodStartUpLatencyHigh
- annotations:
- message: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh
- expr: |
- histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
- for: 15m
- labels:
- severity: warning
- - alert: KubeletDown
- annotations:
- message: Kubelet has disappeared from Prometheus target discovery.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown
- expr: |
- absent(up{job="kubelet", metrics_path="/metrics"} == 1)
- for: 15m
- labels:
- severity: critical
- - name: kubernetes-system-scheduler
- rules:
- - alert: KubeSchedulerDown
- annotations:
- message: KubeScheduler has disappeared from Prometheus target discovery.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown
- expr: |
- absent(up{job="kube-scheduler"} == 1)
- for: 15m
- labels:
- severity: critical
- - name: kubernetes-system-controller-manager
- rules:
- - alert: KubeControllerManagerDown
- annotations:
- message: KubeControllerManager has disappeared from Prometheus target discovery.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown
- expr: |
- absent(up{job="kube-controller-manager"} == 1)
- for: 15m
- labels:
- severity: critical
- - name: prometheus
- rules:
- - alert: PrometheusBadConfig
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to reload its configuration.
- summary: Failed Prometheus configuration reload.
- expr: |
- # Without max_over_time, failed scrapes could create false negatives, see
- # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
- max_over_time(prometheus_config_last_reload_successful{job="prometheus-k8s",namespace="monitoring"}[5m]) == 0
- for: 10m
- labels:
- severity: critical
- - alert: PrometheusNotificationQueueRunningFull
- annotations:
- description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} is running full.
- summary: Prometheus alert notification queue predicted to run full in less than 30m.
- expr: |
- # Without min_over_time, failed scrapes could create false negatives, see
- # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
- (
- predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s",namespace="monitoring"}[5m], 60 * 30)
- >
- min_over_time(prometheus_notifications_queue_capacity{job="prometheus-k8s",namespace="monitoring"}[5m])
- )
- for: 15m
- labels:
- severity: warning
- - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
- annotations:
- description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.'
- summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
- expr: |
- (
- rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
- /
- rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
- )
- * 100
- > 1
- for: 15m
- labels:
- severity: warning
- - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
- annotations:
- description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
- summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
- expr: |
- min without(alertmanager) (
- rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
- /
- rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
- )
- * 100
- > 3
- for: 15m
- labels:
- severity: critical
- - alert: PrometheusNotConnectedToAlertmanagers
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers.
- summary: Prometheus is not connected to any Alertmanagers.
- expr: |
- # Without max_over_time, failed scrapes could create false negatives, see
- # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
- max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s",namespace="monitoring"}[5m]) < 1
- for: 10m
- labels:
- severity: warning
- - alert: PrometheusTSDBReloadsFailing
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} reload failures over the last 3h.
- summary: Prometheus has issues reloading blocks from disk.
- expr: |
- increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
- for: 4h
- labels:
- severity: warning
- - alert: PrometheusTSDBCompactionsFailing
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} compaction failures over the last 3h.
- summary: Prometheus has issues compacting blocks.
- expr: |
- increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
- for: 4h
- labels:
- severity: warning
- - alert: PrometheusNotIngestingSamples
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples.
- summary: Prometheus is not ingesting samples.
- expr: |
- rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
- for: 10m
- labels:
- severity: warning
- - alert: PrometheusDuplicateTimestamps
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with different values but duplicated timestamp.
- summary: Prometheus is dropping samples with duplicate timestamps.
- expr: |
- rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
- for: 10m
- labels:
- severity: warning
- - alert: PrometheusOutOfOrderTimestamps
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
- summary: Prometheus drops samples with out-of-order timestamps.
- expr: |
- rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
- for: 10m
- labels:
- severity: warning
- - alert: PrometheusRemoteStorageFailures
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
- summary: Prometheus fails to send samples to remote storage.
- expr: |
- (
- rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m])
- /
- (
- rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m])
- +
- rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m])
- )
- )
- * 100
- > 1
- for: 15m
- labels:
- severity: critical
- - alert: PrometheusRemoteWriteBehind
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
- summary: Prometheus remote write is behind.
- expr: |
- # Without max_over_time, failed scrapes could create false negatives, see
- # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
- (
- max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
- - on(job, instance) group_right
- max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
- )
- > 120
- for: 15m
- labels:
- severity: critical
- - alert: PrometheusRemoteWriteDesiredShards
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}` $labels.instance | query | first | value }}.
- summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
- expr: |
- # Without max_over_time, failed scrapes could create false negatives, see
- # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
- (
- max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-k8s",namespace="monitoring"}[5m])
- >
- max_over_time(prometheus_remote_storage_shards_max{job="prometheus-k8s",namespace="monitoring"}[5m])
- )
- for: 15m
- labels:
- severity: warning
- - alert: PrometheusRuleFailures
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m.
- summary: Prometheus is failing rule evaluations.
- expr: |
- increase(prometheus_rule_evaluation_failures_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
- for: 15m
- labels:
- severity: critical
- - alert: PrometheusMissingRuleEvaluations
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m.
- summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
- expr: |
- increase(prometheus_rule_group_iterations_missed_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
- for: 15m
- labels:
- severity: warning
- - name: alertmanager.rules
- rules:
- - alert: AlertmanagerConfigInconsistent
- annotations:
- message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.
- expr: |
- count_values("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="prometheus-operator",namespace="monitoring",controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1
- for: 5m
- labels:
- severity: critical
- - alert: AlertmanagerFailedReload
- annotations:
- message: Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}.
- expr: |
- alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"} == 0
- for: 10m
- labels:
- severity: warning
- - alert: AlertmanagerMembersInconsistent
- annotations:
- message: Alertmanager has not found all other members of the cluster.
- expr: |
- alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}
- != on (service) GROUP_LEFT()
- count by (service) (alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"})
- for: 5m
- labels:
- severity: critical
- - name: general.rules
- rules:
- - alert: TargetDown
- annotations:
- message: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down.'
- expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10
- for: 10m
- labels:
- severity: warning
- - alert: Watchdog
- annotations:
- message: |
- This is an alert meant to ensure that the entire alerting pipeline is functional.
- This alert is always firing, therefore it should always be firing in Alertmanager
- and always fire against a receiver. There are integrations with various notification
- mechanisms that send a notification when this alert is not firing. For example the
- "DeadMansSnitch" integration in PagerDuty.
- expr: vector(1)
- labels:
- severity: none
- - name: node-network
- rules:
- - alert: NodeNetworkInterfaceFlapping
- annotations:
- message: Network interface "{{ $labels.device }}" changing it's up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"
- expr: |
- changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
- for: 2m
- labels:
- severity: warning
- - name: prometheus-operator
- rules:
- - alert: PrometheusOperatorReconcileErrors
- annotations:
- message: Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace }} Namespace.
- expr: |
- rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
- for: 10m
- labels:
- severity: warning
- - alert: PrometheusOperatorNodeLookupErrors
- annotations:
- message: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.
- expr: |
- rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
- for: 10m
- labels:
- severity: warning
diff --git a/manifests/prometheus-service.yaml b/manifests/prometheus-service.yaml
index 4f61e88..23ea918 100644
--- a/manifests/prometheus-service.yaml
+++ b/manifests/prometheus-service.yaml
@@ -2,7 +2,11 @@ apiVersion: v1
kind: Service
metadata:
labels:
- prometheus: k8s
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
namespace: monitoring
spec:
@@ -10,7 +14,12 @@ spec:
- name: web
port: 9090
targetPort: web
+ - name: reloader-web
+ port: 8080
+ targetPort: reloader-web
selector:
- app: prometheus
- prometheus: k8s
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
sessionAffinity: ClientIP
diff --git a/manifests/prometheus-serviceAccount.yaml b/manifests/prometheus-serviceAccount.yaml
index 3e55fad..2a4ada1 100644
--- a/manifests/prometheus-serviceAccount.yaml
+++ b/manifests/prometheus-serviceAccount.yaml
@@ -1,5 +1,12 @@
apiVersion: v1
+automountServiceAccountToken: true
kind: ServiceAccount
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
namespace: monitoring
diff --git a/manifests/prometheus-serviceMonitor.yaml b/manifests/prometheus-serviceMonitor.yaml
index b7605db..936b449 100644
--- a/manifests/prometheus-serviceMonitor.yaml
+++ b/manifests/prometheus-serviceMonitor.yaml
@@ -2,13 +2,22 @@ apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
- k8s-app: prometheus
- name: prometheus
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
+ name: prometheus-k8s
namespace: monitoring
spec:
endpoints:
- interval: 30s
port: web
+ - interval: 30s
+ port: reloader-web
selector:
matchLabels:
- prometheus: k8s
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/prometheus-serviceMonitorApiserver.yaml b/manifests/prometheus-serviceMonitorApiserver.yaml
deleted file mode 100644
index 500c0d3..0000000
--- a/manifests/prometheus-serviceMonitorApiserver.yaml
+++ /dev/null
@@ -1,74 +0,0 @@
-apiVersion: monitoring.coreos.com/v1
-kind: ServiceMonitor
-metadata:
- labels:
- k8s-app: apiserver
- name: kube-apiserver
- namespace: monitoring
-spec:
- endpoints:
- - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
- interval: 30s
- metricRelabelings:
- - action: drop
- regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
- sourceLabels:
- - __name__
- - action: drop
- regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
- sourceLabels:
- - __name__
- - action: drop
- regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
- sourceLabels:
- - __name__
- - action: drop
- regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
- sourceLabels:
- - __name__
- - action: drop
- regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
- sourceLabels:
- - __name__
- - action: drop
- regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
- sourceLabels:
- - __name__
- - action: drop
- regex: transformation_(transformation_latencies_microseconds|failures_total)
- sourceLabels:
- - __name__
- - action: drop
- regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
- sourceLabels:
- - __name__
- - action: drop
- regex: etcd_(debugging|disk|request|server).*
- sourceLabels:
- - __name__
- - action: drop
- regex: apiserver_admission_controller_admission_latencies_seconds_.*
- sourceLabels:
- - __name__
- - action: drop
- regex: apiserver_admission_step_admission_latencies_seconds_.*
- sourceLabels:
- - __name__
- - action: drop
- regex: apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)
- sourceLabels:
- - __name__
- - le
- port: https
- scheme: https
- tlsConfig:
- caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
- serverName: kubernetes
- jobLabel: component
- namespaceSelector:
- matchNames:
- - default
- selector:
- matchLabels:
- component: apiserver
- provider: kubernetes
diff --git a/manifests/prometheus-serviceMonitorCoreDNS.yaml b/manifests/prometheus-serviceMonitorCoreDNS.yaml
deleted file mode 100644
index 633aa18..0000000
--- a/manifests/prometheus-serviceMonitorCoreDNS.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-apiVersion: monitoring.coreos.com/v1
-kind: ServiceMonitor
-metadata:
- labels:
- k8s-app: coredns
- name: coredns
- namespace: monitoring
-spec:
- endpoints:
- - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
- interval: 15s
- port: metrics
- jobLabel: k8s-app
- namespaceSelector:
- matchNames:
- - kube-system
- selector:
- matchLabels:
- k8s-app: kube-dns
diff --git a/manifests/prometheus-serviceMonitorKubeControllerManager.yaml b/manifests/prometheus-serviceMonitorKubeControllerManager.yaml
deleted file mode 100644
index 7f20fce..0000000
--- a/manifests/prometheus-serviceMonitorKubeControllerManager.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-apiVersion: monitoring.coreos.com/v1
-kind: ServiceMonitor
-metadata:
- labels:
- k8s-app: kube-controller-manager
- name: kube-controller-manager
- namespace: monitoring
-spec:
- endpoints:
- - interval: 30s
- metricRelabelings:
- - action: drop
- regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
- sourceLabels:
- - __name__
- - action: drop
- regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
- sourceLabels:
- - __name__
- - action: drop
- regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
- sourceLabels:
- - __name__
- - action: drop
- regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
- sourceLabels:
- - __name__
- - action: drop
- regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
- sourceLabels:
- - __name__
- - action: drop
- regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
- sourceLabels:
- - __name__
- - action: drop
- regex: transformation_(transformation_latencies_microseconds|failures_total)
- sourceLabels:
- - __name__
- - action: drop
- regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
- sourceLabels:
- - __name__
- - action: drop
- regex: etcd_(debugging|disk|request|server).*
- sourceLabels:
- - __name__
- port: http-metrics
- jobLabel: k8s-app
- namespaceSelector:
- matchNames:
- - kube-system
- selector:
- matchLabels:
- k8s-app: kube-controller-manager
diff --git a/manifests/prometheus-serviceMonitorKubeScheduler.yaml b/manifests/prometheus-serviceMonitorKubeScheduler.yaml
deleted file mode 100644
index f00db0e..0000000
--- a/manifests/prometheus-serviceMonitorKubeScheduler.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-apiVersion: monitoring.coreos.com/v1
-kind: ServiceMonitor
-metadata:
- labels:
- k8s-app: kube-scheduler
- name: kube-scheduler
- namespace: monitoring
-spec:
- endpoints:
- - interval: 30s
- port: http-metrics
- jobLabel: k8s-app
- namespaceSelector:
- matchNames:
- - kube-system
- selector:
- matchLabels:
- k8s-app: kube-scheduler
diff --git a/manifests/prometheus-serviceMonitorKubelet.yaml b/manifests/prometheus-serviceMonitorKubelet.yaml
deleted file mode 100644
index 6ee73fd..0000000
--- a/manifests/prometheus-serviceMonitorKubelet.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-apiVersion: monitoring.coreos.com/v1
-kind: ServiceMonitor
-metadata:
- labels:
- k8s-app: kubelet
- name: kubelet
- namespace: monitoring
-spec:
- endpoints:
- - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
- honorLabels: true
- interval: 30s
- metricRelabelings:
- - action: drop
- regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
- sourceLabels:
- - __name__
- - action: drop
- regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
- sourceLabels:
- - __name__
- - action: drop
- regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
- sourceLabels:
- - __name__
- - action: drop
- regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
- sourceLabels:
- - __name__
- - action: drop
- regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
- sourceLabels:
- - __name__
- - action: drop
- regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
- sourceLabels:
- - __name__
- - action: drop
- regex: transformation_(transformation_latencies_microseconds|failures_total)
- sourceLabels:
- - __name__
- - action: drop
- regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
- sourceLabels:
- - __name__
- port: https-metrics
- relabelings:
- - sourceLabels:
- - __metrics_path__
- targetLabel: metrics_path
- scheme: https
- tlsConfig:
- insecureSkipVerify: true
- - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
- honorLabels: true
- interval: 30s
- metricRelabelings:
- - action: drop
- regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)
- sourceLabels:
- - __name__
- path: /metrics/cadvisor
- port: https-metrics
- relabelings:
- - sourceLabels:
- - __metrics_path__
- targetLabel: metrics_path
- scheme: https
- tlsConfig:
- insecureSkipVerify: true
- jobLabel: k8s-app
- namespaceSelector:
- matchNames:
- - kube-system
- selector:
- matchLabels:
- k8s-app: kubelet
diff --git a/manifests/setup/0namespace-prometheusRule.yaml b/manifests/setup/0namespace-prometheusRule.yaml
new file mode 100644
index 0000000..0facf21
--- /dev/null
+++ b/manifests/setup/0namespace-prometheusRule.yaml
@@ -0,0 +1,83 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ labels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: kube-prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ prometheus: k8s
+ role: alert-rules
+ name: kube-prometheus-rules
+ namespace: monitoring
+spec:
+ groups:
+ - name: general.rules
+ rules:
+ - alert: TargetDown
+ annotations:
+ description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down.'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown
+ summary: One or more targets are unreachable.
+ expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10
+ for: 10m
+ labels:
+ severity: warning
+ - alert: Watchdog
+ annotations:
+ description: |
+ This is an alert meant to ensure that the entire alerting pipeline is functional.
+ This alert is always firing, therefore it should always be firing in Alertmanager
+ and always fire against a receiver. There are integrations with various notification
+ mechanisms that send a notification when this alert is not firing. For example the
+ "DeadMansSnitch" integration in PagerDuty.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
+ summary: An alert that should always be firing to certify that Alertmanager is working properly.
+ expr: vector(1)
+ labels:
+ severity: none
+ - alert: InfoInhibitor
+ annotations:
+ description: |
+ This is an alert that is used to inhibit info alerts.
+ By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with
+ other alerts.
+ This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a
+ severity of 'warning' or 'critical' starts firing on the same namespace.
+ This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
+ summary: Info-level alert inhibition.
+ expr: ALERTS{severity = "info"} == 1 unless on(namespace) ALERTS{alertname != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
+ labels:
+ severity: none
+ - name: node-network
+ rules:
+ - alert: NodeNetworkInterfaceFlapping
+ annotations:
+ description: Network interface "{{ $labels.device }}" changing its up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/nodenetworkinterfaceflapping
+ summary: Network interface is often changing its status
+ expr: |
+ changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
+ for: 2m
+ labels:
+ severity: warning
+ - name: kube-prometheus-node-recording.rules
+ rules:
+ - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY (instance)
+ record: instance:node_cpu:rate:sum
+ - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
+ record: instance:node_network_receive_bytes:rate:sum
+ - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
+ record: instance:node_network_transmit_bytes:rate:sum
+ - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)
+ record: instance:node_cpu:ratio
+ - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
+ record: cluster:node_cpu:sum_rate5m
+ - expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
+ record: cluster:node_cpu:ratio
+ - name: kube-prometheus-general.rules
+ rules:
+ - expr: count without(instance, pod, node) (up == 1)
+ record: count:up1
+ - expr: count without(instance, pod, node) (up == 0)
+ record: count:up0
diff --git a/manifests/setup/prometheus-operator-0alertmanagerConfigCustomResourceDefinition.yaml b/manifests/setup/prometheus-operator-0alertmanagerConfigCustomResourceDefinition.yaml
new file mode 100644
index 0000000..ad8336d
--- /dev/null
+++ b/manifests/setup/prometheus-operator-0alertmanagerConfigCustomResourceDefinition.yaml
@@ -0,0 +1,3401 @@
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.9.2
+ creationTimestamp: null
+ name: alertmanagerconfigs.monitoring.coreos.com
+spec:
+ group: monitoring.coreos.com
+ names:
+ categories:
+ - prometheus-operator
+ kind: AlertmanagerConfig
+ listKind: AlertmanagerConfigList
+ plural: alertmanagerconfigs
+ shortNames:
+ - amcfg
+ singular: alertmanagerconfig
+ scope: Namespaced
+ versions:
+ - name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: AlertmanagerConfig defines a namespaced AlertmanagerConfig to be aggregated across multiple namespaces configuring one Alertmanager cluster.
+ properties:
+ apiVersion:
+ description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
+ type: string
+ kind:
+ description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: AlertmanagerConfigSpec is a specification of the desired behavior of the Alertmanager configuration. By definition, the Alertmanager configuration only applies to alerts for which the `namespace` label is equal to the namespace of the AlertmanagerConfig resource.
+ properties:
+ inhibitRules:
+ description: List of inhibition rules. The rules will only apply to alerts matching the resource's namespace.
+ items:
+ description: InhibitRule defines an inhibition rule that allows to mute alerts when other alerts are already firing. See https://prometheus.io/docs/alerting/latest/configuration/#inhibit_rule
+ properties:
+ equal:
+ description: Labels that must have an equal value in the source and target alert for the inhibition to take effect.
+ items:
+ type: string
+ type: array
+ sourceMatch:
+ description: Matchers for which one or more alerts have to exist for the inhibition to take effect. The operator enforces that the alert matches the resource's namespace.
+ items:
+ description: Matcher defines how to match on alert's labels.
+ properties:
+ matchType:
+ description: Match operation available with AlertManager >= v0.22.0 and takes precedence over Regex (deprecated) if non-empty.
+ enum:
+ - '!='
+ - =
+ - =~
+ - '!~'
+ type: string
+ name:
+ description: Label to match.
+ minLength: 1
+ type: string
+ regex:
+ description: Whether to match on equality (false) or regular-expression (true). Deprecated as of AlertManager >= v0.22.0 where a user should use MatchType instead.
+ type: boolean
+ value:
+ description: Label value to match.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ targetMatch:
+ description: Matchers that have to be fulfilled in the alerts to be muted. The operator enforces that the alert matches the resource's namespace.
+ items:
+ description: Matcher defines how to match on alert's labels.
+ properties:
+ matchType:
+ description: Match operation available with AlertManager >= v0.22.0 and takes precedence over Regex (deprecated) if non-empty.
+ enum:
+ - '!='
+ - =
+ - =~
+ - '!~'
+ type: string
+ name:
+ description: Label to match.
+ minLength: 1
+ type: string
+ regex:
+ description: Whether to match on equality (false) or regular-expression (true). Deprecated as of AlertManager >= v0.22.0 where a user should use MatchType instead.
+ type: boolean
+ value:
+ description: Label value to match.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ type: array
+ muteTimeIntervals:
+ description: List of MuteTimeInterval specifying when the routes should be muted.
+ items:
+ description: MuteTimeInterval specifies the periods in time when notifications will be muted
+ properties:
+ name:
+ description: Name of the time interval
+ type: string
+ timeIntervals:
+ description: TimeIntervals is a list of TimeInterval
+ items:
+ description: TimeInterval describes intervals of time
+ properties:
+ daysOfMonth:
+ description: DaysOfMonth is a list of DayOfMonthRange
+ items:
+ description: DayOfMonthRange is an inclusive range of days of the month beginning at 1
+ properties:
+ end:
+ description: End of the inclusive range
+ maximum: 31
+ minimum: -31
+ type: integer
+ start:
+ description: Start of the inclusive range
+ maximum: 31
+ minimum: -31
+ type: integer
+ type: object
+ type: array
+ months:
+ description: Months is a list of MonthRange
+ items:
+ description: MonthRange is an inclusive range of months of the year beginning in January Months can be specified by name (e.g 'January') by numerical month (e.g '1') or as an inclusive range (e.g 'January:March', '1:3', '1:March')
+ pattern: ^((?i)january|february|march|april|may|june|july|august|september|october|november|december|[1-12])(?:((:((?i)january|february|march|april|may|june|july|august|september|october|november|december|[1-12]))$)|$)
+ type: string
+ type: array
+ times:
+ description: Times is a list of TimeRange
+ items:
+ description: TimeRange defines a start and end time in 24hr format
+ properties:
+ endTime:
+ description: EndTime is the end time in 24hr format.
+ pattern: ^((([01][0-9])|(2[0-3])):[0-5][0-9])$|(^24:00$)
+ type: string
+ startTime:
+ description: StartTime is the start time in 24hr format.
+ pattern: ^((([01][0-9])|(2[0-3])):[0-5][0-9])$|(^24:00$)
+ type: string
+ type: object
+ type: array
+ weekdays:
+ description: Weekdays is a list of WeekdayRange
+ items:
+ description: WeekdayRange is an inclusive range of days of the week beginning on Sunday Days can be specified by name (e.g 'Sunday') or as an inclusive range (e.g 'Monday:Friday')
+ pattern: ^((?i)sun|mon|tues|wednes|thurs|fri|satur)day(?:((:(sun|mon|tues|wednes|thurs|fri|satur)day)$)|$)
+ type: string
+ type: array
+ years:
+ description: Years is a list of YearRange
+ items:
+ description: YearRange is an inclusive range of years
+ pattern: ^2\d{3}(?::2\d{3}|$)
+ type: string
+ type: array
+ type: object
+ type: array
+ type: object
+ type: array
+ receivers:
+ description: List of receivers.
+ items:
+ description: Receiver defines one or more notification integrations.
+ properties:
+ emailConfigs:
+ description: List of Email configurations.
+ items:
+ description: EmailConfig configures notifications via Email.
+ properties:
+ authIdentity:
+ description: The identity to use for authentication.
+ type: string
+ authPassword:
+ description: The secret's key that contains the password to use for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ authSecret:
+ description: The secret's key that contains the CRAM-MD5 secret. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ authUsername:
+ description: The username to use for authentication.
+ type: string
+ from:
+ description: The sender address.
+ type: string
+ headers:
+ description: Further headers email header key/value pairs. Overrides any headers previously set by the notification implementation.
+ items:
+ description: KeyValue defines a (key, value) tuple.
+ properties:
+ key:
+ description: Key of the tuple.
+ minLength: 1
+ type: string
+ value:
+ description: Value of the tuple.
+ type: string
+ required:
+ - key
+ - value
+ type: object
+ type: array
+ hello:
+ description: The hostname to identify to the SMTP server.
+ type: string
+ html:
+ description: The HTML body of the email notification.
+ type: string
+ requireTLS:
+ description: The SMTP TLS requirement. Note that Go does not support unencrypted connections to remote SMTP endpoints.
+ type: boolean
+ sendResolved:
+ description: Whether or not to notify about resolved alerts.
+ type: boolean
+ smarthost:
+ description: The SMTP host and port through which emails are sent. E.g. example.com:25
+ type: string
+ text:
+ description: The text body of the email notification.
+ type: string
+ tlsConfig:
+ description: TLS configuration
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ to:
+ description: The email address to send notifications to.
+ type: string
+ type: object
+ type: array
+ name:
+ description: Name of the receiver. Must be unique across all items from the list.
+ minLength: 1
+ type: string
+ opsgenieConfigs:
+ description: List of OpsGenie configurations.
+ items:
+ description: OpsGenieConfig configures notifications via OpsGenie. See https://prometheus.io/docs/alerting/latest/configuration/#opsgenie_config
+ properties:
+ actions:
+ description: Comma separated list of actions that will be available for the alert.
+ type: string
+ apiKey:
+ description: The secret's key that contains the OpsGenie API key. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ apiURL:
+ description: The URL to send OpsGenie API requests to.
+ type: string
+ description:
+ description: Description of the incident.
+ type: string
+ details:
+ description: A set of arbitrary key/value pairs that provide further detail about the incident.
+ items:
+ description: KeyValue defines a (key, value) tuple.
+ properties:
+ key:
+ description: Key of the tuple.
+ minLength: 1
+ type: string
+ value:
+ description: Value of the tuple.
+ type: string
+ required:
+ - key
+ - value
+ type: object
+ type: array
+ entity:
+ description: Optional field that can be used to specify which domain alert is related to.
+ type: string
+ httpConfig:
+ description: HTTP client configuration.
+ properties:
+ authorization:
+ description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+.
+ properties:
+ credentials:
+ description: The secret's key that contains the credentials of the request
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type:
+ description: Set the authentication type. Defaults to Bearer, Basic will cause an error
+ type: string
+ type: object
+ basicAuth:
+ description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence.
+ properties:
+ password:
+ description: The secret in the service monitor namespace that contains the password for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ username:
+ description: The secret in the service monitor namespace that contains the username for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ bearerTokenSecret:
+ description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ followRedirects:
+ description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects.
+ type: boolean
+ oauth2:
+ description: OAuth2 client credentials used to fetch a token for the targets.
+ properties:
+ clientId:
+ description: The secret or configmap containing the OAuth2 client id
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ clientSecret:
+ description: The secret containing the OAuth2 client secret
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ endpointParams:
+ additionalProperties:
+ type: string
+ description: Parameters to append to the token URL
+ type: object
+ scopes:
+ description: OAuth2 scopes used for the token request
+ items:
+ type: string
+ type: array
+ tokenUrl:
+ description: The URL to fetch the token from
+ minLength: 1
+ type: string
+ required:
+ - clientId
+ - clientSecret
+ - tokenUrl
+ type: object
+ proxyURL:
+ description: Optional proxy URL.
+ type: string
+ tlsConfig:
+ description: TLS configuration for the client.
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ type: object
+ message:
+ description: Alert text limited to 130 characters.
+ type: string
+ note:
+ description: Additional alert note.
+ type: string
+ priority:
+ description: Priority level of alert. Possible values are P1, P2, P3, P4, and P5.
+ type: string
+ responders:
+ description: List of responders responsible for notifications.
+ items:
+ description: OpsGenieConfigResponder defines a responder to an incident. One of `id`, `name` or `username` has to be defined.
+ properties:
+ id:
+ description: ID of the responder.
+ type: string
+ name:
+ description: Name of the responder.
+ type: string
+ type:
+ description: Type of responder.
+ enum:
+ - team
+ - teams
+ - user
+ - escalation
+ - schedule
+ minLength: 1
+ type: string
+ username:
+ description: Username of the responder.
+ type: string
+ required:
+ - type
+ type: object
+ type: array
+ sendResolved:
+ description: Whether or not to notify about resolved alerts.
+ type: boolean
+ source:
+ description: Backlink to the sender of the notification.
+ type: string
+ tags:
+ description: Comma separated list of tags attached to the notifications.
+ type: string
+ updateAlerts:
+ description: Whether to update message and description of the alert in OpsGenie if it already exists By default, the alert is never updated in OpsGenie, the new message only appears in activity log.
+ type: boolean
+ type: object
+ type: array
+ pagerdutyConfigs:
+ description: List of PagerDuty configurations.
+ items:
+ description: PagerDutyConfig configures notifications via PagerDuty. See https://prometheus.io/docs/alerting/latest/configuration/#pagerduty_config
+ properties:
+ class:
+ description: The class/type of the event.
+ type: string
+ client:
+ description: Client identification.
+ type: string
+ clientURL:
+ description: Backlink to the sender of notification.
+ type: string
+ component:
+ description: The part or component of the affected system that is broken.
+ type: string
+ description:
+ description: Description of the incident.
+ type: string
+ details:
+ description: Arbitrary key/value pairs that provide further detail about the incident.
+ items:
+ description: KeyValue defines a (key, value) tuple.
+ properties:
+ key:
+ description: Key of the tuple.
+ minLength: 1
+ type: string
+ value:
+ description: Value of the tuple.
+ type: string
+ required:
+ - key
+ - value
+ type: object
+ type: array
+ group:
+ description: A cluster or grouping of sources.
+ type: string
+ httpConfig:
+ description: HTTP client configuration.
+ properties:
+ authorization:
+ description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+.
+ properties:
+ credentials:
+ description: The secret's key that contains the credentials of the request
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type:
+ description: Set the authentication type. Defaults to Bearer, Basic will cause an error
+ type: string
+ type: object
+ basicAuth:
+ description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence.
+ properties:
+ password:
+ description: The secret in the service monitor namespace that contains the password for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ username:
+ description: The secret in the service monitor namespace that contains the username for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ bearerTokenSecret:
+ description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ followRedirects:
+ description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects.
+ type: boolean
+ oauth2:
+ description: OAuth2 client credentials used to fetch a token for the targets.
+ properties:
+ clientId:
+ description: The secret or configmap containing the OAuth2 client id
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ clientSecret:
+ description: The secret containing the OAuth2 client secret
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ endpointParams:
+ additionalProperties:
+ type: string
+ description: Parameters to append to the token URL
+ type: object
+ scopes:
+ description: OAuth2 scopes used for the token request
+ items:
+ type: string
+ type: array
+ tokenUrl:
+ description: The URL to fetch the token from
+ minLength: 1
+ type: string
+ required:
+ - clientId
+ - clientSecret
+ - tokenUrl
+ type: object
+ proxyURL:
+ description: Optional proxy URL.
+ type: string
+ tlsConfig:
+ description: TLS configuration for the client.
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ type: object
+ pagerDutyImageConfigs:
+ description: A list of image details to attach that provide further detail about an incident.
+ items:
+ description: PagerDutyImageConfig attaches images to an incident
+ properties:
+ alt:
+ description: Alt is the optional alternative text for the image.
+ type: string
+ href:
+ description: Optional URL; makes the image a clickable link.
+ type: string
+ src:
+ description: Src of the image being attached to the incident
+ type: string
+ type: object
+ type: array
+ pagerDutyLinkConfigs:
+ description: A list of link details to attach that provide further detail about an incident.
+ items:
+ description: PagerDutyLinkConfig attaches text links to an incident
+ properties:
+ alt:
+ description: Text that describes the purpose of the link, and can be used as the link's text.
+ type: string
+ href:
+ description: Href is the URL of the link to be attached
+ type: string
+ type: object
+ type: array
+ routingKey:
+ description: The secret's key that contains the PagerDuty integration key (when using Events API v2). Either this field or `serviceKey` needs to be defined. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ sendResolved:
+ description: Whether or not to notify about resolved alerts.
+ type: boolean
+ serviceKey:
+ description: The secret's key that contains the PagerDuty service key (when using integration type "Prometheus"). Either this field or `routingKey` needs to be defined. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ severity:
+ description: Severity of the incident.
+ type: string
+ url:
+ description: The URL to send requests to.
+ type: string
+ type: object
+ type: array
+ pushoverConfigs:
+ description: List of Pushover configurations.
+ items:
+ description: PushoverConfig configures notifications via Pushover. See https://prometheus.io/docs/alerting/latest/configuration/#pushover_config
+ properties:
+ expire:
+ description: How long your notification will continue to be retried for, unless the user acknowledges the notification.
+ pattern: ^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$
+ type: string
+ html:
+ description: Whether notification message is HTML or plain text.
+ type: boolean
+ httpConfig:
+ description: HTTP client configuration.
+ properties:
+ authorization:
+ description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+.
+ properties:
+ credentials:
+ description: The secret's key that contains the credentials of the request
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type:
+ description: Set the authentication type. Defaults to Bearer, Basic will cause an error
+ type: string
+ type: object
+ basicAuth:
+ description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence.
+ properties:
+ password:
+ description: The secret in the service monitor namespace that contains the password for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ username:
+ description: The secret in the service monitor namespace that contains the username for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ bearerTokenSecret:
+ description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ followRedirects:
+ description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects.
+ type: boolean
+ oauth2:
+ description: OAuth2 client credentials used to fetch a token for the targets.
+ properties:
+ clientId:
+ description: The secret or configmap containing the OAuth2 client id
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ clientSecret:
+ description: The secret containing the OAuth2 client secret
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ endpointParams:
+ additionalProperties:
+ type: string
+ description: Parameters to append to the token URL
+ type: object
+ scopes:
+ description: OAuth2 scopes used for the token request
+ items:
+ type: string
+ type: array
+ tokenUrl:
+ description: The URL to fetch the token from
+ minLength: 1
+ type: string
+ required:
+ - clientId
+ - clientSecret
+ - tokenUrl
+ type: object
+ proxyURL:
+ description: Optional proxy URL.
+ type: string
+ tlsConfig:
+ description: TLS configuration for the client.
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ type: object
+ message:
+ description: Notification message.
+ type: string
+ priority:
+ description: Priority, see https://pushover.net/api#priority
+ type: string
+ retry:
+ description: How often the Pushover servers will send the same notification to the user. Must be at least 30 seconds.
+ pattern: ^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$
+ type: string
+ sendResolved:
+ description: Whether or not to notify about resolved alerts.
+ type: boolean
+ sound:
+ description: The name of one of the sounds supported by device clients to override the user's default sound choice
+ type: string
+ title:
+ description: Notification title.
+ type: string
+ token:
+ description: The secret's key that contains the registered application's API token, see https://pushover.net/apps. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ url:
+ description: A supplementary URL shown alongside the message.
+ type: string
+ urlTitle:
+ description: A title for supplementary URL, otherwise just the URL is shown
+ type: string
+ userKey:
+ description: The secret's key that contains the recipient user's user key. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ type: object
+ type: array
+ slackConfigs:
+ description: List of Slack configurations.
+ items:
+ description: SlackConfig configures notifications via Slack. See https://prometheus.io/docs/alerting/latest/configuration/#slack_config
+ properties:
+ actions:
+ description: A list of Slack actions that are sent with each notification.
+ items:
+ description: SlackAction configures a single Slack action that is sent with each notification. See https://api.slack.com/docs/message-attachments#action_fields and https://api.slack.com/docs/message-buttons for more information.
+ properties:
+ confirm:
+ description: SlackConfirmationField protect users from destructive actions or particularly distinguished decisions by asking them to confirm their button click one more time. See https://api.slack.com/docs/interactive-message-field-guide#confirmation_fields for more information.
+ properties:
+ dismissText:
+ type: string
+ okText:
+ type: string
+ text:
+ minLength: 1
+ type: string
+ title:
+ type: string
+ required:
+ - text
+ type: object
+ name:
+ type: string
+ style:
+ type: string
+ text:
+ minLength: 1
+ type: string
+ type:
+ minLength: 1
+ type: string
+ url:
+ type: string
+ value:
+ type: string
+ required:
+ - text
+ - type
+ type: object
+ type: array
+ apiURL:
+ description: The secret's key that contains the Slack webhook URL. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ callbackId:
+ type: string
+ channel:
+ description: The channel or user to send notifications to.
+ type: string
+ color:
+ type: string
+ fallback:
+ type: string
+ fields:
+ description: A list of Slack fields that are sent with each notification.
+ items:
+ description: SlackField configures a single Slack field that is sent with each notification. Each field must contain a title, value, and optionally, a boolean value to indicate if the field is short enough to be displayed next to other fields designated as short. See https://api.slack.com/docs/message-attachments#fields for more information.
+ properties:
+ short:
+ type: boolean
+ title:
+ minLength: 1
+ type: string
+ value:
+ minLength: 1
+ type: string
+ required:
+ - title
+ - value
+ type: object
+ type: array
+ footer:
+ type: string
+ httpConfig:
+ description: HTTP client configuration.
+ properties:
+ authorization:
+ description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+.
+ properties:
+ credentials:
+ description: The secret's key that contains the credentials of the request
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type:
+ description: Set the authentication type. Defaults to Bearer, Basic will cause an error
+ type: string
+ type: object
+ basicAuth:
+ description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence.
+ properties:
+ password:
+ description: The secret in the service monitor namespace that contains the password for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ username:
+ description: The secret in the service monitor namespace that contains the username for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ bearerTokenSecret:
+ description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ followRedirects:
+ description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects.
+ type: boolean
+ oauth2:
+ description: OAuth2 client credentials used to fetch a token for the targets.
+ properties:
+ clientId:
+ description: The secret or configmap containing the OAuth2 client id
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ clientSecret:
+ description: The secret containing the OAuth2 client secret
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ endpointParams:
+ additionalProperties:
+ type: string
+ description: Parameters to append to the token URL
+ type: object
+ scopes:
+ description: OAuth2 scopes used for the token request
+ items:
+ type: string
+ type: array
+ tokenUrl:
+ description: The URL to fetch the token from
+ minLength: 1
+ type: string
+ required:
+ - clientId
+ - clientSecret
+ - tokenUrl
+ type: object
+ proxyURL:
+ description: Optional proxy URL.
+ type: string
+ tlsConfig:
+ description: TLS configuration for the client.
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ type: object
+ iconEmoji:
+ type: string
+ iconURL:
+ type: string
+ imageURL:
+ type: string
+ linkNames:
+ type: boolean
+ mrkdwnIn:
+ items:
+ type: string
+ type: array
+ pretext:
+ type: string
+ sendResolved:
+ description: Whether or not to notify about resolved alerts.
+ type: boolean
+ shortFields:
+ type: boolean
+ text:
+ type: string
+ thumbURL:
+ type: string
+ title:
+ type: string
+ titleLink:
+ type: string
+ username:
+ type: string
+ type: object
+ type: array
+ snsConfigs:
+ description: List of SNS configurations
+ items:
+ description: SNSConfig configures notifications via AWS SNS. See https://prometheus.io/docs/alerting/latest/configuration/#sns_configs
+ properties:
+ apiURL:
+ description: The SNS API URL i.e. https://sns.us-east-2.amazonaws.com. If not specified, the SNS API URL from the SNS SDK will be used.
+ type: string
+ attributes:
+ additionalProperties:
+ type: string
+ description: SNS message attributes.
+ type: object
+ httpConfig:
+ description: HTTP client configuration.
+ properties:
+ authorization:
+ description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+.
+ properties:
+ credentials:
+ description: The secret's key that contains the credentials of the request
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type:
+ description: Set the authentication type. Defaults to Bearer, Basic will cause an error
+ type: string
+ type: object
+ basicAuth:
+ description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence.
+ properties:
+ password:
+ description: The secret in the service monitor namespace that contains the password for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ username:
+ description: The secret in the service monitor namespace that contains the username for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ bearerTokenSecret:
+ description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ followRedirects:
+ description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects.
+ type: boolean
+ oauth2:
+ description: OAuth2 client credentials used to fetch a token for the targets.
+ properties:
+ clientId:
+ description: The secret or configmap containing the OAuth2 client id
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ clientSecret:
+ description: The secret containing the OAuth2 client secret
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ endpointParams:
+ additionalProperties:
+ type: string
+ description: Parameters to append to the token URL
+ type: object
+ scopes:
+ description: OAuth2 scopes used for the token request
+ items:
+ type: string
+ type: array
+ tokenUrl:
+ description: The URL to fetch the token from
+ minLength: 1
+ type: string
+ required:
+ - clientId
+ - clientSecret
+ - tokenUrl
+ type: object
+ proxyURL:
+ description: Optional proxy URL.
+ type: string
+ tlsConfig:
+ description: TLS configuration for the client.
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ type: object
+ message:
+ description: The message content of the SNS notification.
+ type: string
+ phoneNumber:
+ description: Phone number if message is delivered via SMS in E.164 format. If you don't specify this value, you must specify a value for the TopicARN or TargetARN.
+ type: string
+ sendResolved:
+ description: Whether or not to notify about resolved alerts.
+ type: boolean
+ sigv4:
+ description: Configures AWS's Signature Verification 4 signing process to sign requests.
+ properties:
+ accessKey:
+ description: AccessKey is the AWS API key. If blank, the environment variable `AWS_ACCESS_KEY_ID` is used.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ profile:
+ description: Profile is the named AWS profile used to authenticate.
+ type: string
+ region:
+ description: Region is the AWS region. If blank, the region from the default credentials chain used.
+ type: string
+ roleArn:
+ description: RoleArn is the named AWS profile used to authenticate.
+ type: string
+ secretKey:
+ description: SecretKey is the AWS API secret. If blank, the environment variable `AWS_SECRET_ACCESS_KEY` is used.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ subject:
+ description: Subject line when the message is delivered to email endpoints.
+ type: string
+ targetARN:
+ description: The mobile platform endpoint ARN if message is delivered via mobile notifications. If you don't specify this value, you must specify a value for the topic_arn or PhoneNumber.
+ type: string
+ topicARN:
+ description: SNS topic ARN, i.e. arn:aws:sns:us-east-2:698519295917:My-Topic If you don't specify this value, you must specify a value for the PhoneNumber or TargetARN.
+ type: string
+ type: object
+ type: array
+ telegramConfigs:
+ description: List of Telegram configurations.
+ items:
+ description: TelegramConfig configures notifications via Telegram. See https://prometheus.io/docs/alerting/latest/configuration/#telegram_config
+ properties:
+ apiURL:
+ description: The Telegram API URL i.e. https://api.telegram.org. If not specified, default API URL will be used.
+ type: string
+ botToken:
+ description: Telegram bot token The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ chatID:
+ description: The Telegram chat ID.
+ format: int64
+ type: integer
+ disableNotifications:
+ description: Disable telegram notifications
+ type: boolean
+ httpConfig:
+ description: HTTP client configuration.
+ properties:
+ authorization:
+ description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+.
+ properties:
+ credentials:
+ description: The secret's key that contains the credentials of the request
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type:
+ description: Set the authentication type. Defaults to Bearer, Basic will cause an error
+ type: string
+ type: object
+ basicAuth:
+ description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence.
+ properties:
+ password:
+ description: The secret in the service monitor namespace that contains the password for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ username:
+ description: The secret in the service monitor namespace that contains the username for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ bearerTokenSecret:
+ description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ followRedirects:
+ description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects.
+ type: boolean
+ oauth2:
+ description: OAuth2 client credentials used to fetch a token for the targets.
+ properties:
+ clientId:
+ description: The secret or configmap containing the OAuth2 client id
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ clientSecret:
+ description: The secret containing the OAuth2 client secret
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ endpointParams:
+ additionalProperties:
+ type: string
+ description: Parameters to append to the token URL
+ type: object
+ scopes:
+ description: OAuth2 scopes used for the token request
+ items:
+ type: string
+ type: array
+ tokenUrl:
+ description: The URL to fetch the token from
+ minLength: 1
+ type: string
+ required:
+ - clientId
+ - clientSecret
+ - tokenUrl
+ type: object
+ proxyURL:
+ description: Optional proxy URL.
+ type: string
+ tlsConfig:
+ description: TLS configuration for the client.
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ type: object
+ message:
+ description: Message template
+ type: string
+ parseMode:
+ description: Parse mode for telegram message
+ enum:
+ - MarkdownV2
+ - Markdown
+ - HTML
+ type: string
+ sendResolved:
+ description: Whether to notify about resolved alerts.
+ type: boolean
+ type: object
+ type: array
+ victoropsConfigs:
+ description: List of VictorOps configurations.
+ items:
+ description: VictorOpsConfig configures notifications via VictorOps. See https://prometheus.io/docs/alerting/latest/configuration/#victorops_config
+ properties:
+ apiKey:
+ description: The secret's key that contains the API key to use when talking to the VictorOps API. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ apiUrl:
+ description: The VictorOps API URL.
+ type: string
+ customFields:
+ description: Additional custom fields for notification.
+ items:
+ description: KeyValue defines a (key, value) tuple.
+ properties:
+ key:
+ description: Key of the tuple.
+ minLength: 1
+ type: string
+ value:
+ description: Value of the tuple.
+ type: string
+ required:
+ - key
+ - value
+ type: object
+ type: array
+ entityDisplayName:
+ description: Contains summary of the alerted problem.
+ type: string
+ httpConfig:
+ description: The HTTP client's configuration.
+ properties:
+ authorization:
+ description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+.
+ properties:
+ credentials:
+ description: The secret's key that contains the credentials of the request
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type:
+ description: Set the authentication type. Defaults to Bearer, Basic will cause an error
+ type: string
+ type: object
+ basicAuth:
+ description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence.
+ properties:
+ password:
+ description: The secret in the service monitor namespace that contains the password for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ username:
+ description: The secret in the service monitor namespace that contains the username for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ bearerTokenSecret:
+ description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ followRedirects:
+ description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects.
+ type: boolean
+ oauth2:
+ description: OAuth2 client credentials used to fetch a token for the targets.
+ properties:
+ clientId:
+ description: The secret or configmap containing the OAuth2 client id
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ clientSecret:
+ description: The secret containing the OAuth2 client secret
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ endpointParams:
+ additionalProperties:
+ type: string
+ description: Parameters to append to the token URL
+ type: object
+ scopes:
+ description: OAuth2 scopes used for the token request
+ items:
+ type: string
+ type: array
+ tokenUrl:
+ description: The URL to fetch the token from
+ minLength: 1
+ type: string
+ required:
+ - clientId
+ - clientSecret
+ - tokenUrl
+ type: object
+ proxyURL:
+ description: Optional proxy URL.
+ type: string
+ tlsConfig:
+ description: TLS configuration for the client.
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ type: object
+ messageType:
+ description: Describes the behavior of the alert (CRITICAL, WARNING, INFO).
+ type: string
+ monitoringTool:
+ description: The monitoring tool the state message is from.
+ type: string
+ routingKey:
+ description: A key used to map the alert to a team.
+ type: string
+ sendResolved:
+ description: Whether or not to notify about resolved alerts.
+ type: boolean
+ stateMessage:
+ description: Contains long explanation of the alerted problem.
+ type: string
+ type: object
+ type: array
+ webhookConfigs:
+ description: List of webhook configurations.
+ items:
+ description: WebhookConfig configures notifications via a generic receiver supporting the webhook payload. See https://prometheus.io/docs/alerting/latest/configuration/#webhook_config
+ properties:
+ httpConfig:
+ description: HTTP client configuration.
+ properties:
+ authorization:
+ description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+.
+ properties:
+ credentials:
+ description: The secret's key that contains the credentials of the request
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type:
+ description: Set the authentication type. Defaults to Bearer, Basic will cause an error
+ type: string
+ type: object
+ basicAuth:
+ description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence.
+ properties:
+ password:
+ description: The secret in the service monitor namespace that contains the password for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ username:
+ description: The secret in the service monitor namespace that contains the username for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ bearerTokenSecret:
+ description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ followRedirects:
+ description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects.
+ type: boolean
+ oauth2:
+ description: OAuth2 client credentials used to fetch a token for the targets.
+ properties:
+ clientId:
+ description: The secret or configmap containing the OAuth2 client id
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ clientSecret:
+ description: The secret containing the OAuth2 client secret
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ endpointParams:
+ additionalProperties:
+ type: string
+ description: Parameters to append to the token URL
+ type: object
+ scopes:
+ description: OAuth2 scopes used for the token request
+ items:
+ type: string
+ type: array
+ tokenUrl:
+ description: The URL to fetch the token from
+ minLength: 1
+ type: string
+ required:
+ - clientId
+ - clientSecret
+ - tokenUrl
+ type: object
+ proxyURL:
+ description: Optional proxy URL.
+ type: string
+ tlsConfig:
+ description: TLS configuration for the client.
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ type: object
+ maxAlerts:
+ description: Maximum number of alerts to be sent per webhook message. When 0, all alerts are included.
+ format: int32
+ minimum: 0
+ type: integer
+ sendResolved:
+ description: Whether or not to notify about resolved alerts.
+ type: boolean
+ url:
+ description: The URL to send HTTP POST requests to. `urlSecret` takes precedence over `url`. One of `urlSecret` and `url` should be defined.
+ type: string
+ urlSecret:
+ description: The secret's key that contains the webhook URL to send HTTP requests to. `urlSecret` takes precedence over `url`. One of `urlSecret` and `url` should be defined. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ type: object
+ type: array
+ wechatConfigs:
+ description: List of WeChat configurations.
+ items:
+ description: WeChatConfig configures notifications via WeChat. See https://prometheus.io/docs/alerting/latest/configuration/#wechat_config
+ properties:
+ agentID:
+ type: string
+ apiSecret:
+ description: The secret's key that contains the WeChat API key. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ apiURL:
+ description: The WeChat API URL.
+ type: string
+ corpID:
+ description: The corp id for authentication.
+ type: string
+ httpConfig:
+ description: HTTP client configuration.
+ properties:
+ authorization:
+ description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+.
+ properties:
+ credentials:
+ description: The secret's key that contains the credentials of the request
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type:
+ description: Set the authentication type. Defaults to Bearer, Basic will cause an error
+ type: string
+ type: object
+ basicAuth:
+ description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence.
+ properties:
+ password:
+ description: The secret in the service monitor namespace that contains the password for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ username:
+ description: The secret in the service monitor namespace that contains the username for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ bearerTokenSecret:
+ description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ followRedirects:
+ description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects.
+ type: boolean
+ oauth2:
+ description: OAuth2 client credentials used to fetch a token for the targets.
+ properties:
+ clientId:
+ description: The secret or configmap containing the OAuth2 client id
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ clientSecret:
+ description: The secret containing the OAuth2 client secret
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ endpointParams:
+ additionalProperties:
+ type: string
+ description: Parameters to append to the token URL
+ type: object
+ scopes:
+ description: OAuth2 scopes used for the token request
+ items:
+ type: string
+ type: array
+ tokenUrl:
+ description: The URL to fetch the token from
+ minLength: 1
+ type: string
+ required:
+ - clientId
+ - clientSecret
+ - tokenUrl
+ type: object
+ proxyURL:
+ description: Optional proxy URL.
+ type: string
+ tlsConfig:
+ description: TLS configuration for the client.
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ type: object
+ message:
+ description: API request data as defined by the WeChat API.
+ type: string
+ messageType:
+ type: string
+ sendResolved:
+ description: Whether or not to notify about resolved alerts.
+ type: boolean
+ toParty:
+ type: string
+ toTag:
+ type: string
+ toUser:
+ type: string
+ type: object
+ type: array
+ required:
+ - name
+ type: object
+ type: array
+ route:
+ description: The Alertmanager route definition for alerts matching the resource's namespace. If present, it will be added to the generated Alertmanager configuration as a first-level route.
+ properties:
+ activeTimeIntervals:
+ description: ActiveTimeIntervals is a list of MuteTimeInterval names when this route should be active.
+ items:
+ type: string
+ type: array
+ continue:
+ description: Boolean indicating whether an alert should continue matching subsequent sibling nodes. It will always be overridden to true for the first-level route by the Prometheus operator.
+ type: boolean
+ groupBy:
+ description: List of labels to group by. Labels must not be repeated (unique list). Special label "..." (aggregate by all possible labels), if provided, must be the only element in the list.
+ items:
+ type: string
+ type: array
+ groupInterval:
+ description: 'How long to wait before sending an updated notification. Must match the regular expression`^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$` Example: "5m"'
+ type: string
+ groupWait:
+ description: 'How long to wait before sending the initial notification. Must match the regular expression`^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$` Example: "30s"'
+ type: string
+ matchers:
+ description: 'List of matchers that the alert''s labels should match. For the first level route, the operator removes any existing equality and regexp matcher on the `namespace` label and adds a `namespace: