From 436cd8015877d550715d9ed39b657c33d3904e23 Mon Sep 17 00:00:00 2001 From: haopeng <657407891@qq.com> Date: Thu, 16 Jan 2025 21:52:16 +0800 Subject: [PATCH] BIGTOP-4325: Add the dashboard of the host to Grafana (#147) --- .../configuration/grafana-dashboard.xml | 1036 ++++++++++++++++- .../infra/v1_0_0/grafana/GrafanaParams.java | 34 +- .../infra/v1_0_0/grafana/GrafanaSetup.java | 2 +- 3 files changed, 1042 insertions(+), 30 deletions(-) diff --git a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/grafana/configuration/grafana-dashboard.xml b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/grafana/configuration/grafana-dashboard.xml index 3a835c59..2a50b52d 100644 --- a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/grafana/configuration/grafana-dashboard.xml +++ b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/grafana/configuration/grafana-dashboard.xml @@ -41,8 +41,8 @@ providers: - bm_agent_dashboard - This is the dashboard configuration file for BM agent + bm_agent_cluster_dashboard + This is the dashboard configuration file for BM agent cluster =~\"${cluster}\"}\n) / count(\n count by (instance) (\n agent_host_monitoring_cpu{cpuUsage=\"cpuUsage\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n )\n)", "interval": "", "legendFormat": "CPU Usage", "range": true, @@ -169,7 +169,7 @@ providers: { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "uid4prometheus" }, "fieldConfig": { "defaults": { @@ -265,7 +265,7 @@ providers: "disableTextWrap": false, "editorMode": "code", "exemplar": true, - "expr": "sum(\n agent_host_monitoring_cpu{cpuUsage=\"cpuUsage\", cluster=\"$cluster\"}\n) / count(\n count by (instance) (\n agent_host_monitoring_cpu{cpuUsage=\"cpuUsage\", cluster=\"$cluster\"}\n )\n)", + "expr": "sum(\n agent_host_monitoring_cpu{cpuUsage=\"cpuUsage\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n) / count(\n count by (instance) (\n agent_host_monitoring_cpu{cpuUsage=\"cpuUsage\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n )\n)", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "$resolution", @@ -281,7 +281,7 @@ providers: { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "uid4prometheus" }, "fieldConfig": { "defaults": { @@ -334,7 +334,7 @@ providers: }, "editorMode": "code", "exemplar": true, - "expr": "sum(\n agent_host_monitoring_cpu{cpuUsage=\"cpuLoadAvgMin_1\", cluster=\"$cluster\"}\n) / count(\n count by (instance) (\n agent_host_monitoring_cpu{cpuUsage=\"cpuLoadAvgMin_1\", cluster=\"$cluster\"}\n )\n)", + "expr": "sum(\n agent_host_monitoring_cpu{cpuUsage=\"cpuLoadAvgMin_1\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n) / count(\n count by (instance) (\n agent_host_monitoring_cpu{cpuUsage=\"cpuLoadAvgMin_1\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n )\n)", "interval": "", "legendFormat": "Min_1", "range": true, @@ -347,7 +347,7 @@ providers: }, "editorMode": "code", "exemplar": true, - "expr": "sum(\n agent_host_monitoring_cpu{cpuUsage=\"cpuLoadAvgMin_5\", cluster=\"$cluster\"}\n) / count(\n count by (instance) (\n agent_host_monitoring_cpu{cpuUsage=\"cpuLoadAvgMin_5\", cluster=\"$cluster\"}\n )\n)", + "expr": "sum(\n agent_host_monitoring_cpu{cpuUsage=\"cpuLoadAvgMin_5\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n) / count(\n count by (instance) (\n agent_host_monitoring_cpu{cpuUsage=\"cpuLoadAvgMin_5\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n )\n)", "interval": "", "legendFormat": "Min_5", "range": true, @@ -360,7 +360,7 @@ providers: }, "editorMode": "code", "exemplar": true, - "expr": "sum(\n agent_host_monitoring_cpu{cpuUsage=\"cpuLoadAvgMin_15\", cluster=\"$cluster\"}\n) / count(\n count by (instance) (\n agent_host_monitoring_cpu{cpuUsage=\"cpuLoadAvgMin_15\", cluster=\"$cluster\"}\n )\n)", + "expr": "sum(\n agent_host_monitoring_cpu{cpuUsage=\"cpuLoadAvgMin_15\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n) / count(\n count by (instance) (\n agent_host_monitoring_cpu{cpuUsage=\"cpuLoadAvgMin_15\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n )\n)", "interval": "", "legendFormat": "Min_15", "range": true, @@ -451,11 +451,11 @@ providers: { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "uid4prometheus" }, "editorMode": "code", "exemplar": true, - "expr": "(sum(\n agent_host_monitoring_mem{memUsage=\"memIdle\", <#noparse>cluster=~\"${cluster}\"}\n) / sum(\n agent_host_monitoring_mem{memUsage=\"memTotal\", <#noparse>cluster=~\"${cluster}\"}\n)) /\ncount(\n count by (instance) (\n agent_host_monitoring_mem{memUsage=\"memTotal\", <#noparse>cluster=~\"${cluster}\"}\n )\n)", + "expr": "(sum(\n agent_host_monitoring_mem{memUsage=\"memIdle\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n) / sum(\n agent_host_monitoring_mem{memUsage=\"memTotal\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n)) /\ncount(\n count by (instance) (\n agent_host_monitoring_mem{memUsage=\"memTotal\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n )\n)", "interval": "", "legendFormat": "Real", "range": true, @@ -559,11 +559,11 @@ providers: { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "uid4prometheus" }, "editorMode": "code", "exemplar": true, - "expr": "(sum(\n agent_host_monitoring_mem{memUsage=\"memIdle\", <#noparse>cluster=~\"${cluster}\"}\n) / sum(\n agent_host_monitoring_mem{memUsage=\"memTotal\", <#noparse>cluster=~\"${cluster}\"}\n)) /\ncount(\n count by (instance) (\n agent_host_monitoring_mem{memUsage=\"memTotal\", <#noparse>cluster=~\"${cluster}\"}\n )\n)", + "expr": "(sum(\n agent_host_monitoring_mem{memUsage=\"memIdle\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n) / sum(\n agent_host_monitoring_mem{memUsage=\"memTotal\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n)) /\ncount(\n count by (instance) (\n agent_host_monitoring_mem{memUsage=\"memTotal\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n )\n)", "interval": "$resolution", "legendFormat": "Memory usage in %", "range": true, @@ -625,11 +625,11 @@ providers: { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "uid4prometheus" }, "editorMode": "code", "exemplar": true, - "expr": "(sum(\n agent_host_monitoring_mem{memUsage=\"memIdle\", <#noparse>cluster=~\"${cluster}\"}\n) / sum(\n agent_host_monitoring_mem{memUsage=\"memTotal\", <#noparse>cluster=~\"${cluster}\"}\n)) /\ncount(\n count by (instance) (\n agent_host_monitoring_mem{memUsage=\"memTotal\", <#noparse>cluster=~\"${cluster}\"}\n )\n)", + "expr": "(sum(\n agent_host_monitoring_mem{memUsage=\"memIdle\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n) / sum(\n agent_host_monitoring_mem{memUsage=\"memTotal\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n)) /\ncount(\n count by (instance) (\n agent_host_monitoring_mem{memUsage=\"memTotal\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n )\n)", "interval": "", "legendFormat": "Real", "range": true, @@ -655,7 +655,7 @@ providers: { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "uid4prometheus" }, "fieldConfig": { "defaults": { @@ -713,7 +713,7 @@ providers: "disableTextWrap": false, "editorMode": "code", "exemplar": false, - "expr": "(sum(\n agent_host_monitoring_disk{diskUsage=\"diskFreeSpace\", <#noparse>cluster=~\"${cluster}\"}\n) / sum(\n agent_host_monitoring_disk{diskUsage=\"diskTotalSpace\", <#noparse>cluster=~\"${cluster}\"}\n)) /\ncount(\n count by (instance) (\n agent_host_monitoring_disk{diskUsage=\"diskFreeSpace\", <#noparse>cluster=~\"${cluster}\"}\n )\n)", + "expr": "(sum(\n agent_host_monitoring_disk{diskUsage=\"diskFreeSpace\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n) / sum(\n agent_host_monitoring_disk{diskUsage=\"diskTotalSpace\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n)) /\ncount(\n count by (instance) (\n agent_host_monitoring_disk{diskUsage=\"diskFreeSpace\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n )\n)", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -730,7 +730,7 @@ providers: { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "uid4prometheus" }, "fieldConfig": { "defaults": { @@ -818,7 +818,7 @@ providers: "disableTextWrap": false, "editorMode": "code", "exemplar": false, - "expr": "(sum(\n agent_host_monitoring_disk{diskUsage=\"diskFreeSpace\", <#noparse>cluster=~\"${cluster}\"}\n) / sum(\n agent_host_monitoring_disk{diskUsage=\"diskTotalSpace\", <#noparse>cluster=~\"${cluster}\"}\n)) /\ncount(\n count by (instance) (\n agent_host_monitoring_disk{diskUsage=\"diskFreeSpace\", <#noparse>cluster=~\"${cluster}\"}\n )\n)", + "expr": "(sum(\n agent_host_monitoring_disk{diskUsage=\"diskFreeSpace\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n) / sum(\n agent_host_monitoring_disk{diskUsage=\"diskTotalSpace\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n)) /\ncount(\n count by (instance) (\n agent_host_monitoring_disk{diskUsage=\"diskFreeSpace\", ${cluster_label}<#noparse>=~\"${cluster}\"}\n )\n)", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -928,4 +928,1000 @@ providers: longtext + + bm_agent_host_dashboard + This is the dashboard configuration file for BM agent host + =~\"$cluster\", hostname=~\"$host\"}\n) / count(\n count by (instance) (\n agent_host_monitoring_cpu{cpuUsage=\"cpuUsage\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n )\n)", + "interval": "", + "legendFormat": "CPU Usage", + "range": true, + "refId": "A" + } + ], + "title": "CPU Usage", + "type": "bargauge" + }, + { + "id": 88, + "type": "stat", + "title": "CPU Core", + "gridPos": { + "x": 12, + "y": 1, + "h": 3, + "w": 12 + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(255, 255, 255)", + "value": null + } + ] + }, + "noValue": "0", + "unit": "none" + }, + "overrides": [] + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "uid4prometheus" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": true, + "expr": "max by (value) (\n label_replace(\n agent_host_monitoring_cpu{cpuUsage=\"cpuUsage\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"},\n \"value\", \"$1\", \"physical_cores\", \"(.*)\"\n )\n)", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "interval": "", + "legendFormat": "Physical: {{value}}", + "range": true, + "refId": "B", + "useBackend": false, + "format": "time_series" + }, + { + "datasource": { + "type": "prometheus", + "uid": "uid4prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": true, + "expr": "max by (value) (\n label_replace(\n agent_host_monitoring_cpu{cpuUsage=\"cpuUsage\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"},\n \"value\", \"$1\", \"logical_cores\", \"(.*)\"\n )\n)", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "interval": "", + "legendFormat": "Logical: {{value}}", + "range": true, + "refId": "C", + "useBackend": false, + "format": "time_series" + } + ], + "datasource": { + "type": "prometheus", + "uid": "uid4prometheus" + }, + "options": { + "reduceOptions": { + "values": false, + "calcs": [ + "min" + ], + "fields": "" + }, + "orientation": "auto", + "textMode": "name", + "wideLayout": true, + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "showPercentChange": false, + "percentChangeColorMode": "standard", + "text": {} + } + }, + { + "datasource": { + "type": "prometheus", + "uid": "uid4prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd", + "seriesBy": "last" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "CPU %", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 0.5 + }, + { + "color": "red", + "value": 0.7 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 4 + }, + "id": 72, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "uid4prometheus" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": true, + "expr": "sum(\n agent_host_monitoring_cpu{cpuUsage=\"cpuUsage\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n) / count(\n count by (instance) (\n agent_host_monitoring_cpu{cpuUsage=\"cpuUsage\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n )\n)", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "$resolution", + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "CPU Utilization", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "uid4prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(255, 255, 255)", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 37, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "uid4prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(\n agent_host_monitoring_cpu{cpuUsage=\"cpuLoadAvgMin_1\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n) / count(\n count by (instance) (\n agent_host_monitoring_cpu{cpuUsage=\"cpuLoadAvgMin_1\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n )\n)", + "interval": "", + "legendFormat": "Min_1", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "uid4prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(\n agent_host_monitoring_cpu{cpuUsage=\"cpuLoadAvgMin_5\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n) / count(\n count by (instance) (\n agent_host_monitoring_cpu{cpuUsage=\"cpuLoadAvgMin_5\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n )\n)", + "interval": "", + "legendFormat": "Min_5", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "uid4prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(\n agent_host_monitoring_cpu{cpuUsage=\"cpuLoadAvgMin_15\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n) / count(\n count by (instance) (\n agent_host_monitoring_cpu{cpuUsage=\"cpuLoadAvgMin_15\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n )\n)", + "interval": "", + "legendFormat": "Min_15", + "range": true, + "refId": "C" + } + ], + "title": "CPU LoadAvg", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 79, + "panels": [], + "title": "Memory", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "uid4prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "decimals": 2, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 78, + "options": { + "displayMode": "lcd", + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 10, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "text": {}, + "valueMode": "color" + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "editorMode": "code", + "exemplar": true, + "expr": "(sum(\n agent_host_monitoring_mem{memUsage=\"memIdle\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n) / sum(\n agent_host_monitoring_mem{memUsage=\"memTotal\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n)) /\ncount(\n count by (instance) (\n agent_host_monitoring_mem{memUsage=\"memTotal\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n )\n)", + "interval": "", + "legendFormat": "Real", + "range": true, + "refId": "A" + } + ], + "title": "RAM Usage", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "uid4prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "MEMORY", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 0.5 + }, + { + "color": "red", + "value": 0.7 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 55, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "hidden", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "editorMode": "code", + "exemplar": true, + "expr": "(sum(\n agent_host_monitoring_mem{memUsage=\"memIdle\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n) / sum(\n agent_host_monitoring_mem{memUsage=\"memTotal\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n)) /\ncount(\n count by (instance) (\n agent_host_monitoring_mem{memUsage=\"memTotal\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n )\n)", + "interval": "$resolution", + "legendFormat": "Memory usage in %", + "range": true, + "refId": "A" + } + ], + "title": "Memory Utilization", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "uid4prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(255, 255, 255)", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 39, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "editorMode": "code", + "exemplar": true, + "expr": "(sum(\n agent_host_monitoring_mem{memUsage=\"memIdle\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n) / sum(\n agent_host_monitoring_mem{memUsage=\"memTotal\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n)) /\ncount(\n count by (instance) (\n agent_host_monitoring_mem{memUsage=\"memTotal\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n )\n)", + "interval": "", + "legendFormat": "Real", + "range": true, + "refId": "A" + } + ], + "title": "RAM Usage", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 81, + "panels": [], + "title": "Storage", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "uid4prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 23 + }, + "id": 87, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "uid4prometheus" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "(sum(\n agent_host_monitoring_disk{diskUsage=\"diskFreeSpace\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n) / sum(\n agent_host_monitoring_disk{diskUsage=\"diskTotalSpace\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n)) /\ncount(\n count by (instance) (\n agent_host_monitoring_disk{diskUsage=\"diskFreeSpace\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n )\n)", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "interval": "$resolution", + "legendFormat": "{{label_name}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Persistent Volumes - Usage in %", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "uid4prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 23 + }, + "id": 82, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "uid4prometheus" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "(sum(\n agent_host_monitoring_disk{diskUsage=\"diskFreeSpace\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n) / sum(\n agent_host_monitoring_disk{diskUsage=\"diskTotalSpace\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n)) /\ncount(\n count by (instance) (\n agent_host_monitoring_disk{diskUsage=\"diskFreeSpace\", ${cluster_label}<#noparse>=~\"$cluster\", hostname=~\"$host\"}\n )\n)", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "interval": "$resolution", + "legendFormat": "{{label_name}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Persistent Volumes - Usage in %", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "30s", + "schemaVersion": 40, + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "prometheus", + "value": "uid4prometheus" + }, + "includeAll": false, + "label": "Datasource", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "current": { + "text": "${default_cluster_name}", + "value": "${default_cluster_name}" + }, + "datasource": { + "type": "prometheus", + "uid": "uid4prometheus" + }, + "includeAll": false, + "label": "Cluster", + "name": "${cluster_label}", + "options": [], + "query": "label_values(${cluster_label})", + "refresh": 2, + "type": "query" + }, + { + "current": { + "text": "${default_host_name}", + "value": "${default_host_name}" + }, + "definition": "label_values({${cluster_label}<#noparse>=~\"$cluster\"},hostname)", + "label": "Host", + "name": "host", + "options": [], + "query": { + "qryType": 1, + "query": "label_values({${cluster_label}<#noparse>=~\"$cluster\"},hostname)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "1m", + "value": "1m" + }, + "includeAll": false, + "label": "Resolution", + "name": "resolution", + "options": [ + { + "selected": false, + "text": "1s", + "value": "1s" + }, + { + "selected": false, + "text": "5s", + "value": "5s" + }, + { + "selected": true, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "3m", + "value": "3m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + } + ], + "query": "1s, 5s, 1m, 3m, 5m", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "${dashboard_name}", + "uid": "dbuid_${dashboard_name}", + "version": 19, + "weekStart": "" +} + ]]> + + + longtext + + \ No newline at end of file diff --git a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/grafana/GrafanaParams.java b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/grafana/GrafanaParams.java index dfe03fc9..a4a4cdc9 100644 --- a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/grafana/GrafanaParams.java +++ b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/grafana/GrafanaParams.java @@ -45,7 +45,8 @@ public class GrafanaParams extends InfraParams { private String grafanaContent; private String grafanaDashboardContent; private String prometheusDashboardPath; - private String bmAgentDashboardConfig; + private String bmAgentClusterDashboardConfig; + private String bmAgentHostDashboardConfig; private String grafanaPort; private String grafanaLogLevel; private String dataSourceContent; @@ -124,7 +125,8 @@ public Map prometheus() { public Map dashboards() { Map configuration = LocalSettings.configurations(getServiceName(), "grafana-dashboard"); grafanaDashboardContent = (String) configuration.get("content"); - bmAgentDashboardConfig = (String) configuration.get("bm_agent_dashboard"); + bmAgentClusterDashboardConfig = (String) configuration.get("bm_agent_cluster_dashboard"); + bmAgentHostDashboardConfig = (String) configuration.get("bm_agent_host_dashboard"); prometheusDashboardPath = MessageFormat.format("{0}/prometheus", dashboardsDir()); return configuration; } @@ -141,7 +143,6 @@ public List getClusters() { return new ArrayList<>(getClusterHosts().keySet()); } - // TODO: add host dashboard public void setDashboards() { dashboards = new ArrayList<>(); @@ -151,15 +152,30 @@ public void setDashboards() { clusterDashboard.put("path", dashboardConfigDir("Cluster")); // Used for dashboard json configuration - List clusters = getClusters(); - if (clusters != null && !clusters.isEmpty()) { - clusterDashboard.put("default_cluster_name", getClusters().get(0)); - } else { - clusterDashboard.put("default_cluster_name", ""); - } clusterDashboard.put("cluster_label", PrometheusParams.AGENT_TARGET_LABEL); clusterDashboard.put("dashboard_name", "Cluster"); + clusterDashboard.put("dashboard_config_content", bmAgentClusterDashboardConfig); + + Map hostDashboard = new HashMap<>(); + hostDashboard.put("name", "Host"); + hostDashboard.put("path", dashboardConfigDir("Host")); + + hostDashboard.put("cluster_label", PrometheusParams.AGENT_TARGET_LABEL); + hostDashboard.put("dashboard_name", "Host"); + hostDashboard.put("dashboard_config_content", bmAgentHostDashboardConfig); + + Map> clusterHost = getClusterHosts(); + if (clusterHost != null && !clusterHost.isEmpty()) { + String defaultCluster = clusterHost.keySet().iterator().next(); + String defaultHost = clusterHost.get(defaultCluster).isEmpty() + ? "" + : clusterHost.get(defaultCluster).get(0); + clusterDashboard.put("default_cluster_name", defaultCluster); + hostDashboard.put("default_cluster_name", defaultCluster); + hostDashboard.put("default_host_name", defaultHost); + } dashboards.add(clusterDashboard); + dashboards.add(hostDashboard); } } diff --git a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/grafana/GrafanaSetup.java b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/grafana/GrafanaSetup.java index 6f9ab66e..8170e140 100644 --- a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/grafana/GrafanaSetup.java +++ b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/grafana/GrafanaSetup.java @@ -77,7 +77,7 @@ public static ShellResult config(Params params) { LinuxFileUtils.createDirectories(confPath, user, group, Constants.PERMISSION_755, true); LinuxFileUtils.toFileByTemplate( - grafanaParams.getBmAgentDashboardConfig(), + (String) dashboard.get("dashboard_config_content"), MessageFormat.format("{0}/{1}.json", confPath, dashboard.get("name")), user, group,