diff --git a/grafana/dcgm-exporter-dashboard.json b/grafana/dcgm-exporter-dashboard.json index ad3d0b2..ed2be0a 100644 --- a/grafana/dcgm-exporter-dashboard.json +++ b/grafana/dcgm-exporter-dashboard.json @@ -100,8 +100,8 @@ "steppedLine": false, "targets": [ { - "expr": "DCGM_FI_DEV_GPU_TEMP", - "instant": true, + "expr": "DCGM_FI_DEV_GPU_TEMP{instance=~\"${instance}\", gpu=~\"${gpu}\"}", + "instant": false, "interval": "", "legendFormat": "GPU {{gpu}}", "refId": "A" @@ -130,8 +130,8 @@ "format": "celsius", "label": null, "logBase": 1, - "max": "100", - "min": "0", + "max": null, + "min": null, "show": true }, { @@ -198,7 +198,7 @@ "pluginVersion": "6.7.3", "targets": [ { - "expr": "avg(DCGM_FI_DEV_GPU_TEMP)", + "expr": "avg(DCGM_FI_DEV_GPU_TEMP{instance=~\"${instance}\", gpu=~\"${gpu}\"})", "interval": "", "legendFormat": "", "refId": "A" @@ -253,7 +253,7 @@ "steppedLine": false, "targets": [ { - "expr": "DCGM_FI_DEV_POWER_USAGE", + "expr": "DCGM_FI_DEV_POWER_USAGE{instance=~\"${instance}\", gpu=~\"${gpu}\"}", "interval": "", "legendFormat": "GPU {{gpu}}", "refId": "A" @@ -283,7 +283,7 @@ "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -320,24 +320,7 @@ "color": { "mode": "thresholds" }, - "mappings": [ - { - "id": 0, - "op": "=", - "text": "value to text", - "type": 1, - "value": "1" - }, - { - "from": "", - "id": 1, - "operator": "", - "text": "range to text", - "to": "", - "type": 1, - "value": "2" - } - ], + "mappings": [], "max": 2400, "min": 0, "nullValueMode": "connected", @@ -370,7 +353,7 @@ "pluginVersion": "6.7.3", "targets": [ { - "expr": "sum(DCGM_FI_DEV_POWER_USAGE)", + "expr": "sum(DCGM_FI_DEV_POWER_USAGE{instance=~\"${instance}\", gpu=~\"${gpu}\"})", "interval": "", "legendFormat": "", "refId": "A" @@ -426,7 +409,7 @@ "steppedLine": false, "targets": [ { - "expr": "DCGM_FI_DEV_SM_CLOCK", + "expr": "DCGM_FI_DEV_SM_CLOCK{instance=~\"${instance}\", gpu=~\"${gpu}\"} * 1000000", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -458,8 +441,8 @@ "format": "hertz", "label": "", "logBase": 1, - "max": "100", - "min": "0", + "max": null, + "min": null, "show": true }, { @@ -519,7 +502,7 @@ "steppedLine": false, "targets": [ { - "expr": "DCGM_FI_DEV_MEM_CLOCK", + "expr": "DCGM_FI_DEV_MEM_CLOCK{instance=~\"${instance}\", gpu=~\"${gpu}\"} * 1000000", "interval": "", "legendFormat": "GPU {{gpu}}", "refId": "A" @@ -548,8 +531,8 @@ "format": "hertz", "label": null, "logBase": 1, - "max": "100", - "min": "0", + "max": null, + "min": null, "show": true }, { @@ -609,7 +592,7 @@ "steppedLine": false, "targets": [ { - "expr": "DCGM_FI_DEV_GPU_UTIL", + "expr": "DCGM_FI_DEV_GPU_UTIL{instance=~\"${instance}\", gpu=~\"${gpu}\"}", "interval": "", "legendFormat": "GPU {{gpu}}", "refId": "A" @@ -699,7 +682,7 @@ "steppedLine": false, "targets": [ { - "expr": "DCGM_FI_DEV_MEM_COPY_UTIL", + "expr": "DCGM_FI_DEV_MEM_COPY_UTIL{instance=~\"${instance}\", gpu=~\"${gpu}\"}", "interval": "", "legendFormat": "GPU {{gpu}}", "refId": "A" @@ -788,7 +771,7 @@ "steppedLine": false, "targets": [ { - "expr": "DCGM_FI_DEV_FB_USED", + "expr": "DCGM_FI_DEV_FB_USED{instance=~\"${instance}\", gpu=~\"${gpu}\"}", "interval": "", "legendFormat": "GPU {{gpu}}", "refId": "A" @@ -814,11 +797,11 @@ }, "yaxes": [ { - "format": "decbytes", + "format": "decmbytes", "label": null, "logBase": 1, - "max": "100", - "min": "0", + "max": null, + "min": null, "show": true }, { @@ -878,7 +861,7 @@ "steppedLine": false, "targets": [ { - "expr": "DCGM_FI_DEV_FB_USED", + "expr": "DCGM_FI_DEV_FB_FREE{instance=~\"${instance}\", gpu=~\"${gpu}\"}", "interval": "", "legendFormat": "GPU {{gpu}}", "refId": "A" @@ -904,11 +887,11 @@ }, "yaxes": [ { - "format": "decbytes", + "format": "decmbytes", "label": null, "logBase": 1, - "max": "100", - "min": "0", + "max": null, + "min": null, "show": true }, { @@ -936,12 +919,34 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "label_values(gpu)", + "definition": "label_values(DCGM_FI_DEV_GPU_TEMP, instance)", "hide": 0, "includeAll": false, + "label": null, + "multi": true, + "name": "instance", + "options": [], + "query": "label_values(DCGM_FI_DEV_GPU_TEMP, instance)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(gpu)", + "hide": 0, + "includeAll": true, "index": -1, "label": null, - "multi": false, + "multi": true, "name": "gpu", "options": [], "query": "label_values(gpu)", @@ -982,4 +987,4 @@ "list": [] }, "version": 1 -} \ No newline at end of file +}