From 94f4d08af8c497bd8c36b7bf4506a62da64e6173 Mon Sep 17 00:00:00 2001 From: Renaud Gaubert Date: Fri, 10 Jul 2020 02:17:26 +0000 Subject: [PATCH] Release rc 12 Signed-off-by: Renaud Gaubert --- README.md | 10 +++++----- dcgm-exporter.yaml | 10 +++++----- service-monitor.yaml | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 0c2647d..11e05a4 100644 --- a/README.md +++ b/README.md @@ -40,10 +40,10 @@ Note: Consider using the [NVIDIA GPU Operator](https://github.com/NVIDIA/gpu-ope Ensure you have already setup your cluster with the [default runtime as NVIDIA](https://github.com/NVIDIA/nvidia-container-runtime#docker-engine-setup). To gather metrics on your GPU nodes you can deploy the daemonset: ``` -$ kubectl create -f https://raw.githubusercontent.com/NVIDIA/gpu-monitoring-tools/2.0.0-rc.11/dcgm-exporter.yaml +$ kubectl create -f https://raw.githubusercontent.com/NVIDIA/gpu-monitoring-tools/2.0.0-rc.12/dcgm-exporter.yaml # Let's get the output of a random pod: -$ NAME=$(kubectl get pods -l "app.kubernetes.io/name=dcgm-exporter, app.kubernetes.io/version=2.0.0-rc.11" \ +$ NAME=$(kubectl get pods -l "app.kubernetes.io/name=dcgm-exporter, app.kubernetes.io/version=2.0.0-rc.12" \ -o "jsonpath={ .items[0].metadata.name}") $ kubectl port-forward $NAME 8080:9400 & @@ -68,7 +68,7 @@ $ helm repo add stable https://kubernetes-charts.storage.googleapis.com $ helm install stable/prometheus-operator --generate-name \ --set "prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false" $ kubectl create -f \ - https://raw.githubusercontent.com/NVIDIA/gpu-monitoring-tools/2.0.0-rc.11/service-monitor.yaml + https://raw.githubusercontent.com/NVIDIA/gpu-monitoring-tools/2.0.0-rc.12/service-monitor.yaml # Note might take ~1-2 minutes for prometheus to pickup the metrics and display them # You can also check in the WebUI the servce-discovery tab (in the Status category) @@ -133,7 +133,7 @@ DCGM_FI_DEV_MEMORY_TEMP{gpu="0", UUID="GPU-604ac76c-d9cf-fef3-62e9-d92044ab6e52" ### Changing the Metrics With dcgm-exporter 2.0 you can configure which fields are collected by specifying a custom CSV file. -You will find the [default CSV file here](https://github.com/NVIDIA/gpu-monitoring-tools/blob/2.0.0-rc.11/etc/dcgm-exporter/default-counters.csv) and on your system or container at /etc/dcgm-exporter/default-counters.csv +You will find the [default CSV file here](https://github.com/NVIDIA/gpu-monitoring-tools/blob/2.0.0-rc.12/etc/dcgm-exporter/default-counters.csv) and on your system or container at /etc/dcgm-exporter/default-counters.csv The format of this file is pretty straightforward: ``` @@ -159,7 +159,7 @@ Notes: You can find the official NVIDIA dcgm-exporter dashboard here: https://grafana.com/grafana/dashboards/12239 -You will also find the json file on this repo: https://github.com/NVIDIA/gpu-monitoring-tools/blob/2.0.0-rc.11/grafana/dcgm-exporter-dashboard.json +You will also find the json file on this repo: https://github.com/NVIDIA/gpu-monitoring-tools/blob/2.0.0-rc.12/grafana/dcgm-exporter-dashboard.json Pull requests are accepted! diff --git a/dcgm-exporter.yaml b/dcgm-exporter.yaml index 8e5f822..3ce5389 100644 --- a/dcgm-exporter.yaml +++ b/dcgm-exporter.yaml @@ -18,19 +18,19 @@ metadata: name: "dcgm-exporter" labels: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "2.0.0-rc.11" + app.kubernetes.io/version: "2.0.0-rc.12" spec: updateStrategy: type: RollingUpdate selector: matchLabels: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "2.0.0-rc.11" + app.kubernetes.io/version: "2.0.0-rc.12" template: metadata: labels: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "2.0.0-rc.11" + app.kubernetes.io/version: "2.0.0-rc.12" name: "dcgm-exporter" spec: containers: @@ -64,11 +64,11 @@ metadata: name: "dcgm-exporter" labels: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "2.0.0-rc.11" + app.kubernetes.io/version: "2.0.0-rc.12" spec: selector: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "2.0.0-rc.11" + app.kubernetes.io/version: "2.0.0-rc.12" ports: - name: "metrics" port: 9400 diff --git a/service-monitor.yaml b/service-monitor.yaml index 0e18f7a..7e39764 100644 --- a/service-monitor.yaml +++ b/service-monitor.yaml @@ -18,12 +18,12 @@ metadata: name: "dcgm-exporter" labels: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "2.0.0-rc.11" + app.kubernetes.io/version: "2.0.0-rc.12" spec: selector: matchLabels: app.kubernetes.io/name: "dcgm-exporter" - app.kubernetes.io/version: "2.0.0-rc.11" + app.kubernetes.io/version: "2.0.0-rc.12" endpoints: - port: "metrics" path: "/metrics"