From 2ce0c832db0ec470de8e6aa29013b0cee39e3a5f Mon Sep 17 00:00:00 2001 From: Renaud Gaubert Date: Fri, 1 May 2020 23:58:47 -0700 Subject: [PATCH] [README] Update Prometheus instructions Signed-off-by: Renaud Gaubert --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 0bac324..ca22752 100644 --- a/README.md +++ b/README.md @@ -63,11 +63,14 @@ DCGM_FI_DEV_MEMORY_TEMP{gpu="0" UUID="GPU-604ac76c-d9cf-fef3-62e9-d92044ab6e52"} # Note on exporters here: # https://github.com/coreos/prometheus-operator/blob/release-0.38/Documentation/user-guides/running-exporters.md +$ helm repo add stable https://kubernetes-charts.storage.googleapis.com $ helm install stable/prometheus-operator --generate-name \ --set "prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false" $ kubectl create -f \ https://raw.githubusercontent.com/NVIDIA/gpu-monitoring-tools/2.0.0-rc.8/service-monitor.yaml +# Note might take ~1-2 minutes for prometheus to pickup the metrics and display them +# You can also check in the WebUI the servce-discovery tab (in the Status category) $ NAME=$(kubectl get svc -l app=prometheus-operator-prometheus -o jsonpath='{.items[0].metadata.name}') $ curl "$BASE/services/$NAME:9090/proxy/api/v1/query?query=DCGM_FI_DEV_MEMORY_TEMP" {