Skip to content

Commit

Permalink
Add a sample REST API for DCGM
Browse files Browse the repository at this point in the history
  • Loading branch information
guptaNswati committed Jul 10, 2018
1 parent 7688df9 commit 6f965de
Show file tree
Hide file tree
Showing 6 changed files with 574 additions and 0 deletions.
65 changes: 65 additions & 0 deletions bindings/go/samples/dcgm/restApi/handlers/byIds.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package handlers

import (
"net/http"
)

func DeviceInfo(resp http.ResponseWriter, req *http.Request) {
device := getDeviceInfo(resp, req)
if device == nil {
return
}
if isJson(req) {
encode(resp, req, device)
return
}
print(resp, req, device, deviceInfo)
}

func DeviceStatus(resp http.ResponseWriter, req *http.Request) {
st := getDeviceStatus(resp, req)
if st == nil {
return
}
if isJson(req) {
encode(resp, req, st)
return
}
print(resp, req, st, deviceStatus)
}

func ProcessInfo(resp http.ResponseWriter, req *http.Request) {
pInfo := getProcessInfo(resp, req)
if len(pInfo) == 0 {
return
}
if isJson(req) {
encode(resp, req, pInfo)
return
}
processPrint(resp, req, pInfo)
}

func Health(resp http.ResponseWriter, req *http.Request) {
h := getHealth(resp, req)
if h == nil {
return
}
if isJson(req) {
encode(resp, req, h)
return
}
print(resp, req, h, healthStatus)
}

func DcgmStatus(resp http.ResponseWriter, req *http.Request) {
st := getDcgmStatus(resp, req)
if st == nil {
return
}
if isJson(req) {
encode(resp, req, st)
return
}
print(resp, req, st, hostengine)
}
65 changes: 65 additions & 0 deletions bindings/go/samples/dcgm/restApi/handlers/byUuids.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package handlers

import (
"log"
"net/http"

"github.com/NVIDIA/gpu-monitoring-tools/bindings/go/dcgm"
)

// map of uuids and device id
var uuids map[string]uint

func DevicesUuids() {
uuids = make(map[string]uint)
count, err := dcgm.GetAllDeviceCount()
if err != nil {
log.Printf("(DCGM) Error getting devices: %s", err)
return
}

for i := uint(0); i < count; i++ {
deviceInfo, err := dcgm.GetDeviceInfo(i)
if err != nil {
log.Printf("(DCGM) Error getting device information: %s", err)
return
}
uuids[deviceInfo.UUID] = i
}
}

func DeviceInfoByUuid(resp http.ResponseWriter, req *http.Request) {
device := getDeviceInfo(resp, req)
if device == nil {
return
}
if isJson(req) {
encode(resp, req, device)
return
}
print(resp, req, device, deviceInfo)
}

func DeviceStatusByUuid(resp http.ResponseWriter, req *http.Request) {
st := getDeviceStatus(resp, req)
if st == nil {
return
}
if isJson(req) {
encode(resp, req, st)
return
}
print(resp, req, st, deviceStatus)
}

func HealthByUuid(resp http.ResponseWriter, req *http.Request) {
h := getHealth(resp, req)
if h == nil {
return
}
if isJson(req) {
encode(resp, req, h)
return
}
print(resp, req, h, healthStatus)
}
136 changes: 136 additions & 0 deletions bindings/go/samples/dcgm/restApi/handlers/dcgm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
package handlers

import (
"log"
"math"
"net/http"
"time"

"github.com/NVIDIA/gpu-monitoring-tools/bindings/go/dcgm"
"github.com/gorilla/mux"
)

func getDcgmStatus(resp http.ResponseWriter, req *http.Request) (status *dcgm.DcgmStatus) {
st, err := dcgm.Introspect()
if err != nil {
http.Error(resp, err.Error(), http.StatusInternalServerError)
log.Printf("error: %v%v: %v", req.Host, req.URL, err.Error())
return
}
return &st

}

func getDeviceInfo(resp http.ResponseWriter, req *http.Request) (device *dcgm.Device) {
var id uint
params := mux.Vars(req)
for k, v := range params {
switch k {
case "id":
id = getId(resp, req, v)
case "uuid":
id = getIdByUuid(resp, req, v)
}
}

if id == math.MaxUint32 {
return
}

if !isValidId(id, resp, req) {
return
}
d, err := dcgm.GetDeviceInfo(id)
if err != nil {
http.Error(resp, err.Error(), http.StatusInternalServerError)
log.Printf("error: %v%v: %v", req.Host, req.URL, err.Error())
return
}
return &d
}

func getDeviceStatus(resp http.ResponseWriter, req *http.Request) (status *dcgm.DeviceStatus) {
var id uint
params := mux.Vars(req)
for k, v := range params {
switch k {
case "id":
id = getId(resp, req, v)
case "uuid":
id = getIdByUuid(resp, req, v)
}
}

if id == math.MaxUint32 {
return
}

if !isValidId(id, resp, req) {
return
}

if !isDcgmSupported(id, resp, req) {
return
}

st, err := dcgm.GetDeviceStatus(id)
if err != nil {
http.Error(resp, err.Error(), http.StatusInternalServerError)
log.Printf("error: %v%v: %v", req.Host, req.URL, err.Error())
return
}
return &st
}

func getHealth(resp http.ResponseWriter, req *http.Request) (health *dcgm.DeviceHealth) {
var id uint
params := mux.Vars(req)
for k, v := range params {
switch k {
case "id":
id = getId(resp, req, v)
case "uuid":
id = getIdByUuid(resp, req, v)
}
}

if id == math.MaxUint32 {
return
}

if !isValidId(id, resp, req) {
return
}

h, err := dcgm.HealthCheckByGpuId(id)
if err != nil {
http.Error(resp, err.Error(), http.StatusInternalServerError)
log.Printf("error: %v%v: %v", req.Host, req.URL, err.Error())
return
}
return &h
}

func getProcessInfo(resp http.ResponseWriter, req *http.Request) (pInfo []dcgm.ProcessInfo) {
params := mux.Vars(req)
pid := getId(resp, req, params["pid"])
if pid == math.MaxUint32 {
return
}
group, err := dcgm.WatchPidFields()
if err != nil {
http.Error(resp, err.Error(), http.StatusInternalServerError)
log.Printf("error: %v%v: %v", req.Host, req.URL, err.Error())
return
}

// wait for watches to be enabled
log.Printf("Enabling DCGM watches to start collecting process stats. This may take a few seconds....")
time.Sleep(3000 * time.Millisecond)
pInfo, err = dcgm.GetProcessInfo(group, pid)
if err != nil {
http.Error(resp, err.Error(), http.StatusInternalServerError)
log.Printf("error: %v%v: %v", req.Host, req.URL, err.Error())
}
return
}
Loading

0 comments on commit 6f965de

Please sign in to comment.