Skip to content

Commit

Permalink
nvml Init
Browse files Browse the repository at this point in the history
Signed-off-by: keyangxie <[email protected]>
  • Loading branch information
xiekeyang committed Jan 22, 2021
1 parent 437dd3d commit a92610b
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 0 deletions.
45 changes: 45 additions & 0 deletions bindings/go/nvml/api.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package nvml

import (
"fmt"
"os"
"sync"
)

var (
nvmlInitCounter int
mux sync.Mutex
)

func InitCounter() (cleanup func(), err error) {
mux.Lock()
if nvmlInitCounter < 0 {
count := fmt.Sprintf("%d", nvmlInitCounter)
err = fmt.Errorf("ShutdownCounter() is called %s times, before InitCounter()", count[1:])
}
if nvmlInitCounter == 0 {
err = Init()
}
nvmlInitCounter += 1
mux.Unlock()

return func() {
if err := ShutdownCounter(); err != nil {
fmt.Fprintf(os.Stderr, "Failed to shutdown DCGM with error: `%v`", err)
}
}, err
}

func ShutdownCounter() (err error) {
mux.Lock()
if nvmlInitCounter <= 0 {
err = fmt.Errorf("Init() needs to be called before Shutdown()")
}
if nvmlInitCounter == 1 {
err = Shutdown()
}
nvmlInitCounter -= 1
mux.Unlock()

return
}
8 changes: 8 additions & 0 deletions pkg/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"time"

"github.com/NVIDIA/gpu-monitoring-tools/bindings/go/dcgm"
"github.com/NVIDIA/gpu-monitoring-tools/bindings/go/nvml"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
Expand Down Expand Up @@ -103,6 +104,13 @@ restart:
}
logrus.Info("DCGM successfully initialized!")

nvmlCleanup, err := nvml.InitCounter()
defer nvmlCleanup()
if err != nil {
logrus.Fatal(err)
}
logrus.Info("DCGM successfully initialized!")

_, err = dcgm.GetSupportedMetricGroups(0)
if err != nil {
config.CollectDCP = false
Expand Down

0 comments on commit a92610b

Please sign in to comment.