Skip to content

Commit 7426080

Browse files
committed
Create device nodes in JIT-CDI mode
Signed-off-by: Evan Lezar <[email protected]>
1 parent 76b6d4d commit 7426080

File tree

11 files changed

+77
-48
lines changed

11 files changed

+77
-48
lines changed

Diff for: internal/info/proc/devices/builder.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ func New(opts ...Option) Devices {
4545
type Option func(*builder)
4646

4747
// WithDeviceToMajor specifies an explicit device name to major number map.
48-
func WithDeviceToMajor(deviceToMajor map[string]int) Option {
48+
func WithDeviceToMajor(deviceToMajor map[string]uint32) Option {
4949
return func(b *builder) {
5050
b.asMap = make(devices)
5151
for name, major := range deviceToMajor {

Diff for: internal/info/proc/devices/devices.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ const (
4545
type Name string
4646

4747
// Major represents a device major as specified under /proc/devices
48-
type Major int
48+
type Major uint32
4949

5050
// Devices represents the set of devices under /proc/devices
5151
//
@@ -130,8 +130,8 @@ func nvidiaDeviceFrom(reader io.Reader) (Devices, error) {
130130
return nvidiaDevices, nil
131131
}
132132

133-
func devicesFrom(reader io.Reader) map[string]int {
134-
allDevices := make(map[string]int)
133+
func devicesFrom(reader io.Reader) map[string]uint32 {
134+
allDevices := make(map[string]uint32)
135135
scanner := bufio.NewScanner(reader)
136136
for scanner.Scan() {
137137
device, major, err := processProcDeviceLine(scanner.Text())
@@ -143,11 +143,11 @@ func devicesFrom(reader io.Reader) map[string]int {
143143
return allDevices
144144
}
145145

146-
func processProcDeviceLine(line string) (string, int, error) {
146+
func processProcDeviceLine(line string) (string, uint32, error) {
147147
trimmed := strings.TrimSpace(line)
148148

149149
var name string
150-
var major int
150+
var major uint32
151151

152152
n, _ := fmt.Sscanf(trimmed, "%d %s", &major, &name)
153153
if n == 2 {

Diff for: internal/modifier/cdi.go

+30-1
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,15 @@ import (
2222

2323
"tags.cncf.io/container-device-interface/pkg/parser"
2424

25+
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
26+
2527
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
2628
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
2729
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
2830
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
2931
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier/cdi"
3032
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
33+
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
3134
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
3235
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
3336
)
@@ -198,12 +201,14 @@ func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, drive
198201
logger.Warningf("Ignoring error(s) loading kernel modules: %v", err)
199202
}
200203

201-
identifiers := []string{}
204+
var identifiers []string
202205
for _, device := range devices {
203206
_, _, id := parser.ParseDevice(device)
204207
identifiers = append(identifiers, id)
205208
}
206209

210+
tryCreateDeviceNodes(logger, driver, identifiers...)
211+
207212
deviceSpecs, err := cdilib.GetDeviceSpecsByID(identifiers...)
208213
if err != nil {
209214
return nil, fmt.Errorf("failed to get CDI device specs: %w", err)
@@ -221,3 +226,27 @@ func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, drive
221226
spec.WithClass("gpu"),
222227
)
223228
}
229+
230+
func tryCreateDeviceNodes(logger logger.Interface, driver *root.Driver, identifiers ...string) {
231+
devices, err := nvdevices.New(
232+
nvdevices.WithLogger(logger),
233+
nvdevices.WithDevRoot(driver.Root),
234+
)
235+
if err != nil {
236+
logger.Warningf("Failed to create devices library: %v", err)
237+
return
238+
}
239+
if err := devices.CreateNVIDIAControlDevices(); err != nil {
240+
logger.Warningf("Failed to create control devices: %v", err)
241+
}
242+
if err := devices.CreateNVIDIACapsControlDeviceNodes(); err != nil {
243+
logger.Warningf("Failed to create nvidia-caps control devices: %v", err)
244+
}
245+
246+
for _, id := range identifiers {
247+
identifier := device.Identifier(id)
248+
if err := devices.CreateDeviceNodes(identifier); err != nil {
249+
logger.Warningf("Error creating device nodes for %v: %v", identifier, err)
250+
}
251+
}
252+
}

Diff for: internal/nvcaps/nvcaps.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ const (
3737
)
3838

3939
// MigMinor represents the minor number of a MIG device
40-
type MigMinor int
40+
type MigMinor uint32
4141

4242
// MigCap represents the path to a MIG cap file.
4343
// These are listed in /proc/driver/nvidia-caps/mig-minors and have one of the
@@ -144,7 +144,7 @@ func processMigMinorsLine(line string) (MigCap, MigMinor, error) {
144144
return "", 0, fmt.Errorf("invalid MIG minors line: '%v'", line)
145145
}
146146

147-
minor, err := strconv.Atoi(parts[1])
147+
minor, err := strconv.ParseUint(parts[1], 10, 32)
148148
if err != nil {
149149
return "", 0, fmt.Errorf("error reading MIG minor from '%v': %v", line, err)
150150
}

Diff for: internal/oci/spec_mock.go

+1-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: internal/system/nvdevices/control-device-nodes.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ func (m *Interface) CreateNVIDIACapsControlDeviceNodes() error {
5959
continue
6060
}
6161
deviceNodePath := migMinor.DevicePath()
62-
if err := m.createDeviceNode(deviceNodePath, int(capsMajor), int(migMinor)); err != nil {
62+
if err := m.createDeviceNode(deviceNodePath, capsMajor, uint32(migMinor)); err != nil {
6363
errs = errors.Join(errs, fmt.Errorf("failed to create nvidia-caps device node %v: %w", deviceNodePath, err))
6464
}
6565
}
@@ -82,12 +82,12 @@ func (m *Interface) createControlDeviceNode(node controlDeviceNode) error {
8282
return fmt.Errorf("failed to determine minor: %w", err)
8383
}
8484

85-
return m.createDeviceNode(node.path(), int(major), int(minor))
85+
return m.createDeviceNode(node.path(), major, minor)
8686
}
8787

8888
// controlDeviceNodeMajor returns the major number for the specified NVIDIA control device node.
8989
// If the device node is not supported, an error is returned.
90-
func (m *Interface) controlDeviceNodeMajor(node controlDeviceNode) (int64, error) {
90+
func (m *Interface) controlDeviceNodeMajor(node controlDeviceNode) (devices.Major, error) {
9191
var valid bool
9292
var major devices.Major
9393
switch node {
@@ -98,15 +98,15 @@ func (m *Interface) controlDeviceNodeMajor(node controlDeviceNode) (int64, error
9898
}
9999

100100
if valid {
101-
return int64(major), nil
101+
return major, nil
102102
}
103103

104104
return 0, errInvalidDeviceNode
105105
}
106106

107107
// controlDeviceNodeMinor returns the minor number for the specified NVIDIA control device node.
108108
// If the device node is not supported, an error is returned.
109-
func (m *Interface) controlDeviceNodeMinor(node controlDeviceNode) (int64, error) {
109+
func (m *Interface) controlDeviceNodeMinor(node controlDeviceNode) (uint32, error) {
110110
switch node {
111111
case "nvidia-modeset":
112112
return devices.NVIDIAModesetMinor, nil

Diff for: internal/system/nvdevices/devices.go

+7-7
Original file line numberDiff line numberDiff line change
@@ -89,25 +89,25 @@ func New(opts ...Option) (*Interface, error) {
8989
func (m *Interface) CreateDeviceNodes(id device.Identifier) error {
9090
switch {
9191
case id.IsGpuIndex():
92-
index, err := strconv.Atoi(string(id))
92+
index, err := strconv.ParseUint(string(id), 10, 32)
9393
if err != nil {
9494
return fmt.Errorf("invalid GPU index: %v", id)
9595
}
96-
return m.createGPUDeviceNode(index)
96+
return m.createGPUDeviceNode(int(index))
9797
case id.IsMigIndex():
9898
indices := strings.Split(string(id), ":")
9999
if len(indices) != 2 {
100100
return fmt.Errorf("invalid MIG index %v", id)
101101
}
102-
gpuIndex, err := strconv.Atoi(indices[0])
102+
gpuIndex, err := strconv.ParseUint(indices[0], 10, 32)
103103
if err != nil {
104104
return fmt.Errorf("invalid parent index %v: %w", indices[0], err)
105105
}
106-
if err := m.createGPUDeviceNode(gpuIndex); err != nil {
106+
if err := m.createGPUDeviceNode(int(gpuIndex)); err != nil {
107107
return fmt.Errorf("failed to create parent device node: %w", err)
108108
}
109109

110-
return m.createMigDeviceNodes(gpuIndex)
110+
return m.createMigDeviceNodes(int(gpuIndex))
111111
case id.IsGpuUUID(), id.IsMigUUID(), id == "all":
112112
return m.createAllGPUDeviceNodes()
113113
default:
@@ -117,7 +117,7 @@ func (m *Interface) CreateDeviceNodes(id device.Identifier) error {
117117

118118
// createDeviceNode creates the specified device node with the require major and minor numbers.
119119
// If a devRoot is configured, this is prepended to the path.
120-
func (m *Interface) createDeviceNode(path string, major int, minor int) error {
120+
func (m *Interface) createDeviceNode(path string, major devices.Major, minor uint32) error {
121121
path = filepath.Join(m.devRoot, path)
122-
return m.Mknode(path, major, minor)
122+
return m.Mknode(path, uint32(major), minor)
123123
}

Diff for: internal/system/nvdevices/gpu-device-nodes.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ func (m *Interface) createGPUDeviceNode(gpuIndex int) error {
3333
}
3434

3535
deviceNodePath := fmt.Sprintf("/dev/nvidia%d", gpuIndex)
36-
if err := m.createDeviceNode(deviceNodePath, int(major), gpuIndex); err != nil {
36+
if err := m.createDeviceNode(deviceNodePath, major, uint32(gpuIndex)); err != nil {
3737
return fmt.Errorf("failed to create device node %v: %w", deviceNodePath, err)
3838
}
3939
return nil
@@ -47,7 +47,7 @@ func (m *Interface) createMigDeviceNodes(gpuIndex int) error {
4747
var errs error
4848
for _, capsDeviceMinor := range m.migCaps.FilterForGPU(gpuIndex) {
4949
capDevicePath := capsDeviceMinor.DevicePath()
50-
err := m.createDeviceNode(capDevicePath, int(capsMajor), int(capsDeviceMinor))
50+
err := m.createDeviceNode(capDevicePath, capsMajor, uint32(capsDeviceMinor))
5151
errs = errors.Join(errs, fmt.Errorf("failed to create %v: %w", capDevicePath, err))
5252
}
5353
return errs

Diff for: internal/system/nvdevices/mknod.go

+7-5
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,18 @@ import (
2525
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
2626
)
2727

28-
//go:generate moq -stub -out mknod_mock.go . mknoder
28+
type mint uint32
29+
30+
//go:generate moq -fmt=goimports -rm -stub -out mknod_mock.go . mknoder
2931
type mknoder interface {
30-
Mknode(string, int, int) error
32+
Mknode(string, uint32, uint32) error
3133
}
3234

3335
type mknodLogger struct {
3436
logger.Interface
3537
}
3638

37-
func (m *mknodLogger) Mknode(path string, major, minor int) error {
39+
func (m *mknodLogger) Mknode(path string, major uint32, minor uint32) error {
3840
m.Infof("Running: mknod --mode=0666 %s c %d %d", path, major, minor)
3941
return nil
4042
}
@@ -43,7 +45,7 @@ type mknodUnix struct {
4345
logger logger.Interface
4446
}
4547

46-
func (m *mknodUnix) Mknode(path string, major, minor int) error {
48+
func (m *mknodUnix) Mknode(path string, major uint32, minor uint32) error {
4749
// TODO: Ensure that the existing device node has the correct properties.
4850
if _, err := os.Stat(path); err == nil {
4951
m.logger.Infof("Skipping: %s already exists", path)
@@ -52,7 +54,7 @@ func (m *mknodUnix) Mknode(path string, major, minor int) error {
5254
return fmt.Errorf("failed to stat %s: %v", path, err)
5355
}
5456

55-
err := unix.Mknod(path, unix.S_IFCHR, int(unix.Mkdev(uint32(major), uint32(minor))))
57+
err := unix.Mknod(path, unix.S_IFCHR, int(unix.Mkdev(major, minor)))
5658
if err != nil {
5759
return err
5860
}

Diff for: internal/system/nvdevices/mknod_mock.go

+16-16
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: pkg/nvcdi/namer_nvml_mock.go

+1-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)