diff --git a/cmd/nvidia-dra-plugin/cdi.go b/cmd/nvidia-dra-plugin/cdi.go index 548f624d..bad73ed5 100644 --- a/cmd/nvidia-dra-plugin/cdi.go +++ b/cmd/nvidia-dra-plugin/cdi.go @@ -48,16 +48,16 @@ const ( ) type CDIHandler struct { - logger *logrus.Logger - nvml nvml.Interface - nvdevice nvdevice.Interface - nvcdiDevice nvcdi.Interface - nvcdiClaim nvcdi.Interface - cache *cdiapi.Cache - driverRoot string - devRoot string - targetDriverRoot string - nvidiaCTKPath string + logger *logrus.Logger + nvml nvml.Interface + nvdevice nvdevice.Interface + nvcdiDevice nvcdi.Interface + nvcdiClaim nvcdi.Interface + cache *cdiapi.Cache + driverRoot string + devRoot string + targetDriverRoot string + nvidiaCDIHookPath string cdiRoot string vendor string @@ -103,7 +103,7 @@ func NewCDIHandler(opts ...cdiOption) (*CDIHandler, error) { nvcdi.WithMode("nvml"), nvcdi.WithVendor(h.vendor), nvcdi.WithClass(h.deviceClass), - nvcdi.WithNVIDIACDIHookPath(h.nvidiaCTKPath), + nvcdi.WithNVIDIACDIHookPath(h.nvidiaCDIHookPath), ) if err != nil { return nil, fmt.Errorf("unable to create CDI library for devices: %w", err) @@ -120,7 +120,7 @@ func NewCDIHandler(opts ...cdiOption) (*CDIHandler, error) { nvcdi.WithMode("nvml"), nvcdi.WithVendor(h.vendor), nvcdi.WithClass(h.claimClass), - nvcdi.WithNVIDIACDIHookPath(h.nvidiaCTKPath), + nvcdi.WithNVIDIACDIHookPath(h.nvidiaCDIHookPath), ) if err != nil { return nil, fmt.Errorf("unable to create CDI library for claims: %w", err) diff --git a/cmd/nvidia-dra-plugin/cdioptions.go b/cmd/nvidia-dra-plugin/cdioptions.go index 4e7b9916..b7497fa9 100644 --- a/cmd/nvidia-dra-plugin/cdioptions.go +++ b/cmd/nvidia-dra-plugin/cdioptions.go @@ -52,10 +52,10 @@ func WithCDIRoot(cdiRoot string) cdiOption { } } -// WithNvidiaCTKPath provides an cdiOption to set the nvidia-ctk path used by the 'cdi' interface. -func WithNvidiaCTKPath(path string) cdiOption { +// WithNvidiaCDIHookPath provides an cdiOption to set the nvidia-cdi-hook path used by the 'cdi' interface. +func WithNvidiaCDIHookPath(path string) cdiOption { return func(c *CDIHandler) { - c.nvidiaCTKPath = path + c.nvidiaCDIHookPath = path } } diff --git a/cmd/nvidia-dra-plugin/device_state.go b/cmd/nvidia-dra-plugin/device_state.go index 22879d83..ae71124d 100644 --- a/cmd/nvidia-dra-plugin/device_state.go +++ b/cmd/nvidia-dra-plugin/device_state.go @@ -76,7 +76,7 @@ func NewDeviceState(ctx context.Context, config *Config) (*DeviceState, error) { WithDriverRoot(string(containerDriverRoot)), WithDevRoot(devRoot), WithTargetDriverRoot(hostDriverRoot), - WithNvidiaCTKPath(config.flags.nvidiaCTKPath), + WithNvidiaCDIHookPath(config.flags.nvidiaCDIHookPath), WithCDIRoot(config.flags.cdiRoot), WithVendor(cdiVendor), ) diff --git a/cmd/nvidia-dra-plugin/main.go b/cmd/nvidia-dra-plugin/main.go index ab5bd585..36faec13 100644 --- a/cmd/nvidia-dra-plugin/main.go +++ b/cmd/nvidia-dra-plugin/main.go @@ -50,7 +50,7 @@ type Flags struct { cdiRoot string containerDriverRoot string hostDriverRoot string - nvidiaCTKPath string + nvidiaCDIHookPath string deviceClasses sets.Set[string] } @@ -108,11 +108,11 @@ func newApp() *cli.App { EnvVars: []string{"CONTAINER_DRIVER_ROOT"}, }, &cli.StringFlag{ - Name: "nvidia-ctk-path", - Value: "/usr/bin/nvidia-ctk", - Usage: "the path to use for the nvidia-ctk in the generated CDI specification. Note that this represents the path on the host.", - Destination: &flags.nvidiaCTKPath, - EnvVars: []string{"NVIDIA_CTK_PATH"}, + Name: "nvidia-cdi-hook-path", + Value: "/usr/bin/nvidia-cdi-hook", + Usage: "the path to use for the nvidia-cdi-hook in the generated CDI specification. Note that this represents the path on the host.", + Destination: &flags.nvidiaCDIHookPath, + EnvVars: []string{"NVIDIA_CDI_HOOK_PATH", "NVIDIA_CTK_PATH"}, }, &cli.StringSliceFlag{ Name: "device-classes", diff --git a/demo/clusters/kind/install-dra-driver.sh b/demo/clusters/kind/install-dra-driver.sh index ece8cdf1..35c46fa7 100755 --- a/demo/clusters/kind/install-dra-driver.sh +++ b/demo/clusters/kind/install-dra-driver.sh @@ -27,7 +27,7 @@ kubectl label node -l node-role.x-k8s.io/worker --overwrite nvidia.com/gpu.prese deviceClasses=${1:-"gpu,mig,imex"} helm upgrade -i --create-namespace --namespace nvidia nvidia-dra-driver ${PROJECT_DIR}/deployments/helm/k8s-dra-driver \ --set deviceClasses="{${deviceClasses}}" \ - ${NVIDIA_CTK_PATH:+--set nvidiaCtkPath=${NVIDIA_CTK_PATH}} \ + ${NVIDIA_CDI_HOOK_PATH:+--set nvidiaCDIHookPath=${NVIDIA_CDI_HOOK_PATH}} \ ${NVIDIA_DRIVER_ROOT:+--set nvidiaDriverRoot=${NVIDIA_DRIVER_ROOT}} \ ${MASK_NVIDIA_DRIVER_PARAMS:+--set maskNvidiaDriverParams=${MASK_NVIDIA_DRIVER_PARAMS}} \ --wait diff --git a/demo/clusters/kind/scripts/kind-cluster-config.yaml b/demo/clusters/kind/scripts/kind-cluster-config.yaml index b5af76c2..e245adff 100644 --- a/demo/clusters/kind/scripts/kind-cluster-config.yaml +++ b/demo/clusters/kind/scripts/kind-cluster-config.yaml @@ -59,13 +59,12 @@ nodes: # in `/etc/nvidia-container-runtime/config.toml` - hostPath: /dev/null containerPath: /var/run/nvidia-container-devices/cdi/runtime.nvidia.com/gpu/all - # The generated CDI specification assumes that `nvidia-ctk` is available on a - # node -- specifically for the `nvidia-ctk hook` subcommand. As a workaround, - # we mount it from the host. - # TODO: Remove this once we have a more stable solution to make `nvidia-ctk` + # The generated CDI specification assumes that `nvidia-cdi-hook` is available on a + # node + # TODO: Remove this once we have a more stable solution to make `nvidia-cdi-hook` # on the kind nodes. - - hostPath: /usr/bin/nvidia-ctk - containerPath: /usr/bin/nvidia-ctk + - hostPath: /usr/bin/nvidia-cdi-hook + containerPath: /usr/bin/nvidia-cdi-hook # We need to inject the fabricmanager socket to support MIG with toolkit 1.16.2 # TODO: Remove this once we have a version of the toolkit where this is not required - hostPath: /run/nvidia-fabricmanager/socket diff --git a/demo/clusters/nvkind/scripts/kind-cluster-config.yaml b/demo/clusters/nvkind/scripts/kind-cluster-config.yaml index adf15356..af9ee67a 100644 --- a/demo/clusters/nvkind/scripts/kind-cluster-config.yaml +++ b/demo/clusters/nvkind/scripts/kind-cluster-config.yaml @@ -60,13 +60,12 @@ nodes: # in `/etc/nvidia-container-runtime/config.toml` - hostPath: /dev/null containerPath: /var/run/nvidia-container-devices/cdi/runtime.nvidia.com/gpu/{{ $gpu }} - # The generated CDI specification assumes that `nvidia-ctk` is available on a - # node -- specifically for the `nvidia-ctk hook` subcommand. As a workaround, - # we mount it from the host. - # TODO: Remove this once we have a more stable solution to make `nvidia-ctk` + # The generated CDI specification assumes that `nvidia-cdi-hook` is available on a + # node + # TODO: Remove this once we have a more stable solution to make `nvidia-cdi-hook` # on the kind nodes. - - hostPath: /usr/bin/nvidia-ctk - containerPath: /usr/bin/nvidia-ctk + - hostPath: /usr/bin/nvidia-cdi-hook + containerPath: /usr/bin/nvidia-cdi-hook # We need to inject the fabricmanager socket to support MIG with toolkit 1.16.2 # TODO: Remove this once we have a version of the toolkit where this is not required - hostPath: /run/nvidia-fabricmanager/socket diff --git a/deployments/helm/k8s-dra-driver/templates/kubeletplugin.yaml b/deployments/helm/k8s-dra-driver/templates/kubeletplugin.yaml index 0b9b09b0..7472b61a 100644 --- a/deployments/helm/k8s-dra-driver/templates/kubeletplugin.yaml +++ b/deployments/helm/k8s-dra-driver/templates/kubeletplugin.yaml @@ -71,8 +71,8 @@ spec: env: - name: MASK_NVIDIA_DRIVER_PARAMS value: "{{ .Values.maskNvidiaDriverParams }}" - - name: NVIDIA_CTK_PATH - value: "{{ .Values.nvidiaCtkPath }}" + - name: NVIDIA_CDI_HOOK_PATH + value: "{{ .Values.nvidiaCDIHookPath }}" - name: NVIDIA_DRIVER_ROOT value: "{{ .Values.nvidiaDriverRoot }}" - name: NVIDIA_VISIBLE_DEVICES diff --git a/deployments/helm/k8s-dra-driver/values.yaml b/deployments/helm/k8s-dra-driver/values.yaml index 76ff38ca..d062b36b 100644 --- a/deployments/helm/k8s-dra-driver/values.yaml +++ b/deployments/helm/k8s-dra-driver/values.yaml @@ -22,10 +22,10 @@ # For driver installed directly on a host, a value of `/` is used. nvidiaDriverRoot: / -# Specify the path of CTK binary (nvidia-ctk) on the host, +# Specify the path of cdi hook (nvidia-cdi-hook) on the host, # as it should appear in the the generated CDI specification. # The path depends on the system that runs on the node. -nvidiaCtkPath: /usr/bin/nvidia-ctk +nvidiaCDIHookPath: /usr/bin/nvidia-cdi-hook nameOverride: "" fullnameOverride: ""