From 13bbf71ead2452a7d6e4317c148c5d452beacac5 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 28 Feb 2025 14:13:30 +0200 Subject: [PATCH 1/6] Move ContainerRoot type to oci package Thsi change moves the ContainerRoot type to the oci package and updates state.GetContainerRootDirPath to return a variable of type ContainerRoot. This enabled better reuse between hooks. Signed-off-by: Evan Lezar --- cmd/nvidia-cdi-hook/chmod/chmod.go | 5 +- .../create-symlinks/create-symlinks.go | 4 +- cmd/nvidia-cdi-hook/cudacompat/cudacompat.go | 6 +- .../update-ldcache/container-root.go | 46 ----------- .../update-ldcache/update-ldcache.go | 18 ++--- internal/oci/container-root.go | 77 +++++++++++++++++++ internal/oci/state.go | 14 ++-- 7 files changed, 101 insertions(+), 69 deletions(-) delete mode 100644 cmd/nvidia-cdi-hook/update-ldcache/container-root.go create mode 100644 internal/oci/container-root.go diff --git a/cmd/nvidia-cdi-hook/chmod/chmod.go b/cmd/nvidia-cdi-hook/chmod/chmod.go index 9a4ee6562..7253b44d9 100644 --- a/cmd/nvidia-cdi-hook/chmod/chmod.go +++ b/cmd/nvidia-cdi-hook/chmod/chmod.go @@ -113,7 +113,7 @@ func (m command) run(c *cli.Context, cfg *config) error { return fmt.Errorf("failed to load container state: %v", err) } - containerRoot, err := s.GetContainerRoot() + containerRoot, err := s.GetContainerRootDirPath() if err != nil { return fmt.Errorf("failed to determined container root: %v", err) } @@ -121,7 +121,7 @@ func (m command) run(c *cli.Context, cfg *config) error { return fmt.Errorf("empty container root detected") } - paths := m.getPaths(containerRoot, cfg.paths.Value(), cfg.mode) + paths := m.getPaths(string(containerRoot), cfg.paths.Value(), cfg.mode) if len(paths) == 0 { m.logger.Debugf("No paths specified; exiting") return nil @@ -140,6 +140,7 @@ func (m command) run(c *cli.Context, cfg *config) error { } // getPaths updates the specified paths relative to the root. +// TODO(elezar): This function should be updated to make use of the oci.ContainerRoot type. func (m command) getPaths(root string, paths []string, desiredMode fs.FileMode) []string { var pathsInRoot []string for _, f := range paths { diff --git a/cmd/nvidia-cdi-hook/create-symlinks/create-symlinks.go b/cmd/nvidia-cdi-hook/create-symlinks/create-symlinks.go index 28b0626e3..001c3a91e 100644 --- a/cmd/nvidia-cdi-hook/create-symlinks/create-symlinks.go +++ b/cmd/nvidia-cdi-hook/create-symlinks/create-symlinks.go @@ -84,7 +84,7 @@ func (m command) run(c *cli.Context, cfg *config) error { return fmt.Errorf("failed to load container state: %v", err) } - containerRoot, err := s.GetContainerRoot() + containerRoot, err := s.GetContainerRootDirPath() if err != nil { return fmt.Errorf("failed to determined container root: %v", err) } @@ -100,7 +100,7 @@ func (m command) run(c *cli.Context, cfg *config) error { return fmt.Errorf("invalid symlink specification %v", l) } - err := m.createLink(containerRoot, parts[0], parts[1]) + err := m.createLink(string(containerRoot), parts[0], parts[1]) if err != nil { return fmt.Errorf("failed to create link %v: %w", parts, err) } diff --git a/cmd/nvidia-cdi-hook/cudacompat/cudacompat.go b/cmd/nvidia-cdi-hook/cudacompat/cudacompat.go index 0cecd6c17..be1bb6426 100644 --- a/cmd/nvidia-cdi-hook/cudacompat/cudacompat.go +++ b/cmd/nvidia-cdi-hook/cudacompat/cudacompat.go @@ -103,12 +103,12 @@ func (m command) run(_ *cli.Context, cfg *options) error { return fmt.Errorf("failed to load container state: %w", err) } - containerRootDir, err := s.GetContainerRoot() + containerRootDirPath, err := s.GetContainerRootDirPath() if err != nil { return fmt.Errorf("failed to determined container root: %w", err) } - containerForwardCompatDir, err := m.getContainerForwardCompatDir(containerRoot(containerRootDir), cfg.hostDriverVersion) + containerForwardCompatDir, err := m.getContainerForwardCompatDir(containerRoot(containerRootDirPath), cfg.hostDriverVersion) if err != nil { return fmt.Errorf("failed to get container forward compat directory: %w", err) } @@ -116,7 +116,7 @@ func (m command) run(_ *cli.Context, cfg *options) error { return nil } - return m.createLdsoconfdFile(containerRoot(containerRootDir), cudaCompatLdsoconfdFilenamePattern, containerForwardCompatDir) + return m.createLdsoconfdFile(containerRoot(containerRootDirPath), cudaCompatLdsoconfdFilenamePattern, containerForwardCompatDir) } func (m command) getContainerForwardCompatDir(containerRoot containerRoot, hostDriverVersion string) (string, error) { diff --git a/cmd/nvidia-cdi-hook/update-ldcache/container-root.go b/cmd/nvidia-cdi-hook/update-ldcache/container-root.go deleted file mode 100644 index 71a494691..000000000 --- a/cmd/nvidia-cdi-hook/update-ldcache/container-root.go +++ /dev/null @@ -1,46 +0,0 @@ -/** -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package ldcache - -import ( - "os" - "path/filepath" - - "github.com/moby/sys/symlink" -) - -// A containerRoot represents the root filesystem of a container. -type containerRoot string - -// hasPath checks whether the specified path exists in the root. -func (r containerRoot) hasPath(path string) bool { - resolved, err := r.resolve(path) - if err != nil { - return false - } - if _, err := os.Stat(resolved); err != nil && os.IsNotExist(err) { - return false - } - return true -} - -// resolve returns the absolute path including root path. -// Symlinks are resolved, but are guaranteed to resolve in the root. -func (r containerRoot) resolve(path string) (string, error) { - absolute := filepath.Clean(filepath.Join(string(r), path)) - return symlink.FollowSymlinkInScope(absolute, string(r)) -} diff --git a/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go b/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go index e35da8ae0..c39d8cbad 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go +++ b/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go @@ -108,8 +108,8 @@ func (m command) run(c *cli.Context, cfg *options) error { return fmt.Errorf("failed to load container state: %v", err) } - containerRootDir, err := s.GetContainerRoot() - if err != nil || containerRootDir == "" || containerRootDir == "/" { + containerRootDirPath, err := s.GetContainerRootDirPath() + if err != nil || containerRootDirPath == "" || containerRootDirPath == "/" { return fmt.Errorf("failed to determined container root: %v", err) } @@ -117,7 +117,7 @@ func (m command) run(c *cli.Context, cfg *options) error { args := []string{ filepath.Base(ldconfigPath), // Run ldconfig in the container root directory on the host. - "-r", containerRootDir, + "-r", string(containerRootDirPath), // Explicitly specify using /etc/ld.so.conf since the host's ldconfig may // be configured to use a different config file by default. // Note that since we apply the `-r {{ .containerRootDir }}` argument, /etc/ld.so.conf is @@ -125,9 +125,7 @@ func (m command) run(c *cli.Context, cfg *options) error { "-f", "/etc/ld.so.conf", } - containerRoot := containerRoot(containerRootDir) - - if containerRoot.hasPath("/etc/ld.so.cache") { + if containerRootDirPath.HasPath("/etc/ld.so.cache") { args = append(args, "-C", "/etc/ld.so.cache") } else { m.logger.Debugf("No ld.so.cache found, skipping update") @@ -135,8 +133,8 @@ func (m command) run(c *cli.Context, cfg *options) error { } folders := cfg.folders.Value() - if containerRoot.hasPath("/etc/ld.so.conf.d") { - err := m.createLdsoconfdFile(containerRoot, ldsoconfdFilenamePattern, folders...) + if containerRootDirPath.HasPath("/etc/ld.so.conf.d") { + err := m.createLdsoconfdFile(containerRootDirPath, ldsoconfdFilenamePattern, folders...) if err != nil { return fmt.Errorf("failed to update ld.so.conf.d: %v", err) } @@ -157,13 +155,13 @@ func (m command) resolveLDConfigPath(path string) string { // createLdsoconfdFile creates a file at /etc/ld.so.conf.d/ in the specified root. // The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and // contains the specified directories on each line. -func (m command) createLdsoconfdFile(in containerRoot, pattern string, dirs ...string) error { +func (m command) createLdsoconfdFile(in oci.ContainerRoot, pattern string, dirs ...string) error { if len(dirs) == 0 { m.logger.Debugf("No directories to add to /etc/ld.so.conf") return nil } - ldsoconfdDir, err := in.resolve("/etc/ld.so.conf.d") + ldsoconfdDir, err := in.Resolve("/etc/ld.so.conf.d") if err != nil { return err } diff --git a/internal/oci/container-root.go b/internal/oci/container-root.go new file mode 100644 index 000000000..13703ff5f --- /dev/null +++ b/internal/oci/container-root.go @@ -0,0 +1,77 @@ +/** +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package oci + +import ( + "os" + "path/filepath" + + "github.com/moby/sys/symlink" +) + +// A ContainerRoot represents the root directory of a container's filesystem. +type ContainerRoot string + +// GlobFiles matches the specified pattern in the container root. +// The files that match must be regular files. Symlinks and directories are ignored. +func (r ContainerRoot) GlobFiles(pattern string) ([]string, error) { + patternPath, err := r.Resolve(pattern) + if err != nil { + return nil, err + } + matches, err := filepath.Glob(patternPath) + if err != nil { + return nil, err + } + var files []string + for _, match := range matches { + info, err := os.Lstat(match) + if err != nil { + return nil, err + } + // Ignore symlinks. + if info.Mode()&os.ModeSymlink != 0 { + continue + } + // Ignore directories. + if info.IsDir() { + continue + } + files = append(files, match) + } + return files, nil +} + +// HasPath checks whether the specified path exists in the root. +func (r ContainerRoot) HasPath(path string) bool { + resolved, err := r.Resolve(path) + if err != nil { + return false + } + if _, err := os.Stat(resolved); err != nil && os.IsNotExist(err) { + return false + } + return true +} + +// Resolve returns the absolute path including root path. +// Symlinks are resolved, but are guaranteed to resolve in the root. +func (r ContainerRoot) Resolve(path string) (string, error) { + absolute := filepath.Clean(filepath.Join(string(r), path)) + return symlink.FollowSymlinkInScope(absolute, string(r)) +} diff --git a/internal/oci/state.go b/internal/oci/state.go index 2bb4e6e53..1e7609597 100644 --- a/internal/oci/state.go +++ b/internal/oci/state.go @@ -72,9 +72,11 @@ func (s *State) LoadSpec() (*specs.Spec, error) { return spec, nil } -// GetContainerRoot returns the root for the container from the associated spec. If the spec is not yet loaded, it is -// loaded and cached. -func (s *State) GetContainerRoot() (string, error) { +// GetContainerRootDirPath returns the root for the container from the associated spec. +// If the spec is not yet loaded, it is loaded and cached. +// The container root directory is the absolute path to the directory containing the root +// of the container filesystem on the host. +func (s *State) GetContainerRootDirPath() (ContainerRoot, error) { spec, err := s.LoadSpec() if err != nil { return "", err @@ -85,9 +87,9 @@ func (s *State) GetContainerRoot() (string, error) { containerRoot = spec.Root.Path } - if filepath.IsAbs(containerRoot) { - return containerRoot, nil + if !filepath.IsAbs(containerRoot) { + containerRoot = filepath.Join(s.Bundle, containerRoot) } - return filepath.Join(s.Bundle, containerRoot), nil + return ContainerRoot(containerRoot), nil } From 5d2f48cd4286055a46f7290d043ba8807ae26bbc Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 28 Feb 2025 14:15:37 +0200 Subject: [PATCH 2/6] Use oci.ContainerRoot from enable-cuda-compat This change updates the enable-cuda-compat implementation to also use oci.ContainerRoot. Signed-off-by: Evan Lezar --- .../cudacompat/container-root.go | 76 ------------------- cmd/nvidia-cdi-hook/cudacompat/cudacompat.go | 34 +++++---- .../cudacompat/cudacompat_test.go | 6 +- internal/oci/container-root.go | 14 ++++ 4 files changed, 38 insertions(+), 92 deletions(-) delete mode 100644 cmd/nvidia-cdi-hook/cudacompat/container-root.go diff --git a/cmd/nvidia-cdi-hook/cudacompat/container-root.go b/cmd/nvidia-cdi-hook/cudacompat/container-root.go deleted file mode 100644 index 8bb3b3c85..000000000 --- a/cmd/nvidia-cdi-hook/cudacompat/container-root.go +++ /dev/null @@ -1,76 +0,0 @@ -/** -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -**/ - -package cudacompat - -import ( - "os" - "path/filepath" - - "github.com/moby/sys/symlink" -) - -// A containerRoot represents the root filesystem of a container. -type containerRoot string - -// hasPath checks whether the specified path exists in the root. -func (r containerRoot) hasPath(path string) bool { - resolved, err := r.resolve(path) - if err != nil { - return false - } - if _, err := os.Stat(resolved); err != nil && os.IsNotExist(err) { - return false - } - return true -} - -// globFiles matches the specified pattern in the root. -// The files that match must be regular files. -func (r containerRoot) globFiles(pattern string) ([]string, error) { - patternPath, err := r.resolve(pattern) - if err != nil { - return nil, err - } - matches, err := filepath.Glob(patternPath) - if err != nil { - return nil, err - } - var files []string - for _, match := range matches { - info, err := os.Lstat(match) - if err != nil { - return nil, err - } - // Ignore symlinks. - if info.Mode()&os.ModeSymlink != 0 { - continue - } - // Ignore directories. - if info.IsDir() { - continue - } - files = append(files, match) - } - return files, nil -} - -// resolve returns the absolute path including root path. -// Symlinks are resolved, but are guaranteed to resolve in the root. -func (r containerRoot) resolve(path string) (string, error) { - absolute := filepath.Clean(filepath.Join(string(r), path)) - return symlink.FollowSymlinkInScope(absolute, string(r)) -} diff --git a/cmd/nvidia-cdi-hook/cudacompat/cudacompat.go b/cmd/nvidia-cdi-hook/cudacompat/cudacompat.go index be1bb6426..f149d2bd5 100644 --- a/cmd/nvidia-cdi-hook/cudacompat/cudacompat.go +++ b/cmd/nvidia-cdi-hook/cudacompat/cudacompat.go @@ -108,7 +108,7 @@ func (m command) run(_ *cli.Context, cfg *options) error { return fmt.Errorf("failed to determined container root: %w", err) } - containerForwardCompatDir, err := m.getContainerForwardCompatDir(containerRoot(containerRootDirPath), cfg.hostDriverVersion) + containerForwardCompatDir, err := m.getContainerForwardCompatDirPathInContainer(containerRootDirPath, cfg.hostDriverVersion) if err != nil { return fmt.Errorf("failed to get container forward compat directory: %w", err) } @@ -116,40 +116,44 @@ func (m command) run(_ *cli.Context, cfg *options) error { return nil } - return m.createLdsoconfdFile(containerRoot(containerRootDirPath), cudaCompatLdsoconfdFilenamePattern, containerForwardCompatDir) + return m.createLdsoconfdFile(containerRootDirPath, cudaCompatLdsoconfdFilenamePattern, containerForwardCompatDir) } -func (m command) getContainerForwardCompatDir(containerRoot containerRoot, hostDriverVersion string) (string, error) { +// getContainerForwardCompatDirPathInContainer returns the path to the directory containing +// the CUDA Forward Compatibility libraries in the container. +func (m command) getContainerForwardCompatDirPathInContainer(containerRootDirPath oci.ContainerRoot, hostDriverVersion string) (string, error) { if hostDriverVersion == "" { m.logger.Debugf("Host driver version not specified") return "", nil } - if !containerRoot.hasPath(cudaCompatPath) { + if !containerRootDirPath.HasPath(cudaCompatPath) { m.logger.Debugf("No CUDA forward compatibility libraries directory in container") return "", nil } - if !containerRoot.hasPath("/etc/ld.so.cache") { + if !containerRootDirPath.HasPath("/etc/ld.so.cache") { m.logger.Debugf("The container does not have an LDCache") return "", nil } - libs, err := containerRoot.globFiles(filepath.Join(cudaCompatPath, "libcuda.so.*.*")) + cudaForwardCompatLibPaths, err := containerRootDirPath.GlobFiles(filepath.Join(cudaCompatPath, "libcuda.so.*.*")) if err != nil { m.logger.Warningf("Failed to find CUDA compat library: %w", err) return "", nil } - if len(libs) == 0 { + if len(cudaForwardCompatLibPaths) == 0 { m.logger.Debugf("No CUDA forward compatibility libraries container") return "", nil } - if len(libs) != 1 { - m.logger.Warningf("Unexpected number of CUDA compat libraries in container: %v", libs) + if len(cudaForwardCompatLibPaths) != 1 { + m.logger.Warningf("Unexpected number of CUDA compat libraries in container: %v", cudaForwardCompatLibPaths) return "", nil } - compatDriverVersion := strings.TrimPrefix(filepath.Base(libs[0]), "libcuda.so.") + cudaForwardCompatLibPath := cudaForwardCompatLibPaths[0] + + compatDriverVersion := strings.TrimPrefix(filepath.Base(cudaForwardCompatLibPath), "libcuda.so.") compatMajor, err := extractMajorVersion(compatDriverVersion) if err != nil { return "", fmt.Errorf("failed to extract major version from %q: %v", compatDriverVersion, err) @@ -165,20 +169,22 @@ func (m command) getContainerForwardCompatDir(containerRoot containerRoot, hostD return "", nil } - resolvedCompatDir := strings.TrimPrefix(filepath.Dir(libs[0]), string(containerRoot)) - return resolvedCompatDir, nil + cudaForwardCompatLibDirPath := filepath.Dir(cudaForwardCompatLibPath) + resolvedCompatDirPathInContainer := containerRootDirPath.ToContainerPath(cudaForwardCompatLibDirPath) + + return resolvedCompatDirPathInContainer, nil } // createLdsoconfdFile creates a file at /etc/ld.so.conf.d/ in the specified root. // The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and // contains the specified directories on each line. -func (m command) createLdsoconfdFile(in containerRoot, pattern string, dirs ...string) error { +func (m command) createLdsoconfdFile(in oci.ContainerRoot, pattern string, dirs ...string) error { if len(dirs) == 0 { m.logger.Debugf("No directories to add to /etc/ld.so.conf") return nil } - ldsoconfdDir, err := in.resolve("/etc/ld.so.conf.d") + ldsoconfdDir, err := in.Resolve("/etc/ld.so.conf.d") if err != nil { return err } diff --git a/cmd/nvidia-cdi-hook/cudacompat/cudacompat_test.go b/cmd/nvidia-cdi-hook/cudacompat/cudacompat_test.go index 0422fe76c..a795a3d52 100644 --- a/cmd/nvidia-cdi-hook/cudacompat/cudacompat_test.go +++ b/cmd/nvidia-cdi-hook/cudacompat/cudacompat_test.go @@ -24,6 +24,8 @@ import ( testlog "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/require" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" ) func TestCompatLibs(t *testing.T) { @@ -130,7 +132,7 @@ func TestCompatLibs(t *testing.T) { c := command{ logger: logger, } - containerForwardCompatDir, err := c.getContainerForwardCompatDir(containerRoot(containerRootDir), tc.hostDriverVersion) + containerForwardCompatDir, err := c.getContainerForwardCompatDirPathInContainer(oci.ContainerRoot(containerRootDir), tc.hostDriverVersion) require.NoError(t, err) require.EqualValues(t, tc.expectedContainerForwardCompatDir, containerForwardCompatDir) }) @@ -160,7 +162,7 @@ func TestUpdateLdconfig(t *testing.T) { c := command{ logger: logger, } - err := c.createLdsoconfdFile(containerRoot(containerRootDir), cudaCompatLdsoconfdFilenamePattern, tc.folders...) + err := c.createLdsoconfdFile(oci.ContainerRoot(containerRootDir), cudaCompatLdsoconfdFilenamePattern, tc.folders...) require.NoError(t, err) matches, err := filepath.Glob(filepath.Join(containerRootDir, "/etc/ld.so.conf.d/00-compat-*.conf")) diff --git a/internal/oci/container-root.go b/internal/oci/container-root.go index 13703ff5f..2613fab57 100644 --- a/internal/oci/container-root.go +++ b/internal/oci/container-root.go @@ -20,6 +20,7 @@ package oci import ( "os" "path/filepath" + "strings" "github.com/moby/sys/symlink" ) @@ -75,3 +76,16 @@ func (r ContainerRoot) Resolve(path string) (string, error) { absolute := filepath.Clean(filepath.Join(string(r), path)) return symlink.FollowSymlinkInScope(absolute, string(r)) } + +// ToContainerPath converts the specified path to a path in the container. +// Relative paths and absolute paths that are in the container root are returned as is. +func (r ContainerRoot) ToContainerPath(path string) string { + if !filepath.IsAbs(path) { + return path + } + if !strings.HasPrefix(path, string(r)) { + return path + } + + return strings.TrimPrefix(path, string(r)) +} From 715e09fbd4c7ec11e559e3671cd6cf7e7cf4234f Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 28 Feb 2025 14:29:52 +0200 Subject: [PATCH 3/6] Move CreateLdsoconfdFile to ContainerRoot Since the creation of a .conf file in /etc/ld.so.conf.d is shared by both the update-ldcache and enable-cuda-compat hooks, this is moved to the ContainerRoot type. Signed-off-by: Evan Lezar --- cmd/nvidia-cdi-hook/cudacompat/cudacompat.go | 48 +------------- .../cudacompat/cudacompat_test.go | 44 ------------- .../update-ldcache/update-ldcache.go | 48 +------------- internal/oci/container-root.go | 43 ++++++++++++ internal/oci/container-root_test.go | 66 +++++++++++++++++++ 5 files changed, 111 insertions(+), 138 deletions(-) create mode 100644 internal/oci/container-root_test.go diff --git a/cmd/nvidia-cdi-hook/cudacompat/cudacompat.go b/cmd/nvidia-cdi-hook/cudacompat/cudacompat.go index f149d2bd5..bc5cad4ec 100644 --- a/cmd/nvidia-cdi-hook/cudacompat/cudacompat.go +++ b/cmd/nvidia-cdi-hook/cudacompat/cudacompat.go @@ -18,7 +18,6 @@ package cudacompat import ( "fmt" - "os" "path/filepath" "strconv" "strings" @@ -116,7 +115,7 @@ func (m command) run(_ *cli.Context, cfg *options) error { return nil } - return m.createLdsoconfdFile(containerRootDirPath, cudaCompatLdsoconfdFilenamePattern, containerForwardCompatDir) + return containerRootDirPath.CreateLdsoconfdFile(cudaCompatLdsoconfdFilenamePattern, containerForwardCompatDir) } // getContainerForwardCompatDirPathInContainer returns the path to the directory containing @@ -175,51 +174,6 @@ func (m command) getContainerForwardCompatDirPathInContainer(containerRootDirPat return resolvedCompatDirPathInContainer, nil } -// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/ in the specified root. -// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and -// contains the specified directories on each line. -func (m command) createLdsoconfdFile(in oci.ContainerRoot, pattern string, dirs ...string) error { - if len(dirs) == 0 { - m.logger.Debugf("No directories to add to /etc/ld.so.conf") - return nil - } - - ldsoconfdDir, err := in.Resolve("/etc/ld.so.conf.d") - if err != nil { - return err - } - if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil { - return fmt.Errorf("failed to create ld.so.conf.d: %w", err) - } - - configFile, err := os.CreateTemp(ldsoconfdDir, pattern) - if err != nil { - return fmt.Errorf("failed to create config file: %w", err) - } - defer configFile.Close() - - m.logger.Debugf("Adding directories %v to %v", dirs, configFile.Name()) - - added := make(map[string]bool) - for _, dir := range dirs { - if added[dir] { - continue - } - _, err = configFile.WriteString(fmt.Sprintf("%s\n", dir)) - if err != nil { - return fmt.Errorf("failed to update config file: %w", err) - } - added[dir] = true - } - - // The created file needs to be world readable for the cases where the container is run as a non-root user. - if err := configFile.Chmod(0644); err != nil { - return fmt.Errorf("failed to chmod config file: %w", err) - } - - return nil -} - // extractMajorVersion parses a version string and returns the major version as an int. func extractMajorVersion(version string) (int, error) { majorString := strings.SplitN(version, ".", 2)[0] diff --git a/cmd/nvidia-cdi-hook/cudacompat/cudacompat_test.go b/cmd/nvidia-cdi-hook/cudacompat/cudacompat_test.go index a795a3d52..1916ac730 100644 --- a/cmd/nvidia-cdi-hook/cudacompat/cudacompat_test.go +++ b/cmd/nvidia-cdi-hook/cudacompat/cudacompat_test.go @@ -138,47 +138,3 @@ func TestCompatLibs(t *testing.T) { }) } } - -func TestUpdateLdconfig(t *testing.T) { - logger, _ := testlog.NewNullLogger() - testCases := []struct { - description string - folders []string - expectedContents string - }{ - { - description: "no folders; have no contents", - }, - { - description: "single folder is added", - folders: []string{"/usr/local/cuda/compat"}, - expectedContents: "/usr/local/cuda/compat\n", - }, - } - - for _, tc := range testCases { - t.Run(tc.description, func(t *testing.T) { - containerRootDir := t.TempDir() - c := command{ - logger: logger, - } - err := c.createLdsoconfdFile(oci.ContainerRoot(containerRootDir), cudaCompatLdsoconfdFilenamePattern, tc.folders...) - require.NoError(t, err) - - matches, err := filepath.Glob(filepath.Join(containerRootDir, "/etc/ld.so.conf.d/00-compat-*.conf")) - require.NoError(t, err) - - if tc.expectedContents == "" { - require.Empty(t, matches) - return - } - - require.Len(t, matches, 1) - contents, err := os.ReadFile(matches[0]) - require.NoError(t, err) - - require.EqualValues(t, tc.expectedContents, string(contents)) - }) - } - -} diff --git a/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go b/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go index c39d8cbad..df0bd6c62 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go +++ b/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go @@ -19,7 +19,6 @@ package ldcache import ( "errors" "fmt" - "os" "path/filepath" "strings" @@ -134,7 +133,7 @@ func (m command) run(c *cli.Context, cfg *options) error { folders := cfg.folders.Value() if containerRootDirPath.HasPath("/etc/ld.so.conf.d") { - err := m.createLdsoconfdFile(containerRootDirPath, ldsoconfdFilenamePattern, folders...) + err := containerRootDirPath.CreateLdsoconfdFile(ldsoconfdFilenamePattern, folders...) if err != nil { return fmt.Errorf("failed to update ld.so.conf.d: %v", err) } @@ -151,48 +150,3 @@ func (m command) run(c *cli.Context, cfg *options) error { func (m command) resolveLDConfigPath(path string) string { return strings.TrimPrefix(config.NormalizeLDConfigPath("@"+path), "@") } - -// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/ in the specified root. -// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and -// contains the specified directories on each line. -func (m command) createLdsoconfdFile(in oci.ContainerRoot, pattern string, dirs ...string) error { - if len(dirs) == 0 { - m.logger.Debugf("No directories to add to /etc/ld.so.conf") - return nil - } - - ldsoconfdDir, err := in.Resolve("/etc/ld.so.conf.d") - if err != nil { - return err - } - if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil { - return fmt.Errorf("failed to create ld.so.conf.d: %w", err) - } - - configFile, err := os.CreateTemp(ldsoconfdDir, pattern) - if err != nil { - return fmt.Errorf("failed to create config file: %w", err) - } - defer configFile.Close() - - m.logger.Debugf("Adding directories %v to %v", dirs, configFile.Name()) - - added := make(map[string]bool) - for _, dir := range dirs { - if added[dir] { - continue - } - _, err = configFile.WriteString(fmt.Sprintf("%s\n", dir)) - if err != nil { - return fmt.Errorf("failed to update config file: %w", err) - } - added[dir] = true - } - - // The created file needs to be world readable for the cases where the container is run as a non-root user. - if err := configFile.Chmod(0644); err != nil { - return fmt.Errorf("failed to chmod config file: %w", err) - } - - return nil -} diff --git a/internal/oci/container-root.go b/internal/oci/container-root.go index 2613fab57..ef2f9db7a 100644 --- a/internal/oci/container-root.go +++ b/internal/oci/container-root.go @@ -18,6 +18,7 @@ package oci import ( + "fmt" "os" "path/filepath" "strings" @@ -28,6 +29,48 @@ import ( // A ContainerRoot represents the root directory of a container's filesystem. type ContainerRoot string +// CreateLdsoconfdFile creates a file at /etc/ld.so.conf.d/ in the specified container root. +// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and +// contains the specified directories on each line. +func (r ContainerRoot) CreateLdsoconfdFile(pattern string, dirs ...string) error { + if len(dirs) == 0 { + return nil + } + + ldsoconfdDir, err := r.Resolve("/etc/ld.so.conf.d") + if err != nil { + return err + } + if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil { + return fmt.Errorf("failed to create ld.so.conf.d: %w", err) + } + + configFile, err := os.CreateTemp(ldsoconfdDir, pattern) + if err != nil { + return fmt.Errorf("failed to create config file: %w", err) + } + defer configFile.Close() + + added := make(map[string]bool) + for _, dir := range dirs { + if added[dir] { + continue + } + _, err = configFile.WriteString(fmt.Sprintf("%s\n", dir)) + if err != nil { + return fmt.Errorf("failed to update config file: %w", err) + } + added[dir] = true + } + + // The created file needs to be world readable for the cases where the container is run as a non-root user. + if err := configFile.Chmod(0644); err != nil { + return fmt.Errorf("failed to chmod config file: %w", err) + } + + return nil +} + // GlobFiles matches the specified pattern in the container root. // The files that match must be regular files. Symlinks and directories are ignored. func (r ContainerRoot) GlobFiles(pattern string) ([]string, error) { diff --git a/internal/oci/container-root_test.go b/internal/oci/container-root_test.go new file mode 100644 index 000000000..93e5453b9 --- /dev/null +++ b/internal/oci/container-root_test.go @@ -0,0 +1,66 @@ +/** +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package oci + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestUpdateLdconfig(t *testing.T) { + testCases := []struct { + description string + folders []string + expectedContents string + }{ + { + description: "no folders; have no contents", + }, + { + description: "single folder is added", + folders: []string{"/usr/local/cuda/compat"}, + expectedContents: "/usr/local/cuda/compat\n", + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + containerRootDir := t.TempDir() + c := ContainerRoot(containerRootDir) + err := c.CreateLdsoconfdFile("00-test-*.conf", tc.folders...) + require.NoError(t, err) + + matches, err := filepath.Glob(filepath.Join(containerRootDir, "/etc/ld.so.conf.d/00-test-*.conf")) + require.NoError(t, err) + + if tc.expectedContents == "" { + require.Empty(t, matches) + return + } + + require.Len(t, matches, 1) + contents, err := os.ReadFile(matches[0]) + require.NoError(t, err) + + require.EqualValues(t, tc.expectedContents, string(contents)) + }) + } + +} From 4088430fc1bf1bdab1d96e0167c2edf13e919f7c Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 28 Feb 2025 14:38:02 +0200 Subject: [PATCH 4/6] Move SafeExec logic to internal safeexec package Signed-off-by: Evan Lezar --- .../update-ldcache/update-ldcache.go | 5 ++- internal/safe-exec/safe-exec.go | 37 +++++++++++++++++++ .../safe-exec}/safe-exec_linux.go | 8 ++-- .../safe-exec}/safe-exec_other.go | 7 ++-- 4 files changed, 49 insertions(+), 8 deletions(-) create mode 100644 internal/safe-exec/safe-exec.go rename {cmd/nvidia-cdi-hook/update-ldcache => internal/safe-exec}/safe-exec_linux.go (86%) rename {cmd/nvidia-cdi-hook/update-ldcache => internal/safe-exec}/safe-exec_other.go (75%) diff --git a/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go b/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go index df0bd6c62..5d152c1a3 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go +++ b/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go @@ -27,6 +27,7 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" + safeexec "github.com/NVIDIA/nvidia-container-toolkit/internal/safe-exec" ) const ( @@ -39,6 +40,7 @@ const ( ) type command struct { + safeexec.Execer logger logger.Interface } @@ -52,6 +54,7 @@ type options struct { func NewCommand(logger logger.Interface) *cli.Command { c := command{ logger: logger, + Execer: safeexec.New(logger), } return c.build() } @@ -141,7 +144,7 @@ func (m command) run(c *cli.Context, cfg *options) error { args = append(args, folders...) } - return m.SafeExec(ldconfigPath, args, nil) + return m.Exec(ldconfigPath, args, nil) } // resolveLDConfigPath determines the LDConfig path to use for the system. diff --git a/internal/safe-exec/safe-exec.go b/internal/safe-exec/safe-exec.go new file mode 100644 index 000000000..8c2f68cff --- /dev/null +++ b/internal/safe-exec/safe-exec.go @@ -0,0 +1,37 @@ +/** +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package safeexec + +import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" + +// An Execer implements an Exec function. +type Execer interface { + Exec(string, []string, []string) error +} + +// A safeExecer is used to Exec an application from a memfd to prevent possible +// tampering. +type safeExecer struct { + logger logger.Interface +} + +// New creates a safe Execer with the specified logger. +func New(logger logger.Interface) Execer { + return &safeExecer{ + logger: logger, + } +} diff --git a/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_linux.go b/internal/safe-exec/safe-exec_linux.go similarity index 86% rename from cmd/nvidia-cdi-hook/update-ldcache/safe-exec_linux.go rename to internal/safe-exec/safe-exec_linux.go index c1c655b49..9f56216e8 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_linux.go +++ b/internal/safe-exec/safe-exec_linux.go @@ -14,7 +14,7 @@ # limitations under the License. **/ -package ldcache +package safeexec import ( "fmt" @@ -25,11 +25,11 @@ import ( "github.com/opencontainers/runc/libcontainer/dmz" ) -// SafeExec attempts to clone the specified binary (as an memfd, for example) before executing it. -func (m command) SafeExec(path string, args []string, envv []string) error { +// Exec attempts to clone the specified binary (as an memfd, for example) before executing it. +func (s *safeExecer) Exec(path string, args []string, envv []string) error { safeExe, err := cloneBinary(path) if err != nil { - m.logger.Warningf("Failed to clone binary %q: %v; falling back to Exec", path, err) + s.logger.Warningf("Failed to clone binary %q: %v; falling back to Exec", path, err) //nolint:gosec // TODO: Can we harden this so that there is less risk of command injection return syscall.Exec(path, args, envv) } diff --git a/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_other.go b/internal/safe-exec/safe-exec_other.go similarity index 75% rename from cmd/nvidia-cdi-hook/update-ldcache/safe-exec_other.go rename to internal/safe-exec/safe-exec_other.go index dff11dd39..82e1f7418 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/safe-exec_other.go +++ b/internal/safe-exec/safe-exec_other.go @@ -17,13 +17,14 @@ # limitations under the License. **/ -package ldcache +package safeexec import "syscall" -// SafeExec is not implemented on non-linux systems and forwards directly to the +// Exec is not implemented on non-linux systems and forwards directly to the // Exec syscall. -func (m *command) SafeExec(path string, args []string, envv []string) error { +func (s *safeExecer) Exec(path string, args []string, envv []string) error { + s.logger.Warningf("Cloning binary not implemented for binary %q; falling back to Exec", path) //nolint:gosec // TODO: Can we harden this so that there is less risk of command injection return syscall.Exec(path, args, envv) } From 15645e6cd53626a8f32bb8a5e0f9540292f51ee8 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 28 Feb 2025 14:44:22 +0200 Subject: [PATCH 5/6] Move resolution of host LDConfig to config package Signed-off-by: Evan Lezar --- cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go | 10 +--------- internal/config/cli.go | 7 +++++++ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go b/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go index 5d152c1a3..65dbc2700 100644 --- a/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go +++ b/cmd/nvidia-cdi-hook/update-ldcache/update-ldcache.go @@ -20,7 +20,6 @@ import ( "errors" "fmt" "path/filepath" - "strings" "github.com/urfave/cli/v2" @@ -115,7 +114,7 @@ func (m command) run(c *cli.Context, cfg *options) error { return fmt.Errorf("failed to determined container root: %v", err) } - ldconfigPath := m.resolveLDConfigPath(cfg.ldconfigPath) + ldconfigPath := config.ResolveLDConfigPathOnHost(cfg.ldconfigPath) args := []string{ filepath.Base(ldconfigPath), // Run ldconfig in the container root directory on the host. @@ -146,10 +145,3 @@ func (m command) run(c *cli.Context, cfg *options) error { return m.Exec(ldconfigPath, args, nil) } - -// resolveLDConfigPath determines the LDConfig path to use for the system. -// On systems such as Ubuntu where `/sbin/ldconfig` is a wrapper around -// /sbin/ldconfig.real, the latter is returned. -func (m command) resolveLDConfigPath(path string) string { - return strings.TrimPrefix(config.NormalizeLDConfigPath("@"+path), "@") -} diff --git a/internal/config/cli.go b/internal/config/cli.go index 3621df25a..6fb4cc665 100644 --- a/internal/config/cli.go +++ b/internal/config/cli.go @@ -94,3 +94,10 @@ func (p ldconfigPath) normalize() ldconfigPath { func NormalizeLDConfigPath(path string) string { return string(ldconfigPath(path).normalize()) } + +// ResolveLDConfigPathOnHost determines the LDConfig path to use for the system. +// The ldconfig path is normalized (i.e. /sbin/ldconfig or /sbin/ldconfig.real is used as required) +// and is guaranteed to resolve on the host. +func ResolveLDConfigPathOnHost(path string) string { + return strings.TrimPrefix(NormalizeLDConfigPath("@"+path), "@") +} From bdfaea4e9e395d79cbb63ccb50820dbf572d77e3 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Thu, 27 Feb 2025 14:38:37 +0200 Subject: [PATCH 6/6] Add create-soname-symlinks hook This change adds a create-soname-symlinks hook that can be used to ensure that the soname symlinks for injected libraries exist in a container. This is done by calling ldconfig -n -N for the folders containing the injected libraries. This also ensures that libcuda.so is present in the ldcache when the update-ldcache hook is run. Signed-off-by: Evan Lezar --- cmd/nvidia-cdi-hook/commands/commands.go | 2 + .../create-soname-symlinks/soname-symlinks.go | 132 ++++++++++++++++++ .../container/toolkit/toolkit_test.go | 7 + cmd/nvidia-ctk/cdi/generate/generate_test.go | 7 + internal/discover/ldconfig.go | 27 ++-- internal/discover/ldconfig_test.go | 66 +++++++-- tests/e2e/nvidia-container-toolkit_test.go | 22 +++ 7 files changed, 238 insertions(+), 25 deletions(-) create mode 100644 cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go diff --git a/cmd/nvidia-cdi-hook/commands/commands.go b/cmd/nvidia-cdi-hook/commands/commands.go index 3f80ba9be..7ad34ca9f 100644 --- a/cmd/nvidia-cdi-hook/commands/commands.go +++ b/cmd/nvidia-cdi-hook/commands/commands.go @@ -20,6 +20,7 @@ import ( "github.com/urfave/cli/v2" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/chmod" + soname "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-soname-symlinks" symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-symlinks" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/cudacompat" ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/update-ldcache" @@ -34,5 +35,6 @@ func New(logger logger.Interface) []*cli.Command { symlinks.NewCommand(logger), chmod.NewCommand(logger), cudacompat.NewCommand(logger), + soname.NewCommand(logger), } } diff --git a/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go b/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go new file mode 100644 index 000000000..bd44b6601 --- /dev/null +++ b/cmd/nvidia-cdi-hook/create-soname-symlinks/soname-symlinks.go @@ -0,0 +1,132 @@ +/** +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package soname + +import ( + "errors" + "fmt" + "path/filepath" + + "github.com/urfave/cli/v2" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" + "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" + safeexec "github.com/NVIDIA/nvidia-container-toolkit/internal/safe-exec" +) + +type command struct { + logger logger.Interface + safeexec.Execer +} + +type options struct { + folders cli.StringSlice + ldconfigPath string + containerSpec string +} + +// NewCommand constructs an create-soname-symlinks command with the specified logger +func NewCommand(logger logger.Interface) *cli.Command { + c := command{ + logger: logger, + Execer: safeexec.New(logger), + } + return c.build() +} + +// build the create-soname-symlinks command +func (m command) build() *cli.Command { + cfg := options{} + + // Create the 'create-soname-symlinks' command + c := cli.Command{ + Name: "create-soname-symlinks", + Usage: "Create soname symlinks for the specified folders using ldconfig -n -N", + Before: func(c *cli.Context) error { + return m.validateFlags(c, &cfg) + }, + Action: func(c *cli.Context) error { + return m.run(c, &cfg) + }, + } + + c.Flags = []cli.Flag{ + &cli.StringSliceFlag{ + Name: "folder", + Usage: "Specify a folder to search for shared libraries for which soname symlinks need to be created", + Destination: &cfg.folders, + }, + &cli.StringFlag{ + Name: "ldconfig-path", + Usage: "Specify the path to the ldconfig program", + Destination: &cfg.ldconfigPath, + Value: "/sbin/ldconfig", + }, + &cli.StringFlag{ + Name: "container-spec", + Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN", + Destination: &cfg.containerSpec, + }, + } + + return &c +} + +func (m command) validateFlags(c *cli.Context, cfg *options) error { + if cfg.ldconfigPath == "" { + return errors.New("ldconfig-path must be specified") + } + return nil +} + +func (m command) run(c *cli.Context, cfg *options) error { + s, err := oci.LoadContainerState(cfg.containerSpec) + if err != nil { + return fmt.Errorf("failed to load container state: %v", err) + } + + containerRoot, err := s.GetContainerRootDirPath() + if err != nil { + return fmt.Errorf("failed to determined container root: %v", err) + } + if containerRoot == "" { + m.logger.Warningf("No container root detected") + return nil + } + + dirs := cfg.folders.Value() + if len(dirs) == 0 { + return nil + } + + ldconfigPath := config.ResolveLDConfigPathOnHost(cfg.ldconfigPath) + args := []string{filepath.Base(ldconfigPath)} + + args = append(args, + // Specify the containerRoot to use. + "-r", string(containerRoot), + // Specify -n to only process the specified folders. + "-n", + // Explicitly disable updating the LDCache. + "-N", + ) + // Explicitly specific the directories to add. + args = append(args, dirs...) + + return m.Exec(ldconfigPath, args, nil) +} diff --git a/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go b/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go index 4aad36c52..707053958 100644 --- a/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go +++ b/cmd/nvidia-ctk-installer/container/toolkit/toolkit_test.go @@ -95,6 +95,13 @@ containerEdits: - create-symlinks - --link - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so + - hookName: createContainer + path: {{ .toolkitRoot }}/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - create-soname-symlinks + - --folder + - /lib/x86_64-linux-gnu - hookName: createContainer path: {{ .toolkitRoot }}/nvidia-cdi-hook args: diff --git a/cmd/nvidia-ctk/cdi/generate/generate_test.go b/cmd/nvidia-ctk/cdi/generate/generate_test.go index c74cff23e..eb69ede65 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate_test.go +++ b/cmd/nvidia-ctk/cdi/generate/generate_test.go @@ -97,6 +97,13 @@ containerEdits: - nvidia-cdi-hook - enable-cuda-compat - --host-driver-version=999.88.77 + - hookName: createContainer + path: /usr/bin/nvidia-cdi-hook + args: + - nvidia-cdi-hook + - create-soname-symlinks + - --folder + - /lib/x86_64-linux-gnu - hookName: createContainer path: /usr/bin/nvidia-cdi-hook args: diff --git a/internal/discover/ldconfig.go b/internal/discover/ldconfig.go index b81b9be59..db0526ecd 100644 --- a/internal/discover/ldconfig.go +++ b/internal/discover/ldconfig.go @@ -50,16 +50,16 @@ func (d ldconfig) Hooks() ([]Hook, error) { if err != nil { return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err) } - h := CreateLDCacheUpdateHook( + hooks := CreateLDCacheUpdateHooks( d.nvidiaCDIHookPath, d.ldconfigPath, getLibraryPaths(mounts), ) - return []Hook{h}, nil + return hooks, nil } -// CreateLDCacheUpdateHook locates the NVIDIA Container Toolkit CLI and creates a hook for updating the LD Cache -func CreateLDCacheUpdateHook(executable string, ldconfig string, libraries []string) Hook { +// CreateLDCacheUpdateHooks locates the NVIDIA Container Toolkit CLI and creates a hook for updating the LD Cache +func CreateLDCacheUpdateHooks(executable string, ldconfig string, libraries []string) []Hook { var args []string if ldconfig != "" { @@ -70,13 +70,20 @@ func CreateLDCacheUpdateHook(executable string, ldconfig string, libraries []str args = append(args, "--folder", f) } - hook := CreateNvidiaCDIHook( - executable, - "update-ldcache", - args..., - ) + hooks := []Hook{ + CreateNvidiaCDIHook( + executable, + "create-soname-symlinks", + args..., + ), + CreateNvidiaCDIHook( + executable, + "update-ldcache", + args..., + ), + } - return hook + return hooks } // getLibraryPaths extracts the library dirs from the specified mounts diff --git a/internal/discover/ldconfig_test.go b/internal/discover/ldconfig_test.go index 0b214c77b..2c2da46c4 100644 --- a/internal/discover/ldconfig_test.go +++ b/internal/discover/ldconfig_test.go @@ -38,11 +38,22 @@ func TestLDCacheUpdateHook(t *testing.T) { mounts []Mount mountError error expectedError error - expectedArgs []string + expectedHooks []Hook }{ { - description: "empty mounts", - expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache"}, + description: "empty mounts", + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "create-soname-symlinks"}, + }, + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "update-ldcache"}, + }, + }, }, { description: "mount error", @@ -65,7 +76,18 @@ func TestLDCacheUpdateHook(t *testing.T) { Path: "/usr/local/lib/libbar.so", }, }, - expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"}, + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"}, + }, + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"}, + }, + }, }, { description: "host paths are ignored", @@ -75,12 +97,34 @@ func TestLDCacheUpdateHook(t *testing.T) { Path: "/usr/local/lib/libfoo.so", }, }, - expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib"}, + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--folder", "/usr/local/lib"}, + }, + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib"}, + }, + }, }, { description: "explicit ldconfig path is passed", ldconfigPath: testLdconfigPath, - expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--ldconfig-path", testLdconfigPath}, + expectedHooks: []Hook{ + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--ldconfig-path", testLdconfigPath}, + }, + { + Lifecycle: "createContainer", + Path: testNvidiaCDIHookPath, + Args: []string{"nvidia-cdi-hook", "update-ldcache", "--ldconfig-path", testLdconfigPath}, + }, + }, }, } @@ -91,12 +135,6 @@ func TestLDCacheUpdateHook(t *testing.T) { return tc.mounts, tc.mountError }, } - expectedHook := Hook{ - Path: testNvidiaCDIHookPath, - Args: tc.expectedArgs, - Lifecycle: "createContainer", - } - d, err := NewLDCacheUpdateHook(logger, mountMock, testNvidiaCDIHookPath, tc.ldconfigPath) require.NoError(t, err) @@ -110,9 +148,7 @@ func TestLDCacheUpdateHook(t *testing.T) { } require.NoError(t, err) - require.Len(t, hooks, 1) - - require.EqualValues(t, hooks[0], expectedHook) + require.EqualValues(t, tc.expectedHooks, hooks) devices, err := d.Devices() require.NoError(t, err) diff --git a/tests/e2e/nvidia-container-toolkit_test.go b/tests/e2e/nvidia-container-toolkit_test.go index 5948014b6..cde77be7e 100644 --- a/tests/e2e/nvidia-container-toolkit_test.go +++ b/tests/e2e/nvidia-container-toolkit_test.go @@ -215,4 +215,26 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() { Expect(ldconfigOut).To(ContainSubstring("/usr/lib64")) }) }) + + When("A container is run using CDI", Ordered, func() { + BeforeAll(func(ctx context.Context) { + _, _, err := r.Run("docker pull ubuntu") + Expect(err).ToNot(HaveOccurred()) + }) + + It("should include libcuda.so in the ldcache", func(ctx context.Context) { + ldcacheOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu bash -c \"ldconfig -p | grep 'libcuda.so'\"") + Expect(err).ToNot(HaveOccurred()) + Expect(ldcacheOutput).ToNot(BeEmpty()) + + ldcacheLines := strings.Split(ldcacheOutput, "\n") + var libs []string + for _, line := range ldcacheLines { + parts := strings.SplitN(line, " (", 2) + libs = append(libs, strings.TrimSpace(parts[0])) + } + + Expect(libs).To(ContainElements([]string{"libcuda.so", "libcuda.so.1"})) + }) + }) })