-
Notifications
You must be signed in to change notification settings - Fork 320
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Ensure that libcuda.so is in the ldcache #947
base: main
Are you sure you want to change the base?
Changes from all commits
597359e
d54a7f5
e91a729
8124ab0
ff49044
035abe0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
/** | ||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
**/ | ||
|
||
package soname | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
"path/filepath" | ||
|
||
"github.com/urfave/cli/v2" | ||
|
||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/utils" | ||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger" | ||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci" | ||
) | ||
|
||
type command struct { | ||
logger logger.Interface | ||
utils.SafeExecer | ||
} | ||
|
||
type options struct { | ||
folders cli.StringSlice | ||
ldconfigPath string | ||
containerSpec string | ||
} | ||
|
||
// NewCommand constructs an create-soname-symlinks command with the specified logger | ||
func NewCommand(logger logger.Interface) *cli.Command { | ||
c := command{ | ||
logger: logger, | ||
SafeExecer: utils.NewSafeExecer(logger), | ||
} | ||
return c.build() | ||
} | ||
|
||
// build the create-soname-symlinks command | ||
func (m command) build() *cli.Command { | ||
cfg := options{} | ||
|
||
// Create the 'create-soname-symlinks' command | ||
c := cli.Command{ | ||
Name: "create-soname-symlinks", | ||
Usage: "Create soname symlinks for the specified folders using ldconfig -n -N", | ||
Before: func(c *cli.Context) error { | ||
return m.validateFlags(c, &cfg) | ||
}, | ||
Action: func(c *cli.Context) error { | ||
return m.run(c, &cfg) | ||
}, | ||
} | ||
|
||
c.Flags = []cli.Flag{ | ||
&cli.StringSliceFlag{ | ||
Name: "folder", | ||
Usage: "Specify a folder to search for shared libraries for which soname symlinks need to be created", | ||
Destination: &cfg.folders, | ||
}, | ||
&cli.StringFlag{ | ||
Name: "ldconfig-path", | ||
Usage: "Specify the path to the ldconfig program", | ||
Destination: &cfg.ldconfigPath, | ||
Value: "/sbin/ldconfig", | ||
}, | ||
&cli.StringFlag{ | ||
Name: "container-spec", | ||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN", | ||
Destination: &cfg.containerSpec, | ||
}, | ||
} | ||
|
||
return &c | ||
} | ||
|
||
func (m command) validateFlags(c *cli.Context, cfg *options) error { | ||
if cfg.ldconfigPath == "" { | ||
return errors.New("ldconfig-path must be specified") | ||
} | ||
return nil | ||
} | ||
|
||
func (m command) run(c *cli.Context, cfg *options) error { | ||
s, err := oci.LoadContainerState(cfg.containerSpec) | ||
if err != nil { | ||
return fmt.Errorf("failed to load container state: %v", err) | ||
} | ||
|
||
containerRoot, err := s.GetContainerRoot() | ||
if err != nil { | ||
return fmt.Errorf("failed to determined container root: %v", err) | ||
} | ||
if containerRoot == "" { | ||
m.logger.Warningf("No container root detected") | ||
return nil | ||
} | ||
|
||
dirs := cfg.folders.Value() | ||
if len(dirs) == 0 { | ||
return nil | ||
} | ||
|
||
ldconfigPath := utils.ResolveHostLDConfigPath(cfg.ldconfigPath) | ||
args := []string{filepath.Base(ldconfigPath)} | ||
|
||
args = append(args, | ||
// Specify the containerRoot to use. | ||
"-r", containerRoot, | ||
// Specify -n to only process the specified folders. | ||
"-n", | ||
// Explicitly disable updating the LDCache. | ||
"-N", | ||
) | ||
// Explicitly specific the directories to add. | ||
args = append(args, dirs...) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Question -- does this create There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not the |
||
|
||
return m.Exec(ldconfigPath, args, nil) | ||
} |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,13 +18,13 @@ package cudacompat | |
|
||
import ( | ||
"fmt" | ||
"os" | ||
"path/filepath" | ||
"strconv" | ||
"strings" | ||
|
||
"github.com/urfave/cli/v2" | ||
|
||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/utils" | ||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger" | ||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci" | ||
) | ||
|
@@ -107,33 +107,34 @@ func (m command) run(_ *cli.Context, cfg *options) error { | |
if err != nil { | ||
return fmt.Errorf("failed to determined container root: %w", err) | ||
} | ||
containerRoot := utils.ContainerRoot(containerRootDir) | ||
|
||
containerForwardCompatDir, err := m.getContainerForwardCompatDir(containerRoot(containerRootDir), cfg.hostDriverVersion) | ||
containerForwardCompatDir, err := m.getContainerForwardCompatDir(containerRoot, cfg.hostDriverVersion) | ||
if err != nil { | ||
return fmt.Errorf("failed to get container forward compat directory: %w", err) | ||
} | ||
if containerForwardCompatDir == "" { | ||
return nil | ||
} | ||
|
||
return m.createLdsoconfdFile(containerRoot(containerRootDir), cudaCompatLdsoconfdFilenamePattern, containerForwardCompatDir) | ||
return containerRoot.CreateLdsoconfdFile(cudaCompatLdsoconfdFilenamePattern, containerForwardCompatDir) | ||
} | ||
|
||
func (m command) getContainerForwardCompatDir(containerRoot containerRoot, hostDriverVersion string) (string, error) { | ||
func (m command) getContainerForwardCompatDir(containerRoot utils.ContainerRoot, hostDriverVersion string) (string, error) { | ||
if hostDriverVersion == "" { | ||
m.logger.Debugf("Host driver version not specified") | ||
return "", nil | ||
} | ||
if !containerRoot.hasPath(cudaCompatPath) { | ||
if !containerRoot.HasPath(cudaCompatPath) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have picked a more or less arbitrary place in the code to put the following comment. Having more expressive variable names would make review for me easier. You can postpone looking at this comment until after merge, that's fine with me! In the spirit of learning what we do here however I'd appreciate an answer at some point :) In my software engineering life I have always deeply cared about file system terminology and operations, and about making related code readable and self-expressive. For example,
What is
What are some properties about
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for calling this out. I think we could do a pass through this and improve readability greatly. There is no rush on this particular PR (I've elected to hold the changes back from the v1.17.5 release) and as such, I think we can address these concerns here instead of as a follow-up. The The |
||
m.logger.Debugf("No CUDA forward compatibility libraries directory in container") | ||
return "", nil | ||
} | ||
if !containerRoot.hasPath("/etc/ld.so.cache") { | ||
if !containerRoot.HasPath("/etc/ld.so.cache") { | ||
m.logger.Debugf("The container does not have an LDCache") | ||
return "", nil | ||
} | ||
|
||
libs, err := containerRoot.globFiles(filepath.Join(cudaCompatPath, "libcuda.so.*.*")) | ||
libs, err := containerRoot.GlobFiles(filepath.Join(cudaCompatPath, "libcuda.so.*.*")) | ||
if err != nil { | ||
m.logger.Warningf("Failed to find CUDA compat library: %w", err) | ||
return "", nil | ||
|
@@ -169,51 +170,6 @@ func (m command) getContainerForwardCompatDir(containerRoot containerRoot, hostD | |
return resolvedCompatDir, nil | ||
} | ||
|
||
// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/ in the specified root. | ||
// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and | ||
// contains the specified directories on each line. | ||
func (m command) createLdsoconfdFile(in containerRoot, pattern string, dirs ...string) error { | ||
if len(dirs) == 0 { | ||
m.logger.Debugf("No directories to add to /etc/ld.so.conf") | ||
return nil | ||
} | ||
|
||
ldsoconfdDir, err := in.resolve("/etc/ld.so.conf.d") | ||
if err != nil { | ||
return err | ||
} | ||
if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil { | ||
return fmt.Errorf("failed to create ld.so.conf.d: %w", err) | ||
} | ||
|
||
configFile, err := os.CreateTemp(ldsoconfdDir, pattern) | ||
if err != nil { | ||
return fmt.Errorf("failed to create config file: %w", err) | ||
} | ||
defer configFile.Close() | ||
|
||
m.logger.Debugf("Adding directories %v to %v", dirs, configFile.Name()) | ||
|
||
added := make(map[string]bool) | ||
for _, dir := range dirs { | ||
if added[dir] { | ||
continue | ||
} | ||
_, err = configFile.WriteString(fmt.Sprintf("%s\n", dir)) | ||
if err != nil { | ||
return fmt.Errorf("failed to update config file: %w", err) | ||
} | ||
added[dir] = true | ||
} | ||
|
||
// The created file needs to be world readable for the cases where the container is run as a non-root user. | ||
if err := configFile.Chmod(0644); err != nil { | ||
return fmt.Errorf("failed to chmod config file: %w", err) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// extractMajorVersion parses a version string and returns the major version as an int. | ||
func extractMajorVersion(version string) (int, error) { | ||
majorString := strings.SplitN(version, ".", 2)[0] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@klueska I elected to add a new hook entirely instead of modifying the existing
update-ldcache
. This is in keeping with "purpose-built hooks" and also means that the hook name can be used to indicate the intent.