Skip to content

Commit 78d6cdc

Browse files
Merge pull request #876 from ArangoGutierrez/reg_test02
Add remote-test option for E2E
2 parents df4c87b + 6164059 commit 78d6cdc

File tree

111 files changed

+17890
-43
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

111 files changed

+17890
-43
lines changed

tests/e2e/Makefile

+20-1
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,28 @@ include $(CURDIR)/versions.mk
1818

1919
E2E_RUNTIME ?= docker
2020

21+
E2E_INSTALL_CTK ?= false
22+
23+
ifeq ($($(DIST)),)
24+
DIST ?= ubuntu20.04
25+
endif
26+
IMAGE_TAG ?= $(VERSION)-$(DIST)
27+
IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)
28+
29+
E2E_SSH_KEY ?=
30+
E2E_SSH_USER ?=
31+
E2E_SSH_HOST ?=
32+
E2E_SSH_PORT ?= 22
33+
2134
.PHONY: test
2235
test:
2336
cd $(CURDIR)/tests/e2e && $(GO_CMD) test -v . -args \
2437
-ginkgo.focus="$(E2E_RUNTIME)" \
2538
-test.timeout=1h \
26-
-ginkgo.v
39+
-ginkgo.v \
40+
-install-ctk=$(E2E_INSTALL_CTK) \
41+
-toolkit-image=$(IMAGE) \
42+
-ssh-key=$(E2E_SSH_KEY) \
43+
-ssh-user=$(E2E_SSH_USER) \
44+
-remote-host=$(E2E_SSH_HOST) \
45+
-remote-port=$(E2E_SSH_PORT)

tests/e2e/e2e_test.go

+19-25
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,8 @@
1717
package e2e
1818

1919
import (
20-
"bytes"
2120
"context"
22-
"fmt"
23-
"os/exec"
21+
"flag"
2422
"testing"
2523

2624
. "github.com/onsi/ginkgo/v2"
@@ -30,8 +28,26 @@ import (
3028
// Test context
3129
var (
3230
ctx context.Context
31+
32+
installCTK bool
33+
34+
image string
35+
36+
sshKey string
37+
sshUser string
38+
host string
39+
sshPort string
3340
)
3441

42+
func init() {
43+
flag.BoolVar(&installCTK, "install-ctk", false, "Install the NVIDIA Container Toolkit")
44+
flag.StringVar(&image, "toolkit-image", "", "Repository of the image to test")
45+
flag.StringVar(&sshKey, "ssh-key", "", "SSH key to use for remote login")
46+
flag.StringVar(&sshUser, "ssh-user", "", "SSH user to use for remote login")
47+
flag.StringVar(&host, "remote-host", "", "Hostname of the remote machine")
48+
flag.StringVar(&sshPort, "ssh-port", "22", "SSH port to use for remote login")
49+
}
50+
3551
func TestMain(t *testing.T) {
3652
suiteName := "NVIDIA Container Toolkit E2E"
3753

@@ -45,25 +61,3 @@ func TestMain(t *testing.T) {
4561
var _ = BeforeSuite(func() {
4662
ctx = context.Background()
4763
})
48-
49-
func runScript(script string) (string, error) {
50-
// Create a command to run the script using bash
51-
cmd := exec.Command("bash", "-c", script)
52-
53-
// Buffer to capture standard output
54-
var stdout bytes.Buffer
55-
cmd.Stdout = &stdout
56-
57-
// Buffer to capture standard error
58-
var stderr bytes.Buffer
59-
cmd.Stderr = &stderr
60-
61-
// Run the command
62-
err := cmd.Run()
63-
if err != nil {
64-
return "", fmt.Errorf("script execution failed: %v\nSTDOUT: %s\nSTDERR: %s", err, stdout.String(), stderr.String())
65-
}
66-
67-
// Return the captured stdout and nil error
68-
return stdout.String(), nil
69-
}

tests/e2e/installer.go

+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/*
2+
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package e2e
18+
19+
import (
20+
"bytes"
21+
"fmt"
22+
"text/template"
23+
)
24+
25+
// dockerInstallTemplate is a template for installing the NVIDIA Container Toolkit
26+
// on a host using Docker.
27+
var dockerInstallTemplate = `
28+
#! /usr/bin/env bash
29+
set -xe
30+
31+
: ${IMAGE:={{.Image}}}
32+
33+
# Create a temporary directory
34+
TEMP_DIR="/tmp/ctk_e2e.$(date +%s)_$RANDOM"
35+
mkdir -p "$TEMP_DIR"
36+
37+
# Given that docker has an init function that checks for the existence of the
38+
# nvidia-container-toolkit, we need to create a symlink to the nvidia-container-runtime-hook
39+
# in the /usr/bin directory.
40+
# See https://github.com/moby/moby/blob/20a05dabf44934447d1a66cdd616cc803b81d4e2/daemon/nvidia_linux.go#L32-L46
41+
sudo rm -f /usr/bin/nvidia-container-runtime-hook
42+
sudo ln -s "$TEMP_DIR/toolkit/nvidia-container-runtime-hook" /usr/bin/nvidia-container-runtime-hook
43+
44+
docker run --pid=host --rm -i --privileged \
45+
-v /:/host \
46+
-v /var/run/docker.sock:/var/run/docker.sock \
47+
-v "$TEMP_DIR:$TEMP_DIR" \
48+
-v /etc/docker:/config-root \
49+
${IMAGE} \
50+
--root "$TEMP_DIR" \
51+
--runtime=docker \
52+
--config=/config-root/daemon.json \
53+
--driver-root=/ \
54+
--no-daemon \
55+
--restart-mode=systemd
56+
`
57+
58+
type ToolkitInstaller struct {
59+
runner Runner
60+
template string
61+
62+
Image string
63+
}
64+
65+
type installerOption func(*ToolkitInstaller)
66+
67+
func WithRunner(r Runner) installerOption {
68+
return func(i *ToolkitInstaller) {
69+
i.runner = r
70+
}
71+
}
72+
73+
func WithImage(image string) installerOption {
74+
return func(i *ToolkitInstaller) {
75+
i.Image = image
76+
}
77+
}
78+
79+
func WithTemplate(template string) installerOption {
80+
return func(i *ToolkitInstaller) {
81+
i.template = template
82+
}
83+
}
84+
85+
func NewToolkitInstaller(opts ...installerOption) (*ToolkitInstaller, error) {
86+
i := &ToolkitInstaller{
87+
runner: localRunner{},
88+
template: dockerInstallTemplate,
89+
}
90+
91+
for _, opt := range opts {
92+
opt(i)
93+
}
94+
95+
if i.Image == "" {
96+
return nil, fmt.Errorf("image is required")
97+
}
98+
99+
return i, nil
100+
}
101+
102+
func (i *ToolkitInstaller) Install() error {
103+
// Parse the combined template
104+
tmpl, err := template.New("installScript").Parse(i.template)
105+
if err != nil {
106+
return fmt.Errorf("error parsing template: %w", err)
107+
}
108+
109+
// Execute the template
110+
var renderedScript bytes.Buffer
111+
err = tmpl.Execute(&renderedScript, i)
112+
if err != nil {
113+
return fmt.Errorf("error executing template: %w", err)
114+
}
115+
116+
_, _, err = i.runner.Run(renderedScript.String())
117+
return err
118+
}

tests/e2e/nvidia-container-toolkit_test.go

+39-17
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,29 @@ import (
2424
)
2525

2626
// Integration tests for Docker runtime
27-
var _ = Describe("docker", func() {
27+
var _ = Describe("docker", Ordered, func() {
28+
var r Runner
29+
30+
// Install the NVIDIA Container Toolkit
31+
BeforeAll(func(ctx context.Context) {
32+
r = NewRunner(
33+
WithHost(host),
34+
WithPort(sshPort),
35+
WithSshKey(sshKey),
36+
WithSshUser(sshUser),
37+
)
38+
if installCTK {
39+
installer, err := NewToolkitInstaller(
40+
WithRunner(r),
41+
WithImage(image),
42+
WithTemplate(dockerInstallTemplate),
43+
)
44+
Expect(err).ToNot(HaveOccurred())
45+
err = installer.Install()
46+
Expect(err).ToNot(HaveOccurred())
47+
}
48+
})
49+
2850
// GPUs are accessible in a container: Running nvidia-smi -L inside the
2951
// container shows the same output inside the container as outside the
3052
// container. This means that the following commands must all produce
@@ -33,33 +55,33 @@ var _ = Describe("docker", func() {
3355
var hostOutput string
3456

3557
BeforeAll(func(ctx context.Context) {
36-
_, err := runScript("docker pull ubuntu")
58+
_, _, err := r.Run("docker pull ubuntu")
3759
Expect(err).ToNot(HaveOccurred())
3860

39-
hostOutput, err = runScript("nvidia-smi -L")
61+
hostOutput, _, err = r.Run("nvidia-smi -L")
4062
Expect(err).ToNot(HaveOccurred())
4163
})
4264

4365
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
44-
containerOutput, err := runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
66+
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu nvidia-smi -L")
4567
Expect(err).ToNot(HaveOccurred())
4668
Expect(containerOutput).To(Equal(hostOutput))
4769
})
4870

4971
It("should support automatic CDI spec generation", func(ctx context.Context) {
50-
containerOutput, err := runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
72+
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all ubuntu nvidia-smi -L")
5173
Expect(err).ToNot(HaveOccurred())
5274
Expect(containerOutput).To(Equal(hostOutput))
5375
})
5476

5577
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
56-
containerOutput, err := runScript("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
78+
containerOutput, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all ubuntu nvidia-smi -L")
5779
Expect(err).ToNot(HaveOccurred())
5880
Expect(containerOutput).To(Equal(hostOutput))
5981
})
6082

6183
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
62-
containerOutput, err := runScript("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
84+
containerOutput, _, err := r.Run("docker run --rm -i --gpus all ubuntu nvidia-smi -L")
6385
Expect(err).ToNot(HaveOccurred())
6486
Expect(containerOutput).To(Equal(hostOutput))
6587
})
@@ -69,34 +91,34 @@ var _ = Describe("docker", func() {
6991
// The following should all produce the same result.
7092
When("Running the cuda-vectorAdd sample", Ordered, func() {
7193
BeforeAll(func(ctx context.Context) {
72-
_, err := runScript("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
94+
_, _, err := r.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
7395
Expect(err).ToNot(HaveOccurred())
7496
})
7597

7698
var referenceOutput string
7799

78100
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
79101
var err error
80-
referenceOutput, err = runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
102+
referenceOutput, _, err = r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
81103
Expect(err).ToNot(HaveOccurred())
82104

83105
Expect(referenceOutput).To(ContainSubstring("Test PASSED"))
84106
})
85107

86108
It("should support automatic CDI spec generation", func(ctx context.Context) {
87-
out2, err := runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
109+
out2, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
88110
Expect(err).ToNot(HaveOccurred())
89111
Expect(referenceOutput).To(Equal(out2))
90112
})
91113

92114
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
93-
out3, err := runScript("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
115+
out3, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
94116
Expect(err).ToNot(HaveOccurred())
95117
Expect(referenceOutput).To(Equal(out3))
96118
})
97119

98120
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
99-
out4, err := runScript("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
121+
out4, _, err := r.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0")
100122
Expect(err).ToNot(HaveOccurred())
101123
Expect(referenceOutput).To(Equal(out4))
102124
})
@@ -106,34 +128,34 @@ var _ = Describe("docker", func() {
106128
// The following should all produce the same result.
107129
When("Running the cuda-deviceQuery sample", Ordered, func() {
108130
BeforeAll(func(ctx context.Context) {
109-
_, err := runScript("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
131+
_, _, err := r.Run("docker pull nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
110132
Expect(err).ToNot(HaveOccurred())
111133
})
112134

113135
var referenceOutput string
114136

115137
It("should support NVIDIA_VISIBLE_DEVICES", func(ctx context.Context) {
116138
var err error
117-
referenceOutput, err = runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
139+
referenceOutput, _, err = r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
118140
Expect(err).ToNot(HaveOccurred())
119141

120142
Expect(referenceOutput).To(ContainSubstring("Result = PASS"))
121143
})
122144

123145
It("should support automatic CDI spec generation", func(ctx context.Context) {
124-
out2, err := runScript("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
146+
out2, _, err := r.Run("docker run --rm -i --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
125147
Expect(err).ToNot(HaveOccurred())
126148
Expect(referenceOutput).To(Equal(out2))
127149
})
128150

129151
It("should support the --gpus flag using the nvidia-container-runtime", func(ctx context.Context) {
130-
out3, err := runScript("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
152+
out3, _, err := r.Run("docker run --rm -i --runtime=nvidia --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
131153
Expect(err).ToNot(HaveOccurred())
132154
Expect(referenceOutput).To(Equal(out3))
133155
})
134156

135157
It("should support the --gpus flag using the nvidia-container-runtime-hook", func(ctx context.Context) {
136-
out4, err := runScript("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
158+
out4, _, err := r.Run("docker run --rm -i --gpus all nvcr.io/nvidia/k8s/cuda-sample:devicequery-cuda12.5.0")
137159
Expect(err).ToNot(HaveOccurred())
138160
Expect(referenceOutput).To(Equal(out4))
139161
})

0 commit comments

Comments
 (0)