Skip to content

Commit f0b8561

Browse files
committed
fix(cli): replace shell-string commands with argv arrays
Closes shell-injection attack surface in the legacy CJS layer by replacing all user-controlled run() / runCapture() shell strings with the new argv-safe runArgv() / runCaptureArgv() helpers. assertSafeName() guards every user-supplied sandbox/instance/preset name before it enters any command. bin/lib/onboard.js -- all openshell/bash/brew calls -> runArgv; file copies -> fs.cpSync/fs.rmSync (no cp shell) bin/lib/nim.js -- docker pull/rm/run/stop/inspect -> runArgv/runCaptureArgv; assertSafeName guard on sandboxName bin/lib/policies.js -- openshell policy get/set -> runCaptureArgv/runArgv; assertSafeName on sandboxName and presetName; temp policy file written with mode 0o600 bin/nemoclaw.js -- setupSpark: remove inline NVIDIA_API_KEY=VALUE from sudo argv (sudo -E already inherits env); deploy: assertSafeName on instanceName; sandbox connect/status/logs/destroy -> runArgv Supersedes PRs: NVIDIA#148 (shell injection), part of NVIDIA#330 (credential leak).
1 parent 53c5071 commit f0b8561

4 files changed

Lines changed: 136 additions & 107 deletions

File tree

bin/lib/nim.js

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
//
44
// NIM container management — pull, start, stop, health-check NIM images.
55

6-
const { run, runCapture } = require("./runner");
6+
const { run, runArgv, runCapture, runCaptureArgv, assertSafeName } = require("./runner");
77
const nimImages = require("./nim-images.json");
88

99
function containerName(sandboxName) {
@@ -121,11 +121,12 @@ function pullNimImage(model) {
121121
process.exit(1);
122122
}
123123
console.log(` Pulling NIM image: ${image}`);
124-
run(`docker pull ${image}`);
124+
runArgv("docker", ["pull", image]);
125125
return image;
126126
}
127127

128128
function startNimContainer(sandboxName, model, port = 8000) {
129+
assertSafeName(sandboxName, "sandbox name");
129130
const name = containerName(sandboxName);
130131
const image = getImageForModel(model);
131132
if (!image) {
@@ -134,12 +135,17 @@ function startNimContainer(sandboxName, model, port = 8000) {
134135
}
135136

136137
// Stop any existing container with same name
137-
run(`docker rm -f ${name} 2>/dev/null || true`, { ignoreError: true });
138+
runArgv("docker", ["rm", "-f", name], { ignoreError: true });
138139

139140
console.log(` Starting NIM container: ${name}`);
140-
run(
141-
`docker run -d --gpus all -p ${port}:8000 --name ${name} --shm-size 16g ${image}`
142-
);
141+
runArgv("docker", [
142+
"run", "-d",
143+
"--gpus", "all",
144+
"-p", `${port}:8000`,
145+
"--name", name,
146+
"--shm-size", "16g",
147+
image,
148+
]);
143149
return name;
144150
}
145151

@@ -150,7 +156,7 @@ function waitForNimHealth(port = 8000, timeout = 300) {
150156

151157
while ((Date.now() - start) / 1000 < timeout) {
152158
try {
153-
const result = runCapture(`curl -sf http://localhost:${port}/v1/models`, {
159+
const result = runCaptureArgv("curl", ["-sf", `http://localhost:${port}/v1/models`], {
154160
ignoreError: true,
155161
});
156162
if (result) {
@@ -168,27 +174,27 @@ function waitForNimHealth(port = 8000, timeout = 300) {
168174
function stopNimContainer(sandboxName) {
169175
const name = containerName(sandboxName);
170176
console.log(` Stopping NIM container: ${name}`);
171-
run(`docker stop ${name} 2>/dev/null || true`, { ignoreError: true });
172-
run(`docker rm ${name} 2>/dev/null || true`, { ignoreError: true });
177+
runArgv("docker", ["stop", name], { ignoreError: true });
178+
runArgv("docker", ["rm", name], { ignoreError: true });
173179
}
174180

175181
function nimStatus(sandboxName) {
176182
const name = containerName(sandboxName);
177183
try {
178-
const state = runCapture(
179-
`docker inspect --format '{{.State.Status}}' ${name} 2>/dev/null`,
184+
const state = runCaptureArgv(
185+
"docker", ["inspect", "--format", "{{.State.Status}}", name],
180186
{ ignoreError: true }
181187
);
182188
if (!state) return { running: false, container: name };
183189

184190
let healthy = false;
185-
if (state === "running") {
186-
const health = runCapture(`curl -sf http://localhost:8000/v1/models 2>/dev/null`, {
191+
if (state.trim() === "running") {
192+
const health = runCaptureArgv("curl", ["-sf", "http://localhost:8000/v1/models"], {
187193
ignoreError: true,
188194
});
189195
healthy = !!health;
190196
}
191-
return { running: state === "running", healthy, container: name, state };
197+
return { running: state.trim() === "running", healthy, container: name, state: state.trim() };
192198
} catch {
193199
return { running: false, container: name };
194200
}

bin/lib/onboard.js

Lines changed: 95 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
const fs = require("fs");
77
const path = require("path");
8-
const { ROOT, SCRIPTS, run, runCapture } = require("./runner");
8+
const { ROOT, SCRIPTS, run, runArgv, runCapture, runCaptureArgv, assertSafeName } = require("./runner");
99
const { prompt, ensureApiKey, getCredential } = require("./credentials");
1010
const registry = require("./registry");
1111
const nim = require("./nim");
@@ -24,7 +24,7 @@ function step(n, total, msg) {
2424

2525
function isDockerRunning() {
2626
try {
27-
runCapture("docker info", { ignoreError: false });
27+
runCaptureArgv("docker", ["info"], { ignoreError: false });
2828
return true;
2929
} catch {
3030
return false;
@@ -33,7 +33,7 @@ function isDockerRunning() {
3333

3434
function isOpenshellInstalled() {
3535
try {
36-
runCapture("command -v openshell");
36+
runCaptureArgv("sh", ["-c", "command -v openshell"]);
3737
return true;
3838
} catch {
3939
return false;
@@ -42,7 +42,7 @@ function isOpenshellInstalled() {
4242

4343
function installOpenshell() {
4444
console.log(" Installing openshell CLI...");
45-
run(`bash "${path.join(SCRIPTS, "install-openshell.sh")}"`, { ignoreError: true });
45+
runArgv("bash", [path.join(SCRIPTS, "install-openshell.sh")], { ignoreError: true });
4646
return isOpenshellInstalled();
4747
}
4848

@@ -124,20 +124,20 @@ async function startGateway(gpu) {
124124
step(2, 7, "Starting OpenShell gateway");
125125

126126
// Destroy old gateway
127-
run("openshell gateway destroy -g nemoclaw 2>/dev/null || true", { ignoreError: true });
127+
runArgv("openshell", ["gateway", "destroy", "-g", "nemoclaw"], { ignoreError: true, stdio: ["ignore", "ignore", "ignore"] });
128128

129-
const gwArgs = ["--name", "nemoclaw"];
129+
const gwArgs = ["gateway", "start", "--name", "nemoclaw"];
130130
// Do NOT pass --gpu here. On DGX Spark (and most GPU hosts), inference is
131131
// routed through a host-side provider (Ollama, vLLM, or cloud API) — the
132132
// sandbox itself does not need direct GPU access. Passing --gpu causes
133133
// FailedPrecondition errors when the gateway's k3s device plugin cannot
134134
// allocate GPUs. See: https://build.nvidia.com/spark/nemoclaw/instructions
135135

136-
run(`openshell gateway start ${gwArgs.join(" ")}`, { ignoreError: false });
136+
runArgv("openshell", gwArgs, { ignoreError: false });
137137

138138
// Verify health
139139
for (let i = 0; i < 5; i++) {
140-
const status = runCapture("openshell status 2>&1", { ignoreError: true });
140+
const status = runCaptureArgv("sh", ["-c", "openshell status 2>&1"], { ignoreError: true });
141141
if (status.includes("Connected")) {
142142
console.log(" ✓ Gateway is healthy");
143143
break;
@@ -157,7 +157,7 @@ async function startGateway(gpu) {
157157
].find((s) => fs.existsSync(s));
158158
if (colimaSocket) {
159159
console.log(" Patching CoreDNS for Colima...");
160-
run(`bash "${path.join(SCRIPTS, "fix-coredns.sh")}" 2>&1 || true`, { ignoreError: true });
160+
runArgv("bash", [path.join(SCRIPTS, "fix-coredns.sh")], { ignoreError: true });
161161
}
162162
// Give DNS a moment to propagate
163163
require("child_process").spawnSync("sleep", ["5"]);
@@ -171,15 +171,7 @@ async function createSandbox(gpu) {
171171

172172
const nameAnswer = await prompt(" Sandbox name (lowercase, numbers, hyphens) [my-assistant]: ");
173173
const sandboxName = (nameAnswer || "my-assistant").trim().toLowerCase();
174-
175-
// Validate: RFC 1123 subdomain — lowercase alphanumeric and hyphens,
176-
// must start and end with alphanumeric (required by Kubernetes/OpenShell)
177-
if (!/^[a-z0-9]([a-z0-9-]*[a-z0-9])?$/.test(sandboxName)) {
178-
console.error(` Invalid sandbox name: '${sandboxName}'`);
179-
console.error(" Names must be lowercase, contain only letters, numbers, and hyphens,");
180-
console.error(" and must start and end with a letter or number.");
181-
process.exit(1);
182-
}
174+
assertSafeName(sandboxName, "sandbox name");
183175

184176
// Check if sandbox already exists in registry
185177
const existing = registry.getSandbox(sandboxName);
@@ -190,7 +182,7 @@ async function createSandbox(gpu) {
190182
return sandboxName;
191183
}
192184
// Destroy old sandbox
193-
run(`openshell sandbox delete "${sandboxName}" 2>/dev/null || true`, { ignoreError: true });
185+
runArgv("openshell", ["sandbox", "delete", sandboxName], { ignoreError: true, stdio: ["ignore", "ignore", "ignore"] });
194186
registry.removeSandbox(sandboxName);
195187
}
196188

@@ -199,40 +191,47 @@ async function createSandbox(gpu) {
199191
const os = require("os");
200192
const buildCtx = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-build-"));
201193
fs.copyFileSync(path.join(ROOT, "Dockerfile"), path.join(buildCtx, "Dockerfile"));
202-
run(`cp -r "${path.join(ROOT, "nemoclaw")}" "${buildCtx}/nemoclaw"`);
203-
run(`cp -r "${path.join(ROOT, "nemoclaw-blueprint")}" "${buildCtx}/nemoclaw-blueprint"`);
204-
run(`cp -r "${path.join(ROOT, "scripts")}" "${buildCtx}/scripts"`);
205-
run(`rm -rf "${buildCtx}/nemoclaw/node_modules" "${buildCtx}/nemoclaw/src"`, { ignoreError: true });
194+
fs.cpSync(path.join(ROOT, "nemoclaw"), path.join(buildCtx, "nemoclaw"), { recursive: true });
195+
fs.cpSync(path.join(ROOT, "nemoclaw-blueprint"), path.join(buildCtx, "nemoclaw-blueprint"), { recursive: true });
196+
fs.cpSync(path.join(ROOT, "scripts"), path.join(buildCtx, "scripts"), { recursive: true });
197+
fs.rmSync(path.join(buildCtx, "nemoclaw", "node_modules"), { recursive: true, force: true });
198+
fs.rmSync(path.join(buildCtx, "nemoclaw", "src"), { recursive: true, force: true });
206199

207200
// Create sandbox (use -- echo to avoid dropping into interactive shell)
208201
// Pass the base policy so sandbox starts in proxy mode (required for policy updates later)
209202
const basePolicyPath = path.join(ROOT, "nemoclaw-blueprint", "policies", "openclaw-sandbox.yaml");
210-
const createArgs = [
211-
`--from "${buildCtx}/Dockerfile"`,
212-
`--name "${sandboxName}"`,
213-
`--policy "${basePolicyPath}"`,
203+
const createArgv = [
204+
"sandbox", "create",
205+
"--from", path.join(buildCtx, "Dockerfile"),
206+
"--name", sandboxName,
207+
"--policy", basePolicyPath,
214208
];
215209
// --gpu is intentionally omitted. See comment in startGateway().
216210

217211
console.log(` Creating sandbox '${sandboxName}' (this takes a few minutes on first run)...`);
218212
const chatUiUrl = process.env.CHAT_UI_URL || 'http://127.0.0.1:18789';
219-
const envArgs = [`CHAT_UI_URL=${chatUiUrl}`];
220-
if (process.env.NVIDIA_API_KEY) {
221-
envArgs.push(`NVIDIA_API_KEY=${process.env.NVIDIA_API_KEY}`);
222-
}
223-
// set -o pipefail ensures the openshell exit code propagates through the awk pipe.
224-
// Without it, awk's exit code (always 0) would mask a failed sandbox create.
225-
run(`set -o pipefail; openshell sandbox create ${createArgs.join(" ")} -- env ${envArgs.join(" ")} nemoclaw-start 2>&1 | awk '/Sandbox allocated/{if(!seen){print;seen=1}next}1'`);
213+
// The `-- env KEY=VALUE` args are the openshell protocol for injecting env
214+
// vars into the sandbox at startup. NVIDIA_API_KEY is passed here so the
215+
// sandbox process can use it, but this still appears in argv. The key is
216+
// already in process.env (set by ensureApiKey) so the child inherits it;
217+
// passing it explicitly here is belt-and-suspenders for the sandbox entrypoint.
218+
createArgv.push(
219+
"--", "env",
220+
`CHAT_UI_URL=${chatUiUrl}`,
221+
...(process.env.NVIDIA_API_KEY ? [`NVIDIA_API_KEY=${process.env.NVIDIA_API_KEY}`] : []),
222+
"nemoclaw-start",
223+
);
224+
runArgv("openshell", createArgv);
226225

227226
// Release any stale forward on port 18789 before claiming it for the new sandbox.
228227
// A previous onboard run may have left the port forwarded to a different sandbox,
229228
// which would silently prevent the new sandbox's dashboard from being reachable.
230-
run(`openshell forward stop 18789 2>/dev/null || true`, { ignoreError: true });
229+
runArgv("openshell", ["forward", "stop", "18789"], { ignoreError: true, stdio: ["ignore", "ignore", "ignore"] });
231230
// Forward dashboard port to the new sandbox
232-
run(`openshell forward start --background 18789 "${sandboxName}"`, { ignoreError: true });
231+
runArgv("openshell", ["forward", "start", "--background", "18789", sandboxName], { ignoreError: true });
233232

234233
// Clean up build context
235-
run(`rm -rf "${buildCtx}"`, { ignoreError: true });
234+
fs.rmSync(buildCtx, { recursive: true, force: true });
236235

237236
// Register in registry
238237
registry.registerSandbox({
@@ -254,9 +253,9 @@ async function setupNim(sandboxName, gpu) {
254253
let nimContainer = null;
255254

256255
// Detect local inference options
257-
const hasOllama = !!runCapture("command -v ollama", { ignoreError: true });
258-
const ollamaRunning = !!runCapture("curl -sf http://localhost:11434/api/tags 2>/dev/null", { ignoreError: true });
259-
const vllmRunning = !!runCapture("curl -sf http://localhost:8000/v1/models 2>/dev/null", { ignoreError: true });
256+
const hasOllama = !!runCaptureArgv("sh", ["-c", "command -v ollama"], { ignoreError: true });
257+
const ollamaRunning = !!runCaptureArgv("curl", ["-sf", "http://localhost:11434/api/tags"], { ignoreError: true });
258+
const vllmRunning = !!runCaptureArgv("curl", ["-sf", "http://localhost:8000/v1/models"], { ignoreError: true });
260259

261260
// Auto-select only with NEMOCLAW_EXPERIMENTAL=1 (prevents silent misconfiguration)
262261
if (EXPERIMENTAL) {
@@ -343,17 +342,17 @@ async function setupNim(sandboxName, gpu) {
343342
} else if (selected.key === "ollama") {
344343
if (!ollamaRunning) {
345344
console.log(" Starting Ollama...");
346-
run("OLLAMA_HOST=0.0.0.0:11434 ollama serve > /dev/null 2>&1 &", { ignoreError: true });
345+
runArgv("sh", ["-c", "OLLAMA_HOST=0.0.0.0:11434 ollama serve > /dev/null 2>&1 &"], { ignoreError: true });
347346
require("child_process").spawnSync("sleep", ["2"]);
348347
}
349348
console.log(" ✓ Using Ollama on localhost:11434");
350349
provider = "ollama-local";
351350
model = "nemotron-3-nano";
352351
} else if (selected.key === "install-ollama") {
353352
console.log(" Installing Ollama via Homebrew...");
354-
run("brew install ollama", { ignoreError: true });
353+
runArgv("brew", ["install", "ollama"], { ignoreError: true });
355354
console.log(" Starting Ollama...");
356-
run("OLLAMA_HOST=0.0.0.0:11434 ollama serve > /dev/null 2>&1 &", { ignoreError: true });
355+
runArgv("sh", ["-c", "OLLAMA_HOST=0.0.0.0:11434 ollama serve > /dev/null 2>&1 &"], { ignoreError: true });
357356
require("child_process").spawnSync("sleep", ["2"]);
358357
console.log(" ✓ Using Ollama on localhost:11434");
359358
provider = "ollama-local";
@@ -382,44 +381,62 @@ async function setupNim(sandboxName, gpu) {
382381
async function setupInference(sandboxName, model, provider) {
383382
step(5, 7, "Setting up inference provider");
384383

384+
// Helper: create-or-update a provider with argv (no shell, no leak).
385+
// Credentials are passed as env-var NAME only (openshell env-lookup form).
386+
// The actual secret value is set on process.env so the child inherits it,
387+
// but it never appears in the argv list visible via `ps aux`.
388+
function upsertProvider(name, credentialEnvName, configValue) {
389+
const args = [
390+
"provider", "create",
391+
"--name", name,
392+
"--type", "openai",
393+
"--credential", credentialEnvName, // env-lookup: openshell reads from env
394+
"--config", `OPENAI_BASE_URL=${configValue}`,
395+
];
396+
const updateArgs = [
397+
"provider", "update", name,
398+
"--credential", credentialEnvName,
399+
"--config", `OPENAI_BASE_URL=${configValue}`,
400+
];
401+
// Try create first; fall back to update if the provider already exists.
402+
const r = runArgv("openshell", args, { ignoreError: true, stdio: ["ignore", "pipe", "pipe"] });
403+
if (r.status !== 0) {
404+
runArgv("openshell", updateArgs, { ignoreError: true, stdio: ["ignore", "ignore", "ignore"] });
405+
}
406+
}
407+
385408
if (provider === "nvidia-nim") {
386-
// Create nvidia-nim provider
387-
run(
388-
`openshell provider create --name nvidia-nim --type openai ` +
389-
`--credential "NVIDIA_API_KEY=${process.env.NVIDIA_API_KEY}" ` +
390-
`--config "OPENAI_BASE_URL=https://integrate.api.nvidia.com/v1" 2>&1 || true`,
391-
{ ignoreError: true }
392-
);
393-
run(
394-
`openshell inference set --no-verify --provider nvidia-nim --model ${model} 2>/dev/null || true`,
395-
{ ignoreError: true }
396-
);
409+
// SECURITY: set the key in the child's inherited env — NOT in argv.
410+
// openshell reads --credential NVIDIA_API_KEY from process.env, not from args.
411+
runArgv("openshell", [
412+
"provider", "create",
413+
"--name", "nvidia-nim",
414+
"--type", "openai",
415+
"--credential", "NVIDIA_API_KEY",
416+
"--config", "OPENAI_BASE_URL=https://integrate.api.nvidia.com/v1",
417+
], {
418+
env: { NVIDIA_API_KEY: process.env.NVIDIA_API_KEY || "" },
419+
ignoreError: true,
420+
stdio: ["ignore", "ignore", "ignore"],
421+
});
422+
runArgv("openshell", [
423+
"inference", "set", "--no-verify", "--provider", "nvidia-nim", "--model", model,
424+
], { ignoreError: true, stdio: ["ignore", "ignore", "ignore"] });
425+
397426
} else if (provider === "vllm-local") {
398-
run(
399-
`openshell provider create --name vllm-local --type openai ` +
400-
`--credential "OPENAI_API_KEY=dummy" ` +
401-
`--config "OPENAI_BASE_URL=${HOST_GATEWAY_URL}:8000/v1" 2>&1 || ` +
402-
`openshell provider update vllm-local --credential "OPENAI_API_KEY=dummy" ` +
403-
`--config "OPENAI_BASE_URL=${HOST_GATEWAY_URL}:8000/v1" 2>&1 || true`,
404-
{ ignoreError: true }
405-
);
406-
run(
407-
`openshell inference set --no-verify --provider vllm-local --model ${model} 2>/dev/null || true`,
408-
{ ignoreError: true }
409-
);
427+
// dummy is not a secret — safe as literal KEY=VALUE
428+
upsertProvider("vllm-local", "OPENAI_API_KEY", `${HOST_GATEWAY_URL}:8000/v1`);
429+
runArgv("openshell", [
430+
"inference", "set", "--no-verify", "--provider", "vllm-local", "--model", model,
431+
], { ignoreError: true, stdio: ["ignore", "ignore", "ignore"] });
432+
410433
} else if (provider === "ollama-local") {
411-
run(
412-
`openshell provider create --name ollama-local --type openai ` +
413-
`--credential "OPENAI_API_KEY=ollama" ` +
414-
`--config "OPENAI_BASE_URL=${HOST_GATEWAY_URL}:11434/v1" 2>&1 || ` +
415-
`openshell provider update ollama-local --credential "OPENAI_API_KEY=ollama" ` +
416-
`--config "OPENAI_BASE_URL=${HOST_GATEWAY_URL}:11434/v1" 2>&1 || true`,
417-
{ ignoreError: true }
418-
);
419-
run(
420-
`openshell inference set --no-verify --provider ollama-local --model ${model} 2>/dev/null || true`,
421-
{ ignoreError: true }
422-
);
434+
// "ollama" is not a secret — safe as literal KEY=VALUE
435+
process.env.OPENAI_API_KEY = process.env.OPENAI_API_KEY || "ollama";
436+
upsertProvider("ollama-local", "OPENAI_API_KEY", `${HOST_GATEWAY_URL}:11434/v1`);
437+
runArgv("openshell", [
438+
"inference", "set", "--no-verify", "--provider", "ollama-local", "--model", model,
439+
], { ignoreError: true, stdio: ["ignore", "ignore", "ignore"] });
423440
}
424441

425442
registry.updateSandbox(sandboxName, { model, provider });

0 commit comments

Comments
 (0)