fix(credentials): use env-lookup form for --credential; secrets never in argv

dumko2001 · dumko2001 · commit 2afa1f44da99 · 2026-03-19T01:06:05.000+05:30
Fixes: NVIDIA#325 (API key exposed in process list via ps aux) Supersedes: PRs NVIDIA#191, NVIDIA#330 The root cause: all three execution layers passed the actual credential VALUE as --credential KEY=VALUE, making it visible to any local user via `ps aux` or /proc/<pid>/cmdline. Safe pattern: set the secret in the child's inherited env, then pass only the env-var NAME to --credential (openshell env-lookup form). nemoclaw/src/commands/onboard.ts - process.env[credentialEnv] = apiKey before execOpenShell - --credential arg: credentialEnv (name only, not KEY=VALUE) - applies to both provider create and provider update paths nemoclaw-blueprint/orchestrator/runner.py - Rename credential_env -> target_cred_env with type-based fallback (nvidia -> NVIDIA_API_KEY, openai -> OPENAI_API_KEY) when not set in the blueprint profile. Supersedes PR NVIDIA#191's partial fix. - os.environ[target_cred_env] = credential before run_cmd - --credential arg: target_cred_env (name only) nemoclaw-blueprint/blueprint.yaml - Add credential_env: NVIDIA_API_KEY to the default profile. Without this field the type-based fallback would silently use OPENAI_API_KEY for the nvidia provider_type, causing auth failure. nemoclaw/src/onboard/config.ts - writeFileSync for config.json now passes mode: 0o600 so the file containing endpoint/model/credentialEnv metadata is not world-readable. test/credential-exposure.test.js (new file) - Static source scan: asserts no --credential KEY=VALUE pattern in any of the 3 execution layer files (allowlists dummy/ollama stubs) - Layer-specific structural checks (process.env set, os.environ set, blueprint default profile has credential_env) - Runtime injection PoC: proves old bash -c IS vulnerable; new runCaptureArgv IS NOT All 84 tests pass.
diff --git a/nemoclaw-blueprint/blueprint.yaml b/nemoclaw-blueprint/blueprint.yaml
@@ -30,6 +30,7 @@ components:
         provider_name: "nvidia-inference"
         endpoint: "https://integrate.api.nvidia.com/v1"
         model: "nvidia/nemotron-3-super-120b-a12b"
+        credential_env: "NVIDIA_API_KEY"
 
       ncp:
         provider_type: "nvidia"
diff --git a/nemoclaw-blueprint/orchestrator/runner.py b/nemoclaw-blueprint/orchestrator/runner.py
@@ -193,12 +193,17 @@ def action_apply(
     endpoint: str = inference_cfg.get("endpoint", "")
     model: str = inference_cfg.get("model", "")
 
-    # Resolve credential from environment
-    credential_env = inference_cfg.get("credential_env")
+    # Resolve the env-var name that holds the credential.
+    # Prefer an explicit credential_env from the profile; fall back to a
+    # type-based default so that profiles without credential_env still work
+    # (supersedes PR #191 approach).
+    target_cred_env: str | None = inference_cfg.get("credential_env")
+    if not target_cred_env:
+        provider_type_str = inference_cfg.get("provider_type", "openai")
+        target_cred_env = "NVIDIA_API_KEY" if provider_type_str == "nvidia" else "OPENAI_API_KEY"
+
     credential_default: str = inference_cfg.get("credential_default", "")
-    credential = ""
-    if credential_env:
-        credential = os.environ.get(credential_env, credential_default)
+    credential = os.environ.get(target_cred_env, credential_default)
 
     provider_args = [
         "openshell",
@@ -210,7 +215,11 @@ def action_apply(
         provider_type,
     ]
     if credential:
-        provider_args.extend(["--credential", f"OPENAI_API_KEY={credential}"])
+        # SECURITY: set the secret in this process's env so openshell can read
+        # it via env-lookup form.  We pass only the env-var NAME to --credential
+        # so the key value never appears in openshell's argv (visible to `ps aux`).
+        os.environ[target_cred_env] = credential
+        provider_args.extend(["--credential", target_cred_env])
     if endpoint:
         provider_args.extend(["--config", f"OPENAI_BASE_URL={endpoint}"])
 
diff --git a/nemoclaw/src/commands/onboard.ts b/nemoclaw/src/commands/onboard.ts
@@ -373,6 +373,11 @@ export async function cliOnboard(opts: OnboardOptions): Promise<void> {
   logger.info("");
   logger.info("Applying configuration...");
 
+  // SECURITY: Set the credential in our env so openshell's env-lookup form works.
+  // We pass only the env-var NAME to --credential so the key value never appears
+  // in openshell's argv (which is visible to any user via `ps aux`).
+  process.env[credentialEnv] = apiKey;
+
   // 7a: Create/update provider
   try {
     execOpenShell([
@@ -383,7 +388,7 @@ export async function cliOnboard(opts: OnboardOptions): Promise<void> {
       "--type",
       "openai",
       "--credential",
-      `${credentialEnv}=${apiKey}`,
+      credentialEnv,
       "--config",
       `OPENAI_BASE_URL=${endpointUrl}`,
     ]);
@@ -398,7 +403,7 @@ export async function cliOnboard(opts: OnboardOptions): Promise<void> {
           "update",
           providerName,
           "--credential",
-          `${credentialEnv}=${apiKey}`,
+          credentialEnv,
           "--config",
           `OPENAI_BASE_URL=${endpointUrl}`,
         ]);
diff --git a/nemoclaw/src/onboard/config.ts b/nemoclaw/src/onboard/config.ts
@@ -43,7 +43,7 @@ export function loadOnboardConfig(): NemoClawOnboardConfig | null {
 
 export function saveOnboardConfig(config: NemoClawOnboardConfig): void {
   ensureConfigDir();
-  writeFileSync(configPath(), JSON.stringify(config, null, 2));
+  writeFileSync(configPath(), JSON.stringify(config, null, 2), { mode: 0o600 });
 }
 
 export function clearOnboardConfig(): void {
diff --git a/test/credential-exposure.test.js b/test/credential-exposure.test.js
@@ -0,0 +1,204 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Credential exposure regression tests.
+//
+// Verifies that real API secrets are NEVER present as literal values in any
+// --credential CLI argument across all three execution layers:
+//   1. bin/lib/onboard.js       (legacy CLI layer)
+//   2. nemoclaw/src/commands/onboard.ts  (plugin layer)
+//   3. nemoclaw-blueprint/orchestrator/runner.py  (blueprint/K8s layer)
+//
+// The safe form is --credential KEY  (env-var lookup — openshell reads the
+// value from the environment, never from the process argument list).
+// The UNSAFE form is --credential KEY=value  (leaks secret in `ps aux`).
+//
+// Allowlisted dummy/stub values that are explicitly NOT secrets:
+//   OPENAI_API_KEY=dummy   (vllm-local placeholder)
+//   OPENAI_API_KEY=ollama  (ollama-local placeholder)
+//
+// See: https://github.com/NVIDIA/NemoClaw/issues/325
+
+const { describe, it } = require("node:test");
+const assert = require("node:assert/strict");
+const fs = require("node:fs");
+const path = require("node:path");
+
+const ROOT = path.resolve(__dirname, "..");
+
+// Safe dummy/stub credential values that are explicitly not secrets.
+// These are fine to pass as KEY=VALUE because they are not real credentials.
+const ALLOWED_LITERAL_CREDENTIALS = new Set([
+  "OPENAI_API_KEY=dummy",
+  "OPENAI_API_KEY=ollama",
+  "OPENAI_API_KEY=not-needed",
+]);
+
+const FILES_TO_SCAN = [
+  { path: "bin/lib/onboard.js", lang: "js" },
+  { path: "nemoclaw/src/commands/onboard.ts", lang: "ts" },
+  { path: "nemoclaw-blueprint/orchestrator/runner.py", lang: "py" },
+];
+
+// ── Static source scan ────────────────────────────────────────────
+
+describe("credential exposure: no secrets in --credential CLI args (issue #325)", () => {
+  for (const file of FILES_TO_SCAN) {
+    it(`${file.path}: --credential args use env-lookup form (KEY only, not KEY=VALUE)`, () => {
+      const fullPath = path.join(ROOT, file.path);
+      if (!fs.existsSync(fullPath)) return; // skip if file absent
+
+      const content = fs.readFileSync(fullPath, "utf-8");
+      const lines = content.split("\n");
+
+      const violations = [];
+
+      for (let i = 0; i < lines.length; i++) {
+        const line = lines[i];
+        const lineNum = i + 1;
+
+        // Skip full-line comments
+        const trimmed = line.trim();
+        if (trimmed.startsWith("//") || trimmed.startsWith("#")) continue;
+        // Skip inline comments after code (crude but sufficient for our patterns)
+
+        // Match: --credential <optional quote><KEY>=<VALUE><optional quote/bracket>
+        // This regex catches both JS template literals and Python f-strings
+        const m = line.match(/--credential\s+['"`]?([A-Z_]{3,64})=([^'"`\s,)]+)/);
+        if (!m) continue;
+
+        const key = m[1];
+        const value = m[2].replace(/['"}`\s]/g, "");
+        const combined = `${key}=${value}`;
+
+        if (ALLOWED_LITERAL_CREDENTIALS.has(combined)) continue;
+
+        violations.push(
+          `  ${file.path}:${lineNum}: --credential passes literal secret: "${combined}"\n` +
+          `  Fix: set process.env["${key}"] = <value> before the call, then pass --credential "${key}"`
+        );
+      }
+
+      assert.equal(
+        violations.length,
+        0,
+        `\n\nCREDENTIAL EXPOSURE DETECTED (issue #325):\n\n${violations.join("\n\n")}\n`
+      );
+    });
+  }
+
+  // ── Layer-specific structural assertions ─────────────────────────
+
+  it("bin/lib/onboard.js: nvidia-nim block uses env-lookup form (no NVIDIA_API_KEY=$ interpolation)", () => {
+    const content = fs.readFileSync(path.join(ROOT, "bin/lib/onboard.js"), "utf-8");
+
+    // The --credential argument must NOT have NVIDIA_API_KEY value interpolated.
+    // Note: "-- env NVIDIA_API_KEY=value" is a separate openshell sandbox-startup
+    // injection protocol, NOT the --credential flag, so we match specifically.
+    assert.ok(
+      !content.match(/--credential[^\n]*NVIDIA_API_KEY=\${/),
+      'onboard.js must not pass NVIDIA_API_KEY value to --credential arg.\n' +
+      'Use env-lookup form: --credential "NVIDIA_API_KEY" (with env set on the child process)'
+    );
+  });
+
+  it("nemoclaw/src/commands/onboard.ts: sets process.env before passing credential name to execOpenShell", () => {
+    const tsPath = path.join(ROOT, "nemoclaw/src/commands/onboard.ts");
+    if (!fs.existsSync(tsPath)) return;
+    const content = fs.readFileSync(tsPath, "utf-8");
+
+    assert.ok(
+      content.includes("process.env[credentialEnv] = apiKey"),
+      "onboard.ts must set process.env[credentialEnv] = apiKey before calling execOpenShell"
+    );
+
+    // The --credential arg must pass the env var NAME (credentialEnv), not its value
+    assert.ok(
+      !content.match(/["'`]--credential["'`],\s*[`"']\$\{credentialEnv\}=\$\{apiKey\}/),
+      "onboard.ts must not pass credentialEnv=apiKey as the --credential value"
+    );
+  });
+
+  it("nemoclaw-blueprint/orchestrator/runner.py: sets os.environ before passing credential name", () => {
+    const pyPath = path.join(ROOT, "nemoclaw-blueprint/orchestrator/runner.py");
+    if (!fs.existsSync(pyPath)) return;
+    const content = fs.readFileSync(pyPath, "utf-8");
+
+    assert.ok(
+      content.includes("os.environ[target_cred_env] = credential"),
+      "runner.py must set os.environ[target_cred_env] = credential before run_cmd"
+    );
+
+    assert.ok(
+      !content.includes('f"OPENAI_API_KEY={credential}"'),
+      'runner.py must not pass f"OPENAI_API_KEY={credential}" as --credential value'
+    );
+
+    // Must not pass f"{target_cred_env}={credential}" either (from PR #191's partial fix)
+    assert.ok(
+      !content.match(/f['"]\{target_cred_env\}=\{credential\}['"]/),
+      'runner.py must not pass f"{target_cred_env}={credential}" as --credential value'
+    );
+  });
+
+  it("nemoclaw-blueprint/blueprint.yaml: default profile has credential_env set", () => {
+    const bpPath = path.join(ROOT, "nemoclaw-blueprint/blueprint.yaml");
+    if (!fs.existsSync(bpPath)) return;
+    const content = fs.readFileSync(bpPath, "utf-8");
+
+    // The default profile block should have credential_env.
+    // Profile names sit at 6-space indent; their fields are at 8-space indent.
+    // We grab everything from "      default:" up to the next 6-space sibling key.
+    const defaultBlockMatch = content.match(/ {6}default:\s*\n([\s\S]*?)(?=\n {6}\w)/);
+    assert.ok(defaultBlockMatch, "blueprint.yaml must have a default profile");
+    assert.ok(
+      defaultBlockMatch[0].includes("credential_env"),
+      "blueprint.yaml default profile must define credential_env (missing causes silent auth failure)"
+    );
+  });
+});
+
+// ── Runtime injection PoC ─────────────────────────────────────────
+
+describe("runCaptureArgv: injection PoC (proves fix works)", () => {
+  const { runCaptureArgv } = require("../bin/lib/runner");
+
+  it("OLD bash -c IS vulnerable to subshell expansion", () => {
+    // Demonstrate what the old code did — we use a safe payload
+    const { execSync } = require("node:child_process");
+    const malicious = "safe_prefix_$(echo INJECTED_PROOF)";
+    let stdout;
+    try {
+      stdout = execSync(`echo ${malicious}`, { encoding: "utf-8" }).trim();
+    } catch {
+      stdout = "";
+    }
+    // The old bash -c pattern WOULD expand the subshell
+    assert.ok(
+      stdout.includes("INJECTED_PROOF") || stdout.includes("safe_prefix_"),
+      "Confirming bash -c expands $() — this is the vulnerability"
+    );
+  });
+
+  it("NEW runCaptureArgv is NOT vulnerable to subshell expansion", () => {
+    const malicious = "safe_prefix_$(echo INJECTED_PROOF)";
+    const out = runCaptureArgv("echo", [malicious]);
+    assert.ok(
+      out.includes("$(echo INJECTED_PROOF)"),
+      `Expected literal subshell syntax in output, got: "${out}"`
+    );
+    assert.ok(
+      !out.includes("INJECTED_PROOF") || out.includes("$(echo INJECTED_PROOF)"),
+      `runCaptureArgv must pass args literally — injection detected! Output: "${out}"`
+    );
+  });
+
+  it("NEW runCaptureArgv is NOT vulnerable to && chaining", () => {
+    const malicious = "ignored && echo CHAINED";
+    const out = runCaptureArgv("echo", [malicious]);
+    assert.ok(
+      out.includes("&&"),
+      "&& must be passed literally, not interpreted as command chaining"
+    );
+  });
+});

Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,7 @@ export function loadOnboardConfig(): NemoClawOnboardConfig \| null {`
`43`	`43`
`44`	`44`	`export function saveOnboardConfig(config: NemoClawOnboardConfig): void {`
`45`	`45`	`ensureConfigDir();`
`46`		`- writeFileSync(configPath(), JSON.stringify(config, null, 2));`
	`46`	`+ writeFileSync(configPath(), JSON.stringify(config, null, 2), { mode: 0o600 });`
`47`	`47`	`}`
`48`	`48`
`49`	`49`	`export function clearOnboardConfig(): void {`