NVIDIA · BenediktSchackenberg · Mar 26, 2026 · Mar 26, 2026 · Mar 27, 2026 · Mar 27, 2026
diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
@@ -2159,6 +2159,12 @@ async function setupNim(gpu) {
           if (!preferredInferenceApi) {
             continue selectionLoop;
           }
+          // NIM uses vLLM internally — same tool-call-parser limitation
+          // applies to /v1/responses. Force chat completions.
+          if (preferredInferenceApi !== "openai-completions") {
+            console.log("  ℹ Using chat completions API (tool-call-parser requires /v1/chat/completions)");
+          }
+          preferredInferenceApi = "openai-completions";
         }
       }
       break;
@@ -2275,6 +2281,13 @@ async function setupNim(gpu) {
       if (!preferredInferenceApi) {
         continue selectionLoop;
       }
+      // Force chat completions — vLLM's /v1/responses endpoint does not
+      // run the --tool-call-parser, so tool calls arrive as raw text.
+      // See: https://github.com/NVIDIA/NemoClaw/issues/976
+      if (preferredInferenceApi !== "openai-completions") {
+        console.log("  ℹ Using chat completions API (tool-call-parser requires /v1/chat/completions)");
+      }
+      preferredInferenceApi = "openai-completions";
       break;
     }
   }

diff --git a/test/onboard-selection.test.js b/test/onboard-selection.test.js
@@ -1304,4 +1304,103 @@ const { setupNim } = require(${onboardPath});
     assert.ok(payload.lines.some((line) => line.includes("Please choose a provider/model again")));
     assert.equal(payload.messages.filter((message) => /Choose \[/.test(message)).length, 2);
   });
+
+  it("forces openai-completions for vLLM even when probe detects openai-responses", () => {
+    const repoRoot = path.join(import.meta.dirname, "..");
+    const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-vllm-override-"));
+    const fakeBin = path.join(tmpDir, "bin");
+    const scriptPath = path.join(tmpDir, "vllm-override-check.js");
+    const onboardPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "onboard.js"));
+    const credentialsPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "credentials.js"));
+    const runnerPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "runner.js"));
+
+    fs.mkdirSync(fakeBin, { recursive: true });
+    // Fake curl: /v1/responses returns 200 (so probe detects openai-responses),
+    // /v1/models returns a vLLM model list
+    fs.writeFileSync(
+      path.join(fakeBin, "curl"),
+      `#!/usr/bin/env bash
+body=''
+status="200"
+outfile=""
+url=""
+while [ "$#" -gt 0 ]; do
+  case "$1" in
+    -o) outfile="$2"; shift 2 ;;
+    *) url="$1"; shift ;;
+  esac
+done
+if echo "$url" | grep -q '/v1/models'; then
+  body='{"data":[{"id":"meta-llama/Llama-3.3-70B-Instruct"}]}'
+elif echo "$url" | grep -q '/v1/responses'; then
+  body='{"id":"resp_123","output":[{"type":"message","content":[{"type":"output_text","text":"ok"}]}]}'
+elif echo "$url" | grep -q '/v1/chat/completions'; then
+  body='{"id":"chatcmpl-123","choices":[{"message":{"content":"ok"}}]}'
+fi
+printf '%s' "$body" > "$outfile"
+printf '%s' "$status"
+`,
+      { mode: 0o755 },
+    );
+
+    // vLLM is option 7 (build, openai, custom, anthropic, anthropicCompatible, gemini, vllm)
+    const script = String.raw`
+const credentials = require(${credentialsPath});
+const runner = require(${runnerPath});
+
+const answers = ["7"];
+const messages = [];
+
+credentials.prompt = async (message) => {
+  messages.push(message);
+  return answers.shift() || "";
+};
+credentials.ensureApiKey = async () => {};
+runner.runCapture = (command) => {
+  if (command.includes("command -v ollama")) return "";
+  if (command.includes("localhost:11434")) return "";
+  if (command.includes("localhost:8000/v1/models")) return JSON.stringify({ data: [{ id: "meta-llama/Llama-3.3-70B-Instruct" }] });
+  return "";
+};
+
+const { setupNim } = require(${onboardPath});
+
+(async () => {
+  const originalLog = console.log;
+  const lines = [];
+  console.log = (...args) => lines.push(args.join(" "));
+  try {
+    const result = await setupNim(null);
+    originalLog(JSON.stringify({ result, messages, lines }));
+  } finally {
+    console.log = originalLog;
+  }
+})().catch((error) => {
+  console.error(error);
+  process.exit(1);
+});
+`;
+    fs.writeFileSync(scriptPath, script);
+
+    const result = spawnSync(process.execPath, [scriptPath], {
+      cwd: repoRoot,
+      encoding: "utf-8",
+      env: {
+        ...process.env,
+        HOME: tmpDir,
+        PATH: `${fakeBin}:${process.env.PATH || ""}`,
+        NEMOCLAW_EXPERIMENTAL: "1",
+      },
+    });
+
+    assert.equal(result.status, 0, result.stderr);
+    const payload = JSON.parse(result.stdout.trim());
+    assert.equal(payload.result.provider, "vllm-local");
+    assert.equal(payload.result.model, "meta-llama/Llama-3.3-70B-Instruct");
+    // Key assertion: even though probe detected openai-responses, the override
+    // forces openai-completions so tool-call-parser works correctly.
+    assert.equal(payload.result.preferredInferenceApi, "openai-completions");
+    assert.ok(payload.lines.some((line) => line.includes("Using existing vLLM")));
+    assert.ok(payload.lines.some((line) => line.includes("tool-call-parser requires")));
+  });
 });