diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index fa5150a980..4d2290ea80 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -2180,6 +2180,12 @@ async function setupNim(gpu) { if (!preferredInferenceApi) { continue selectionLoop; } + // NIM uses vLLM internally — same tool-call-parser limitation + // applies to /v1/responses. Force chat completions. + if (preferredInferenceApi !== "openai-completions") { + console.log(" ℹ Using chat completions API (tool-call-parser requires /v1/chat/completions)"); + } + preferredInferenceApi = "openai-completions"; } } break; @@ -2296,6 +2302,13 @@ async function setupNim(gpu) { if (!preferredInferenceApi) { continue selectionLoop; } + // Force chat completions — vLLM's /v1/responses endpoint does not + // run the --tool-call-parser, so tool calls arrive as raw text. + // See: https://github.com/NVIDIA/NemoClaw/issues/976 + if (preferredInferenceApi !== "openai-completions") { + console.log(" ℹ Using chat completions API (tool-call-parser requires /v1/chat/completions)"); + } + preferredInferenceApi = "openai-completions"; break; } } diff --git a/test/onboard-selection.test.js b/test/onboard-selection.test.js index 6cf6e436af..929c7ea2a5 100644 --- a/test/onboard-selection.test.js +++ b/test/onboard-selection.test.js @@ -1303,4 +1303,211 @@ const { setupNim } = require(${onboardPath}); assert.ok(payload.lines.some((line) => line.includes("Please choose a provider/model again"))); assert.equal(payload.messages.filter((message) => /Choose \[/.test(message)).length, 2); }); + + it("forces openai-completions for vLLM even when probe detects openai-responses", () => { + const repoRoot = path.join(import.meta.dirname, ".."); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-vllm-override-")); + const fakeBin = path.join(tmpDir, "bin"); + const scriptPath = path.join(tmpDir, "vllm-override-check.js"); + const onboardPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "onboard.js")); + const credentialsPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "credentials.js")); + const runnerPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "runner.js")); + + fs.mkdirSync(fakeBin, { recursive: true }); + // Fake curl: /v1/responses returns 200 (so probe detects openai-responses), + // /v1/models returns a vLLM model list + fs.writeFileSync( + path.join(fakeBin, "curl"), + `#!/usr/bin/env bash +body='' +status="200" +outfile="" +url="" +while [ "$#" -gt 0 ]; do + case "$1" in + -o) outfile="$2"; shift 2 ;; + *) url="$1"; shift ;; + esac +done +if echo "$url" | grep -q '/v1/models'; then + body='{"data":[{"id":"meta-llama/Llama-3.3-70B-Instruct"}]}' +elif echo "$url" | grep -q '/v1/responses'; then + body='{"id":"resp_123","output":[{"type":"message","content":[{"type":"output_text","text":"ok"}]}]}' +elif echo "$url" | grep -q '/v1/chat/completions'; then + body='{"id":"chatcmpl-123","choices":[{"message":{"content":"ok"}}]}' +fi +printf '%s' "$body" > "$outfile" +printf '%s' "$status" +`, + { mode: 0o755 }, + ); + + // vLLM is option 7 (build, openai, custom, anthropic, anthropicCompatible, gemini, vllm) + const script = String.raw` +const credentials = require(${credentialsPath}); +const runner = require(${runnerPath}); + +const answers = ["7"]; +const messages = []; + +credentials.prompt = async (message) => { + messages.push(message); + return answers.shift() || ""; +}; +credentials.ensureApiKey = async () => {}; +runner.runCapture = (command) => { + if (command.includes("command -v ollama")) return ""; + if (command.includes("localhost:11434")) return ""; + if (command.includes("localhost:8000/v1/models")) return JSON.stringify({ data: [{ id: "meta-llama/Llama-3.3-70B-Instruct" }] }); + return ""; +}; + +const { setupNim } = require(${onboardPath}); + +(async () => { + const originalLog = console.log; + const lines = []; + console.log = (...args) => lines.push(args.join(" ")); + try { + const result = await setupNim(null); + originalLog(JSON.stringify({ result, messages, lines })); + } finally { + console.log = originalLog; + } +})().catch((error) => { + console.error(error); + process.exit(1); +}); +`; + fs.writeFileSync(scriptPath, script); + + const result = spawnSync(process.execPath, [scriptPath], { + cwd: repoRoot, + encoding: "utf-8", + env: { + ...process.env, + HOME: tmpDir, + PATH: `${fakeBin}:${process.env.PATH || ""}`, + NEMOCLAW_EXPERIMENTAL: "1", + }, + }); + + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout.trim()); + assert.equal(payload.result.provider, "vllm-local"); + assert.equal(payload.result.model, "meta-llama/Llama-3.3-70B-Instruct"); + // Key assertion: even though probe detected openai-responses, the override + // forces openai-completions so tool-call-parser works correctly. + assert.equal(payload.result.preferredInferenceApi, "openai-completions"); + assert.ok(payload.lines.some((line) => line.includes("Using existing vLLM"))); + assert.ok(payload.lines.some((line) => line.includes("tool-call-parser requires"))); + }); + + it("forces openai-completions for NIM-local even when probe detects openai-responses", () => { + const repoRoot = path.join(import.meta.dirname, ".."); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-nim-override-")); + const fakeBin = path.join(tmpDir, "bin"); + const scriptPath = path.join(tmpDir, "nim-override-check.js"); + const onboardPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "onboard.js")); + const credentialsPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "credentials.js")); + const runnerPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "runner.js")); + const nimPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "nim.js")); + + fs.mkdirSync(fakeBin, { recursive: true }); + // Fake curl: /v1/responses returns 200 (probe detects openai-responses) + fs.writeFileSync( + path.join(fakeBin, "curl"), + `#!/usr/bin/env bash +body='' +status="200" +outfile="" +url="" +while [ "$#" -gt 0 ]; do + case "$1" in + -o) outfile="$2"; shift 2 ;; + *) url="$1"; shift ;; + esac +done +if echo "$url" | grep -q '/v1/models'; then + body='{"data":[{"id":"nvidia/nemotron-3-nano"}]}' +elif echo "$url" | grep -q '/v1/responses'; then + body='{"id":"resp_123","output":[{"type":"message","content":[{"type":"output_text","text":"ok"}]}]}' +elif echo "$url" | grep -q '/v1/chat/completions'; then + body='{"id":"chatcmpl-123","choices":[{"message":{"content":"ok"}}]}' +fi +printf '%s' "$body" > "$outfile" +printf '%s' "$status" +`, + { mode: 0o755 }, + ); + + // NIM-local is option 7 (build, openai, custom, anthropic, anthropicCompatible, gemini, nim-local) + // No ollama, no vLLM — only NIM-local shows up as experimental option + const script = String.raw` +const credentials = require(${credentialsPath}); +const runner = require(${runnerPath}); + +// Mock nim module before onboard.js requires it +const nimMod = require(${nimPath}); +nimMod.listModels = () => [{ name: "nvidia/nemotron-3-nano", image: "fake", minGpuMemoryMB: 8000 }]; +nimMod.pullNimImage = () => {}; +nimMod.containerName = () => "nemoclaw-nim-test"; +nimMod.startNimContainerByName = () => "container-123"; +nimMod.waitForNimHealth = () => true; + +// Select option 7 (nim-local), then model 1 +const answers = ["7", "1"]; +const messages = []; + +credentials.prompt = async (message) => { + messages.push(message); + return answers.shift() || ""; +}; +credentials.ensureApiKey = async () => {}; +runner.runCapture = (command) => { + if (command.includes("command -v ollama")) return ""; + if (command.includes("localhost:11434")) return ""; + if (command.includes("localhost:8000/v1/models")) return ""; + return ""; +}; + +const { setupNim } = require(${onboardPath}); + +(async () => { + const originalLog = console.log; + const lines = []; + console.log = (...args) => lines.push(args.join(" ")); + try { + // Pass a GPU object with nimCapable: true + const result = await setupNim({ type: "nvidia", totalMemoryMB: 16000, nimCapable: true }); + originalLog(JSON.stringify({ result, messages, lines })); + } finally { + console.log = originalLog; + } +})().catch((error) => { + console.error(error); + process.exit(1); +}); +`; + fs.writeFileSync(scriptPath, script); + + const result = spawnSync(process.execPath, [scriptPath], { + cwd: repoRoot, + encoding: "utf-8", + env: { + ...process.env, + HOME: tmpDir, + PATH: `${fakeBin}:${process.env.PATH || ""}`, + NEMOCLAW_EXPERIMENTAL: "1", + }, + }); + + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout.trim()); + assert.equal(payload.result.provider, "vllm-local"); + assert.equal(payload.result.model, "nvidia/nemotron-3-nano"); + // Key assertion: NIM uses vLLM internally — same override must apply. + assert.equal(payload.result.preferredInferenceApi, "openai-completions"); + assert.ok(payload.lines.some((line) => line.includes("tool-call-parser requires"))); + }); });