Skip to content

Commit 0c728f4

Browse files
kjw3temrjan
authored andcommitted
feat: expand provider onboarding and validation (NVIDIA#648)
* feat: expand provider onboarding and validation * test: raise config coverage after provider rebase * test: finalize provider branch CI fixes * fix: harden onboarding prompt and probe handling * fix: restore credentials execsync and trim exports * fix: prefer ollama tags api for installed models
1 parent ec164ac commit 0c728f4

32 files changed

Lines changed: 3951 additions & 561 deletions

.dockerignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
node_modules
22
/dist
3-
!nemoclaw/dist
43
.git
54
*.pyc
65
__pycache__

Dockerfile

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,12 @@ RUN chmod +x /usr/local/bin/nemoclaw-start
8080
# Build args for config that varies per deployment.
8181
# nemoclaw onboard passes these at image build time.
8282
ARG NEMOCLAW_MODEL=nvidia/nemotron-3-super-120b-a12b
83+
ARG NEMOCLAW_PROVIDER_KEY=nvidia
84+
ARG NEMOCLAW_PRIMARY_MODEL_REF=nvidia/nemotron-3-super-120b-a12b
8385
ARG CHAT_UI_URL=http://127.0.0.1:18789
86+
ARG NEMOCLAW_INFERENCE_BASE_URL=https://inference.local/v1
87+
ARG NEMOCLAW_INFERENCE_API=openai-completions
88+
ARG NEMOCLAW_INFERENCE_COMPAT_B64=e30=
8489
# Unique per build to ensure each image gets a fresh auth token.
8590
# Pass --build-arg NEMOCLAW_BUILD_ID=$(date +%s) to bust the cache.
8691
ARG NEMOCLAW_BUILD_ID=default
@@ -89,7 +94,12 @@ ARG NEMOCLAW_BUILD_ID=default
8994
# via os.environ, never via string interpolation into Python source code.
9095
# Direct ARG interpolation into python3 -c is a code injection vector (C-2).
9196
ENV NEMOCLAW_MODEL=${NEMOCLAW_MODEL} \
92-
CHAT_UI_URL=${CHAT_UI_URL}
97+
NEMOCLAW_PROVIDER_KEY=${NEMOCLAW_PROVIDER_KEY} \
98+
NEMOCLAW_PRIMARY_MODEL_REF=${NEMOCLAW_PRIMARY_MODEL_REF} \
99+
CHAT_UI_URL=${CHAT_UI_URL} \
100+
NEMOCLAW_INFERENCE_BASE_URL=${NEMOCLAW_INFERENCE_BASE_URL} \
101+
NEMOCLAW_INFERENCE_API=${NEMOCLAW_INFERENCE_API} \
102+
NEMOCLAW_INFERENCE_COMPAT_B64=${NEMOCLAW_INFERENCE_COMPAT_B64}
93103

94104
WORKDIR /sandbox
95105
USER sandbox
@@ -100,30 +110,30 @@ USER sandbox
100110
# Build args (NEMOCLAW_MODEL, CHAT_UI_URL) customize per deployment.
101111
# Auth token is generated per build so each image has a unique token.
102112
RUN python3 -c "\
103-
import json, os, secrets; \
113+
import base64, json, os, secrets; \
104114
from urllib.parse import urlparse; \
105115
model = os.environ['NEMOCLAW_MODEL']; \
106116
chat_ui_url = os.environ['CHAT_UI_URL']; \
117+
provider_key = os.environ['NEMOCLAW_PROVIDER_KEY']; \
118+
primary_model_ref = os.environ['NEMOCLAW_PRIMARY_MODEL_REF']; \
119+
inference_base_url = os.environ['NEMOCLAW_INFERENCE_BASE_URL']; \
120+
inference_api = os.environ['NEMOCLAW_INFERENCE_API']; \
121+
inference_compat = json.loads(base64.b64decode(os.environ['NEMOCLAW_INFERENCE_COMPAT_B64']).decode('utf-8')); \
107122
parsed = urlparse(chat_ui_url); \
108123
chat_origin = f'{parsed.scheme}://{parsed.netloc}' if parsed.scheme and parsed.netloc else 'http://127.0.0.1:18789'; \
109124
origins = ['http://127.0.0.1:18789']; \
110125
origins = list(dict.fromkeys(origins + [chat_origin])); \
126+
providers = { \
127+
provider_key: { \
128+
'baseUrl': inference_base_url, \
129+
'apiKey': 'unused', \
130+
'api': inference_api, \
131+
'models': [{**({'compat': inference_compat} if inference_compat else {}), 'id': model, 'name': primary_model_ref, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096}] \
132+
} \
133+
}; \
111134
config = { \
112-
'agents': {'defaults': {'model': {'primary': f'inference/{model}'}}}, \
113-
'models': {'mode': 'merge', 'providers': { \
114-
'nvidia': { \
115-
'baseUrl': 'https://inference.local/v1', \
116-
'apiKey': 'openshell-managed', \
117-
'api': 'openai-completions', \
118-
'models': [{'id': model.split('/')[-1], 'name': model, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096}] \
119-
}, \
120-
'inference': { \
121-
'baseUrl': 'https://inference.local/v1', \
122-
'apiKey': 'unused', \
123-
'api': 'openai-completions', \
124-
'models': [{'id': model, 'name': model, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096}] \
125-
} \
126-
}}, \
135+
'agents': {'defaults': {'model': {'primary': primary_model_ref}}}, \
136+
'models': {'mode': 'merge', 'providers': providers}, \
127137
'channels': {'defaults': {'configWrites': False}}, \
128138
'gateway': { \
129139
'mode': 'local', \

README.md

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ When the install completes, a summary confirms the running environment:
8686
```text
8787
──────────────────────────────────────────────────
8888
Sandbox my-assistant (Landlock + seccomp + netns)
89-
Model nvidia/nemotron-3-super-120b-a12b (NVIDIA Endpoint API)
89+
Model nvidia/nemotron-3-super-120b-a12b (NVIDIA Endpoints)
9090
──────────────────────────────────────────────────
9191
Run: nemoclaw my-assistant connect
9292
Status: nemoclaw my-assistant status
@@ -162,14 +162,14 @@ curl -fsSL https://raw.githubusercontent.com/NVIDIA/NemoClaw/refs/heads/main/uni
162162

163163
## How It Works
164164

165-
NemoClaw installs the NVIDIA OpenShell runtime and Nemotron models, then uses a versioned blueprint to create a sandboxed environment where every network request, file access, and inference call is governed by declarative policy. The `nemoclaw` CLI orchestrates the full stack: OpenShell gateway, sandbox, inference provider, and network policy.
165+
NemoClaw installs the NVIDIA OpenShell runtime, then creates a sandboxed OpenClaw environment where every network request, file access, and inference call is governed by declarative policy. The `nemoclaw` CLI orchestrates the full stack: OpenShell gateway, sandbox, inference provider, and network policy.
166166

167167
| Component | Role |
168168
|------------------|-------------------------------------------------------------------------------------------|
169169
| **Plugin** | TypeScript CLI commands for launch, connect, status, and logs. |
170170
| **Blueprint** | Versioned Python artifact that orchestrates sandbox creation, policy, and inference setup. |
171171
| **Sandbox** | Isolated OpenShell container running OpenClaw with policy-enforced egress and filesystem. |
172-
| **Inference** | NVIDIA Endpoint model calls, routed through the OpenShell gateway, transparent to the agent. |
172+
| **Inference** | Provider-routed model calls, routed through the OpenShell gateway, transparent to the agent. |
173173

174174
The blueprint lifecycle follows four stages: resolve the artifact, verify its digest, plan the resources, and apply through the OpenShell CLI.
175175

@@ -179,15 +179,28 @@ When something goes wrong, errors may originate from either NemoClaw or the Open
179179

180180
## Inference
181181

182-
Inference requests from the agent never leave the sandbox directly. OpenShell intercepts every call and routes it to the NVIDIA Endpoint provider.
182+
Inference requests from the agent never leave the sandbox directly. OpenShell intercepts every call and routes it to the provider you selected during onboarding.
183183

184-
| Provider | Model | Use Case |
185-
|--------------|--------------------------------------|-------------------------------------------------|
186-
| NVIDIA Endpoint | `nvidia/nemotron-3-super-120b-a12b` | Production. Requires an NVIDIA API key. |
184+
Supported non-experimental onboarding paths:
187185

188-
Get an API key from [build.nvidia.com](https://build.nvidia.com). The `nemoclaw onboard` command prompts for this key during setup.
186+
| Provider | Notes |
187+
|---|---|
188+
| NVIDIA Endpoints | Curated hosted models on `integrate.api.nvidia.com`. |
189+
| OpenAI | Curated GPT models plus `Other...` for manual model entry. |
190+
| Other OpenAI-compatible endpoint | For proxies and compatible gateways. |
191+
| Anthropic | Curated Claude models plus `Other...` for manual model entry. |
192+
| Other Anthropic-compatible endpoint | For Claude proxies and compatible gateways. |
193+
| Google Gemini | Google's OpenAI-compatible endpoint. |
189194

190-
Local inference options such as Ollama and vLLM are still experimental. On macOS, they also depend on OpenShell host-routing support in addition to the local service itself being reachable on the host.
195+
During onboarding, NemoClaw validates the selected provider and model before it creates the sandbox:
196+
197+
- OpenAI-compatible providers: tries `/responses` first, then `/chat/completions`
198+
- Anthropic-compatible providers: tries `/v1/messages`
199+
- If validation fails, the wizard prompts you to fix the selection before continuing
200+
201+
Credentials stay on the host in `~/.nemoclaw/credentials.json`. The sandbox only sees the routed `inference.local` endpoint, not your raw provider key.
202+
203+
Local Ollama is supported in the standard onboarding flow. Local vLLM remains experimental, and local host-routed inference on macOS still depends on OpenShell host-routing support in addition to the local service itself being reachable on the host.
191204

192205
---
193206

@@ -252,7 +265,7 @@ Refer to the documentation for more information on NemoClaw.
252265
- [Overview](https://docs.nvidia.com/nemoclaw/latest/about/overview.html): Learn what NemoClaw does and how it fits together.
253266
- [How It Works](https://docs.nvidia.com/nemoclaw/latest/about/how-it-works.html): Learn about the plugin, blueprint, and sandbox lifecycle.
254267
- [Architecture](https://docs.nvidia.com/nemoclaw/latest/reference/architecture.html): Learn about the plugin structure, blueprint lifecycle, and sandbox environment.
255-
- [Inference Profiles](https://docs.nvidia.com/nemoclaw/latest/reference/inference-profiles.html): Learn about the NVIDIA Endpoint inference configuration.
268+
- [Inference Profiles](https://docs.nvidia.com/nemoclaw/latest/reference/inference-profiles.html): Learn how NemoClaw configures routed inference providers.
256269
- [Network Policies](https://docs.nvidia.com/nemoclaw/latest/reference/network-policies.html): Learn about egress control and policy customization.
257270
- [CLI Commands](https://docs.nvidia.com/nemoclaw/latest/reference/commands.html): Learn about the full command reference.
258271
- [Troubleshooting](https://docs.nvidia.com/nemoclaw/latest/reference/troubleshooting.html): Troubleshoot common issues and resolution steps.

bin/lib/credentials.js

Lines changed: 94 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,98 @@ function getCredential(key) {
3131
return creds[key] || null;
3232
}
3333

34-
function prompt(question) {
35-
return new Promise((resolve) => {
34+
function promptSecret(question) {
35+
return new Promise((resolve, reject) => {
36+
const input = process.stdin;
37+
const output = process.stderr;
38+
let answer = "";
39+
let rawModeEnabled = false;
40+
let finished = false;
41+
42+
function cleanup() {
43+
input.removeListener("data", onData);
44+
if (rawModeEnabled && typeof input.setRawMode === "function") {
45+
input.setRawMode(false);
46+
}
47+
if (typeof input.pause === "function") {
48+
input.pause();
49+
}
50+
}
51+
52+
function finish(fn, value) {
53+
if (finished) return;
54+
finished = true;
55+
cleanup();
56+
output.write("\n");
57+
fn(value);
58+
}
59+
60+
function onData(chunk) {
61+
const text = chunk.toString("utf8");
62+
for (let i = 0; i < text.length; i += 1) {
63+
const ch = text[i];
64+
65+
if (ch === "\u0003") {
66+
finish(reject, Object.assign(new Error("Prompt interrupted"), { code: "SIGINT" }));
67+
return;
68+
}
69+
70+
if (ch === "\r" || ch === "\n") {
71+
finish(resolve, answer.trim());
72+
return;
73+
}
74+
75+
if (ch === "\u0008" || ch === "\u007f") {
76+
answer = answer.slice(0, -1);
77+
continue;
78+
}
79+
80+
if (ch === "\u001b") {
81+
// Ignore terminal escape/control sequences such as Delete, arrows,
82+
// Home/End, etc. while leaving the buffered secret untouched.
83+
const rest = text.slice(i);
84+
const match = rest.match(/^\u001b(?:\[[0-9;?]*[~A-Za-z]|\][^\u0007]*\u0007|.)/);
85+
if (match) {
86+
i += match[0].length - 1;
87+
}
88+
continue;
89+
}
90+
91+
if (ch >= " ") {
92+
answer += ch;
93+
}
94+
}
95+
}
96+
97+
output.write(question);
98+
input.setEncoding("utf8");
99+
if (typeof input.resume === "function") {
100+
input.resume();
101+
}
102+
if (typeof input.setRawMode === "function") {
103+
input.setRawMode(true);
104+
rawModeEnabled = true;
105+
}
106+
input.on("data", onData);
107+
});
108+
}
109+
110+
function prompt(question, opts = {}) {
111+
return new Promise((resolve, reject) => {
112+
const silent = opts.secret === true && process.stdin.isTTY && process.stderr.isTTY;
113+
if (silent) {
114+
promptSecret(question)
115+
.then(resolve)
116+
.catch((err) => {
117+
if (err && err.code === "SIGINT") {
118+
reject(err);
119+
process.kill(process.pid, "SIGINT");
120+
return;
121+
}
122+
reject(err);
123+
});
124+
return;
125+
}
36126
const rl = readline.createInterface({ input: process.stdin, output: process.stderr });
37127
rl.question(question, (answer) => {
38128
rl.close();
@@ -67,7 +157,7 @@ async function ensureApiKey() {
67157
console.log(" └─────────────────────────────────────────────────────────────────┘");
68158
console.log("");
69159

70-
key = await prompt(" NVIDIA API Key: ");
160+
key = await prompt(" NVIDIA API Key: ", { secret: true });
71161

72162
if (!key || !key.startsWith("nvapi-")) {
73163
console.error(" Invalid key. Must start with nvapi-");
@@ -114,7 +204,7 @@ async function ensureGithubToken() {
114204
console.log(" └──────────────────────────────────────────────────┘");
115205
console.log("");
116206

117-
token = await prompt(" GitHub Token: ");
207+
token = await prompt(" GitHub Token: ", { secret: true });
118208

119209
if (!token) {
120210
console.error(" Token required for deploy (repo is private).");

bin/lib/inference-config.js

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ const { DEFAULT_OLLAMA_MODEL } = require("./local-inference");
1818

1919
function getProviderSelectionConfig(provider, model) {
2020
switch (provider) {
21+
case "nvidia-prod":
2122
case "nvidia-nim":
2223
return {
2324
endpointType: "custom",
@@ -27,7 +28,62 @@ function getProviderSelectionConfig(provider, model) {
2728
profile: DEFAULT_ROUTE_PROFILE,
2829
credentialEnv: DEFAULT_ROUTE_CREDENTIAL_ENV,
2930
provider,
30-
providerLabel: "NVIDIA Endpoint API",
31+
providerLabel: "NVIDIA Endpoints",
32+
};
33+
case "openai-api":
34+
return {
35+
endpointType: "custom",
36+
endpointUrl: INFERENCE_ROUTE_URL,
37+
ncpPartner: null,
38+
model: model || "gpt-5.4",
39+
profile: DEFAULT_ROUTE_PROFILE,
40+
credentialEnv: "OPENAI_API_KEY",
41+
provider,
42+
providerLabel: "OpenAI",
43+
};
44+
case "anthropic-prod":
45+
return {
46+
endpointType: "custom",
47+
endpointUrl: INFERENCE_ROUTE_URL,
48+
ncpPartner: null,
49+
model: model || "claude-sonnet-4-6",
50+
profile: DEFAULT_ROUTE_PROFILE,
51+
credentialEnv: "ANTHROPIC_API_KEY",
52+
provider,
53+
providerLabel: "Anthropic",
54+
};
55+
case "compatible-anthropic-endpoint":
56+
return {
57+
endpointType: "custom",
58+
endpointUrl: INFERENCE_ROUTE_URL,
59+
ncpPartner: null,
60+
model: model || "custom-anthropic-model",
61+
profile: DEFAULT_ROUTE_PROFILE,
62+
credentialEnv: "COMPATIBLE_ANTHROPIC_API_KEY",
63+
provider,
64+
providerLabel: "Other Anthropic-compatible endpoint",
65+
};
66+
case "gemini-api":
67+
return {
68+
endpointType: "custom",
69+
endpointUrl: INFERENCE_ROUTE_URL,
70+
ncpPartner: null,
71+
model: model || "gemini-2.5-flash",
72+
profile: DEFAULT_ROUTE_PROFILE,
73+
credentialEnv: "GEMINI_API_KEY",
74+
provider,
75+
providerLabel: "Google Gemini",
76+
};
77+
case "compatible-endpoint":
78+
return {
79+
endpointType: "custom",
80+
endpointUrl: INFERENCE_ROUTE_URL,
81+
ncpPartner: null,
82+
model: model || "custom-model",
83+
profile: DEFAULT_ROUTE_PROFILE,
84+
credentialEnv: "COMPATIBLE_API_KEY",
85+
provider,
86+
providerLabel: "Other OpenAI-compatible endpoint",
3187
};
3288
case "vllm-local":
3389
return {

0 commit comments

Comments
 (0)