From 8b4a86609d41f05dfbbcf862e95847c534edb48c Mon Sep 17 00:00:00 2001 From: "Fred K. Schott" <622227+FredKSchott@users.noreply.github.com> Date: Thu, 14 May 2026 23:59:30 -0700 Subject: [PATCH 1/3] feat(runtime): let sandbox connectors author their built-in tool list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an optional `tools()` method to `SandboxFactory`. When a connector implements it, the framework uses its return value verbatim as the model-facing built-in tool list (replacing the default six: read/write/edit/bash/grep/glob) and appends `task` on top. Connectors without `tools()` fall through to the existing default path — no behaviour change for any connector that ships today. This unblocks the cf-shell sandbox adoption, where the natural surface is a codemode `code` tool over `state.*` (not bash). Generalizes for future connectors with native primitives that make the defaults wasteful or wrong. Plumbed via a new `SessionToolFactory` carried alongside `SessionEnv` from `resolveSessionEnv` → `Harness` → `Session`. The custom-tool validator now derives its reserved-names set from the resolved built-in list (rather than the static `BUILTIN_TOOL_NAMES`) so a user on a cf-shell-style connector can still register a custom `bash` without a spurious collision, while connector-supplied names remain protected. Builtins are now built once per prompt/skill/task and the validator reads from that result — connector factories are invoked exactly once per call, not twice. Verified end-to-end against examples/hello-world (hello + with-tools agents): bash/read/edit/task/custom-tool paths all behave identically to main. check:types clean in packages/runtime and packages/cli. --- packages/runtime/src/agent.ts | 14 +++- packages/runtime/src/client.ts | 29 ++++++-- packages/runtime/src/harness.ts | 8 ++ packages/runtime/src/index.ts | 2 + packages/runtime/src/session.ts | 128 +++++++++++++++++++++++++++----- packages/runtime/src/types.ts | 44 ++++++++++- 6 files changed, 197 insertions(+), 28 deletions(-) diff --git a/packages/runtime/src/agent.ts b/packages/runtime/src/agent.ts index aa87b17e..4fc9811a 100644 --- a/packages/runtime/src/agent.ts +++ b/packages/runtime/src/agent.ts @@ -8,6 +8,13 @@ const MAX_GREP_MATCHES = 100; const MAX_GREP_LINE_LENGTH = 500; const MAX_GLOB_RESULTS = 1000; +/** + * Names of the framework's default built-in tools (the six produced by + * `createTools` plus the framework-owned `task`). Used by the validator when + * a session uses the default tool path. Sessions whose sandbox connector + * implements `tools()` derive their reserved set from the connector's actual + * tool names plus `task` instead — these are not universally reserved. + */ export const BUILTIN_TOOL_NAMES = new Set([ 'read', 'write', @@ -289,7 +296,12 @@ const TaskParams = Type.Object({ ), }); -function createTaskTool( +/** + * Build the canonical `task` tool. The framework appends this on top of + * whatever the connector's `tools()` returns; standalone callers should reach + * for {@link createTools} instead. + */ +export function createTaskTool( runTask: ( params: TaskToolParams, signal?: AbortSignal, diff --git a/packages/runtime/src/client.ts b/packages/runtime/src/client.ts index 5ed38baf..892e96c6 100644 --- a/packages/runtime/src/client.ts +++ b/packages/runtime/src/client.ts @@ -14,6 +14,7 @@ import type { SandboxFactory, SessionEnv, SessionStore, + SessionToolFactory, } from './types.ts'; export interface FlueContextConfig { @@ -128,7 +129,12 @@ export function createFlueContext(config: FlueContextConfig): FlueContextInterna try { assertRoleExists(config.agentConfig.roles, options.role); const sandbox = options.sandbox; - const baseEnv = await resolveSessionEnv(config.id, sandbox, config, options.cwd); + const { env: baseEnv, toolFactory } = await resolveSessionEnv( + config.id, + sandbox, + config, + options.cwd, + ); // Resolve `init({ cwd })` against the sandbox's own cwd so that // relative paths target the sandbox/session filesystem, not the // agent process cwd or `/`. Mirrors the same pattern used for @@ -163,6 +169,7 @@ export function createFlueContext(config: FlueContextConfig): FlueContextInterna emitEvent(event); }, options.tools, + toolFactory, ); } catch (error) { initializedHarnessNames.delete(name); @@ -220,15 +227,22 @@ function isSandboxFactory(value: unknown): value is SandboxFactory { ); } -/** Resolve sandbox option to SessionEnv: default → BashFactory → platform hook → SandboxFactory. */ +/** + * Resolve sandbox option to a `SessionEnv` plus an optional connector-supplied + * `toolFactory`. Resolution order: default → BashFactory → platform hook → + * SandboxFactory. Only `SandboxFactory` connectors may contribute a + * `toolFactory`; the default empty sandbox, bash factories, and the platform + * hook return `toolFactory: undefined`, which makes the framework fall back to + * its default six built-in tools. + */ async function resolveSessionEnv( id: string, sandbox: AgentInit['sandbox'], config: FlueContextConfig, cwd: string | undefined, -): Promise { +): Promise<{ env: SessionEnv; toolFactory?: SessionToolFactory }> { if (sandbox === undefined || sandbox === false) { - return config.createDefaultEnv(); + return { env: await config.createDefaultEnv() }; } // JS-caller / `any`-input fallback for the removed `'empty'` and // `'local'` magic strings. TS callers get compile errors from the @@ -249,7 +263,7 @@ async function resolveSessionEnv( ); } if (isBashFactory(sandbox)) { - return bashFactoryToSessionEnv(sandbox); + return { env: await bashFactoryToSessionEnv(sandbox) }; } if (isBashLike(sandbox)) { throw new Error( @@ -259,10 +273,11 @@ async function resolveSessionEnv( } if (config.resolveSandbox) { const resolved = await config.resolveSandbox(sandbox); - if (resolved) return resolved; + if (resolved) return { env: resolved }; } if (isSandboxFactory(sandbox)) { - return sandbox.createSessionEnv({ id, cwd }); + const env = await sandbox.createSessionEnv({ id, cwd }); + return { env, toolFactory: sandbox.tools }; } throw new Error('[flue] Invalid sandbox option passed to init().'); } diff --git a/packages/runtime/src/harness.ts b/packages/runtime/src/harness.ts index 1a38e6e4..c7dd769b 100644 --- a/packages/runtime/src/harness.ts +++ b/packages/runtime/src/harness.ts @@ -15,6 +15,7 @@ import type { SessionEnv, SessionOptions, SessionStore, + SessionToolFactory, ShellOptions, ShellResult, ToolDef, @@ -43,6 +44,11 @@ export class Harness implements FlueHarness { private store: SessionStore, private eventCallback?: FlueEventCallback, private agentTools: ToolDef[] = [], + // Optional connector-supplied factory for the model-facing built-in + // tool list. When unset, sessions use the framework default six + // (read, write, edit, bash, grep, glob). The framework always + // appends `task` on top of whatever the connector returns. + private toolFactory?: SessionToolFactory, ) { this.fs = createFlueFs(env); } @@ -109,6 +115,7 @@ export class Harness implements FlueHarness { existingData: data, onAgentEvent: this.decorateEventCallback(this.eventCallback), agentTools: this.agentTools, + toolFactory: this.toolFactory, sessionRole: options?.role, taskDepth: 0, createTaskSession: (taskOptions) => this.createTaskSession(taskOptions), @@ -174,6 +181,7 @@ export class Harness implements FlueHarness { existingData: data, onAgentEvent: eventCallback, agentTools: this.agentTools, + toolFactory: this.toolFactory, sessionRole: options.role, taskDepth: options.depth, createTaskSession: (childOptions) => this.createTaskSession(childOptions), diff --git a/packages/runtime/src/index.ts b/packages/runtime/src/index.ts index f766263d..2038294f 100644 --- a/packages/runtime/src/index.ts +++ b/packages/runtime/src/index.ts @@ -12,6 +12,8 @@ export type { SessionEnv, FileStat, SandboxFactory, + SessionToolFactory, + SessionToolFactoryOptions, BashFactory, BashLike, SessionOptions, diff --git a/packages/runtime/src/session.ts b/packages/runtime/src/session.ts index dd9fa780..95bceb73 100644 --- a/packages/runtime/src/session.ts +++ b/packages/runtime/src/session.ts @@ -12,7 +12,7 @@ import type { import type * as v from 'valibot'; import { abortErrorFor, createCallHandle } from './abort.ts'; import { - BUILTIN_TOOL_NAMES, + createTaskTool, createTools, formatBashResult, type TaskToolParams, @@ -67,6 +67,7 @@ import type { SessionEntry, SessionEnv, SessionStore, + SessionToolFactory, ShellOptions, ShellResult, SkillOptions, @@ -101,6 +102,14 @@ interface SessionInitOptions { existingData: SessionData | null; onAgentEvent?: FlueEventCallback; agentTools?: ToolDef[]; + /** + * Optional connector-supplied factory for the model-facing built-in tool + * list. When set, `createBuiltinTools` uses its return value verbatim + * (plus the framework-appended `task` tool) instead of the default six. + * Connectors that omit `tools()` fall through to the default path, so + * adding this is a no-op for them. + */ + toolFactory?: SessionToolFactory; sessionRole?: string; taskDepth?: number; createTaskSession?: CreateTaskSession; @@ -408,6 +417,7 @@ export class Session implements FlueSession { private compactionAbortController: AbortController | undefined; private eventCallback: FlueEventCallback | undefined; private agentTools: ToolDef[]; + private toolFactory: SessionToolFactory | undefined; private deleted = false; private activeOperation: OperationKind | undefined; private activeOperationId: string | undefined; @@ -427,6 +437,7 @@ export class Session implements FlueSession { this.fs = createFlueFs(options.env); this.store = options.store; this.agentTools = options.agentTools ?? []; + this.toolFactory = options.toolFactory; this.sessionRole = options.sessionRole; this.taskDepth = options.taskDepth ?? 0; this.createTaskSession = options.createTaskSession; @@ -442,9 +453,16 @@ export class Session implements FlueSession { assertRoleExists(this.config.roles, this.config.role); assertRoleExists(this.config.roles, this.sessionRole); + // Build the built-in tools first so the custom-tool validator can + // reject collisions against the connector's actual tool names rather + // than re-invoking the connector's `tools()` factory just to read + // them. The connector factory is allowed to do real work (e.g. + // cf-shell calling into codemode to build state.* providers); calling + // it once per Session-open / per prompt is the budget. + const builtinTools = this.createBuiltinTools(this.env, []); const tools = [ - ...this.createBuiltinTools(this.env, []), - ...this.createCustomTools(this.agentTools), + ...builtinTools, + ...this.createCustomTools(this.agentTools, builtinTools), ]; const previousMessages = this.history.buildContext(); @@ -841,8 +859,11 @@ export class Session implements FlueSession { // ─── Custom Tools ─────────────────────────────────────────────────────── - private createCustomTools(tools: ToolDef[]): AgentTool[] { - this.validateCustomToolNames(tools); + private createCustomTools( + tools: ToolDef[], + builtinTools: AgentTool[], + ): AgentTool[] { + this.validateCustomToolNames(tools, builtinTools); return tools.map( (toolDef): AgentTool => ({ @@ -862,13 +883,31 @@ export class Session implements FlueSession { ); } - private validateCustomToolNames(tools: ToolDef[]): void { + /** + * Reject user-supplied tools that collide with the framework-reserved + * `task` name, with any of the already-built built-in tool names, or + * with each other. + * + * The reserved-names set is derived from the resolved `builtinTools` list + * — not a static constant — so connectors that author their own tool list + * (cf-shell etc.) reserve only the names they actually expose, while the + * default tool path keeps reserving all six framework defaults. + */ + private validateCustomToolNames( + tools: ToolDef[], + builtinTools: AgentTool[], + ): void { + const reserved = new Set(builtinTools.map((t) => t.name)); + // `task` is always appended downstream; reserved even if the connector + // chose not to include it (which it shouldn't — `validateConnectorTools` + // also rejects connector-supplied `task`). + reserved.add('task'); const names = new Set(); for (const toolDef of tools) { - if (BUILTIN_TOOL_NAMES.has(toolDef.name)) { + if (reserved.has(toolDef.name)) { throw new Error( `[flue] Custom tool "${toolDef.name}" conflicts with a built-in tool. ` + - `Built-in tools: ${[...BUILTIN_TOOL_NAMES].join(', ')}`, + `Built-in tools: ${[...reserved].join(', ')}`, ); } if (names.has(toolDef.name)) { @@ -880,6 +919,15 @@ export class Session implements FlueSession { } } + /** + * Build the model-facing built-in tool list for this session. + * + * If the sandbox connector implements `tools()`, its return value replaces + * the framework default (read/write/edit/bash/grep/glob); we still append + * the framework `task` tool on top. Otherwise we fall back to the + * canonical `createTools(env, options)` path, which returns the six + * defaults plus `task` when supported. + */ private createBuiltinTools( env: SessionEnv, tools: ToolDef[], @@ -887,18 +935,54 @@ export class Session implements FlueSession { model?: string, thinkingLevel?: ThinkingLevel, ): AgentTool[] { + const runTask = (params: TaskToolParams, signal?: AbortSignal) => + this.runTaskForTool(params, tools, role, model, thinkingLevel, signal); + + if (this.toolFactory) { + const connectorTools = this.toolFactory(env, { roles: this.config.roles }); + // Validate the connector's contribution before handing it to + // pi-agent-core: duplicate names within the connector list, or a + // collision with the framework `task` tool, would otherwise + // surface as an opaque pi-agent-core error mid-session. + this.validateConnectorTools(connectorTools); + return [...connectorTools, createTaskTool(runTask, this.config.roles)]; + } + return createTools(env, { roles: this.config.roles, - task: (params, signal) => - this.runTaskForTool(params, tools, role, model, thinkingLevel, signal), + task: runTask, }); } + /** + * Validate a connector-supplied built-in tool list. Connector tools must + * be uniquely-named and must not claim the framework-reserved `task` + * name. Mirrors the `validateCustomToolNames` shape so connector authors + * get the same error story as user-supplied tools. + */ + private validateConnectorTools(tools: AgentTool[]): void { + const names = new Set(); + for (const tool of tools) { + if (tool.name === 'task') { + throw new Error( + '[flue] Sandbox connector tools() returned a tool named "task", which is ' + + 'framework-reserved. The framework appends `task` automatically; remove it from the connector.', + ); + } + if (names.has(tool.name)) { + throw new Error( + `[flue] Sandbox connector tools() returned duplicate tool name "${tool.name}". ` + + 'Connector tool names must be unique.', + ); + } + names.add(tool.name); + } + } + private async withScopedRuntime( options: RuntimeScopeOptions, fn: (ctx: { resolvedModel: Model }) => Promise, ): Promise { - const customTools = this.createCustomTools([...this.agentTools, ...options.tools]); const previousTools = this.harness.state.tools; const previousModel = this.harness.state.model; const previousSystemPrompt = this.harness.state.systemPrompt; @@ -911,14 +995,22 @@ export class Session implements FlueSession { options.thinkingLevel, options.role, ); + // Build builtins first so the custom-tool validator can read the + // reserved-names set directly from the resulting list — avoids + // invoking the connector's `tools()` factory twice per call. + const builtinTools = this.createBuiltinTools( + this.env, + options.tools, + options.role, + options.model, + options.thinkingLevel, + ); + const customTools = this.createCustomTools( + [...this.agentTools, ...options.tools], + builtinTools, + ); this.harness.state.tools = [ - ...this.createBuiltinTools( - this.env, - options.tools, - options.role, - options.model, - options.thinkingLevel, - ), + ...builtinTools, ...customTools, ...(options.extraTools ?? []), ]; diff --git a/packages/runtime/src/types.ts b/packages/runtime/src/types.ts index fba632d3..65df75ff 100644 --- a/packages/runtime/src/types.ts +++ b/packages/runtime/src/types.ts @@ -1,4 +1,4 @@ -import type { AgentMessage, ThinkingLevel } from '@earendil-works/pi-agent-core'; +import type { AgentMessage, AgentTool, ThinkingLevel } from '@earendil-works/pi-agent-core'; import type { ImageContent, Model, TSchema } from '@earendil-works/pi-ai'; import type * as v from 'valibot'; @@ -684,9 +684,49 @@ export interface ShellResult { // ─── Sandbox ──────────────────────────────────────────────────────────────── -/** Wraps external sandboxes (Daytona, CF Containers, etc.) into Flue's SessionEnv. */ +/** + * Inputs given to a connector's `tools()` method when the framework asks it + * to author the model-facing tool list. Intentionally minimal — the + * framework still owns the `task` tool and appends it on top of whatever the + * connector returns, so `tools()` does not receive a `task` runner. + */ +export interface SessionToolFactoryOptions { + /** + * Roles available on the agent. Forwarded for connectors that wish to + * surface role-aware behaviour in their tool descriptions. Most + * connectors can ignore this. + */ + roles: Record; +} + +/** + * Optional connector-supplied factory for the model-facing tool list. When a + * `SandboxFactory` implements `tools()`, the framework uses its return value + * verbatim as the built-in tools for sessions built from that sandbox — no + * merging with the default six, no inheritance. The framework still appends + * the `task` tool unconditionally; connectors must not include it. + */ +export type SessionToolFactory = ( + env: SessionEnv, + options: SessionToolFactoryOptions, +) => AgentTool[]; + +/** + * Wraps external sandboxes (Daytona, CF Containers, etc.) into Flue's SessionEnv. + * + * Optionally contributes its own model-facing tool list via `tools()`. When + * present, the connector's tools replace the framework default (read, write, + * edit, bash, grep, glob); the framework still appends `task` on top. When + * absent, the framework uses its default six tools — every existing connector + * keeps working unchanged. + * + * Use cases for owning the tool list: a connector with native primitives that + * make the defaults wasteful or wrong (e.g. cf-shell, which uses a codemode + * `code` tool instead of `bash` and routes file ops through `state.*`). + */ export interface SandboxFactory { createSessionEnv(options: { id: string; cwd?: string }): Promise; + tools?: SessionToolFactory; } /** From edeb5795b7f22708fe5c3e5aba872e580208ccaa Mon Sep 17 00:00:00 2001 From: "Fred K. Schott" <622227+FredKSchott@users.noreply.github.com> Date: Fri, 15 May 2026 00:49:35 -0700 Subject: [PATCH 2/3] feat(runtime): add Cloudflare shell sandbox --- CHANGELOG.md | 10 + README.md | 31 ++- apps/www/src/code-snippets.ts | 24 +- connectors/sandbox--cloudflare.md | 12 +- docs/cloudflare-shell.md | 172 ++++++++++++++ docs/deploy-cloudflare.md | 49 ++-- .../.flue/agents/skills-from-git.ts | 70 ++++++ .../cloudflare/.flue/agents/skills-from-r2.ts | 75 ++++++ examples/cloudflare/README.md | 105 +++++++++ examples/cloudflare/package.json | 5 +- examples/cloudflare/seed-r2.sh | 59 +++++ examples/cloudflare/tsconfig.json | 6 +- examples/cloudflare/wrangler.jsonc | 20 +- .../.flue/agents/fs-surface-test.ts | 16 +- .../hello-world/.flue/agents/with-sandbox.ts | 2 +- packages/cli/README.md | 31 ++- packages/runtime/README.md | 31 ++- packages/runtime/package.json | 1 + packages/runtime/src/agent.ts | 14 +- packages/runtime/src/client.ts | 9 +- packages/runtime/src/cloudflare/hydrate.ts | 52 ++++ packages/runtime/src/cloudflare/index.ts | 8 + .../runtime/src/cloudflare/shell-sandbox.ts | 223 ++++++++++++++++++ .../runtime/src/cloudflare/virtual-sandbox.ts | 150 +++--------- packages/runtime/src/harness.ts | 4 - packages/runtime/src/session.ts | 51 +--- packages/runtime/src/types.ts | 35 +-- packages/sdk/README.md | 31 ++- pnpm-lock.yaml | 12 + 29 files changed, 1014 insertions(+), 294 deletions(-) create mode 100644 docs/cloudflare-shell.md create mode 100644 examples/cloudflare/.flue/agents/skills-from-git.ts create mode 100644 examples/cloudflare/.flue/agents/skills-from-r2.ts create mode 100644 examples/cloudflare/README.md create mode 100755 examples/cloudflare/seed-r2.sh create mode 100644 packages/runtime/src/cloudflare/hydrate.ts create mode 100644 packages/runtime/src/cloudflare/shell-sandbox.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 0731cda0..a4e69cee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # Changelog +## Unreleased + +### New Features + +- **Cloudflare shell sandbox.** Added `getShellSandbox({ workspace, loader })`, `getDefaultWorkspace()`, and `hydrateFromBucket()` from `@flue/runtime/cloudflare`. The new sandbox wires `@cloudflare/shell` Workspaces into Flue through a codemode `code` tool backed by a Worker Loader binding. Agents use `state.*` inside the `code` tool instead of bash/read/write/grep/glob. Use `@cloudflare/shell` directly for primitives like `Workspace`, `WorkspaceFileSystem`, and `createGit`. + +### Breaking Changes + +- **`getVirtualSandbox()` now throws with a migration message.** The previous API described R2 as if it were mounted directly as the harness filesystem, but `@cloudflare/shell` Workspaces are SQLite-indexed filesystems with optional R2 blob spillover; raw bucket keys uploaded outside Workspace were invisible. Migrate bucket-backed agents to `getShellSandbox({ workspace, loader })` plus `hydrateFromBucket(workspace, env.BUCKET)` before `init()`. If you used zero-arg `getVirtualSandbox()`, remove it and omit `sandbox` from `init()` to use Flue's default in-memory sandbox. + ## 0.6.2 - 2026-05-14 ### Fixes & Other Changes diff --git a/README.md b/README.md index 2bf6c20d..305f5d21 100644 --- a/README.md +++ b/README.md @@ -56,29 +56,40 @@ export default async function ({ init, payload }: FlueContext) { ### Support Agent -A support agent can also run in a virtual sandbox, but we now add a file-system using an R2 bucket. The knowledge base is stored in R2 and mounted directly into the harness filesystem — the agent searches it with its built-in tools (grep, glob, read). Skills are also defined in the bucket that help the agent perform its task. +A support agent can also run on Cloudflare without a container by using a cf-shell Workspace. The Workspace is a durable SQLite-indexed filesystem; R2 is an optional hydration source (and large-file spillover), not a live bucket mount. Copy the R2 objects you want into the Workspace before calling `init()`, then the agent operates on that structured filesystem through the `code` tool and `state.*` API. Because this agent is deployed to Cloudflare, message history and session state are automatically persisted for you. So you (or your customer) can revisit this support session days, weeks, or years later and pick up exactly where you left off. ```ts // .flue/agents/support.ts -import { getVirtualSandbox } from '@flue/runtime/cloudflare'; import type { FlueContext } from '@flue/runtime'; +import { + getDefaultWorkspace, + getShellSandbox, + hydrateFromBucket, +} from '@flue/runtime/cloudflare'; import * as v from 'valibot'; export const triggers = { webhook: true }; export default async function ({ init, payload, env }: FlueContext) { - // Mount the R2 knowledge base bucket as the harness filesystem. - // The agent can grep, glob, and read articles with bash, but - // without needing to spin up an entire container sandbox. - const sandbox = await getVirtualSandbox(env.KNOWLEDGE_BASE); - const harness = await init({ sandbox, model: 'openrouter/moonshotai/kimi-k2.6' }); + const workspace = getDefaultWorkspace(); + + // Hydrate once per agent instance. R2 is a source, not a live mount. + if (!(await workspace.exists('/.hydrated'))) { + await hydrateFromBucket(workspace, env.KNOWLEDGE_BASE); + await workspace.writeFile('/.hydrated', new Date().toISOString()); + } + + const harness = await init({ + sandbox: getShellSandbox({ workspace, loader: env.LOADER }), + model: 'openrouter/moonshotai/kimi-k2.6', + }); const session = await harness.session(); return await session.prompt( - `You are a support agent. Search the knowledge base for articles - relevant to this request, then write a helpful response. + `You are a support agent. Use the code tool to search the hydrated + workspace for articles relevant to this request, then write a helpful response. Customer: ${payload.message}`, { @@ -89,6 +100,8 @@ export default async function ({ init, payload, env }: FlueContext) { } ``` +This requires a `worker_loaders` binding (`{ "worker_loaders": [{ "binding": "LOADER" }] }`) in your `wrangler.jsonc`. If you need true bucket-keys-as-filesystem-paths semantics or Linux shell commands, use `@cloudflare/sandbox` Containers with `mountBucket` instead. See [Cloudflare Shell Sandbox](https://github.com/withastro/flue/blob/main/docs/cloudflare-shell.md) for the full migration and trade-offs. + ### Issue Triage (CI) A triage agent that runs in CI whenever an issue is opened on GitHub. The `local()` sandbox gives the agent direct access to the host filesystem and shell — perfect for CI runners, where `gh`, `git`, and `npm` are already on `$PATH` and the runner itself is your isolation boundary. diff --git a/apps/www/src/code-snippets.ts b/apps/www/src/code-snippets.ts index 6d2ea2bf..9ee20946 100644 --- a/apps/www/src/code-snippets.ts +++ b/apps/www/src/code-snippets.ts @@ -28,20 +28,28 @@ export const HERO = `export default async function ({ init, payload, env }) { }); }`; -export const SUPPORT_AGENT = `import { getVirtualSandbox } from '@flue/runtime/cloudflare'; -import type { FlueContext } from '@flue/runtime'; +export const SUPPORT_AGENT = `import type { FlueContext } from '@flue/runtime'; +import { + getDefaultWorkspace, + getShellSandbox, + hydrateFromBucket, +} from '@flue/runtime/cloudflare'; // POST /agents/support/:id export const triggers = { webhook: true }; // Built for: Cloudflare Workers, R2 export default async function ({ init, payload, env }: FlueContext) { - // Mount your R2 bucket (declared as a binding in wrangler.jsonc) as - // the agent's filesystem at /workspace, backed by Durable Object - // SQLite + R2 under the hood. The agent searches it with bash — - // grep, glob, read — without spinning up a container. - const sandbox = await getVirtualSandbox(env.KNOWLEDGE_BASE_BUCKET); - const harness = await init({ sandbox, model: 'openrouter/moonshotai/kimi-k2.6' }); + const workspace = getDefaultWorkspace(); + if (!(await workspace.exists('/.hydrated'))) { + await hydrateFromBucket(workspace, env.KNOWLEDGE_BASE_BUCKET); + await workspace.writeFile('/.hydrated', new Date().toISOString()); + } + + const harness = await init({ + sandbox: getShellSandbox({ workspace, loader: env.LOADER }), + model: 'openrouter/moonshotai/kimi-k2.6', + }); const session = await harness.session(); // Prompt! The agent harness includes your workspace AGENTS.md, // skills, and roles (aka subagents) to complete your task as diff --git a/connectors/sandbox--cloudflare.md b/connectors/sandbox--cloudflare.md index 448852d8..640d1907 100644 --- a/connectors/sandbox--cloudflare.md +++ b/connectors/sandbox--cloudflare.md @@ -190,10 +190,11 @@ That document walks through the migration end-to-end: - Hello-world agent on Cloudflare (`flue dev --target cloudflare`). - Adding `wrangler.jsonc`, `.env`, and `--target cloudflare` to scripts. -- Optionally adding R2-backed storage (`getVirtualSandbox(env.BUCKET)`) - if the user only needs a searchable file store and not a full Linux - container — this is often the right answer and is much cheaper than - containers. +- Optionally adding a cf-shell Workspace sandbox + (`getShellSandbox({ workspace, loader })` plus explicit R2/git + hydration) if the user only needs a searchable file store and not a + full Linux container — this is often the right answer and is much + cheaper than containers. - Adding the Cloudflare Sandbox container at the end (which is the same recipe as Path A above). @@ -218,7 +219,8 @@ without first confirming the basics work on `--target cloudflare`. version different from the `@cloudflare/sandbox` npm package version the user actually installed. They have to match. - The published Flue surface for Cloudflare-specific helpers is - `@flue/runtime/cloudflare` (e.g. `getVirtualSandbox`). The + `@flue/runtime/cloudflare` (e.g. `getShellSandbox`, + `getDefaultWorkspace`, `hydrateFromBucket`). The `@cloudflare/sandbox` package is a separate Cloudflare-published dependency the user installs themselves. Don't import from `@flue/runtime/internal`. diff --git a/docs/cloudflare-shell.md b/docs/cloudflare-shell.md new file mode 100644 index 00000000..d26aeadb --- /dev/null +++ b/docs/cloudflare-shell.md @@ -0,0 +1,172 @@ +# Cloudflare Shell Sandbox + +Flue's Cloudflare shell sandbox is built on [`@cloudflare/shell`](https://www.npmjs.com/package/@cloudflare/shell): a durable, SQLite-indexed `Workspace` plus a codemode `code` tool that runs JavaScript in an isolated Worker through a `worker_loaders` binding. + +The common R2 hydration flow only imports Flue helpers. Install `@cloudflare/shell` directly when you want to construct custom Workspaces or use git helpers like `WorkspaceFileSystem` and `createGit`. + +This replaces the old `getVirtualSandbox(env.BUCKET)` API. That API described R2 as if it were mounted directly as the agent filesystem. That was not accurate: `Workspace` stores directory/file metadata in Durable Object SQLite and only uses R2 as blob spillover for large files written through the Workspace API. Raw R2 keys uploaded with `wrangler r2 object put` are not visible until you explicitly hydrate them into the Workspace. + +## Basic Pattern + +```ts +import type { FlueContext } from '@flue/runtime'; +import { + getDefaultWorkspace, + getShellSandbox, + hydrateFromBucket, +} from '@flue/runtime/cloudflare'; + +export const triggers = { webhook: true }; + +export default async function ({ init, env, payload }: FlueContext) { + const workspace = getDefaultWorkspace(); + + if (!(await workspace.exists('/.hydrated'))) { + await hydrateFromBucket(workspace, env.KNOWLEDGE_BASE); + await workspace.writeFile('/.hydrated', new Date().toISOString()); + } + + const harness = await init({ + sandbox: getShellSandbox({ workspace, loader: env.LOADER }), + model: 'anthropic/claude-sonnet-4-6', + }); + const session = await harness.session(); + + return session.prompt(`Answer this using the hydrated workspace: ${payload.message}`); +} +``` + +Add the Worker Loader and R2 bindings to `wrangler.jsonc`: + +```jsonc +{ + "compatibility_flags": ["nodejs_compat"], + "worker_loaders": [{ "binding": "LOADER" }], + "r2_buckets": [{ "binding": "KNOWLEDGE_BASE", "bucket_name": "my-knowledge-base" }] +} +``` + +Worker Loader is currently in beta. If local `wrangler dev` does not simulate `worker_loaders`, use `wrangler dev --remote` or deploy to a preview environment. + +## What The Agent Sees + +The cf-shell sandbox does not expose `bash`, `grep`, `glob`, `read`, `write`, or `edit`. It exposes: + +- `code` — JavaScript execution in an isolated Worker. +- `task` — Flue's framework-owned child-agent tool. + +Inside the `code` tool, the model can call `state.*` methods provided by `@cloudflare/shell`, for example: + +```js +async () => { + const files = await state.readdir('/'); + const article = await state.readFile('/articles/reset-password.md'); + return { files, excerpt: article.slice(0, 500) }; +} +``` + +Programmatic file access still works through `session.fs` and `harness.fs`, backed by the same Workspace. Paths are Workspace paths such as `/foo.md`; there is no `/workspace` mount prefix. + +`session.shell()` and `harness.shell()` throw because cf-shell has no shell. If you need Linux commands, use `@cloudflare/sandbox` Containers instead. + +## Default Workspace + +`getDefaultWorkspace()` constructs `new Workspace({ sql: getCloudflareContext().storage.sql })` for the current agent Durable Object. + +Call it inside an agent invocation, not at module top level. Calling it twice in the same agent instance returns two handles to the same default-namespace backing store. If you need isolated workspaces inside one Durable Object, install `@cloudflare/shell` and construct them yourself with explicit namespaces: + +```ts +import { Workspace } from '@cloudflare/shell'; + +const workspace = new Workspace({ + sql: ctx.storage.sql, + namespace: 'subagent-a', + r2: env.WORKSPACE_FILES, +}); +``` + +## R2 Hydration + +`hydrateFromBucket(workspace, bucket, options?)` eagerly copies matching R2 objects into the Workspace: + +```ts +await hydrateFromBucket(workspace, env.KNOWLEDGE_BASE, { prefix: 'articles/' }); +``` + +With `prefix: 'articles/'`, a bucket key `articles/reset-password.md` becomes `/reset-password.md` in the Workspace. + +Hydration is intentionally not idempotent. Use a sentinel key you own: + +```ts +const sentinel = '/.hydrated-kb-v1'; +if (!(await workspace.exists(sentinel))) { + await hydrateFromBucket(workspace, env.KNOWLEDGE_BASE); + await workspace.writeFile(sentinel, new Date().toISOString()); +} +``` + +If hydration fails partway through, earlier writes remain. Re-run after fixing the error, or wipe the Durable Object storage if you need a clean rebuild. + +Large files written into a Workspace may be spilled back to R2 under Workspace's own key scheme. That can duplicate large source objects once; it is correct, but not a raw bucket mount. + +## Git Hydration + +For git, install `@cloudflare/shell` and use its primitives directly: + +```ts +import { WorkspaceFileSystem } from '@cloudflare/shell'; +import { createGit } from '@cloudflare/shell/git'; +import { getDefaultWorkspace } from '@flue/runtime/cloudflare'; + +const workspace = getDefaultWorkspace(); +if (!(await workspace.exists('/.hydrated'))) { + const git = createGit(new WorkspaceFileSystem(workspace)); + await git.clone({ + url: 'https://github.com/FredKSchott/vinext-starter', + dir: '/repo', + depth: 1, + singleBranch: true, + }); + await workspace.writeFile('/.hydrated', new Date().toISOString()); +} +``` + +Flue does not wrap git hydration because `createGit(...).clone(...)` is already the natural API. + +## Migrating From getVirtualSandbox + +Old: + +```ts +import { getVirtualSandbox } from '@flue/runtime/cloudflare'; + +const sandbox = await getVirtualSandbox(env.KNOWLEDGE_BASE); +const harness = await init({ sandbox, model: 'anthropic/claude-sonnet-4-6' }); +``` + +New: + +```ts +import { + getDefaultWorkspace, + getShellSandbox, + hydrateFromBucket, +} from '@flue/runtime/cloudflare'; + +const workspace = getDefaultWorkspace(); +if (!(await workspace.exists('/.hydrated'))) { + await hydrateFromBucket(workspace, env.KNOWLEDGE_BASE); + await workspace.writeFile('/.hydrated', new Date().toISOString()); +} + +const harness = await init({ + sandbox: getShellSandbox({ workspace, loader: env.LOADER }), + model: 'anthropic/claude-sonnet-4-6', +}); +``` + +If you used `getVirtualSandbox()` with no bucket, remove the call entirely and omit `sandbox` from `init()`. Flue's default in-memory sandbox is already that behavior. + +## When You Need Bucket-Keys-As-Paths + +If your requirement is literally "R2 bucket keys appear as filesystem paths" or you need shell commands like `grep`, `find`, or language toolchains, use [`@cloudflare/sandbox`](https://developers.cloudflare.com/containers/) with [`mountBucket`](https://developers.cloudflare.com/sandbox/guides/mount-buckets/) instead. That gives you a real Linux container and direct bucket mount semantics. cf-shell is the lightweight Workspace + codemode path, not a Linux filesystem mount. diff --git a/docs/deploy-cloudflare.md b/docs/deploy-cloudflare.md index 808700c9..bc2be6ee 100644 --- a/docs/deploy-cloudflare.md +++ b/docs/deploy-cloudflare.md @@ -173,30 +173,45 @@ EOF`); The agent can use its built-in tools — grep, glob, read — to search and read these files. This is still running on a virtual sandbox (no container), so it's fast and cheap. -## R2-backed agents +## Workspace-backed agents -Inline files work for small, static content. But for larger datasets — a knowledge base, documentation corpus, product catalog — you want persistent storage. Flue integrates with [Cloudflare R2](https://developers.cloudflare.com/r2/) to mount a bucket directly as the agent's filesystem. +Inline files work for small, static content. But for larger datasets — a knowledge base, documentation corpus, product catalog — you want persistent storage. On Cloudflare, the lightweight non-container path is [`@cloudflare/shell`](./cloudflare-shell.md): a durable SQLite-indexed `Workspace` plus a `code` tool that runs JavaScript against `state.*` in an isolated Worker. + +R2 is a good source for that workspace, but it is not a live filesystem mount. Hydrate the R2 objects you want into the Workspace before `init()`, then the agent operates on the Workspace. The basic R2 flow below uses Flue's Cloudflare helpers; install `@cloudflare/shell` directly if you want to construct custom Workspaces or hydrate from git. ### The support agent pattern -This is one of the most powerful patterns on Cloudflare: a support agent that searches a knowledge base to answer customer questions. The knowledge base lives in R2, and Flue mounts it as the sandbox filesystem — the agent searches it with grep, glob, and read, just like a real filesystem. +This is one of the most powerful patterns on Cloudflare: a support agent that searches a knowledge base to answer customer questions. The knowledge base can be stored in R2, hydrated once into the Workspace, and then searched through the `code` tool with `state.searchFiles`, `state.glob`, `state.readFile`, and related APIs. `.flue/agents/support.ts`: ```typescript -import { getVirtualSandbox } from '@flue/runtime/cloudflare'; import type { FlueContext } from '@flue/runtime'; +import { + getDefaultWorkspace, + getShellSandbox, + hydrateFromBucket, +} from '@flue/runtime/cloudflare'; export const triggers = { webhook: true }; export default async function ({ init, payload, env }: FlueContext) { - const sandbox = await getVirtualSandbox(env.KNOWLEDGE_BASE); - const harness = await init({ sandbox, model: 'openrouter/moonshotai/kimi-k2.6' }); + const workspace = getDefaultWorkspace(); + + if (!(await workspace.exists('/.hydrated'))) { + await hydrateFromBucket(workspace, env.KNOWLEDGE_BASE); + await workspace.writeFile('/.hydrated', new Date().toISOString()); + } + + const harness = await init({ + sandbox: getShellSandbox({ workspace, loader: env.LOADER }), + model: 'openrouter/moonshotai/kimi-k2.6', + }); const session = await harness.session(); return await session.prompt( - `You are a support agent. Search the knowledge base for articles - relevant to this request, then write a helpful response. + `You are a support agent. Use the code tool to search the hydrated + workspace for articles relevant to this request, then write a helpful response. Customer: ${payload.message}`, { @@ -206,15 +221,16 @@ export default async function ({ init, payload, env }: FlueContext) { } ``` -### Adding the R2 binding +### Adding the bindings -Add the R2 bucket to your project's `wrangler.jsonc` (at the root of your project, alongside `package.json`): +Add a Worker Loader binding and the R2 bucket to your project's `wrangler.jsonc` (at the root of your project, alongside `package.json`): ```jsonc { "name": "my-support-agent", "compatibility_date": "2026-04-01", "compatibility_flags": ["nodejs_compat"], + "worker_loaders": [{ "binding": "LOADER" }], "r2_buckets": [ { "binding": "KNOWLEDGE_BASE", @@ -224,6 +240,8 @@ Add the R2 bucket to your project's `wrangler.jsonc` (at the root of your projec } ``` +Worker Loader is currently in beta. If `wrangler dev` local mode does not simulate `worker_loaders`, use `wrangler dev --remote` or deploy to a preview environment. + When you run `flue build --target cloudflare`, Flue merges its own Durable Object bindings into this file and writes the composed config to `dist/wrangler.jsonc`. `wrangler deploy` picks that up automatically via a redirect at `.wrangler/deploy/config.json` — so you can keep editing only your root `wrangler.jsonc` and bindings like this R2 binding will flow through to deploy. You don't need to set `main` yourself; Flue owns the bundle entrypoint. Upload your knowledge base to R2 using Wrangler: @@ -242,10 +260,13 @@ done ### Why this works well - **No container** — Still running on a virtual sandbox. Fast startup, low cost. -- **Persistent data** — The knowledge base lives in R2 and persists across requests. -- **Agent-native search** — The agent uses grep and glob to find relevant articles, just like it would in a real filesystem. +- **Persistent data** — The workspace lives in Durable Object SQLite, with optional R2 spillover for large files. +- **Explicit sources** — R2, git, or any other source can hydrate the workspace before the agent runs. +- **Agent-native search** — The agent uses the `code` tool and `state.*` APIs to list, read, search, and edit files. - **Session persistence** — Because this deploys to Cloudflare Workers with Durable Objects, message history and session state are automatically persisted. A customer can revisit a support session days later and pick up where they left off. +If you specifically need bucket keys to appear as filesystem paths, use `@cloudflare/sandbox` Containers with [`mountBucket`](https://developers.cloudflare.com/sandbox/guides/mount-buckets/) instead. That is the right tool for Linux shell commands and live bucket-mount semantics. + ## Connecting a remote sandbox The examples above all run on virtual sandboxes — no container needed. But for agents that need a full Linux environment — git, Node.js, a browser, system packages — you want a remote sandbox. @@ -375,7 +396,7 @@ This is built in when you deploy with `--target cloudflare`. No extra configurat `AGENTS.md` and skills are optional workspace-context files that the agent reads from its sandbox at `init()` time. They live at conventional paths inside whatever sandbox the agent is using — Flue looks for `/AGENTS.md` and `/.agents/skills//SKILL.md`. Whatever's there gets loaded; whatever isn't, doesn't. Most agents don't need either to do useful work. -If you want to use them, put them in your sandbox. How you do that depends on which sandbox you're using: upload to R2 for an R2-backed virtual sandbox, `COPY` them in for a container, or write them in via `session.shell()` on a sandbox you control. +If you want to use them, put them in your sandbox. How you do that depends on which sandbox you're using: hydrate them into a cf-shell Workspace from R2 or git before `init()`, `COPY` them in for a container, or write them in via `session.shell()` on a sandbox that supports shell execution. **Skills** are reusable agent tasks defined as markdown files in `.agents/skills/`. They give the agent a focused instruction set for a specific job: @@ -437,7 +458,7 @@ Here's the progression of sandbox types available on Cloudflare, from simplest t 1. **Empty virtual sandbox** — `init({ model: 'anthropic/claude-sonnet-4-6' })`. Fast, cheap, stateless. Good for prompt-and-response agents. 2. **Virtual sandbox with shell setup** — Use `session.shell()` to write files and configure the workspace. Still fast and cheap, good for agents that need small amounts of static context. -3. **R2-backed virtual sandbox** — `getVirtualSandbox(env.BUCKET)`. Persistent storage, searchable filesystem. Ideal for knowledge bases, support agents, data processing. +3. **cf-shell Workspace sandbox** — `getShellSandbox({ workspace, loader })`. Durable SQLite-indexed Workspace, hydrated from R2/git/etc., searched and edited through the `code` tool and `state.*`. Ideal for Cloudflare-native knowledge bases and support agents without Linux dependencies. 4. **Container sandbox** — Full Linux environment via `@cloudflare/sandbox`. For coding agents, complex dev environments, and anything that needs real system tools. Start simple. Move up when you need to. diff --git a/examples/cloudflare/.flue/agents/skills-from-git.ts b/examples/cloudflare/.flue/agents/skills-from-git.ts new file mode 100644 index 00000000..6e109452 --- /dev/null +++ b/examples/cloudflare/.flue/agents/skills-from-git.ts @@ -0,0 +1,70 @@ +/** + * Demonstrates hydrating a cf-shell Workspace from a git repository + * instead of an R2 bucket. Same pattern, different source. + * + * What this shows: + * 1. Construct a default Workspace for the agent instance. + * 2. On first run, clone a public repo into the workspace via + * isomorphic-git (via `createGit`). Subsequent runs short-circuit + * on the `/.hydrated` sentinel. + * 3. Wire the Workspace into a cf-shell sandbox; pass it to `init()`. + * 4. Ask the agent to list the cloned files via the `code` tool, which + * operates against the workspace's `state.*` API. + * + * Local development: see the same caveat in skills-from-r2.ts. + * `wrangler dev --remote` is the supported local path until Worker + * Loader is supported in local-mode wrangler dev. + */ +import type { FlueContext } from '@flue/runtime'; +import { WorkspaceFileSystem } from '@cloudflare/shell'; +import { createGit } from '@cloudflare/shell/git'; +import { + getDefaultWorkspace, + getShellSandbox, +} from '@flue/runtime/cloudflare'; + +export const triggers = { webhook: true }; + +interface Env { + LOADER: WorkerLoader; +} + +const HYDRATION_SENTINEL = '/.hydrated'; +const TARGET_REPO = 'https://github.com/FredKSchott/vinext-starter'; +const CLONE_DIR = '/repo'; + +export default async function ({ init, env }: FlueContext) { + const workspace = getDefaultWorkspace(); + + // Clone once per agent instance. createGit() operates on any cf-shell + // FileSystem; we adapt the Workspace via WorkspaceFileSystem. + if (!(await workspace.exists(HYDRATION_SENTINEL))) { + const git = createGit(new WorkspaceFileSystem(workspace)); + await git.clone({ + url: TARGET_REPO, + dir: CLONE_DIR, + singleBranch: true, + depth: 1, + }); + await workspace.writeFile(HYDRATION_SENTINEL, new Date().toISOString()); + } + + const harness = await init({ + sandbox: getShellSandbox({ workspace, loader: env.LOADER }), + model: 'cloudflare/@cf/moonshotai/kimi-k2.6', + cwd: CLONE_DIR, + }); + const session = await harness.session(); + + // Ask the agent to introspect the cloned repo via the code tool. + // The model will write something like: + // async () => state.readdir("/repo") + // and we return whatever it discovers. + const { text } = await session.prompt( + `Use the code tool to list every top-level file and directory inside ${CLONE_DIR}, ` + + 'then briefly describe what this project is based on what you see. ' + + 'Do not respond until you have actually inspected the directory.', + ); + + return { repo: TARGET_REPO, summary: text }; +} diff --git a/examples/cloudflare/.flue/agents/skills-from-r2.ts b/examples/cloudflare/.flue/agents/skills-from-r2.ts new file mode 100644 index 00000000..ad6a4853 --- /dev/null +++ b/examples/cloudflare/.flue/agents/skills-from-r2.ts @@ -0,0 +1,75 @@ +/** + * Demonstrates the `getShellSandbox` + `hydrateFromBucket` flow that + * replaces the old (buggy) `getVirtualSandbox(env.BUCKET)` pattern. + * + * What this shows: + * 1. Construct a default Workspace for the agent instance. + * 2. On first run, copy the bucket's contents into the Workspace (so + * `/.agents/skills/spam-filter/SKILL.md` lives at that path on the + * workspace filesystem). Subsequent runs short-circuit on the + * `/.hydrated` sentinel. + * 3. Wire the Workspace into a cf-shell sandbox; pass it to `init()`. + * 4. Call a skill the agent discovered from the hydrated filesystem. + * + * Bucket layout: any object whose key under the bucket starts with + * `.agents/skills//SKILL.md` becomes a registered skill. We pass + * `prefix: ''` (the default) so the bucket's full key tree is mirrored + * verbatim into the workspace. + * + * Local development: Worker Loader is in beta and `wrangler dev`'s + * local mode doesn't yet simulate the `worker_loaders` binding. To run + * this example, use either: + * - `wrangler dev --remote` (runs against Cloudflare's edge; requires + * Worker Loader access on your account), or + * - `wrangler deploy` to a preview environment. + * See the example's README.md for the full setup, the seed-r2.sh helper, + * and the migration / fallback options if you don't have Loader access. + */ +import type { FlueContext } from '@flue/runtime'; +import { + getDefaultWorkspace, + getShellSandbox, + hydrateFromBucket, +} from '@flue/runtime/cloudflare'; +import * as v from 'valibot'; + +export const triggers = { webhook: true }; + +interface Env { + KNOWLEDGE_BASE: R2Bucket; + LOADER: WorkerLoader; +} + +const HYDRATION_SENTINEL = '/.hydrated'; + +export default async function ({ init, env }: FlueContext) { + const workspace = getDefaultWorkspace(); + + // Hydrate once per agent instance. Bump the sentinel key (e.g. + // `/.hydrated-v2`) to force re-hydration after you change the bucket + // contents — Workspace owns mutations after hydration, so R2 changes + // won't propagate back on their own. + if (!(await workspace.exists(HYDRATION_SENTINEL))) { + await hydrateFromBucket(workspace, env.KNOWLEDGE_BASE); + await workspace.writeFile(HYDRATION_SENTINEL, new Date().toISOString()); + } + const harness = await init({ + sandbox: getShellSandbox({ workspace, loader: env.LOADER }), + model: 'cloudflare/@cf/moonshotai/kimi-k2.6', + }); + const session = await harness.session(); + + // The skill body lives at `.agents/skills/spam-filter/SKILL.md` in + // the workspace (hydrated from the same path in the bucket). Flue + // discovers it via the standard skills lookup during `init()`. + const result = await session.skill('spam-filter', { + args: { message: 'CONGRATS! You have won a free iPhone. Click here: http://bit.ly/xyz' }, + result: v.object({ + spam: v.boolean(), + confidence: v.picklist(['low', 'medium', 'high']), + reasoning: v.string(), + }), + }); + + return result.data; +} diff --git a/examples/cloudflare/README.md b/examples/cloudflare/README.md new file mode 100644 index 00000000..0cab54d9 --- /dev/null +++ b/examples/cloudflare/README.md @@ -0,0 +1,105 @@ +# Cloudflare example agents + +This directory exercises Flue's Cloudflare-specific surfaces. The agents +here are intentionally minimal — each one demonstrates a single capability +end-to-end so it's easy to copy the pattern into a real app. + +## Agents + +| Agent | Demonstrates | +| ------------------------------ | ---------------------------------------------------------------------------------- | +| `with-cloudflare-binding.ts` | Routing model traffic through the Workers AI binding (no API keys). | +| `skills-from-r2.ts` | Hydrating a cf-shell `Workspace` from an R2 bucket and using a discovered skill. | +| `skills-from-git.ts` | Hydrating a cf-shell `Workspace` from a git repo via `createGit`. | + +## Setup + +Install deps: + +```bash +pnpm install +``` + +Build the runtime + cli once (the local workspace `dist/` directories are +what `flue dev`/`run` consume; a fresh checkout has stale ones — see L1 +in the cf-shell adoption plan): + +```bash +pnpm run build -F @flue/runtime -F @flue/cli +``` + +The agents in this example use the Workers AI binding, so no provider API +keys are required. If you switch them to a non-Cloudflare model, put the +matching provider key in `.env` at the project root (see +`../../docs/deploy-cloudflare.md` for the full story). + +## Worker Loader requirement (skills-from-r2, skills-from-git) + +Both hydration examples require a `worker_loaders` binding. Worker Loader +is **currently in beta** and your Cloudflare account needs access; the +binding is already declared in `wrangler.jsonc` here. + +### Local development caveat + +`wrangler dev` local mode can expose a local `worker_loaders` binding, but +Wrangler's local R2 CLI storage may not be visible to the running dev +server's R2 binding. For an end-to-end R2 hydration smoke, use remote +resources. You have two options: + +- **`wrangler dev --remote`** — runs the worker against Cloudflare's edge + using your dev bucket. Requires Worker Loader access on your account. +- **`wrangler deploy` to a preview environment** — deploy first, exercise + the agent over HTTP afterward. + +If your account doesn't have Worker Loader access at all, the migration +path is `@cloudflare/sandbox` (Containers + `mountBucket`) — see +`../../docs/cloudflare-shell.md` for the comparison. + +### Seeding R2 (skills-from-r2 only) + +Before running `skills-from-r2`, put a SKILL.md into your dev R2 bucket +so the hydration step has something to copy: + +```bash +# from this directory; requires wrangler installed + authenticated +./seed-r2.sh +``` + +The script writes `.agents/skills/spam-filter/SKILL.md` into +`flue-example-knowledge-base-dev`. Pass `BUCKET=prod` to seed the prod +bucket instead. + +If you want to use different bucket names, edit `wrangler.jsonc` and the +`BUCKET_NAME` table in `seed-r2.sh` in lockstep. + +## Running + +```bash +# Build + serve (one of these, depending on Loader access for the agent you want) +pnpm exec flue dev --target cloudflare --env ../../.env +pnpm exec wrangler dev --remote # if needed for cf-shell agents + +# Trigger an agent +curl -X POST http://localhost:3583/agents/with-cloudflare-binding/test-1 \ + -H 'Content-Type: application/json' -d '{}' + +curl -X POST http://localhost:3583/agents/skills-from-r2/test-1 \ + -H 'Content-Type: application/json' -d '{}' + +curl -X POST http://localhost:3583/agents/skills-from-git/test-1 \ + -H 'Content-Type: application/json' -d '{}' +``` + +`skills-from-r2` and `skills-from-git` write a `/.hydrated` sentinel into +the Durable Object's SQLite on first run; second-run hydration is a no-op +on the sentinel check. Bump the sentinel key in source (or wipe the DO's +storage) to force re-hydration. + +## Migrating from `getVirtualSandbox` + +If you're coming from `getVirtualSandbox(env.BUCKET)`, the moral +equivalent is `getShellSandbox({ workspace, loader })` + an explicit +`hydrateFromBucket(workspace, env.BUCKET)` step before the sandbox is +created. `skills-from-r2.ts` is the canonical example of that flow. +See `../../docs/cloudflare-shell.md` for the full background on why this +replaced the previous API. diff --git a/examples/cloudflare/package.json b/examples/cloudflare/package.json index d7d97c3b..b5fa064d 100644 --- a/examples/cloudflare/package.json +++ b/examples/cloudflare/package.json @@ -3,12 +3,15 @@ "private": true, "type": "module", "dependencies": { + "@cloudflare/shell": "^0.3.2", "@flue/runtime": "workspace:*", "agents": "*", "hono": "^4.7.0", "valibot": "^1.0.0" }, "devDependencies": { - "@flue/cli": "workspace:*" + "@cloudflare/workers-types": "^4.20260505.1", + "@flue/cli": "workspace:*", + "wrangler": "^4.87.0" } } diff --git a/examples/cloudflare/seed-r2.sh b/examples/cloudflare/seed-r2.sh new file mode 100755 index 00000000..f9ebbc92 --- /dev/null +++ b/examples/cloudflare/seed-r2.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Seed the R2 bucket used by skills-from-r2.ts with a sample skill. +# +# Run this once before exercising the agent. The agent reads from +# `.agents/skills/spam-filter/SKILL.md` after hydration, so the bucket +# needs an object at that key. +# +# Usage: +# ./seed-r2.sh # seeds the remote dev bucket +# BUCKET=prod ./seed-r2.sh # seeds the remote prod bucket +# REMOTE=0 ./seed-r2.sh # seeds wrangler's local R2 store +# +# Requires pnpm dependencies installed and wrangler authenticated. +set -euo pipefail + +BUCKET="${BUCKET:-dev}" +REMOTE="${REMOTE:-1}" +case "$BUCKET" in + dev) BUCKET_NAME="flue-example-knowledge-base-dev" ;; + prod) BUCKET_NAME="flue-example-knowledge-base" ;; + *) echo "BUCKET must be 'dev' or 'prod' (got: $BUCKET)" >&2; exit 1 ;; +esac + +SKILL_KEY=".agents/skills/spam-filter/SKILL.md" + +# Use a tempfile so quoting stays sane. +TMP=$(mktemp) +trap 'rm -f "$TMP"' EXIT +cat > "$TMP" <<'EOF' +--- +name: spam-filter +description: Classify a message as spam or not spam, with confidence and reasoning. +--- + +You are a spam-classification skill. + +The message to classify is in `{{message}}`. Return a structured verdict: + +- `spam`: true if the message is unsolicited bulk, fraudulent, or + malicious; false if it's legitimate correspondence. +- `confidence`: "low" / "medium" / "high" based on how clear the + signals are. +- `reasoning`: one or two sentences explaining the decision. Cite + specific phrases or patterns from the message. + +Heuristics that strongly indicate spam: +- Urgency ("URGENT", "ACT NOW", excessive ALL CAPS) +- Free prizes, lotteries, or "you won" claims +- Suspicious shortened URLs (bit.ly, tinyurl) without context +- Requests for sensitive information (passwords, SSNs, account numbers) +EOF + +echo "Uploading $SKILL_KEY to $BUCKET_NAME..." +REMOTE_FLAG=() +if [[ "$REMOTE" != "0" ]]; then + REMOTE_FLAG=(--remote) +fi +pnpm exec wrangler r2 object put "$BUCKET_NAME/$SKILL_KEY" --file "$TMP" ${REMOTE_FLAG+"${REMOTE_FLAG[@]}"} +echo "Done." diff --git a/examples/cloudflare/tsconfig.json b/examples/cloudflare/tsconfig.json index b361441c..ba1e381c 100644 --- a/examples/cloudflare/tsconfig.json +++ b/examples/cloudflare/tsconfig.json @@ -1,3 +1,7 @@ { - "extends": "../../tsconfig.base.json" + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "types": ["@cloudflare/workers-types"] + }, + "include": [".flue/**/*.ts"] } diff --git a/examples/cloudflare/wrangler.jsonc b/examples/cloudflare/wrangler.jsonc index 6effa229..91172a1b 100644 --- a/examples/cloudflare/wrangler.jsonc +++ b/examples/cloudflare/wrangler.jsonc @@ -7,6 +7,24 @@ "compatibility_date": "2026-04-01", "compatibility_flags": ["nodejs_compat"], "ai": { - "binding": "AI" + "binding": "AI", }, + // Worker Loader binding (beta) required by the cf-shell `code` tool. + // See examples in this directory: skills-from-r2.ts, skills-from-git.ts. + // If your account doesn't have Worker Loader access, comment this out + // — the agents that use it (skills-from-r2, skills-from-git) will + // throw at sandbox-construction with a clear error, but the rest of + // the example (with-cloudflare-binding) keeps working. + "worker_loaders": [{ "binding": "LOADER" }], + // R2 bucket used by skills-from-r2.ts as the hydration source. + // `preview_bucket_name` lets wrangler dev (in --remote mode) use a + // separate bucket so you don't have to seed prod. Seed the dev bucket + // with examples/cloudflare/seed-r2.sh before running. + "r2_buckets": [ + { + "binding": "KNOWLEDGE_BASE", + "bucket_name": "flue-example-knowledge-base", + "preview_bucket_name": "flue-example-knowledge-base-dev", + }, + ], } diff --git a/examples/hello-world/.flue/agents/fs-surface-test.ts b/examples/hello-world/.flue/agents/fs-surface-test.ts index 326d4fac..35a2cb3e 100644 --- a/examples/hello-world/.flue/agents/fs-surface-test.ts +++ b/examples/hello-world/.flue/agents/fs-surface-test.ts @@ -27,20 +27,20 @@ export default async function ({ init }: FlueContext) { const sRead = await session.fs.readFile('/tmp/session.txt'); check('session.fs writeFile/readFile round-trip', sRead === 'session.fs content'); - // agent.fs round-trip - await agent.fs.writeFile('/tmp/agent.txt', 'agent.fs content'); - const aRead = await agent.fs.readFile('/tmp/agent.txt'); - check('agent.fs writeFile/readFile round-trip', aRead === 'agent.fs content'); + // harness.fs round-trip + await harness.fs.writeFile('/tmp/agent.txt', 'agent.fs content'); + const aRead = await harness.fs.readFile('/tmp/agent.txt'); + check('harness.fs writeFile/readFile round-trip', aRead === 'agent.fs content'); // session.fs writes are visible to session.shell await session.fs.writeFile('/tmp/visible.txt', 'staged by SDK'); const viaShell = await session.shell('cat /tmp/visible.txt'); check('session.fs visible to session.shell', viaShell.stdout.trim() === 'staged by SDK'); - // agent.fs writes are visible to agent.shell - await agent.fs.writeFile('/tmp/agent-visible.txt', 'staged by agent.fs'); - const aViaShell = await agent.shell('cat /tmp/agent-visible.txt'); - check('agent.fs visible to agent.shell', aViaShell.stdout.trim() === 'staged by agent.fs'); + // harness.fs writes are visible to harness.shell + await harness.fs.writeFile('/tmp/agent-visible.txt', 'staged by harness.fs'); + const aViaShell = await harness.shell('cat /tmp/agent-visible.txt'); + check('harness.fs visible to harness.shell', aViaShell.stdout.trim() === 'staged by harness.fs'); // mkdir / readdir / exists / rm await session.fs.mkdir('/tmp/scratch', { recursive: true }); diff --git a/examples/hello-world/.flue/agents/with-sandbox.ts b/examples/hello-world/.flue/agents/with-sandbox.ts index 87c16fc3..01a44b53 100644 --- a/examples/hello-world/.flue/agents/with-sandbox.ts +++ b/examples/hello-world/.flue/agents/with-sandbox.ts @@ -10,7 +10,7 @@ export default async function ({ init }: FlueContext) { const sandbox = await client.create(); const harness = await init({ - sandbox: daytona(sandbox, { cleanup: true }), + sandbox: daytona(sandbox), model: 'anthropic/claude-sonnet-4-6', }); const session = await harness.session(); diff --git a/packages/cli/README.md b/packages/cli/README.md index 2bf6c20d..305f5d21 100644 --- a/packages/cli/README.md +++ b/packages/cli/README.md @@ -56,29 +56,40 @@ export default async function ({ init, payload }: FlueContext) { ### Support Agent -A support agent can also run in a virtual sandbox, but we now add a file-system using an R2 bucket. The knowledge base is stored in R2 and mounted directly into the harness filesystem — the agent searches it with its built-in tools (grep, glob, read). Skills are also defined in the bucket that help the agent perform its task. +A support agent can also run on Cloudflare without a container by using a cf-shell Workspace. The Workspace is a durable SQLite-indexed filesystem; R2 is an optional hydration source (and large-file spillover), not a live bucket mount. Copy the R2 objects you want into the Workspace before calling `init()`, then the agent operates on that structured filesystem through the `code` tool and `state.*` API. Because this agent is deployed to Cloudflare, message history and session state are automatically persisted for you. So you (or your customer) can revisit this support session days, weeks, or years later and pick up exactly where you left off. ```ts // .flue/agents/support.ts -import { getVirtualSandbox } from '@flue/runtime/cloudflare'; import type { FlueContext } from '@flue/runtime'; +import { + getDefaultWorkspace, + getShellSandbox, + hydrateFromBucket, +} from '@flue/runtime/cloudflare'; import * as v from 'valibot'; export const triggers = { webhook: true }; export default async function ({ init, payload, env }: FlueContext) { - // Mount the R2 knowledge base bucket as the harness filesystem. - // The agent can grep, glob, and read articles with bash, but - // without needing to spin up an entire container sandbox. - const sandbox = await getVirtualSandbox(env.KNOWLEDGE_BASE); - const harness = await init({ sandbox, model: 'openrouter/moonshotai/kimi-k2.6' }); + const workspace = getDefaultWorkspace(); + + // Hydrate once per agent instance. R2 is a source, not a live mount. + if (!(await workspace.exists('/.hydrated'))) { + await hydrateFromBucket(workspace, env.KNOWLEDGE_BASE); + await workspace.writeFile('/.hydrated', new Date().toISOString()); + } + + const harness = await init({ + sandbox: getShellSandbox({ workspace, loader: env.LOADER }), + model: 'openrouter/moonshotai/kimi-k2.6', + }); const session = await harness.session(); return await session.prompt( - `You are a support agent. Search the knowledge base for articles - relevant to this request, then write a helpful response. + `You are a support agent. Use the code tool to search the hydrated + workspace for articles relevant to this request, then write a helpful response. Customer: ${payload.message}`, { @@ -89,6 +100,8 @@ export default async function ({ init, payload, env }: FlueContext) { } ``` +This requires a `worker_loaders` binding (`{ "worker_loaders": [{ "binding": "LOADER" }] }`) in your `wrangler.jsonc`. If you need true bucket-keys-as-filesystem-paths semantics or Linux shell commands, use `@cloudflare/sandbox` Containers with `mountBucket` instead. See [Cloudflare Shell Sandbox](https://github.com/withastro/flue/blob/main/docs/cloudflare-shell.md) for the full migration and trade-offs. + ### Issue Triage (CI) A triage agent that runs in CI whenever an issue is opened on GitHub. The `local()` sandbox gives the agent direct access to the host filesystem and shell — perfect for CI runners, where `gh`, `git`, and `npm` are already on `$PATH` and the runner itself is your isolation boundary. diff --git a/packages/runtime/README.md b/packages/runtime/README.md index 2bf6c20d..305f5d21 100644 --- a/packages/runtime/README.md +++ b/packages/runtime/README.md @@ -56,29 +56,40 @@ export default async function ({ init, payload }: FlueContext) { ### Support Agent -A support agent can also run in a virtual sandbox, but we now add a file-system using an R2 bucket. The knowledge base is stored in R2 and mounted directly into the harness filesystem — the agent searches it with its built-in tools (grep, glob, read). Skills are also defined in the bucket that help the agent perform its task. +A support agent can also run on Cloudflare without a container by using a cf-shell Workspace. The Workspace is a durable SQLite-indexed filesystem; R2 is an optional hydration source (and large-file spillover), not a live bucket mount. Copy the R2 objects you want into the Workspace before calling `init()`, then the agent operates on that structured filesystem through the `code` tool and `state.*` API. Because this agent is deployed to Cloudflare, message history and session state are automatically persisted for you. So you (or your customer) can revisit this support session days, weeks, or years later and pick up exactly where you left off. ```ts // .flue/agents/support.ts -import { getVirtualSandbox } from '@flue/runtime/cloudflare'; import type { FlueContext } from '@flue/runtime'; +import { + getDefaultWorkspace, + getShellSandbox, + hydrateFromBucket, +} from '@flue/runtime/cloudflare'; import * as v from 'valibot'; export const triggers = { webhook: true }; export default async function ({ init, payload, env }: FlueContext) { - // Mount the R2 knowledge base bucket as the harness filesystem. - // The agent can grep, glob, and read articles with bash, but - // without needing to spin up an entire container sandbox. - const sandbox = await getVirtualSandbox(env.KNOWLEDGE_BASE); - const harness = await init({ sandbox, model: 'openrouter/moonshotai/kimi-k2.6' }); + const workspace = getDefaultWorkspace(); + + // Hydrate once per agent instance. R2 is a source, not a live mount. + if (!(await workspace.exists('/.hydrated'))) { + await hydrateFromBucket(workspace, env.KNOWLEDGE_BASE); + await workspace.writeFile('/.hydrated', new Date().toISOString()); + } + + const harness = await init({ + sandbox: getShellSandbox({ workspace, loader: env.LOADER }), + model: 'openrouter/moonshotai/kimi-k2.6', + }); const session = await harness.session(); return await session.prompt( - `You are a support agent. Search the knowledge base for articles - relevant to this request, then write a helpful response. + `You are a support agent. Use the code tool to search the hydrated + workspace for articles relevant to this request, then write a helpful response. Customer: ${payload.message}`, { @@ -89,6 +100,8 @@ export default async function ({ init, payload, env }: FlueContext) { } ``` +This requires a `worker_loaders` binding (`{ "worker_loaders": [{ "binding": "LOADER" }] }`) in your `wrangler.jsonc`. If you need true bucket-keys-as-filesystem-paths semantics or Linux shell commands, use `@cloudflare/sandbox` Containers with `mountBucket` instead. See [Cloudflare Shell Sandbox](https://github.com/withastro/flue/blob/main/docs/cloudflare-shell.md) for the full migration and trade-offs. + ### Issue Triage (CI) A triage agent that runs in CI whenever an issue is opened on GitHub. The `local()` sandbox gives the agent direct access to the host filesystem and shell — perfect for CI runners, where `gh`, `git`, and `npm` are already on `$PATH` and the runner itself is your isolation boundary. diff --git a/packages/runtime/package.json b/packages/runtime/package.json index 6c1b984c..58e211c6 100644 --- a/packages/runtime/package.json +++ b/packages/runtime/package.json @@ -49,6 +49,7 @@ "prepublishOnly": "cp ../../README.md ." }, "dependencies": { + "@cloudflare/codemode": "^0.3.4", "@cloudflare/shell": "^0.3.2", "@hono/node-server": "^1.14.0", "@hono/standard-validator": "^0.2.0", diff --git a/packages/runtime/src/agent.ts b/packages/runtime/src/agent.ts index 4fc9811a..dc44de05 100644 --- a/packages/runtime/src/agent.ts +++ b/packages/runtime/src/agent.ts @@ -8,13 +8,7 @@ const MAX_GREP_MATCHES = 100; const MAX_GREP_LINE_LENGTH = 500; const MAX_GLOB_RESULTS = 1000; -/** - * Names of the framework's default built-in tools (the six produced by - * `createTools` plus the framework-owned `task`). Used by the validator when - * a session uses the default tool path. Sessions whose sandbox connector - * implements `tools()` derive their reserved set from the connector's actual - * tool names plus `task` instead — these are not universally reserved. - */ +/** Names reserved by the framework's default tool set. */ export const BUILTIN_TOOL_NAMES = new Set([ 'read', 'write', @@ -296,11 +290,7 @@ const TaskParams = Type.Object({ ), }); -/** - * Build the canonical `task` tool. The framework appends this on top of - * whatever the connector's `tools()` returns; standalone callers should reach - * for {@link createTools} instead. - */ +/** Build Flue's framework-owned `task` tool. */ export function createTaskTool( runTask: ( params: TaskToolParams, diff --git a/packages/runtime/src/client.ts b/packages/runtime/src/client.ts index 892e96c6..7cf83cac 100644 --- a/packages/runtime/src/client.ts +++ b/packages/runtime/src/client.ts @@ -227,14 +227,7 @@ function isSandboxFactory(value: unknown): value is SandboxFactory { ); } -/** - * Resolve sandbox option to a `SessionEnv` plus an optional connector-supplied - * `toolFactory`. Resolution order: default → BashFactory → platform hook → - * SandboxFactory. Only `SandboxFactory` connectors may contribute a - * `toolFactory`; the default empty sandbox, bash factories, and the platform - * hook return `toolFactory: undefined`, which makes the framework fall back to - * its default six built-in tools. - */ +/** Resolve sandbox option to its session environment and optional tool factory. */ async function resolveSessionEnv( id: string, sandbox: AgentInit['sandbox'], diff --git a/packages/runtime/src/cloudflare/hydrate.ts b/packages/runtime/src/cloudflare/hydrate.ts new file mode 100644 index 00000000..8e5ba564 --- /dev/null +++ b/packages/runtime/src/cloudflare/hydrate.ts @@ -0,0 +1,52 @@ +import type { Workspace } from '@cloudflare/shell'; + +export interface HydrateFromBucketOptions { + /** Only copy keys under this prefix. The prefix is stripped from Workspace paths. */ + prefix?: string; +} + +/** + * Copy matching R2 objects into a Workspace. + * + * Hydration is eager, paginated, not idempotent, and does not roll back + * partial writes. Callers should gate it with their own sentinel file. + */ +export async function hydrateFromBucket( + workspace: Workspace, + bucket: R2Bucket, + options?: HydrateFromBucketOptions, +): Promise { + const prefix = options?.prefix; + let cursor: string | undefined; + + while (true) { + const listing = await bucket.list({ prefix, cursor }); + for (const obj of listing.objects) { + const relativeKey = stripPrefix(obj.key, prefix); + if (relativeKey === '' || relativeKey.endsWith('/')) continue; + + const body = await bucket.get(obj.key); + if (!body) continue; + + await workspace.writeFileBytes( + absolutize(relativeKey), + new Uint8Array(await body.arrayBuffer()), + ); + } + + if (!listing.truncated) break; + if (!listing.cursor) { + throw new Error('[flue] R2 listing was truncated but did not include a cursor.'); + } + cursor = listing.cursor; + } +} + +function stripPrefix(key: string, prefix: string | undefined): string { + if (!prefix) return key; + return key.startsWith(prefix) ? key.slice(prefix.length) : key; +} + +function absolutize(key: string): string { + return key.startsWith('/') ? key : '/' + key; +} diff --git a/packages/runtime/src/cloudflare/index.ts b/packages/runtime/src/cloudflare/index.ts index e513e241..416acb18 100644 --- a/packages/runtime/src/cloudflare/index.ts +++ b/packages/runtime/src/cloudflare/index.ts @@ -1,6 +1,14 @@ export { getVirtualSandbox } from './virtual-sandbox.ts'; export type { VirtualSandboxOptions } from './virtual-sandbox.ts'; +export { + getShellSandbox, + getDefaultWorkspace, +} from './shell-sandbox.ts'; +export type { GetShellSandboxOptions } from './shell-sandbox.ts'; + +export { hydrateFromBucket } from './hydrate.ts'; + export { cfSandboxToSessionEnv } from './cf-sandbox.ts'; export { store } from './session-store.ts'; diff --git a/packages/runtime/src/cloudflare/shell-sandbox.ts b/packages/runtime/src/cloudflare/shell-sandbox.ts new file mode 100644 index 00000000..f196fba7 --- /dev/null +++ b/packages/runtime/src/cloudflare/shell-sandbox.ts @@ -0,0 +1,223 @@ +/** Cloudflare Workspace sandbox backed by @cloudflare/shell. */ +import { + STATE_TYPES, + Workspace, + WorkspaceFileSystem, + type FsStat as CfFsStat, +} from '@cloudflare/shell'; +import { stateTools } from '@cloudflare/shell/workers'; +import type { DynamicWorkerExecutorOptions } from '@cloudflare/codemode'; +import type { AgentTool, AgentToolResult } from '@earendil-works/pi-agent-core'; +import { type Static, Type } from '@earendil-works/pi-ai'; +import { normalizePath } from '../session.ts'; +import type { + FileStat, + SandboxFactory, + SessionEnv, + SessionToolFactory, + ShellResult, +} from '../types.ts'; +import { getCloudflareContext } from './context.ts'; + +export interface GetShellSandboxOptions { + workspace: Workspace; + loader: WorkerLoader; + /** Forwarded to codemode's DynamicWorkerExecutor. */ + executor?: Pick; +} + +/** + * Create a Workspace-backed sandbox with the codemode `code` tool. + * Requires a Worker Loader binding; cf-shell sandboxes do not support `exec()`. + */ +export function getShellSandbox(options: GetShellSandboxOptions): SandboxFactory { + if (!options || !options.workspace) { + throw new Error( + '[flue] getShellSandbox requires a workspace. Pass `getDefaultWorkspace()` for the common case, ' + + 'or construct your own with `new Workspace({ sql: ctx.storage.sql, ... })`.', + ); + } + if (!options.loader) { + throw new Error( + '[flue] getShellSandbox requires a WorkerLoader binding. Add this to your wrangler.jsonc:\n' + + ' { "worker_loaders": [{ "binding": "LOADER" }] }\n' + + 'Then pass `loader: env.LOADER` to getShellSandbox(). Worker Loader is currently in beta — ' + + 'see https://developers.cloudflare.com/workers/runtime-apis/bindings/worker-loader/.', + ); + } + + const { workspace, loader, executor: executorOptions } = options; + const fs = new WorkspaceFileSystem(workspace); + const toolFactory: SessionToolFactory = () => [ + createCodeTool(workspace, loader, executorOptions), + ]; + + return { + async createSessionEnv() { + return createWorkspaceSessionEnv(workspace, fs, '/'); + }, + tools: toolFactory, + }; +} + +function createWorkspaceSessionEnv( + workspace: Workspace, + fs: WorkspaceFileSystem, + cwd: string, +): SessionEnv { + const normalizedCwd = normalizePath(cwd); + const resolvePath = (p: string): string => { + if (p.startsWith('/')) return normalizePath(p); + if (normalizedCwd === '/') return normalizePath('/' + p); + return normalizePath(normalizedCwd + '/' + p); + }; + + const exec = (): Promise => { + throw new Error(EXEC_NOT_SUPPORTED_MESSAGE); + }; + + return { + exec, + async readFile(path: string): Promise { + return fs.readFile(resolvePath(path)); + }, + async readFileBuffer(path: string): Promise { + return fs.readFileBytes(resolvePath(path)); + }, + async writeFile(path: string, content: string | Uint8Array): Promise { + const resolved = resolvePath(path); + if (typeof content === 'string') { + await workspace.writeFile(resolved, content); + } else { + await workspace.writeFileBytes(resolved, content); + } + }, + async stat(path: string): Promise { + return adaptStat(await fs.stat(resolvePath(path))); + }, + async readdir(path: string): Promise { + return fs.readdir(resolvePath(path)); + }, + async exists(path: string): Promise { + return fs.exists(resolvePath(path)); + }, + async mkdir(path: string, opts?: { recursive?: boolean }): Promise { + await fs.mkdir(resolvePath(path), opts); + }, + async rm(path: string, opts?: { recursive?: boolean; force?: boolean }): Promise { + await fs.rm(resolvePath(path), opts); + }, + cwd: normalizedCwd, + resolvePath, + }; +} + +const EXEC_NOT_SUPPORTED_MESSAGE = + '[flue] The cf-shell sandbox does not support exec(). cf-shell agents operate on a structured ' + + 'workspace via the `code` tool (JavaScript run in an isolated Worker via codemode) and the ' + + '`state.*` API exposed inside that sandbox; there is no shell. If you need bash/grep/find, use ' + + '`@cloudflare/sandbox` (Containers + mountBucket) instead. See docs/cloudflare-shell.md.'; + +function adaptStat(s: CfFsStat): FileStat { + return { + isFile: s.type === 'file', + isDirectory: s.type === 'directory', + isSymbolicLink: s.type === 'symlink', + size: s.size, + mtime: s.mtime, + }; +} + +const CodeParams = Type.Object({ + code: Type.String({ + description: + 'A single async arrow function with the signature `async () => { ... return result; }`. ' + + 'Inside the body, call `state.*` to operate on the workspace (see the type declarations ' + + 'below). The function executes in an isolated Worker — no network, no DOM, no imports. ' + + 'Return whatever JSON-serializable value you want back; it is returned as the tool result.', + }), +}); + +function createCodeTool( + workspace: Workspace, + loader: WorkerLoader, + executorOptions: GetShellSandboxOptions['executor'], +): AgentTool { + return { + name: 'code', + label: 'Run Code', + description: buildCodeToolDescription(), + parameters: CodeParams, + async execute( + _toolCallId: string, + params: Static, + ): Promise> { + const { DynamicWorkerExecutor, resolveProvider } = await import('@cloudflare/codemode'); + const executor = new DynamicWorkerExecutor({ + loader, + ...executorOptions, + }); + const stateProvider = resolveProvider(stateTools(workspace)); + const { result, error, logs } = await executor.execute(params.code, [stateProvider]); + + if (error) { + const logsTail = logs && logs.length ? `\n\nlogs:\n${logs.join('\n')}` : ''; + throw new Error(`code tool failed: ${error}${logsTail}`); + } + + const resultText = formatResult(result); + const logsText = logs && logs.length ? `\n\n--- logs ---\n${logs.join('\n')}` : ''; + + return { + content: [{ type: 'text', text: resultText + logsText }], + details: logs && logs.length ? { logs } : {}, + }; + }, + }; +} + +function formatResult(result: unknown): string { + if (result === undefined) return '(no result)'; + if (typeof result === 'string') return result; + try { + return JSON.stringify(result, null, 2); + } catch { + return String(result); + } +} + +function buildCodeToolDescription(): string { + return [ + 'Run a snippet of JavaScript inside an isolated Worker against a durable', + 'workspace filesystem. The snippet must be a single async arrow function:', + '', + ' async () => {', + ' const text = await state.readFile("/notes.md");', + ' await state.writeFile("/notes.md", text.toUpperCase());', + ' return { bytes: text.length };', + ' }', + '', + 'Rules:', + '- Write JavaScript, not TypeScript — no type annotations.', + '- Do not use `import` statements. Everything you need is on `state`.', + '- Always `return` the value you want back.', + '- For multi-file refactors, prefer `state.planEdits()` + `state.applyEditPlan()` over many writes.', + '- For tree-wide search/replace, use `state.replaceInFiles()` (transactional by default).', + '- Network access (`fetch`, `connect`) is disabled. Do not attempt outbound HTTP.', + '', + 'The `state` API (TypeScript declaration; the runtime is JavaScript):', + '', + '```typescript', + STATE_TYPES, + '```', + ].join('\n'); +} + +/** + * Construct the default Workspace for the current Cloudflare agent instance. + * Repeated calls return handles to the same default Workspace namespace. + */ +export function getDefaultWorkspace(): Workspace { + const { storage } = getCloudflareContext(); + return new Workspace({ sql: storage.sql }); +} diff --git a/packages/runtime/src/cloudflare/virtual-sandbox.ts b/packages/runtime/src/cloudflare/virtual-sandbox.ts index 38170b23..b684c96c 100644 --- a/packages/runtime/src/cloudflare/virtual-sandbox.ts +++ b/packages/runtime/src/cloudflare/virtual-sandbox.ts @@ -1,125 +1,43 @@ -/** - * In-process just-bash sandbox for Cloudflare Workers (no container). - * Without args: empty in-memory. With R2 bucket: persistent files via DO SQLite + R2. - */ -import { - Workspace, - WorkspaceFileSystem, - type FileSystem as CfFileSystem, - type FsStat as CfFsStat, -} from '@cloudflare/shell'; -import { getCloudflareContext } from './context.ts'; +/** Deprecated compatibility stub for the removed virtual Cloudflare sandbox API. */ export interface VirtualSandboxOptions { - /** R2 key prefix for session isolation. */ prefix?: string; } -function adaptStat(cfStat: CfFsStat) { - return { - isFile: cfStat.type === 'file', - isDirectory: cfStat.type === 'directory', - isSymbolicLink: cfStat.type === 'symlink', - mode: cfStat.mode ?? (cfStat.type === 'directory' ? 0o755 : 0o644), - size: cfStat.size, - mtime: cfStat.mtime, - }; -} - -function adaptToJustBash(cfFs: CfFileSystem): any { - return { - readFile: (path: string, _opts?: any) => cfFs.readFile(path), - readFileBuffer: (path: string) => cfFs.readFileBytes(path), - - async writeFile(path: string, content: string | Uint8Array, _opts?: any) { - if (typeof content === 'string') { - await cfFs.writeFile(path, content); - } else { - await cfFs.writeFileBytes(path, content); - } - }, - - appendFile: (path: string, content: string, _opts?: any) => cfFs.appendFile(path, content), - exists: (path: string) => cfFs.exists(path), - - async stat(path: string) { - return adaptStat(await cfFs.stat(path)); - }, - - async lstat(path: string) { - return adaptStat(await cfFs.lstat(path)); - }, - - mkdir: (path: string, opts?: any) => cfFs.mkdir(path, opts), - readdir: (path: string) => cfFs.readdir(path), - - async readdirWithFileTypes(path: string) { - const entries = await cfFs.readdirWithFileTypes(path); - return entries.map((e: any) => ({ - name: e.name, - isFile: e.type === 'file', - isDirectory: e.type === 'directory', - isSymbolicLink: e.type === 'symlink', - })); - }, - - rm: (path: string, opts?: any) => cfFs.rm(path, opts), - cp: (src: string, dest: string, opts?: any) => cfFs.cp(src, dest, opts), - mv: (src: string, dest: string) => cfFs.mv(src, dest), - resolvePath: (base: string, path: string) => cfFs.resolvePath(base, path), - getAllPaths: () => [], - async chmod(_path: string, _mode: number) {}, - symlink: (target: string, linkPath: string) => cfFs.symlink(target, linkPath), +const MIGRATION_DOC = 'docs/cloudflare-shell.md'; - async link(existingPath: string, newPath: string) { - const content = await cfFs.readFileBytes(existingPath); - await cfFs.writeFileBytes(newPath, content); - }, - - readlink: (path: string) => cfFs.readlink(path), - realpath: (path: string) => cfFs.realpath(path), - async utimes(_path: string, _atime: number, _mtime: number) {}, - }; -} - -export async function getVirtualSandbox(): Promise; -export async function getVirtualSandbox( - bucket: unknown, - options?: VirtualSandboxOptions, -): Promise; -export async function getVirtualSandbox( - bucket?: unknown, - options?: VirtualSandboxOptions, -): Promise { +export function getVirtualSandbox(): never; +export function getVirtualSandbox(bucket: unknown, options?: VirtualSandboxOptions): never; +export function getVirtualSandbox(bucket?: unknown, _options?: VirtualSandboxOptions): never { if (bucket === undefined) { - const { Bash, InMemoryFs } = await import(/* @vite-ignore */ 'just-bash' as string); - const fs = new InMemoryFs(); - return () => new Bash({ - fs, - network: { dangerouslyAllowFullInternetAccess: true }, - }); + throw new Error( + '[flue] getVirtualSandbox() has been removed. Flue\'s default in-memory sandbox is already ' + + 'what you wanted — omit the `sandbox` option from init() (or pass `false`) and you get it. ' + + `See ${MIGRATION_DOC} for the full migration story.`, + ); } - - const { storage } = getCloudflareContext(); - const prefix = options?.prefix ?? 'default'; - - const ws = new Workspace({ - sql: storage.sql, - r2: bucket as any, - name: () => prefix, - }); - - const cfFs: CfFileSystem = new WorkspaceFileSystem(ws); - const r2Adapter = adaptToJustBash(cfFs); - - const { Bash, MountableFs, InMemoryFs } = await import(/* @vite-ignore */ 'just-bash' as string); - - const fs = new MountableFs({ base: new InMemoryFs() }); - fs.mount('/workspace', r2Adapter); - - return () => new Bash({ - fs, - cwd: '/workspace', - network: { dangerouslyAllowFullInternetAccess: true }, - }); + throw new Error( + '[flue] getVirtualSandbox(bucket) has been removed. Its "mount the R2 bucket as the agent ' + + 'filesystem" framing was never accurate — @cloudflare/shell\'s Workspace is a SQLite-indexed ' + + 'filesystem, not an R2 mount, and bucket keys uploaded externally were invisible to it.\n\n' + + 'Migrate to getShellSandbox() + hydrateFromBucket(), which explicitly copies the bucket\'s ' + + 'objects into a durable Workspace before the agent runs:\n\n' + + ' import {\n' + + ' getShellSandbox,\n' + + ' getDefaultWorkspace,\n' + + ' hydrateFromBucket,\n' + + ' } from \'@flue/runtime/cloudflare\';\n\n' + + ' const workspace = getDefaultWorkspace();\n' + + ' if (!(await workspace.exists(\'/.hydrated\'))) {\n' + + ' await hydrateFromBucket(workspace, env.KNOWLEDGE_BASE);\n' + + ' await workspace.writeFile(\'/.hydrated\', new Date().toISOString());\n' + + ' }\n' + + ' const harness = await init({\n' + + ' sandbox: getShellSandbox({ workspace, loader: env.LOADER }),\n' + + ' model: \'anthropic/claude-sonnet-4-6\',\n' + + ' });\n\n' + + 'Requires a `worker_loaders` binding in wrangler.jsonc; see ' + + `${MIGRATION_DOC} for the binding setup and the @cloudflare/sandbox + mountBucket alternative ` + + 'if your account doesn\'t have Worker Loader access.', + ); } diff --git a/packages/runtime/src/harness.ts b/packages/runtime/src/harness.ts index c7dd769b..cbd79d43 100644 --- a/packages/runtime/src/harness.ts +++ b/packages/runtime/src/harness.ts @@ -44,10 +44,6 @@ export class Harness implements FlueHarness { private store: SessionStore, private eventCallback?: FlueEventCallback, private agentTools: ToolDef[] = [], - // Optional connector-supplied factory for the model-facing built-in - // tool list. When unset, sessions use the framework default six - // (read, write, edit, bash, grep, glob). The framework always - // appends `task` on top of whatever the connector returns. private toolFactory?: SessionToolFactory, ) { this.fs = createFlueFs(env); diff --git a/packages/runtime/src/session.ts b/packages/runtime/src/session.ts index 95bceb73..4b949590 100644 --- a/packages/runtime/src/session.ts +++ b/packages/runtime/src/session.ts @@ -102,13 +102,6 @@ interface SessionInitOptions { existingData: SessionData | null; onAgentEvent?: FlueEventCallback; agentTools?: ToolDef[]; - /** - * Optional connector-supplied factory for the model-facing built-in tool - * list. When set, `createBuiltinTools` uses its return value verbatim - * (plus the framework-appended `task` tool) instead of the default six. - * Connectors that omit `tools()` fall through to the default path, so - * adding this is a no-op for them. - */ toolFactory?: SessionToolFactory; sessionRole?: string; taskDepth?: number; @@ -453,12 +446,6 @@ export class Session implements FlueSession { assertRoleExists(this.config.roles, this.config.role); assertRoleExists(this.config.roles, this.sessionRole); - // Build the built-in tools first so the custom-tool validator can - // reject collisions against the connector's actual tool names rather - // than re-invoking the connector's `tools()` factory just to read - // them. The connector factory is allowed to do real work (e.g. - // cf-shell calling into codemode to build state.* providers); calling - // it once per Session-open / per prompt is the budget. const builtinTools = this.createBuiltinTools(this.env, []); const tools = [ ...builtinTools, @@ -883,24 +870,12 @@ export class Session implements FlueSession { ); } - /** - * Reject user-supplied tools that collide with the framework-reserved - * `task` name, with any of the already-built built-in tool names, or - * with each other. - * - * The reserved-names set is derived from the resolved `builtinTools` list - * — not a static constant — so connectors that author their own tool list - * (cf-shell etc.) reserve only the names they actually expose, while the - * default tool path keeps reserving all six framework defaults. - */ + /** Reject custom tools that collide with active built-ins or each other. */ private validateCustomToolNames( tools: ToolDef[], builtinTools: AgentTool[], ): void { const reserved = new Set(builtinTools.map((t) => t.name)); - // `task` is always appended downstream; reserved even if the connector - // chose not to include it (which it shouldn't — `validateConnectorTools` - // also rejects connector-supplied `task`). reserved.add('task'); const names = new Set(); for (const toolDef of tools) { @@ -919,15 +894,7 @@ export class Session implements FlueSession { } } - /** - * Build the model-facing built-in tool list for this session. - * - * If the sandbox connector implements `tools()`, its return value replaces - * the framework default (read/write/edit/bash/grep/glob); we still append - * the framework `task` tool on top. Otherwise we fall back to the - * canonical `createTools(env, options)` path, which returns the six - * defaults plus `task` when supported. - */ + /** Build built-in tools from the connector or the framework defaults. */ private createBuiltinTools( env: SessionEnv, tools: ToolDef[], @@ -940,10 +907,6 @@ export class Session implements FlueSession { if (this.toolFactory) { const connectorTools = this.toolFactory(env, { roles: this.config.roles }); - // Validate the connector's contribution before handing it to - // pi-agent-core: duplicate names within the connector list, or a - // collision with the framework `task` tool, would otherwise - // surface as an opaque pi-agent-core error mid-session. this.validateConnectorTools(connectorTools); return [...connectorTools, createTaskTool(runTask, this.config.roles)]; } @@ -954,12 +917,7 @@ export class Session implements FlueSession { }); } - /** - * Validate a connector-supplied built-in tool list. Connector tools must - * be uniquely-named and must not claim the framework-reserved `task` - * name. Mirrors the `validateCustomToolNames` shape so connector authors - * get the same error story as user-supplied tools. - */ + /** Validate connector tool names before handing them to the agent loop. */ private validateConnectorTools(tools: AgentTool[]): void { const names = new Set(); for (const tool of tools) { @@ -995,9 +953,6 @@ export class Session implements FlueSession { options.thinkingLevel, options.role, ); - // Build builtins first so the custom-tool validator can read the - // reserved-names set directly from the resulting list — avoids - // invoking the connector's `tools()` factory twice per call. const builtinTools = this.createBuiltinTools( this.env, options.tools, diff --git a/packages/runtime/src/types.ts b/packages/runtime/src/types.ts index 65df75ff..be662b5f 100644 --- a/packages/runtime/src/types.ts +++ b/packages/runtime/src/types.ts @@ -684,48 +684,21 @@ export interface ShellResult { // ─── Sandbox ──────────────────────────────────────────────────────────────── -/** - * Inputs given to a connector's `tools()` method when the framework asks it - * to author the model-facing tool list. Intentionally minimal — the - * framework still owns the `task` tool and appends it on top of whatever the - * connector returns, so `tools()` does not receive a `task` runner. - */ export interface SessionToolFactoryOptions { - /** - * Roles available on the agent. Forwarded for connectors that wish to - * surface role-aware behaviour in their tool descriptions. Most - * connectors can ignore this. - */ + /** Roles available on the agent. */ roles: Record; } -/** - * Optional connector-supplied factory for the model-facing tool list. When a - * `SandboxFactory` implements `tools()`, the framework uses its return value - * verbatim as the built-in tools for sessions built from that sandbox — no - * merging with the default six, no inheritance. The framework still appends - * the `task` tool unconditionally; connectors must not include it. - */ +/** Connector-supplied model-facing tools. Flue appends `task` separately. */ export type SessionToolFactory = ( env: SessionEnv, options: SessionToolFactoryOptions, ) => AgentTool[]; -/** - * Wraps external sandboxes (Daytona, CF Containers, etc.) into Flue's SessionEnv. - * - * Optionally contributes its own model-facing tool list via `tools()`. When - * present, the connector's tools replace the framework default (read, write, - * edit, bash, grep, glob); the framework still appends `task` on top. When - * absent, the framework uses its default six tools — every existing connector - * keeps working unchanged. - * - * Use cases for owning the tool list: a connector with native primitives that - * make the defaults wasteful or wrong (e.g. cf-shell, which uses a codemode - * `code` tool instead of `bash` and routes file ops through `state.*`). - */ +/** Wraps external sandboxes (Daytona, CF Containers, etc.) into Flue's SessionEnv. */ export interface SandboxFactory { createSessionEnv(options: { id: string; cwd?: string }): Promise; + /** Replaces the framework default tool list for this sandbox. */ tools?: SessionToolFactory; } diff --git a/packages/sdk/README.md b/packages/sdk/README.md index 2bf6c20d..305f5d21 100644 --- a/packages/sdk/README.md +++ b/packages/sdk/README.md @@ -56,29 +56,40 @@ export default async function ({ init, payload }: FlueContext) { ### Support Agent -A support agent can also run in a virtual sandbox, but we now add a file-system using an R2 bucket. The knowledge base is stored in R2 and mounted directly into the harness filesystem — the agent searches it with its built-in tools (grep, glob, read). Skills are also defined in the bucket that help the agent perform its task. +A support agent can also run on Cloudflare without a container by using a cf-shell Workspace. The Workspace is a durable SQLite-indexed filesystem; R2 is an optional hydration source (and large-file spillover), not a live bucket mount. Copy the R2 objects you want into the Workspace before calling `init()`, then the agent operates on that structured filesystem through the `code` tool and `state.*` API. Because this agent is deployed to Cloudflare, message history and session state are automatically persisted for you. So you (or your customer) can revisit this support session days, weeks, or years later and pick up exactly where you left off. ```ts // .flue/agents/support.ts -import { getVirtualSandbox } from '@flue/runtime/cloudflare'; import type { FlueContext } from '@flue/runtime'; +import { + getDefaultWorkspace, + getShellSandbox, + hydrateFromBucket, +} from '@flue/runtime/cloudflare'; import * as v from 'valibot'; export const triggers = { webhook: true }; export default async function ({ init, payload, env }: FlueContext) { - // Mount the R2 knowledge base bucket as the harness filesystem. - // The agent can grep, glob, and read articles with bash, but - // without needing to spin up an entire container sandbox. - const sandbox = await getVirtualSandbox(env.KNOWLEDGE_BASE); - const harness = await init({ sandbox, model: 'openrouter/moonshotai/kimi-k2.6' }); + const workspace = getDefaultWorkspace(); + + // Hydrate once per agent instance. R2 is a source, not a live mount. + if (!(await workspace.exists('/.hydrated'))) { + await hydrateFromBucket(workspace, env.KNOWLEDGE_BASE); + await workspace.writeFile('/.hydrated', new Date().toISOString()); + } + + const harness = await init({ + sandbox: getShellSandbox({ workspace, loader: env.LOADER }), + model: 'openrouter/moonshotai/kimi-k2.6', + }); const session = await harness.session(); return await session.prompt( - `You are a support agent. Search the knowledge base for articles - relevant to this request, then write a helpful response. + `You are a support agent. Use the code tool to search the hydrated + workspace for articles relevant to this request, then write a helpful response. Customer: ${payload.message}`, { @@ -89,6 +100,8 @@ export default async function ({ init, payload, env }: FlueContext) { } ``` +This requires a `worker_loaders` binding (`{ "worker_loaders": [{ "binding": "LOADER" }] }`) in your `wrangler.jsonc`. If you need true bucket-keys-as-filesystem-paths semantics or Linux shell commands, use `@cloudflare/sandbox` Containers with `mountBucket` instead. See [Cloudflare Shell Sandbox](https://github.com/withastro/flue/blob/main/docs/cloudflare-shell.md) for the full migration and trade-offs. + ### Issue Triage (CI) A triage agent that runs in CI whenever an issue is opened on GitHub. The `local()` sandbox gives the agent direct access to the host filesystem and shell — perfect for CI runners, where `gh`, `git`, and `npm` are already on `$PATH` and the runner itself is your isolation boundary. diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index f0607174..61441568 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -86,6 +86,9 @@ importers: examples/cloudflare: dependencies: + '@cloudflare/shell': + specifier: ^0.3.2 + version: 0.3.6(@modelcontextprotocol/sdk@1.29.0(@cfworker/json-schema@4.1.1)(zod@4.4.3))(ai@6.0.175(zod@4.4.3))(zod@4.4.3) '@flue/runtime': specifier: workspace:* version: link:../../packages/runtime @@ -99,9 +102,15 @@ importers: specifier: ^1.0.0 version: 1.3.1(typescript@5.9.3) devDependencies: + '@cloudflare/workers-types': + specifier: ^4.20260505.1 + version: 4.20260505.1 '@flue/cli': specifier: workspace:* version: link:../../packages/cli + wrangler: + specifier: ^4.87.0 + version: 4.87.0(@cloudflare/workers-types@4.20260505.1) examples/hello-world: dependencies: @@ -179,6 +188,9 @@ importers: packages/runtime: dependencies: + '@cloudflare/codemode': + specifier: ^0.3.4 + version: 0.3.4(@modelcontextprotocol/sdk@1.29.0(@cfworker/json-schema@4.1.1)(zod@4.4.3))(ai@6.0.175(zod@4.4.3))(zod@4.4.3) '@cloudflare/shell': specifier: ^0.3.2 version: 0.3.6(@modelcontextprotocol/sdk@1.29.0(@cfworker/json-schema@4.1.1)(zod@4.4.3))(ai@6.0.175(zod@4.4.3))(zod@4.4.3) From b9b508751bcb870df241bc64c2f164ef07b91b7a Mon Sep 17 00:00:00 2001 From: "Fred K. Schott" <622227+FredKSchott@users.noreply.github.com> Date: Fri, 15 May 2026 13:51:10 -0700 Subject: [PATCH 3/3] fix(runtime): refine Cloudflare shell sandbox wiring --- docs/cloudflare-shell.md | 12 ++++++- .../runtime/src/cloudflare/shell-sandbox.ts | 36 ++++++++++--------- packages/sdk/src/deprecated.ts | 3 ++ 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/docs/cloudflare-shell.md b/docs/cloudflare-shell.md index d26aeadb..861781ab 100644 --- a/docs/cloudflare-shell.md +++ b/docs/cloudflare-shell.md @@ -65,7 +65,17 @@ async () => { } ``` -Programmatic file access still works through `session.fs` and `harness.fs`, backed by the same Workspace. Paths are Workspace paths such as `/foo.md`; there is no `/workspace` mount prefix. +## Using `session.fs` from your own code + +Programmatic file access still works through `session.fs` and `harness.fs`, backed by the same Workspace as the agent's `code` tool: + +```ts +await harness.fs.writeFile('/notes.md', 'staged before the session starts'); +const session = await harness.session(); +const notes = await session.fs.readFile('/notes.md'); +``` + +Use these filesystem APIs for setup, post-processing, or any other host-side file access. Paths are Workspace paths such as `/foo.md`; there is no `/workspace` mount prefix. `session.shell()` and `harness.shell()` throw because cf-shell has no shell. If you need Linux commands, use `@cloudflare/sandbox` Containers instead. diff --git a/packages/runtime/src/cloudflare/shell-sandbox.ts b/packages/runtime/src/cloudflare/shell-sandbox.ts index f196fba7..cdc4c77c 100644 --- a/packages/runtime/src/cloudflare/shell-sandbox.ts +++ b/packages/runtime/src/cloudflare/shell-sandbox.ts @@ -6,7 +6,12 @@ import { type FsStat as CfFsStat, } from '@cloudflare/shell'; import { stateTools } from '@cloudflare/shell/workers'; -import type { DynamicWorkerExecutorOptions } from '@cloudflare/codemode'; +import { + DynamicWorkerExecutor, + resolveProvider, + type DynamicWorkerExecutorOptions, + type ResolvedProvider, +} from '@cloudflare/codemode'; import type { AgentTool, AgentToolResult } from '@earendil-works/pi-agent-core'; import { type Static, Type } from '@earendil-works/pi-ai'; import { normalizePath } from '../session.ts'; @@ -48,9 +53,12 @@ export function getShellSandbox(options: GetShellSandboxOptions): SandboxFactory const { workspace, loader, executor: executorOptions } = options; const fs = new WorkspaceFileSystem(workspace); - const toolFactory: SessionToolFactory = () => [ - createCodeTool(workspace, loader, executorOptions), - ]; + const executor = new DynamicWorkerExecutor({ + loader, + ...executorOptions, + }); + const stateProvider = resolveProvider(stateTools(workspace)); + const toolFactory: SessionToolFactory = () => [createCodeTool(executor, stateProvider)]; return { async createSessionEnv() { @@ -113,10 +121,11 @@ function createWorkspaceSessionEnv( } const EXEC_NOT_SUPPORTED_MESSAGE = - '[flue] The cf-shell sandbox does not support exec(). cf-shell agents operate on a structured ' + - 'workspace via the `code` tool (JavaScript run in an isolated Worker via codemode) and the ' + - '`state.*` API exposed inside that sandbox; there is no shell. If you need bash/grep/find, use ' + - '`@cloudflare/sandbox` (Containers + mountBucket) instead. See docs/cloudflare-shell.md.'; + '[flue] The cf-shell sandbox does not support exec(). The agent\'s `code` tool runs JavaScript ' + + 'in an isolated Worker against the workspace; from your own code, use `session.fs` / `harness.fs` ' + + '(readFile, writeFile, stat, readdir, etc.) — they route through the same Workspace. If you ' + + 'specifically need bash/grep/find or a real Linux environment, use `@cloudflare/sandbox` ' + + '(Containers + mountBucket) instead.'; function adaptStat(s: CfFsStat): FileStat { return { @@ -139,9 +148,8 @@ const CodeParams = Type.Object({ }); function createCodeTool( - workspace: Workspace, - loader: WorkerLoader, - executorOptions: GetShellSandboxOptions['executor'], + executor: DynamicWorkerExecutor, + stateProvider: ResolvedProvider, ): AgentTool { return { name: 'code', @@ -152,12 +160,6 @@ function createCodeTool( _toolCallId: string, params: Static, ): Promise> { - const { DynamicWorkerExecutor, resolveProvider } = await import('@cloudflare/codemode'); - const executor = new DynamicWorkerExecutor({ - loader, - ...executorOptions, - }); - const stateProvider = resolveProvider(stateTools(workspace)); const { result, error, logs } = await executor.execute(params.code, [stateProvider]); if (error) { diff --git a/packages/sdk/src/deprecated.ts b/packages/sdk/src/deprecated.ts index 1eb7ab95..ae66a18a 100644 --- a/packages/sdk/src/deprecated.ts +++ b/packages/sdk/src/deprecated.ts @@ -64,7 +64,10 @@ export { __noop as cfSandboxToSessionEnv, __noop as getCloudflareAIBindingApiProvider, __noop as getCloudflareContext, + __noop as getDefaultWorkspace, + __noop as getShellSandbox, __noop as getVirtualSandbox, + __noop as hydrateFromBucket, __noop as runWithCloudflareContext, __noop as store, // ./config