Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 24 additions & 17 deletions examples/feature-examples/compatibility.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,44 @@ SDK Version: 0.4.3

| Use Case | ACP | Claude |
|----------|-----|--------|
| agent-via-blueprint | pass | pass |
| agent-via-blueprint | fail | pass |
| elicitation-acp | xfail | N/A |
| elicitation-claude | N/A | pass |
| single-prompt | pass | pass |
| mcp-server | pass | pass |
| single-prompt | fail | pass |

## ACP Agent × Feature

| Use Case | opencode | codex-acp | qwen | gemini-cli |
|----------|------------|------------|------------|------------|
| agent-via-blueprint | pass | pass | skip | skip |
| agent-via-blueprint | pass | fail | skip | skip |
| elicitation-acp | xfail | xfail | xfail | xfail |
| single-prompt | pass | pass | pass | pass |
| mcp-server | pass | pass | pass | skip |
| single-prompt | fail | pass | pass | skip |

---

## Run Details

| Agent | Use Case | Status | Duration | Notes |
|-------|----------|--------|----------|-------|
| opencode | agent-via-blueprint | pass | 1.8s | |
| opencode | elicitation-acp | xfail | 9.9s | [xfail: ACP protocol has not added full elicitation support yet] Agent did not trigger session_elicitation |
| opencode | single-prompt | pass | 2.1s | |
| codex-acp | agent-via-blueprint | pass | 2.4s | |
| codex-acp | elicitation-acp | xfail | 10.0s | [xfail: codex-acp does not advertise or send session/elicitation (uses permission requests instead)] Agent did not trigger session_elicitation |
| codex-acp | single-prompt | pass | 1.3s | |
| opencode | agent-via-blueprint | pass | 5.4s | |
| opencode | elicitation-acp | xfail | 19.5s | [xfail: ACP protocol has not added full elicitation support yet] Agent did not trigger session_elicitation |
| opencode | mcp-server | pass | 11.6s | |
| opencode | single-prompt | fail | 0.0s | Long poll timed out after 180000ms. Last result: undefined |
| codex-acp | agent-via-blueprint | fail | 0.0s | Long poll timed out after 180000ms. Last result: undefined |
| codex-acp | elicitation-acp | xfail | 0.0s | [xfail: codex-acp does not advertise or send session/elicitation (uses permission requests instead)] Long poll timed out after 180000ms. Last result: undefined |
| codex-acp | mcp-server | pass | 5.3s | |
| codex-acp | single-prompt | pass | 2.0s | |
| qwen | agent-via-blueprint | skip | 0.0s | No blueprint override defined for qwen — add an entry to BLUEPRINT_OVERRIDES to test this agent via blueprint |
| qwen | elicitation-acp | xfail | 11.7s | [xfail: qwen does not advertise or send session/elicitation] Agent did not trigger session_elicitation |
| qwen | single-prompt | pass | 2.2s | |
| qwen | elicitation-acp | xfail | 12.2s | [xfail: qwen does not advertise or send session/elicitation] Agent did not trigger session_elicitation |
| qwen | mcp-server | pass | 6.7s | |
| qwen | single-prompt | pass | 2.6s | |
| gemini-cli | agent-via-blueprint | skip | 0.0s | No blueprint override defined for gemini-cli — add an entry to BLUEPRINT_OVERRIDES to test this agent via blueprint |
| gemini-cli | elicitation-acp | xfail | 12.9s | [xfail: gemini-cli does not advertise or send session/elicitation] Agent did not trigger session_elicitation |
| gemini-cli | single-prompt | pass | 4.0s | |
| claude-code | agent-via-blueprint | pass | 3.8s | |
| claude-code | elicitation-claude | pass | 16.0s | |
| claude-code | single-prompt | pass | 1.6s | |
| gemini-cli | elicitation-acp | xfail | 0.5s | [xfail: gemini-cli does not advertise or send session/elicitation] [-32000] You have exhausted your daily quota on this model. {"event_type":"turn.failed"} |
| gemini-cli | mcp-server | skip | 0.0s | Cannot verify on this account: Gemini API quota exhausted (verified working with sufficient quota) |
| gemini-cli | single-prompt | skip | 0.0s | Cannot verify on this account: Gemini API quota exhausted (verified working with sufficient quota) |
| claude-code | agent-via-blueprint | pass | 1.4s | |
| claude-code | elicitation-claude | pass | 21.2s | |
| claude-code | mcp-server | pass | 7.4s | |
| claude-code | single-prompt | pass | 1.4s | |
2 changes: 1 addition & 1 deletion examples/feature-examples/src/agents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ export const AGENTS: AgentConfig[] = [
brokerMount: {
protocol: "acp",
agentBinary: "gemini",
launchArgs: ["--experimental-acp", "--yolo"],
launchArgs: ["--acp", "--yolo", "--skip-trust"],
},
secrets: { GEMINI_API_KEY: "GEMINI_API_KEY" },
},
Expand Down
14 changes: 14 additions & 0 deletions examples/feature-examples/src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,20 @@ async function runOne(
const expectedFailReason = useCase.expectedFailures?.[agent.name];
const getDurationMs = () => (agentStartMs === null ? 0 : Date.now() - agentStartMs);

// Pre-setup skip: avoids provisioning a devbox we know we cannot use
// (e.g. an account with an exhausted API quota for this agent).
const preSkipReason = useCase.skipForAgents?.[agent.name];
if (preSkipReason) {
return {
agent: agent.name,
useCase: useCase.name,
protocol: agent.protocol,
status: "skip",
reason: preSkipReason,
durationMs: 0,
};
}

try {
const { ctx: setupCtx } = await setup(agent, useCase);
ctx = setupCtx;
Expand Down
60 changes: 41 additions & 19 deletions examples/feature-examples/src/scaffold.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
import { RunloopSDK, type Secret } from "@runloop/api-client";
import { ACPAxonConnection, PROTOCOL_VERSION } from "@runloop/remote-agents-sdk/acp";
import { ClaudeAxonConnection } from "@runloop/remote-agents-sdk/claude";
import type { AgentConfig, AgentConfigOverride, BrokerMount, UseCase, RunContext } from "./types.js";
import type {
AgentConfig,
AgentConfigOverride,
BrokerMount,
ExtraMount,
UseCase,
RunContext,
} from "./types.js";
import { SkipError } from "./types.js";
import { withTimeout } from "./validator.js";

Expand All @@ -10,7 +17,13 @@ interface SetupResult {
sdk: RunloopSDK;
}

const DEFAULT_WORKING_DIRECTORY = "/home/user";
/**
* Default home directory for the devbox user. Use cases that need to drop
* config under `~` (e.g. gemini-cli's `~/.gemini/settings.json`) should
* import this rather than hardcode the path.
*/
export const DEFAULT_USER_HOME = "/home/user";
const DEFAULT_WORKING_DIRECTORY = DEFAULT_USER_HOME;
const SETUP_STEP_TIMEOUT_MS = 30_000;
const SETUP_ERROR_CLEANUP_TIMEOUT_MS = 10_000;
const DEVBOX_PROVISION_TIMEOUT_MS = 180_000; // 3 minutes for cold start with agent mounts
Expand Down Expand Up @@ -71,8 +84,10 @@ export async function setup(agent: AgentConfig, useCase: UseCase): Promise<Setup
devboxSecretsMap[devboxEnv] = secret.name;
}

// Build the devbox mounts array from the merged config.
const mounts = buildDevboxMounts(axon.id, mergedAgent);
// Build the devbox mounts array from the merged config, plus any extra
// per-agent mounts the use-case requested (e.g. gemini-cli settings.json).
const extraMounts = useCase.extraMountsByAgent?.[agent.name] ?? [];
const mounts = buildDevboxMounts(axon.id, mergedAgent, extraMounts);

log("Creating devbox...");
const devbox = await sdk.devbox.create(
Expand Down Expand Up @@ -138,7 +153,7 @@ export async function setup(agent: AgentConfig, useCase: UseCase): Promise<Setup
const session = await withTimeout(
conn.newSession({
cwd: mergedAgent.brokerMount.workingDirectory ?? DEFAULT_WORKING_DIRECTORY,
mcpServers: [],
mcpServers: useCase.acpMcpServers ?? [],
}),
SETUP_STEP_TIMEOUT_MS,
"ACP newSession",
Expand Down Expand Up @@ -238,14 +253,19 @@ function validateConfig(agent: AgentConfig): void {
}

/**
* Build the devbox mounts array from the agent config.
* Build the devbox mounts array from the agent config plus any use-case
* supplied extra mounts.
*
* - **catalog** install: adds an `agent_mount` (to install from catalog) + `broker_mount`.
* - **agent-mount** install: adds an `agent_mount` (to install from catalog) + `broker_mount`.
* - **blueprint** install: only a `broker_mount` (agent is pre-baked).
*
* Extra mounts (e.g. inline `file_mount` for agent config) are appended last
* so the standard mounts are always present.
*/
function buildDevboxMounts(
axonId: string,
agent: AgentConfig,
extraMounts: ExtraMount[] = [],
): Array<
| { type: "agent_mount"; agent_id: null; agent_name: string }
| {
Expand All @@ -256,20 +276,22 @@ function buildDevboxMounts(
launch_args?: string[];
working_directory?: string;
}
| ExtraMount
> {
const brokerMount = buildBrokerMount(axonId, agent.brokerMount);

if (agent.install.kind === "agent-mount") {
const agentMount = {
type: "agent_mount" as const,
agent_id: null,
agent_name: agent.install.agentName,
};
return [agentMount, brokerMount];
}

// Blueprint install: agent is already in the image.
return [brokerMount];
const base =
agent.install.kind === "agent-mount"
? [
{
type: "agent_mount" as const,
agent_id: null,
agent_name: agent.install.agentName,
},
brokerMount,
]
: [brokerMount];

return [...base, ...extraMounts];
}

/**
Expand Down
60 changes: 59 additions & 1 deletion examples/feature-examples/src/types.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,33 @@
import type { Runloop } from "@runloop/api-client";
import type { ACPAxonConnection } from "@runloop/remote-agents-sdk/acp";
import type { ClaudeAxonConnection } from "@runloop/remote-agents-sdk/claude";
import type { Client, Agent } from "@agentclientprotocol/sdk";
import type { Client, Agent, McpServer } from "@agentclientprotocol/sdk";

/**
* Inline `file_mount` shape from the Runloop API. The file is in place when
* the devbox boots, before the broker spawns the agent process — use it for
* agent config that must exist on startup (e.g. gemini-cli's
* `~/.gemini/settings.json`).
* Note: Use file_mounts ONLY for tiny configuration files, use object_mounts for larger files.
*
* Derived from `@runloop/api-client`'s `Mount` union so the shape can't drift
* from the upstream API.
*/
export type FileMount = Extract<Runloop.Mount, { type: "file_mount" }>;

/**
* `object_mount` shape from the Runloop API for pre-uploaded storage objects.
* Derived from `@runloop/api-client`'s `Mount` union so the shape can't drift
* from the upstream API.
*/
export type ObjectMount = Extract<Runloop.Mount, { type: "object_mount" }>;

/**
* Extra devbox mounts a use case can request per-agent. Restricted to
* supplemental-config mount kinds; `agent_mount` and `broker_mount` are
* managed by scaffold.ts and must not be duplicated here.
*/
export type ExtraMount = FileMount | ObjectMount;

/**
* How the agent gets installed on the devbox.
Expand Down Expand Up @@ -122,13 +149,44 @@ export interface UseCase {
*/
clientCapabilities?: Record<string, unknown>;

/**
* MCP servers attached to the ACP `newSession()` call. Ignored for Claude
* paths (Claude reads MCP config from `--mcp-config` launch args instead).
*/
acpMcpServers?: McpServer[];

/**
* Extra devbox mounts to add at provision time, keyed by agent name. Use
* this for agent-specific config that must be on disk *before* the broker
* spawns the agent process — e.g. gemini-cli's `~/.gemini/settings.json`,
* since gemini-cli reads MCP config from settings.json at startup and does
* not honour ACP `newSession.mcpServers`.
*
* Mounts here are appended to the standard `agent_mount` / `broker_mount`
* pair built by scaffold.ts. Inline `file_mount` is the simplest option;
* `object_mount` is available for pre-uploaded storage objects.
*/
extraMountsByAgent?: Record<string, ExtraMount[]>;

/**
* Per-agent expected failures (with reason), keyed by agent name.
* Results will show as "xfail" instead of "fail" and won't cause exit code 1.
* Use this for protocol/feature-level limitations the agent genuinely
* doesn't implement (e.g. ACP elicitation not advertised).
* E.g., `{ opencode: "Elicitation not yet supported" }`.
*/
expectedFailures?: Record<string, string>;

/**
* Per-agent skip reasons, keyed by agent name. Skipped *before* setup so no
* devbox is provisioned. Use this for environmental limitations that prevent
* verifying the use case on the current machine/account (e.g. an exhausted
* API quota on a shared key) — distinct from `expectedFailures`, which
* documents a protocol/agent that genuinely lacks the feature.
* E.g., `{ "gemini-cli": "Cannot verify on this account: API quota exhausted" }`.
*/
skipForAgents?: Record<string, string>;

/**
* The test body. Receives a fully initialized RunContext.
* Throw to indicate failure. Return cleanly to indicate pass.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export default {
name: "elicitation-claude",
description: "Handle agent-initiated user input via Claude conversational flow",
protocols: ["claude"],
timeoutMs: 20_000,
timeoutMs: 30_000,

async run(ctx) {
if (!ctx.claude) {
Expand Down
2 changes: 2 additions & 0 deletions examples/feature-examples/src/use-cases/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ import type { UseCase } from "../types.js";
import agentViaBlueprint from "./agent-via-blueprint.js";
import elicitationAcp from "./elicitation-acp.js";
import elicitationClaude from "./elicitation-claude.js";
import mcpServer from "./mcp-server.js";
import singlePrompt from "./single-prompt.js";

export const USE_CASES: UseCase[] = [
agentViaBlueprint,
elicitationAcp,
elicitationClaude,
mcpServer,
singlePrompt,
];
Loading
Loading