runloopai · james-rl · May 6, 2026 · May 6, 2026 · May 6, 2026 · May 7, 2026
diff --git a/examples/feature-examples/compatibility.md b/examples/feature-examples/compatibility.md
@@ -6,37 +6,44 @@ SDK Version: 0.4.3
 
 | Use Case | ACP | Claude |
 |----------|-----|--------|
-| agent-via-blueprint | pass | pass |
+| agent-via-blueprint | fail | pass |
 | elicitation-acp | xfail | N/A |
 | elicitation-claude | N/A | pass |
-| single-prompt | pass | pass |
+| mcp-server | pass | pass |
+| single-prompt | fail | pass |
 
 ## ACP Agent × Feature
 
 | Use Case | opencode | codex-acp | qwen | gemini-cli |
 |----------|------------|------------|------------|------------|
-| agent-via-blueprint | pass | pass | skip | skip |
+| agent-via-blueprint | pass | fail | skip | skip |
 | elicitation-acp | xfail | xfail | xfail | xfail |
-| single-prompt | pass | pass | pass | pass |
+| mcp-server | pass | pass | pass | skip |
+| single-prompt | fail | pass | pass | skip |
 
 ---
 
 ## Run Details
 
 | Agent | Use Case | Status | Duration | Notes |
 |-------|----------|--------|----------|-------|
-| opencode | agent-via-blueprint | pass | 1.8s |  |
-| opencode | elicitation-acp | xfail | 9.9s | [xfail: ACP protocol has not added full elicitation support yet] Agent did not trigger session_elicitation |
-| opencode | single-prompt | pass | 2.1s |  |
-| codex-acp | agent-via-blueprint | pass | 2.4s |  |
-| codex-acp | elicitation-acp | xfail | 10.0s | [xfail: codex-acp does not advertise or send session/elicitation (uses permission requests instead)] Agent did not trigger session_elicitation |
-| codex-acp | single-prompt | pass | 1.3s |  |
+| opencode | agent-via-blueprint | pass | 5.4s |  |
+| opencode | elicitation-acp | xfail | 19.5s | [xfail: ACP protocol has not added full elicitation support yet] Agent did not trigger session_elicitation |
+| opencode | mcp-server | pass | 11.6s |  |
+| opencode | single-prompt | fail | 0.0s | Long poll timed out after 180000ms. Last result: undefined |
+| codex-acp | agent-via-blueprint | fail | 0.0s | Long poll timed out after 180000ms. Last result: undefined |
+| codex-acp | elicitation-acp | xfail | 0.0s | [xfail: codex-acp does not advertise or send session/elicitation (uses permission requests instead)] Long poll timed out after 180000ms. Last result: undefined |
+| codex-acp | mcp-server | pass | 5.3s |  |
+| codex-acp | single-prompt | pass | 2.0s |  |
 | qwen | agent-via-blueprint | skip | 0.0s | No blueprint override defined for qwen — add an entry to BLUEPRINT_OVERRIDES to test this agent via blueprint |
-| qwen | elicitation-acp | xfail | 11.7s | [xfail: qwen does not advertise or send session/elicitation] Agent did not trigger session_elicitation |
-| qwen | single-prompt | pass | 2.2s |  |
+| qwen | elicitation-acp | xfail | 12.2s | [xfail: qwen does not advertise or send session/elicitation] Agent did not trigger session_elicitation |
+| qwen | mcp-server | pass | 6.7s |  |
+| qwen | single-prompt | pass | 2.6s |  |
 | gemini-cli | agent-via-blueprint | skip | 0.0s | No blueprint override defined for gemini-cli — add an entry to BLUEPRINT_OVERRIDES to test this agent via blueprint |
-| gemini-cli | elicitation-acp | xfail | 12.9s | [xfail: gemini-cli does not advertise or send session/elicitation] Agent did not trigger session_elicitation |
-| gemini-cli | single-prompt | pass | 4.0s |  |
-| claude-code | agent-via-blueprint | pass | 3.8s |  |
-| claude-code | elicitation-claude | pass | 16.0s |  |
-| claude-code | single-prompt | pass | 1.6s |  |
+| gemini-cli | elicitation-acp | xfail | 0.5s | [xfail: gemini-cli does not advertise or send session/elicitation] [-32000] You have exhausted your daily quota on this model. {"event_type":"turn.failed"} |
+| gemini-cli | mcp-server | skip | 0.0s | Cannot verify on this account: Gemini API quota exhausted (verified working with sufficient quota) |
+| gemini-cli | single-prompt | skip | 0.0s | Cannot verify on this account: Gemini API quota exhausted (verified working with sufficient quota) |
+| claude-code | agent-via-blueprint | pass | 1.4s |  |
+| claude-code | elicitation-claude | pass | 21.2s |  |
+| claude-code | mcp-server | pass | 7.4s |  |
+| claude-code | single-prompt | pass | 1.4s |  |
diff --git a/examples/feature-examples/src/agents.ts b/examples/feature-examples/src/agents.ts
@@ -67,7 +67,7 @@ export const AGENTS: AgentConfig[] = [
     brokerMount: {
       protocol: "acp",
       agentBinary: "gemini",
-      launchArgs: ["--experimental-acp", "--yolo"],
+      launchArgs: ["--acp", "--yolo", "--skip-trust"],
     },
     secrets: { GEMINI_API_KEY: "GEMINI_API_KEY" },
   },

diff --git a/examples/feature-examples/src/main.ts b/examples/feature-examples/src/main.ts
@@ -84,6 +84,20 @@ async function runOne(
   const expectedFailReason = useCase.expectedFailures?.[agent.name];
   const getDurationMs = () => (agentStartMs === null ? 0 : Date.now() - agentStartMs);
 
+  // Pre-setup skip: avoids provisioning a devbox we know we cannot use
+  // (e.g. an account with an exhausted API quota for this agent).
+  const preSkipReason = useCase.skipForAgents?.[agent.name];
+  if (preSkipReason) {
+    return {
+      agent: agent.name,
+      useCase: useCase.name,
+      protocol: agent.protocol,
+      status: "skip",
+      reason: preSkipReason,
+      durationMs: 0,
+    };
+  }
+
   try {
     const { ctx: setupCtx } = await setup(agent, useCase);
     ctx = setupCtx;

diff --git a/examples/feature-examples/src/scaffold.ts b/examples/feature-examples/src/scaffold.ts
@@ -1,7 +1,14 @@
 import { RunloopSDK, type Secret } from "@runloop/api-client";
 import { ACPAxonConnection, PROTOCOL_VERSION } from "@runloop/remote-agents-sdk/acp";
 import { ClaudeAxonConnection } from "@runloop/remote-agents-sdk/claude";
-import type { AgentConfig, AgentConfigOverride, BrokerMount, UseCase, RunContext } from "./types.js";
+import type {
+  AgentConfig,
+  AgentConfigOverride,
+  BrokerMount,
+  ExtraMount,
+  UseCase,
+  RunContext,
+} from "./types.js";
 import { SkipError } from "./types.js";
 import { withTimeout } from "./validator.js";
 
@@ -10,7 +17,13 @@ interface SetupResult {
   sdk: RunloopSDK;
 }
 
-const DEFAULT_WORKING_DIRECTORY = "/home/user";
+/**
+ * Default home directory for the devbox user. Use cases that need to drop
+ * config under `~` (e.g. gemini-cli's `~/.gemini/settings.json`) should
+ * import this rather than hardcode the path.
+ */
+export const DEFAULT_USER_HOME = "/home/user";
+const DEFAULT_WORKING_DIRECTORY = DEFAULT_USER_HOME;
 const SETUP_STEP_TIMEOUT_MS = 30_000;
 const SETUP_ERROR_CLEANUP_TIMEOUT_MS = 10_000;
 const DEVBOX_PROVISION_TIMEOUT_MS = 180_000; // 3 minutes for cold start with agent mounts
@@ -71,8 +84,10 @@ export async function setup(agent: AgentConfig, useCase: UseCase): Promise<Setup
     devboxSecretsMap[devboxEnv] = secret.name;
   }
 
-  // Build the devbox mounts array from the merged config.
-  const mounts = buildDevboxMounts(axon.id, mergedAgent);
+  // Build the devbox mounts array from the merged config, plus any extra
+  // per-agent mounts the use-case requested (e.g. gemini-cli settings.json).
+  const extraMounts = useCase.extraMountsByAgent?.[agent.name] ?? [];
+  const mounts = buildDevboxMounts(axon.id, mergedAgent, extraMounts);
 
   log("Creating devbox...");
   const devbox = await sdk.devbox.create(
@@ -138,7 +153,7 @@ export async function setup(agent: AgentConfig, useCase: UseCase): Promise<Setup
       const session = await withTimeout(
         conn.newSession({
           cwd: mergedAgent.brokerMount.workingDirectory ?? DEFAULT_WORKING_DIRECTORY,
-          mcpServers: [],
+          mcpServers: useCase.acpMcpServers ?? [],
         }),
         SETUP_STEP_TIMEOUT_MS,
         "ACP newSession",
@@ -238,14 +253,19 @@ function validateConfig(agent: AgentConfig): void {
 }
 
 /**
- * Build the devbox mounts array from the agent config.
+ * Build the devbox mounts array from the agent config plus any use-case
+ * supplied extra mounts.
  *
- * - **catalog** install: adds an `agent_mount` (to install from catalog) + `broker_mount`.
+ * - **agent-mount** install: adds an `agent_mount` (to install from catalog) + `broker_mount`.
  * - **blueprint** install: only a `broker_mount` (agent is pre-baked).
+ *
+ * Extra mounts (e.g. inline `file_mount` for agent config) are appended last
+ * so the standard mounts are always present.
  */
 function buildDevboxMounts(
   axonId: string,
   agent: AgentConfig,
+  extraMounts: ExtraMount[] = [],
 ): Array<
   | { type: "agent_mount"; agent_id: null; agent_name: string }
   | {
@@ -256,20 +276,22 @@ function buildDevboxMounts(
       launch_args?: string[];
       working_directory?: string;
     }
+  | ExtraMount
 > {
   const brokerMount = buildBrokerMount(axonId, agent.brokerMount);
-
-  if (agent.install.kind === "agent-mount") {
-    const agentMount = {
-      type: "agent_mount" as const,
-      agent_id: null,
-      agent_name: agent.install.agentName,
-    };
-    return [agentMount, brokerMount];
-  }
-
-  // Blueprint install: agent is already in the image.
-  return [brokerMount];
+  const base =
+    agent.install.kind === "agent-mount"
+      ? [
+          {
+            type: "agent_mount" as const,
+            agent_id: null,
+            agent_name: agent.install.agentName,
+          },
+          brokerMount,
+        ]
+      : [brokerMount];
+
+  return [...base, ...extraMounts];
 }
 
 /**

diff --git a/examples/feature-examples/src/types.ts b/examples/feature-examples/src/types.ts
@@ -1,6 +1,33 @@
+import type { Runloop } from "@runloop/api-client";
 import type { ACPAxonConnection } from "@runloop/remote-agents-sdk/acp";
 import type { ClaudeAxonConnection } from "@runloop/remote-agents-sdk/claude";
-import type { Client, Agent } from "@agentclientprotocol/sdk";
+import type { Client, Agent, McpServer } from "@agentclientprotocol/sdk";
+
+/**
+ * Inline `file_mount` shape from the Runloop API. The file is in place when
+ * the devbox boots, before the broker spawns the agent process — use it for
+ * agent config that must exist on startup (e.g. gemini-cli's
+ * `~/.gemini/settings.json`).
+ * Note: Use file_mounts ONLY for tiny configuration files, use object_mounts for larger files.
+ *
+ * Derived from `@runloop/api-client`'s `Mount` union so the shape can't drift
+ * from the upstream API.
+ */
+export type FileMount = Extract<Runloop.Mount, { type: "file_mount" }>;
+
+/**
+ * `object_mount` shape from the Runloop API for pre-uploaded storage objects.
+ * Derived from `@runloop/api-client`'s `Mount` union so the shape can't drift
+ * from the upstream API.
+ */
+export type ObjectMount = Extract<Runloop.Mount, { type: "object_mount" }>;
+
+/**
+ * Extra devbox mounts a use case can request per-agent. Restricted to
+ * supplemental-config mount kinds; `agent_mount` and `broker_mount` are
+ * managed by scaffold.ts and must not be duplicated here.
+ */
+export type ExtraMount = FileMount | ObjectMount;
 
 /**
  * How the agent gets installed on the devbox.
@@ -122,13 +149,44 @@ export interface UseCase {
    */
   clientCapabilities?: Record<string, unknown>;
 
+  /**
+   * MCP servers attached to the ACP `newSession()` call. Ignored for Claude
+   * paths (Claude reads MCP config from `--mcp-config` launch args instead).
+   */
+  acpMcpServers?: McpServer[];
+
+  /**
+   * Extra devbox mounts to add at provision time, keyed by agent name. Use
+   * this for agent-specific config that must be on disk *before* the broker
+   * spawns the agent process — e.g. gemini-cli's `~/.gemini/settings.json`,
+   * since gemini-cli reads MCP config from settings.json at startup and does
+   * not honour ACP `newSession.mcpServers`.
+   *
+   * Mounts here are appended to the standard `agent_mount` / `broker_mount`
+   * pair built by scaffold.ts. Inline `file_mount` is the simplest option;
+   * `object_mount` is available for pre-uploaded storage objects.
+   */
+  extraMountsByAgent?: Record<string, ExtraMount[]>;
+
   /**
    * Per-agent expected failures (with reason), keyed by agent name.
    * Results will show as "xfail" instead of "fail" and won't cause exit code 1.
+   * Use this for protocol/feature-level limitations the agent genuinely
+   * doesn't implement (e.g. ACP elicitation not advertised).
    * E.g., `{ opencode: "Elicitation not yet supported" }`.
    */
   expectedFailures?: Record<string, string>;
 
+  /**
+   * Per-agent skip reasons, keyed by agent name. Skipped *before* setup so no
+   * devbox is provisioned. Use this for environmental limitations that prevent
+   * verifying the use case on the current machine/account (e.g. an exhausted
+   * API quota on a shared key) — distinct from `expectedFailures`, which
+   * documents a protocol/agent that genuinely lacks the feature.
+   * E.g., `{ "gemini-cli": "Cannot verify on this account: API quota exhausted" }`.
+   */
+  skipForAgents?: Record<string, string>;
+
   /**
    * The test body. Receives a fully initialized RunContext.
    * Throw to indicate failure. Return cleanly to indicate pass.

diff --git a/examples/feature-examples/src/use-cases/elicitation-claude.ts b/examples/feature-examples/src/use-cases/elicitation-claude.ts
@@ -7,7 +7,7 @@ export default {
   name: "elicitation-claude",
   description: "Handle agent-initiated user input via Claude conversational flow",
   protocols: ["claude"],
-  timeoutMs: 20_000,
+  timeoutMs: 30_000,
 
   async run(ctx) {
     if (!ctx.claude) {

diff --git a/examples/feature-examples/src/use-cases/index.ts b/examples/feature-examples/src/use-cases/index.ts
@@ -2,11 +2,13 @@ import type { UseCase } from "../types.js";
 import agentViaBlueprint from "./agent-via-blueprint.js";
 import elicitationAcp from "./elicitation-acp.js";
 import elicitationClaude from "./elicitation-claude.js";
+import mcpServer from "./mcp-server.js";
 import singlePrompt from "./single-prompt.js";
 
 export const USE_CASES: UseCase[] = [
   agentViaBlueprint,
   elicitationAcp,
   elicitationClaude,
+  mcpServer,
   singlePrompt,
 ];