NoeFabris · ndycode · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026 · Mar 3, 2026
diff --git a/README.md b/README.md
@@ -120,8 +120,8 @@ opencode run "Hello" --model=google/antigravity-claude-opus-4-6-thinking --varia
 | `antigravity-gemini-3-pro` | low, high | Gemini 3 Pro with thinking |
 | `antigravity-gemini-3.1-pro` | low, high | Gemini 3.1 Pro with thinking (rollout-dependent) |
 | `antigravity-gemini-3-flash` | minimal, low, medium, high | Gemini 3 Flash with thinking |
-| `antigravity-claude-sonnet-4-6` | — | Claude Sonnet 4.6 |
-| `antigravity-claude-opus-4-6-thinking` | low, max | Claude Opus 4.6 with extended thinking |
+| `antigravity-claude-sonnet-4-6` | — | Claude Sonnet 4.6 (200k base context) |
+| `antigravity-claude-opus-4-6-thinking` | low, max | Claude Opus 4.6 with extended thinking (200k base context) |
 
 **Gemini CLI quota** (separate from Antigravity; used when `cli_first` is true or as fallback):
 
@@ -140,6 +140,7 @@ opencode run "Hello" --model=google/antigravity-claude-opus-4-6-thinking --varia
 > - When a Gemini quota pool is exhausted, the plugin automatically falls back to the other pool.
 > - Claude and image models always use Antigravity.
 > Model names are automatically transformed for the target API (e.g., `antigravity-gemini-3-flash` → `gemini-3-flash-preview` for CLI).
+> - Claude Antigravity limits are 200k by default. Experimental opt-in long-context beta header attempts are available via `antigravity.json` (`claude_long_context_beta`) and automatically fall back to stable 200k if rejected.
 
 **Using variants:**
 ```bash
@@ -190,12 +191,12 @@ Add this to your `~/.config/opencode/opencode.json`:
           }
         },
         "antigravity-claude-sonnet-4-6": {
-          "name": "Claude Sonnet 4.6 (Antigravity)",
+          "name": "Claude Sonnet 4.6 (Antigravity, 200k base)",
           "limit": { "context": 200000, "output": 64000 },
           "modalities": { "input": ["text", "image", "pdf"], "output": ["text"] }
         },
         "antigravity-claude-opus-4-6-thinking": {
-          "name": "Claude Opus 4.6 Thinking (Antigravity)",
+          "name": "Claude Opus 4.6 Thinking (Antigravity, 200k base)",
           "limit": { "context": 200000, "output": 64000 },
           "modalities": { "input": ["text", "image", "pdf"], "output": ["text"] },
           "variants": {

diff --git a/assets/antigravity.schema.json b/assets/antigravity.schema.json
@@ -120,6 +120,17 @@
       "type": "boolean",
       "description": "Enable Claude prompt auto-caching by adding top-level cache_control when absent."
     },
+    "claude_long_context_beta": {
+      "default": false,
+      "type": "boolean",
+      "description": "Enable experimental Claude long-context beta header injection for Claude 4.6 models. If rejected, requests auto-fallback to stable 200k behavior."
+    },
+    "claude_long_context_beta_header": {
+      "default": "context-1m-2025-08-07",
+      "type": "string",
+      "minLength": 1,
+      "description": "Claude long-context beta header value. Override if provider beta token changes."
+    },
     "proactive_token_refresh": {
       "default": true,
       "type": "boolean",

diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md
@@ -35,6 +35,8 @@ Settings that affect how the model thinks and responds.
 | Option | Default | Description |
 |--------|---------|-------------|
 | `keep_thinking` | `false` | Preserve Claude's thinking blocks across turns. **Warning:** enabling may degrade model stability. |
+| `claude_long_context_beta` | `false` | Experimental: attempt Claude 4.6 long-context beta header (provider entitlement required) |
+| `claude_long_context_beta_header` | `"context-1m-2025-08-07"` | Beta header value used when `claude_long_context_beta` is enabled |
 | `session_recovery` | `true` | Auto-recover from tool_result_missing errors |
 | `auto_resume` | `false` | Auto-send resume prompt after recovery |
 | `resume_text` | `"continue"` | Text to send when auto-resuming |
@@ -51,6 +53,24 @@ When `false` (default), thinking is stripped:
 - **Pros:** More stable model behavior, smaller context
 - **Cons:** Model may be less coherent, forgets previous reasoning
 
+### About `claude_long_context_beta`
+
+Claude models on the Antigravity path remain **200k context by default**.
+
+When `claude_long_context_beta` is enabled:
+- Claude 4.6 requests include the configured `anthropic-beta` token (`claude_long_context_beta_header`)
+- If the provider rejects that beta header, the plugin retries once automatically without it
+- The request falls back to the stable 200k path and logs the rejection reason
+
+Example:
+
+```json
+{
+  "claude_long_context_beta": true,
+  "claude_long_context_beta_header": "context-1m-2025-08-07"
+}
+```
+
 ---
 
 ## Account Rotation
@@ -168,6 +188,7 @@ These settings are `false` by default:
 | Setting | Default | What it does |
 |---------|---------|--------------|
 | `keep_thinking` | `false` | Preserve Claude thinking (may degrade stability) |
+| `claude_long_context_beta` | `false` | Opt-in experimental Claude 1M beta header attempt |
 | `auto_resume` | `false` | Auto-continue after recovery |
 
 ---

diff --git a/docs/MODEL-VARIANTS.md b/docs/MODEL-VARIANTS.md
@@ -104,12 +104,15 @@ Claude models use token-based thinking budgets:
 | `low` | 8192 | Light thinking |
 | `max` | 32768 | Maximum thinking |
 
+> **Context Limit Note:** Claude models on Antigravity are configured with a **200k base context**.
+> If you enable `claude_long_context_beta` in `antigravity.json`, the plugin can attempt an experimental long-context beta header and automatically falls back to 200k if rejected.
+
 ### Claude Example
 
 ```json
 {
   "antigravity-claude-opus-4-6-thinking": {
-    "name": "Claude Opus 4.6 Thinking (Antigravity)",
+    "name": "Claude Opus 4.6 Thinking (Antigravity, 200k base)",
     "limit": { "context": 200000, "output": 64000 },
     "modalities": { "input": ["text", "image", "pdf"], "output": ["text"] },
     "variants": {

diff --git a/script/build-schema.ts b/script/build-schema.ts
@@ -41,6 +41,10 @@ const optionDescriptions: Record<string, string> = {
     "Enable tool hallucination prevention for Claude models. Injects parameter signatures and strict usage rules.",
   claude_prompt_auto_caching:
     "Enable Claude prompt auto-caching by adding top-level cache_control when absent.",
+  claude_long_context_beta:
+    "Enable experimental Claude long-context beta header injection for Claude 4.6 models. If rejected, requests auto-fallback to stable 200k behavior.",
+  claude_long_context_beta_header:
+    "Claude long-context beta header value. Override if provider beta token changes.",
   proactive_token_refresh:
     "Enable proactive background token refresh before expiry, ensuring requests never block.",
   proactive_refresh_buffer_seconds:

diff --git a/script/test-models.ts b/script/test-models.ts
@@ -9,7 +9,7 @@ interface ModelTest {
 const MODELS: ModelTest[] = [
   // Gemini CLI (direct Google API)
   { model: "google/gemini-3-flash-preview", category: "gemini-cli" },
-  { model: "google/gemini-3-pro-preview", category: "gemini-cli" },
+  { model: "google/gemini-3.1-pro-preview", category: "gemini-cli" },
   { model: "google/gemini-2.5-pro", category: "gemini-cli" },
   { model: "google/gemini-2.5-flash", category: "gemini-cli" },
 
@@ -27,17 +27,76 @@ const MODELS: ModelTest[] = [
 
 const TEST_PROMPT = "Reply with exactly one word: WORKING";
 const DEFAULT_TIMEOUT_MS = 120_000;
+const MAX_ERROR_SNIPPET_CHARS = 400;
 
 interface TestResult {
   success: boolean;
   error?: string;
   duration: number;
 }
 
+function parseTimeoutMs(value: string, flag: string): number {
+  const parsed = Number.parseInt(value, 10);
+  if (!Number.isFinite(parsed) || parsed <= 0) {
+    throw new Error(`Invalid ${flag} value "${value}". Expected a positive integer.`);
+  }
+  return parsed;
+}
+
+function collectRepeatedArgValues(args: string[], flag: string): string[] {
+  const values: string[] = [];
+  for (let index = 0; index < args.length; index++) {
+    if (args[index] === flag) {
+      const next = args[index + 1];
+      if (next === undefined) {
+        throw new Error(`Missing value for ${flag}`);
+      }
+      values.push(next);
+    }
+  }
+  return values;
+}
+
+function parseModelTimeoutOverrides(specs: string[]): Map<string, number> {
+  const overrides = new Map<string, number>();
+  for (const spec of specs) {
+    const separator = spec.lastIndexOf("=");
+    if (separator <= 0 || separator === spec.length - 1) {
+      throw new Error(`Invalid --timeout-model value "${spec}". Expected "<model>=<ms>".`);
+    }
+    const model = spec.slice(0, separator).trim();
+    const timeoutRaw = spec.slice(separator + 1).trim();
+    const timeoutMs = parseTimeoutMs(timeoutRaw, "--timeout-model");
+    overrides.set(model, timeoutMs);
+  }
+  return overrides;
+}
+
+function summarizeDiagnostic(text: string): string {
+  const normalized = text.replace(/\s+/g, " ").trim();
+  if (!normalized) {
+    return "<empty>";
+  }
+  if (normalized.length <= MAX_ERROR_SNIPPET_CHARS) {
+    return normalized;
+  }
+  return `${normalized.slice(0, MAX_ERROR_SNIPPET_CHARS)}...`;
+}
+
+function resolveTimeoutForModel(model: string, defaultTimeout: number, modelTimeoutOverrides: Map<string, number>): number {
+  for (const [pattern, timeout] of modelTimeoutOverrides) {
+    if (model === pattern || model.endsWith(pattern)) {
+      return timeout;
+    }
+  }
+  return defaultTimeout;
+}
+
 async function testModel(model: string, timeoutMs: number): Promise<TestResult> {
   const start = Date.now();
 
   return new Promise((resolve) => {
+    let settled = false;
     const proc = spawn("opencode", ["run", TEST_PROMPT, "--model", model], {
       stdio: ["ignore", "pipe", "pipe"],
     });
@@ -46,7 +105,11 @@ async function testModel(model: string, timeoutMs: number): Promise<TestResult>
     let stderr = "";
     const timer = setTimeout(() => {
       proc.kill("SIGKILL");
-      resolve({ success: false, error: `Timeout after ${timeoutMs}ms`, duration: Date.now() - start });
+      const diagnostic = summarizeDiagnostic(stderr || stdout);
+      if (!settled) {
+        settled = true;
+        resolve({ success: false, error: `Timeout after ${timeoutMs}ms: ${diagnostic}`, duration: Date.now() - start });
+      }
     }, timeoutMs);
 
     proc.stdout?.on("data", (data) => { stdout += data.toString(); });
@@ -55,33 +118,51 @@ async function testModel(model: string, timeoutMs: number): Promise<TestResult>
     proc.on("close", (code) => {
       clearTimeout(timer);
       const duration = Date.now() - start;
+      if (settled) {
+        return;
+      }
+      settled = true;
 
       if (code !== 0) {
-        resolve({ success: false, error: `Exit ${code}: ${stderr || stdout}`.slice(0, 200), duration });
+        const diagnostic = summarizeDiagnostic(stderr || stdout);
+        resolve({ success: false, error: `Exit ${code}: ${diagnostic}`, duration });
       } else {
         resolve({ success: true, duration });
       }
     });
 
     proc.on("error", (err) => {
       clearTimeout(timer);
+      if (settled) {
+        return;
+      }
+      settled = true;
       resolve({ success: false, error: err.message, duration: Date.now() - start });
     });
   });
 }
 
-function parseArgs(): { filterModel: string | null; filterCategory: string | null; dryRun: boolean; help: boolean; timeout: number } {
+function parseArgs(): {
+  filterModel: string | null;
+  filterCategory: string | null;
+  dryRun: boolean;
+  help: boolean;
+  timeout: number;
+  modelTimeoutOverrides: Map<string, number>;
+} {
   const args = process.argv.slice(2);
   const modelIdx = args.indexOf("--model");
   const catIdx = args.indexOf("--category");
   const timeoutIdx = args.indexOf("--timeout");
+  const modelTimeoutOverrideSpecs = collectRepeatedArgValues(args, "--timeout-model");
 
   return {
     filterModel: modelIdx !== -1 ? args[modelIdx + 1] ?? null : null,
     filterCategory: catIdx !== -1 ? args[catIdx + 1] ?? null : null,
     dryRun: args.includes("--dry-run"),
     help: args.includes("--help") || args.includes("-h"),
-    timeout: timeoutIdx !== -1 ? parseInt(args[timeoutIdx + 1] || "120000", 10) : DEFAULT_TIMEOUT_MS,
+    timeout: timeoutIdx !== -1 ? parseTimeoutMs(args[timeoutIdx + 1] || "120000", "--timeout") : DEFAULT_TIMEOUT_MS,
+    modelTimeoutOverrides: parseModelTimeoutOverrides(modelTimeoutOverrideSpecs),
   };
 }
 
@@ -96,18 +177,21 @@ Options:
   --model <model>      Test specific model
   --category <cat>     Test by category (gemini-cli, antigravity-gemini, antigravity-claude)
   --timeout <ms>       Timeout per model (default: 120000)
+  --timeout-model <spec>
+                       Per-model timeout override. Repeatable. Format: "<model>=<ms>"
   --dry-run            List models without testing
   --help, -h           Show this help
 
 Examples:
   npx tsx script/test-models.ts --dry-run
   npx tsx script/test-models.ts --model google/gemini-3-flash-preview
   npx tsx script/test-models.ts --category antigravity-claude
+  npx tsx script/test-models.ts --timeout-model google/gemini-3.1-pro-preview=240000
 `);
 }
 
 async function main(): Promise<void> {
-  const { filterModel, filterCategory, dryRun, help, timeout } = parseArgs();
+  const { filterModel, filterCategory, dryRun, help, timeout, modelTimeoutOverrides } = parseArgs();
 
   if (help) {
     printHelp();
@@ -138,15 +222,17 @@ async function main(): Promise<void> {
   const failures: { model: string; error: string }[] = [];
 
   for (const t of tests) {
+    const timeoutForModel = resolveTimeoutForModel(t.model, timeout, modelTimeoutOverrides);
     process.stdout.write(`Testing ${t.model.padEnd(50)} ... `);
-    const result = await testModel(t.model, timeout);
+    const result = await testModel(t.model, timeoutForModel);
 
     if (result.success) {
       console.log(`✅ (${(result.duration / 1000).toFixed(1)}s)`);
       passed++;
     } else {
       console.log(`❌ FAIL`);
       console.log(`   ${result.error}`);
+      console.log(`   timeout=${timeoutForModel}ms`);
       failures.push({ model: t.model, error: result.error || "Unknown" });
       failed++;
     }