diff --git a/README.md b/README.md index e87552ea..cefb0bc6 100644 --- a/README.md +++ b/README.md @@ -120,8 +120,8 @@ opencode run "Hello" --model=google/antigravity-claude-opus-4-6-thinking --varia | `antigravity-gemini-3-pro` | low, high | Gemini 3 Pro with thinking | | `antigravity-gemini-3.1-pro` | low, high | Gemini 3.1 Pro with thinking (rollout-dependent) | | `antigravity-gemini-3-flash` | minimal, low, medium, high | Gemini 3 Flash with thinking | -| `antigravity-claude-sonnet-4-6` | — | Claude Sonnet 4.6 | -| `antigravity-claude-opus-4-6-thinking` | low, max | Claude Opus 4.6 with extended thinking | +| `antigravity-claude-sonnet-4-6` | — | Claude Sonnet 4.6 (200k base context) | +| `antigravity-claude-opus-4-6-thinking` | low, max | Claude Opus 4.6 with extended thinking (200k base context) | **Gemini CLI quota** (separate from Antigravity; used when `cli_first` is true or as fallback): @@ -140,6 +140,7 @@ opencode run "Hello" --model=google/antigravity-claude-opus-4-6-thinking --varia > - When a Gemini quota pool is exhausted, the plugin automatically falls back to the other pool. > - Claude and image models always use Antigravity. > Model names are automatically transformed for the target API (e.g., `antigravity-gemini-3-flash` → `gemini-3-flash-preview` for CLI). +> - Claude Antigravity limits are 200k by default. Experimental opt-in long-context beta header attempts are available via `antigravity.json` (`claude_long_context_beta`) and automatically fall back to stable 200k if rejected. **Using variants:** ```bash @@ -190,12 +191,12 @@ Add this to your `~/.config/opencode/opencode.json`: } }, "antigravity-claude-sonnet-4-6": { - "name": "Claude Sonnet 4.6 (Antigravity)", + "name": "Claude Sonnet 4.6 (Antigravity, 200k base)", "limit": { "context": 200000, "output": 64000 }, "modalities": { "input": ["text", "image", "pdf"], "output": ["text"] } }, "antigravity-claude-opus-4-6-thinking": { - "name": "Claude Opus 4.6 Thinking (Antigravity)", + "name": "Claude Opus 4.6 Thinking (Antigravity, 200k base)", "limit": { "context": 200000, "output": 64000 }, "modalities": { "input": ["text", "image", "pdf"], "output": ["text"] }, "variants": { diff --git a/assets/antigravity.schema.json b/assets/antigravity.schema.json index aea2c09c..14365ec9 100644 --- a/assets/antigravity.schema.json +++ b/assets/antigravity.schema.json @@ -120,6 +120,17 @@ "type": "boolean", "description": "Enable Claude prompt auto-caching by adding top-level cache_control when absent." }, + "claude_long_context_beta": { + "default": false, + "type": "boolean", + "description": "Enable experimental Claude long-context beta header injection for Claude 4.6 models. If rejected, requests auto-fallback to stable 200k behavior." + }, + "claude_long_context_beta_header": { + "default": "context-1m-2025-08-07", + "type": "string", + "minLength": 1, + "description": "Claude long-context beta header value. Override if provider beta token changes." + }, "proactive_token_refresh": { "default": true, "type": "boolean", diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index e2ae44bd..465866a8 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -35,6 +35,8 @@ Settings that affect how the model thinks and responds. | Option | Default | Description | |--------|---------|-------------| | `keep_thinking` | `false` | Preserve Claude's thinking blocks across turns. **Warning:** enabling may degrade model stability. | +| `claude_long_context_beta` | `false` | Experimental: attempt Claude 4.6 long-context beta header (provider entitlement required) | +| `claude_long_context_beta_header` | `"context-1m-2025-08-07"` | Beta header value used when `claude_long_context_beta` is enabled | | `session_recovery` | `true` | Auto-recover from tool_result_missing errors | | `auto_resume` | `false` | Auto-send resume prompt after recovery | | `resume_text` | `"continue"` | Text to send when auto-resuming | @@ -51,6 +53,24 @@ When `false` (default), thinking is stripped: - **Pros:** More stable model behavior, smaller context - **Cons:** Model may be less coherent, forgets previous reasoning +### About `claude_long_context_beta` + +Claude models on the Antigravity path remain **200k context by default**. + +When `claude_long_context_beta` is enabled: +- Claude 4.6 requests include the configured `anthropic-beta` token (`claude_long_context_beta_header`) +- If the provider rejects that beta header, the plugin retries once automatically without it +- The request falls back to the stable 200k path and logs the rejection reason + +Example: + +```json +{ + "claude_long_context_beta": true, + "claude_long_context_beta_header": "context-1m-2025-08-07" +} +``` + --- ## Account Rotation @@ -168,6 +188,7 @@ These settings are `false` by default: | Setting | Default | What it does | |---------|---------|--------------| | `keep_thinking` | `false` | Preserve Claude thinking (may degrade stability) | +| `claude_long_context_beta` | `false` | Opt-in experimental Claude 1M beta header attempt | | `auto_resume` | `false` | Auto-continue after recovery | --- diff --git a/docs/MODEL-VARIANTS.md b/docs/MODEL-VARIANTS.md index 6f3c2d61..2c1365a3 100644 --- a/docs/MODEL-VARIANTS.md +++ b/docs/MODEL-VARIANTS.md @@ -104,12 +104,15 @@ Claude models use token-based thinking budgets: | `low` | 8192 | Light thinking | | `max` | 32768 | Maximum thinking | +> **Context Limit Note:** Claude models on Antigravity are configured with a **200k base context**. +> If you enable `claude_long_context_beta` in `antigravity.json`, the plugin can attempt an experimental long-context beta header and automatically falls back to 200k if rejected. + ### Claude Example ```json { "antigravity-claude-opus-4-6-thinking": { - "name": "Claude Opus 4.6 Thinking (Antigravity)", + "name": "Claude Opus 4.6 Thinking (Antigravity, 200k base)", "limit": { "context": 200000, "output": 64000 }, "modalities": { "input": ["text", "image", "pdf"], "output": ["text"] }, "variants": { diff --git a/script/build-schema.ts b/script/build-schema.ts index 61d10d20..152cb038 100644 --- a/script/build-schema.ts +++ b/script/build-schema.ts @@ -41,6 +41,10 @@ const optionDescriptions: Record = { "Enable tool hallucination prevention for Claude models. Injects parameter signatures and strict usage rules.", claude_prompt_auto_caching: "Enable Claude prompt auto-caching by adding top-level cache_control when absent.", + claude_long_context_beta: + "Enable experimental Claude long-context beta header injection for Claude 4.6 models. If rejected, requests auto-fallback to stable 200k behavior.", + claude_long_context_beta_header: + "Claude long-context beta header value. Override if provider beta token changes.", proactive_token_refresh: "Enable proactive background token refresh before expiry, ensuring requests never block.", proactive_refresh_buffer_seconds: diff --git a/script/test-models.ts b/script/test-models.ts index 5ce9b87c..62db9e2f 100644 --- a/script/test-models.ts +++ b/script/test-models.ts @@ -4,13 +4,14 @@ import { spawn } from "child_process"; interface ModelTest { model: string; category: "gemini-cli" | "antigravity-gemini" | "antigravity-claude"; + optional?: boolean; } const MODELS: ModelTest[] = [ // Gemini CLI (direct Google API) { model: "google/gemini-3-flash-preview", category: "gemini-cli" }, - { model: "google/gemini-3-pro-preview", category: "gemini-cli" }, - { model: "google/gemini-2.5-pro", category: "gemini-cli" }, + { model: "google/gemini-3.1-pro-preview", category: "gemini-cli" }, + { model: "google/gemini-2.5-pro", category: "gemini-cli", optional: true }, { model: "google/gemini-2.5-flash", category: "gemini-cli" }, // Antigravity Gemini @@ -27,6 +28,7 @@ const MODELS: ModelTest[] = [ const TEST_PROMPT = "Reply with exactly one word: WORKING"; const DEFAULT_TIMEOUT_MS = 120_000; +const MAX_ERROR_SNIPPET_CHARS = 400; interface TestResult { success: boolean; @@ -34,10 +36,73 @@ interface TestResult { duration: number; } +function parseTimeoutMs(value: string, flag: string): number { + const parsed = Number.parseInt(value, 10); + if (!Number.isFinite(parsed) || parsed <= 0) { + throw new Error(`Invalid ${flag} value "${value}". Expected a positive integer.`); + } + return parsed; +} + +function collectRepeatedArgValues(args: string[], flag: string): string[] { + const values: string[] = []; + for (let index = 0; index < args.length; index++) { + if (args[index] === flag) { + const next = args[index + 1]; + if (next === undefined) { + throw new Error(`Missing value for ${flag}`); + } + values.push(next); + } + } + return values; +} + +function parseModelTimeoutOverrides(specs: string[]): Map { + const overrides = new Map(); + for (const spec of specs) { + const separator = spec.lastIndexOf("="); + if (separator <= 0 || separator === spec.length - 1) { + throw new Error(`Invalid --timeout-model value "${spec}". Expected "=".`); + } + const model = spec.slice(0, separator).trim(); + const timeoutRaw = spec.slice(separator + 1).trim(); + const timeoutMs = parseTimeoutMs(timeoutRaw, "--timeout-model"); + overrides.set(model, timeoutMs); + } + return overrides; +} + +function summarizeDiagnostic(text: string): string { + const normalized = text.replace(/\s+/g, " ").trim(); + if (!normalized) { + return ""; + } + if (normalized.length <= MAX_ERROR_SNIPPET_CHARS) { + return normalized; + } + return `${normalized.slice(0, MAX_ERROR_SNIPPET_CHARS)}...`; +} + +function resolveTimeoutForModel(model: string, defaultTimeout: number, modelTimeoutOverrides: Map): number { + const exact = modelTimeoutOverrides.get(model); + if (exact !== undefined) { + return exact; + } + + for (const [pattern, timeout] of modelTimeoutOverrides) { + if (model.endsWith(pattern)) { + return timeout; + } + } + return defaultTimeout; +} + async function testModel(model: string, timeoutMs: number): Promise { const start = Date.now(); return new Promise((resolve) => { + let settled = false; const proc = spawn("opencode", ["run", TEST_PROMPT, "--model", model], { stdio: ["ignore", "pipe", "pipe"], }); @@ -46,7 +111,11 @@ async function testModel(model: string, timeoutMs: number): Promise let stderr = ""; const timer = setTimeout(() => { proc.kill("SIGKILL"); - resolve({ success: false, error: `Timeout after ${timeoutMs}ms`, duration: Date.now() - start }); + const diagnostic = summarizeDiagnostic(stderr || stdout); + if (!settled) { + settled = true; + resolve({ success: false, error: `Timeout after ${timeoutMs}ms: ${diagnostic}`, duration: Date.now() - start }); + } }, timeoutMs); proc.stdout?.on("data", (data) => { stdout += data.toString(); }); @@ -55,9 +124,14 @@ async function testModel(model: string, timeoutMs: number): Promise proc.on("close", (code) => { clearTimeout(timer); const duration = Date.now() - start; + if (settled) { + return; + } + settled = true; if (code !== 0) { - resolve({ success: false, error: `Exit ${code}: ${stderr || stdout}`.slice(0, 200), duration }); + const diagnostic = summarizeDiagnostic(stderr || stdout); + resolve({ success: false, error: `Exit ${code}: ${diagnostic}`, duration }); } else { resolve({ success: true, duration }); } @@ -65,23 +139,36 @@ async function testModel(model: string, timeoutMs: number): Promise proc.on("error", (err) => { clearTimeout(timer); + if (settled) { + return; + } + settled = true; resolve({ success: false, error: err.message, duration: Date.now() - start }); }); }); } -function parseArgs(): { filterModel: string | null; filterCategory: string | null; dryRun: boolean; help: boolean; timeout: number } { +function parseArgs(): { + filterModel: string | null; + filterCategory: string | null; + dryRun: boolean; + help: boolean; + timeout: number; + modelTimeoutOverrides: Map; +} { const args = process.argv.slice(2); const modelIdx = args.indexOf("--model"); const catIdx = args.indexOf("--category"); const timeoutIdx = args.indexOf("--timeout"); + const modelTimeoutOverrideSpecs = collectRepeatedArgValues(args, "--timeout-model"); return { filterModel: modelIdx !== -1 ? args[modelIdx + 1] ?? null : null, filterCategory: catIdx !== -1 ? args[catIdx + 1] ?? null : null, dryRun: args.includes("--dry-run"), help: args.includes("--help") || args.includes("-h"), - timeout: timeoutIdx !== -1 ? parseInt(args[timeoutIdx + 1] || "120000", 10) : DEFAULT_TIMEOUT_MS, + timeout: timeoutIdx !== -1 ? parseTimeoutMs(args[timeoutIdx + 1] || "120000", "--timeout") : DEFAULT_TIMEOUT_MS, + modelTimeoutOverrides: parseModelTimeoutOverrides(modelTimeoutOverrideSpecs), }; } @@ -96,6 +183,8 @@ Options: --model Test specific model --category Test by category (gemini-cli, antigravity-gemini, antigravity-claude) --timeout Timeout per model (default: 120000) + --timeout-model + Per-model timeout override. Repeatable. Format: "=" --dry-run List models without testing --help, -h Show this help @@ -103,11 +192,12 @@ Examples: npx tsx script/test-models.ts --dry-run npx tsx script/test-models.ts --model google/gemini-3-flash-preview npx tsx script/test-models.ts --category antigravity-claude + npx tsx script/test-models.ts --timeout-model google/gemini-3.1-pro-preview=240000 `); } async function main(): Promise { - const { filterModel, filterCategory, dryRun, help, timeout } = parseArgs(); + const { filterModel, filterCategory, dryRun, help, timeout, modelTimeoutOverrides } = parseArgs(); if (help) { printHelp(); @@ -127,7 +217,8 @@ async function main(): Promise { if (dryRun) { for (const t of tests) { - console.log(` ${t.model.padEnd(50)} [${t.category}]`); + const optionalSuffix = t.optional ? " (optional)" : ""; + console.log(` ${t.model.padEnd(50)} [${t.category}]${optionalSuffix}`); } console.log(`\n${tests.length} models would be tested.\n`); return; @@ -135,33 +226,54 @@ async function main(): Promise { let passed = 0; let failed = 0; - const failures: { model: string; error: string }[] = []; + let optionalFailed = 0; + const requiredFailures: { model: string; error: string }[] = []; + const optionalFailures: { model: string; error: string }[] = []; for (const t of tests) { + const timeoutForModel = resolveTimeoutForModel(t.model, timeout, modelTimeoutOverrides); process.stdout.write(`Testing ${t.model.padEnd(50)} ... `); - const result = await testModel(t.model, timeout); + const result = await testModel(t.model, timeoutForModel); if (result.success) { console.log(`✅ (${(result.duration / 1000).toFixed(1)}s)`); passed++; } else { - console.log(`❌ FAIL`); + if (t.optional) { + console.log(`⚠️ OPTIONAL FAIL`); + } else { + console.log(`❌ FAIL`); + } console.log(` ${result.error}`); - failures.push({ model: t.model, error: result.error || "Unknown" }); - failed++; + console.log(` timeout=${timeoutForModel}ms`); + const failure = { model: t.model, error: result.error || "Unknown" }; + if (t.optional) { + optionalFailures.push(failure); + optionalFailed++; + } else { + requiredFailures.push(failure); + failed++; + } } } console.log(`\n${"=".repeat(50)}`); - console.log(`Summary: ${passed} passed, ${failed} failed\n`); + console.log(`Summary: ${passed} passed, ${failed} failed, ${optionalFailed} optional failed\n`); - if (failures.length > 0) { - console.log("Failed models:"); - for (const f of failures) { + if (requiredFailures.length > 0) { + console.log("Failed required models:"); + for (const f of requiredFailures) { console.log(` - ${f.model}`); } process.exit(1); } + + if (optionalFailures.length > 0) { + console.log("Failed optional models:"); + for (const f of optionalFailures) { + console.log(` - ${f.model}`); + } + } } main().catch(console.error); diff --git a/script/test-regression.ts b/script/test-regression.ts index 519e604d..a034f6e8 100644 --- a/script/test-regression.ts +++ b/script/test-regression.ts @@ -18,6 +18,16 @@ interface MultiTurnTest { interface TurnConfig { prompt: string; model?: string; + timeout?: number; +} + +interface RunTurnResult { + output: string; + stderr: string; + code: number; + sessionId: string | null; + timedOut: boolean; + duration: number; } interface TestResult { @@ -39,6 +49,12 @@ interface ConcurrentTest { timeout: number; } +interface TimeoutOverrides { + defaultTimeout: number | null; + testTimeoutOverrides: Map; + turnTimeoutOverrides: Map; +} + const ERROR_PATTERNS = [ "thinking block order", "Expected thinking or redacted_thinking", @@ -127,7 +143,7 @@ const SANITY_TESTS: MultiTurnTest[] = [ suite: "sanity", turns: ["Reply with exactly: OK", "Repeat the word 'test' 50000 times"], errorPatterns: ["FATAL", "unhandled", "Cannot read properties"], - timeout: 60000, + timeout: 180000, }, ]; @@ -278,6 +294,126 @@ const CONCURRENT_TESTS: ConcurrentTest[] = [ ]; const ALL_TESTS = [...SANITY_TESTS, ...HEAVY_TESTS]; +const MAX_DIAGNOSTIC_CHARS = 500; + +function parseTimeoutMs(value: string, flag: string): number { + const parsed = Number.parseInt(value, 10); + if (!Number.isFinite(parsed) || parsed <= 0) { + throw new Error(`Invalid ${flag} value "${value}". Expected a positive integer.`); + } + return parsed; +} + +function collectRepeatedArgValues(args: string[], flag: string): string[] { + const values: string[] = []; + for (let index = 0; index < args.length; index++) { + if (args[index] === flag) { + const value = args[index + 1]; + if (value === undefined) { + throw new Error(`Missing value for ${flag}`); + } + values.push(value); + } + } + return values; +} + +function parseNamedTimeoutOverrides(specs: string[], flag: string): Map { + const overrides = new Map(); + for (const spec of specs) { + const separator = spec.lastIndexOf("="); + if (separator <= 0 || separator === spec.length - 1) { + throw new Error(`Invalid ${flag} value "${spec}". Expected "=".`); + } + const name = spec.slice(0, separator).trim(); + const timeoutRaw = spec.slice(separator + 1).trim(); + const timeoutMs = parseTimeoutMs(timeoutRaw, flag); + overrides.set(name, timeoutMs); + } + return overrides; +} + +function parseTurnTimeoutOverrides(specs: string[]): Map { + const overrides = new Map(); + for (const spec of specs) { + const separator = spec.lastIndexOf("="); + if (separator <= 0 || separator === spec.length - 1) { + throw new Error(`Invalid --timeout-turn value "${spec}". Expected ":=".`); + } + const key = spec.slice(0, separator).trim(); + const timeoutRaw = spec.slice(separator + 1).trim(); + const keyParts = key.split(":"); + if (keyParts.length !== 2) { + throw new Error(`Invalid --timeout-turn value "${spec}". Expected ":=".`); + } + const testName = keyParts[0]?.trim(); + const turnIndexRaw = keyParts[1]?.trim(); + if (!testName || !turnIndexRaw) { + throw new Error(`Invalid --timeout-turn value "${spec}". Expected ":=".`); + } + const turnIndex = Number.parseInt(turnIndexRaw, 10); + if (!Number.isFinite(turnIndex) || turnIndex <= 0) { + throw new Error(`Invalid turn index in --timeout-turn value "${spec}". Expected a positive integer.`); + } + const timeoutMs = parseTimeoutMs(timeoutRaw, "--timeout-turn"); + overrides.set(`${testName}:${turnIndex}`, timeoutMs); + } + return overrides; +} + +function summarizeDiagnostic(text: string): string { + const normalized = text.replace(/\s+/g, " ").trim(); + if (!normalized) { + return ""; + } + if (normalized.length <= MAX_DIAGNOSTIC_CHARS) { + return normalized; + } + return `${normalized.slice(0, MAX_DIAGNOSTIC_CHARS)}...`; +} + +function resolveBaseTimeout(name: string, defaultTimeout: number, timeoutOverrides: TimeoutOverrides): number { + let timeout = defaultTimeout; + if (timeoutOverrides.defaultTimeout !== null) { + timeout = timeoutOverrides.defaultTimeout; + } + const testOverride = timeoutOverrides.testTimeoutOverrides.get(name); + if (testOverride !== undefined) { + timeout = testOverride; + } + return timeout; +} + +function resolveTurnTimeout( + test: MultiTurnTest, + turn: string | TurnConfig, + turnIndex: number, + timeoutOverrides: TimeoutOverrides +): number { + let timeout = resolveBaseTimeout(test.name, test.timeout, timeoutOverrides); + if (typeof turn !== "string" && turn.timeout !== undefined) { + timeout = turn.timeout; + } + const turnOverride = timeoutOverrides.turnTimeoutOverrides.get(`${test.name}:${turnIndex + 1}`); + if (turnOverride !== undefined) { + timeout = turnOverride; + } + return timeout; +} + +function applyTimeoutOverridesToTests(tests: MultiTurnTest[], timeoutOverrides: TimeoutOverrides): MultiTurnTest[] { + return tests.map((test) => ({ + ...test, + timeout: resolveBaseTimeout(test.name, test.timeout, timeoutOverrides), + })); +} + +function applyTimeoutOverridesToConcurrentTests(tests: ConcurrentTest[], timeoutOverrides: TimeoutOverrides): ConcurrentTest[] { + return tests.map((test) => ({ + ...test, + timeout: resolveBaseTimeout(test.name, test.timeout, timeoutOverrides), + })); +} async function runTurn( prompt: string, @@ -285,8 +421,9 @@ async function runTurn( sessionId: string | null, sessionTitle: string, timeout: number -): Promise<{ output: string; stderr: string; code: number; sessionId: string | null }> { +): Promise { return new Promise((resolve) => { + const start = Date.now(); const args = sessionId ? ["run", prompt, "--session", sessionId, "--model", model] : ["run", prompt, "--model", model, "--title", sessionTitle]; @@ -298,6 +435,7 @@ async function runTurn( let stdout = ""; let stderr = ""; + let timedOut = false; proc.stdout?.on("data", (data) => { stdout += data.toString(); @@ -308,7 +446,11 @@ async function runTurn( }); const timeoutId = setTimeout(() => { + timedOut = true; proc.kill("SIGTERM"); + setTimeout(() => { + proc.kill("SIGKILL"); + }, 2000).unref(); }, timeout); proc.on("close", (code) => { @@ -328,6 +470,8 @@ async function runTurn( stderr: stderr, code: code ?? 1, sessionId: extractedSessionId, + timedOut, + duration: Date.now() - start, }); }); @@ -338,6 +482,8 @@ async function runTurn( stderr: err.message, code: 1, sessionId: null, + timedOut: false, + duration: Date.now() - start, }); }); }); @@ -382,8 +528,9 @@ async function runConcurrentTest(test: ConcurrentTest): Promise { } for (const result of results) { + const combinedOutput = `${result.stderr}\n${result.output}`.toLowerCase(); for (const pattern of test.errorPatterns) { - if (result.stderr.toLowerCase().includes(pattern.toLowerCase())) { + if (combinedOutput.includes(pattern.toLowerCase())) { for (const sid of sessionIds) { await deleteSession(sid); } @@ -397,19 +544,20 @@ async function runConcurrentTest(test: ConcurrentTest): Promise { } } - const failedResults = results.filter((r) => r.code !== 0); + const failedResults = results.filter((r) => r.code !== 0 || r.timedOut); const failedCount = failedResults.length; if (failedCount > test.concurrentRequests / 2) { for (const sid of sessionIds) { await deleteSession(sid); } const firstFailure = failedResults[0]; + const timedOutCount = failedResults.filter((result) => result.timedOut).length; const failureDetails = firstFailure - ? `\n First failure stderr: ${firstFailure.stderr.slice(0, 500)}` + ? `\n First failure: ${summarizeDiagnostic(firstFailure.stderr || firstFailure.output)}` : ""; return { success: false, - error: `${failedCount}/${test.concurrentRequests} requests failed${failureDetails}`, + error: `${failedCount}/${test.concurrentRequests} requests failed (${timedOutCount} timed out)${failureDetails}`, duration: Date.now() - start, turnsCompleted: test.concurrentRequests - failedCount, }; @@ -426,7 +574,7 @@ async function runConcurrentTest(test: ConcurrentTest): Promise { }; } -async function runMultiTurnTest(test: MultiTurnTest): Promise { +async function runMultiTurnTest(test: MultiTurnTest, timeoutOverrides: TimeoutOverrides): Promise { const start = Date.now(); let sessionId: string | null = null; let turnsCompleted = 0; @@ -435,7 +583,7 @@ async function runMultiTurnTest(test: MultiTurnTest): Promise { const turn = test.turns[index]!; const prompt = typeof turn === "string" ? turn : turn.prompt; const model = typeof turn === "string" ? test.model : (turn.model ?? test.model); - const turnStart = Date.now(); + const turnTimeout = resolveTurnTimeout(test, turn, index, timeoutOverrides); process.stdout.write(`\r Progress: ${index + 1}/${test.turns.length} turns...`); @@ -444,11 +592,12 @@ async function runMultiTurnTest(test: MultiTurnTest): Promise { model, sessionId ?? null, `regression-${test.name}`, - test.timeout + turnTimeout ); + const combinedOutput = `${result.stderr}\n${result.output}`.toLowerCase(); for (const pattern of test.errorPatterns) { - if (result.stderr.toLowerCase().includes(pattern.toLowerCase())) { + if (combinedOutput.includes(pattern.toLowerCase())) { process.stdout.write("\r" + " ".repeat(50) + "\r"); return { success: false, @@ -460,18 +609,27 @@ async function runMultiTurnTest(test: MultiTurnTest): Promise { } } - if (result.code !== 0 && result.code !== null) { - const isTimeout = Date.now() - turnStart >= test.timeout - 1000; - if (isTimeout) { - process.stdout.write("\r" + " ".repeat(50) + "\r"); - return { - success: false, - error: `Turn ${index + 1}: Timeout after ${test.timeout}ms`, - duration: Date.now() - start, - turnsCompleted, - sessionId: sessionId ?? undefined, - }; - } + if (result.timedOut) { + process.stdout.write("\r" + " ".repeat(50) + "\r"); + const timeoutDiagnostic = summarizeDiagnostic(result.stderr || result.output); + return { + success: false, + error: `Turn ${index + 1}: Timeout after ${turnTimeout}ms (${timeoutDiagnostic})`, + duration: Date.now() - start, + turnsCompleted, + sessionId: sessionId ?? undefined, + }; + } + + if (result.code !== 0) { + process.stdout.write("\r" + " ".repeat(50) + "\r"); + return { + success: false, + error: `Turn ${index + 1}: Exit ${result.code} (${result.duration}ms) ${summarizeDiagnostic(result.stderr || result.output)}`, + duration: Date.now() - start, + turnsCompleted, + sessionId: sessionId ?? undefined, + }; } sessionId = result.sessionId; @@ -493,6 +651,7 @@ function parseArgs(): { suite: TestSuite; dryRun: boolean; help: boolean; + timeoutOverrides: TimeoutOverrides; } { const args = process.argv.slice(2); const getArg = (flag: string): string | null => { @@ -504,12 +663,21 @@ function parseArgs(): { if (args.includes("--sanity")) suite = "sanity"; if (args.includes("--heavy")) suite = "heavy"; + const timeoutArg = getArg("--timeout"); + const testTimeoutSpecs = collectRepeatedArgValues(args, "--timeout-test"); + const turnTimeoutSpecs = collectRepeatedArgValues(args, "--timeout-turn"); + return { filterName: getArg("--test") ?? getArg("--name"), filterCategory: getArg("--category") as Category | null, suite, dryRun: args.includes("--dry-run"), help: args.includes("--help") || args.includes("-h"), + timeoutOverrides: { + defaultTimeout: timeoutArg !== null ? parseTimeoutMs(timeoutArg, "--timeout") : null, + testTimeoutOverrides: parseNamedTimeoutOverrides(testTimeoutSpecs, "--timeout-test"), + turnTimeoutOverrides: parseTurnTimeoutOverrides(turnTimeoutSpecs), + }, }; } @@ -542,6 +710,9 @@ Options: --heavy Run heavy tests only (stress) --test Run specific test by name --category Run tests by category + --timeout Global timeout override for all tests and turns + --timeout-test Per-test timeout override. Repeatable. Format: "=" + --timeout-turn Per-turn timeout override. Repeatable. Format: ":=" --dry-run List tests without running --help, -h Show this help @@ -549,11 +720,13 @@ Examples: npx tsx script/test-regression.ts --sanity npx tsx script/test-regression.ts --heavy npx tsx script/test-regression.ts --test stress-20-turn-recovery + npx tsx script/test-regression.ts --timeout-test thinking-modification-continue=180000 + npx tsx script/test-regression.ts --timeout-turn thinking-modification-continue:3=240000 `); } async function main(): Promise { - const { filterName, filterCategory, suite, dryRun, help } = parseArgs(); + const { filterName, filterCategory, suite, dryRun, help, timeoutOverrides } = parseArgs(); if (help) { showHelp(); @@ -578,6 +751,7 @@ async function main(): Promise { if (filterCategory && filterCategory !== "concurrency") { tests = tests.filter((t) => t.category === filterCategory); } + tests = applyTimeoutOverridesToTests(tests, timeoutOverrides); const runConcurrentOnly = filterCategory === "concurrency"; if (runConcurrentOnly) { @@ -612,7 +786,7 @@ async function main(): Promise { console.log(` Model: ${test.model}`); console.log(` Turns: ${test.turns.length}`); - const result = await runMultiTurnTest(test); + const result = await runMultiTurnTest(test, timeoutOverrides); results.push({ test, result }); if (result.success) { @@ -630,7 +804,7 @@ async function main(): Promise { } if (suite === "heavy" || suite === "all" || runConcurrentOnly || filterName) { - let concurrentTests = CONCURRENT_TESTS; + let concurrentTests = applyTimeoutOverridesToConcurrentTests(CONCURRENT_TESTS, timeoutOverrides); if (filterName) { concurrentTests = concurrentTests.filter((t) => t.name === filterName); } diff --git a/src/plugin.ts b/src/plugin.ts index 17b1c1a3..c6b3bbdf 100644 --- a/src/plugin.ts +++ b/src/plugin.ts @@ -30,6 +30,7 @@ import { import { buildThinkingWarmupBody, isGenerativeLanguageRequest, + isUnsupportedClaudeLongContextBetaError, prepareAntigravityRequest, transformAntigravityResponse, } from "./plugin/request"; @@ -50,6 +51,7 @@ import { checkAccountsQuota } from "./plugin/quota"; import { initDiskSignatureCache } from "./plugin/cache"; import { createProactiveRefreshQueue, type ProactiveRefreshQueue } from "./plugin/refresh-queue"; import { initLogger, createLogger } from "./plugin/logger"; +import { scrubTextForLog } from "./plugin/logging-utils"; import { initHealthTracker, getHealthTracker, initTokenTracker, getTokenTracker } from "./plugin/rotation"; import { initAntigravityVersion } from "./plugin/version"; import { executeSearch } from "./plugin/search"; @@ -67,6 +69,7 @@ const MAX_OAUTH_ACCOUNTS = 10; const MAX_WARMUP_SESSIONS = 1000; const MAX_WARMUP_RETRIES = 2; const CAPACITY_BACKOFF_TIERS_MS = [5000, 10000, 20000, 30000, 60000]; +const CLAUDE_LONG_CONTEXT_REJECTION_REASON_MAX_CHARS = 240; function getCapacityBackoffDelay(consecutiveFailures: number): number { const index = Math.min(consecutiveFailures, CAPACITY_BACKOFF_TIERS_MS.length - 1); @@ -86,6 +89,8 @@ const log = createLogger("plugin"); const rateLimitToastCooldowns = new Map(); const RATE_LIMIT_TOAST_COOLDOWN_MS = 5000; const MAX_TOAST_COOLDOWN_ENTRIES = 100; +const CLAUDE_LONG_CONTEXT_FALLBACK_TOAST_SESSIONS = new Set(); +const MAX_CLAUDE_LONG_CONTEXT_FALLBACK_TOAST_SESSIONS = 500; // Track if "all accounts blocked" toasts were shown to prevent spam in while loop let softQuotaToastShown = false; @@ -117,6 +122,25 @@ function shouldShowRateLimitToast(message: string): boolean { return true; } +function shouldShowClaudeLongContextFallbackToast(sessionKey: string): boolean { + if (CLAUDE_LONG_CONTEXT_FALLBACK_TOAST_SESSIONS.has(sessionKey)) { + return false; + } + + CLAUDE_LONG_CONTEXT_FALLBACK_TOAST_SESSIONS.add(sessionKey); + if ( + CLAUDE_LONG_CONTEXT_FALLBACK_TOAST_SESSIONS.size > + MAX_CLAUDE_LONG_CONTEXT_FALLBACK_TOAST_SESSIONS + ) { + const first = CLAUDE_LONG_CONTEXT_FALLBACK_TOAST_SESSIONS.values().next().value; + if (first !== undefined) { + CLAUDE_LONG_CONTEXT_FALLBACK_TOAST_SESSIONS.delete(first); + } + } + + return true; +} + function resetAllAccountsBlockedToasts(): void { softQuotaToastShown = false; rateLimitToastShown = false; @@ -1537,6 +1561,9 @@ export const createAntigravityPlugin = (providerId: string) => async ( let lastFailure: FailureContext | null = null; let lastError: Error | null = null; + // Intentional request scope: once context-1m beta is rejected, keep it disabled + // across account rotation for this request to guarantee one stable fallback path. + let disableClaudeLongContextBetaForRetry = false; const abortSignal = init?.signal ?? undefined; // Helper to check if request was aborted @@ -2021,6 +2048,9 @@ export const createAntigravityPlugin = (providerId: string) => async ( { claudeToolHardening: config.claude_tool_hardening, claudePromptAutoCaching: config.claude_prompt_auto_caching, + claudeLongContextBetaEnabled: config.claude_long_context_beta, + claudeLongContextBetaHeader: config.claude_long_context_beta_header, + disableClaudeLongContextBetaForRetry, fingerprint: account.fingerprint, }, ); @@ -2330,6 +2360,59 @@ export const createAntigravityPlugin = (providerId: string) => async ( } } + const canBeClaudeLongContextRejection = + response.status === 400 + || response.status === 403 + || response.status === 422; + + if ( + prepared.claudeLongContextBetaApplied + && !disableClaudeLongContextBetaForRetry + && canBeClaudeLongContextRejection + ) { + const errorBodyText = await response.clone().text().catch(() => ""); + if ( + isUnsupportedClaudeLongContextBetaError( + response.status, + errorBodyText, + prepared.claudeLongContextBetaHeader, + ) + ) { + disableClaudeLongContextBetaForRetry = true; + if (tokenConsumed) { + getTokenTracker().refund(account.index); + tokenConsumed = false; + } + + const sessionKey = prepared.sessionId + ?? `${account.index}:${prepared.effectiveModel ?? "claude"}`; + + if (shouldShowClaudeLongContextFallbackToast(sessionKey)) { + await showToast( + "Claude long-context beta rejected by provider. Falling back to stable 200k path.", + "warning", + ); + } + + const reasonPreview = scrubTextForLog( + errorBodyText, + CLAUDE_LONG_CONTEXT_REJECTION_REASON_MAX_CHARS, + ); + pushDebug( + `claude-long-context-beta rejected status=${response.status} header=${prepared.claudeLongContextBetaHeader ?? "unknown"} reason=${reasonPreview}`, + ); + log.debug("claude-long-context-beta-rejected", { + status: response.status, + model: prepared.effectiveModel, + header: prepared.claudeLongContextBetaHeader, + reasonPreview, + }); + + i -= 1; + continue; + } + } + const shouldRetryEndpoint = ( response.status === 403 || response.status === 404 || diff --git a/src/plugin/config/models.ts b/src/plugin/config/models.ts index 641d2e14..afabbbe3 100644 --- a/src/plugin/config/models.ts +++ b/src/plugin/config/models.ts @@ -68,12 +68,12 @@ export const OPENCODE_MODEL_DEFINITIONS: OpencodeModelDefinitions = { }, }, "antigravity-claude-sonnet-4-6": { - name: "Claude Sonnet 4.6 (Antigravity)", + name: "Claude Sonnet 4.6 (Antigravity, 200k base)", limit: { context: 200000, output: 64000 }, modalities: DEFAULT_MODALITIES, }, "antigravity-claude-opus-4-6-thinking": { - name: "Claude Opus 4.6 Thinking (Antigravity)", + name: "Claude Opus 4.6 Thinking (Antigravity, 200k base)", limit: { context: 200000, output: 64000 }, modalities: DEFAULT_MODALITIES, variants: { diff --git a/src/plugin/config/schema.test.ts b/src/plugin/config/schema.test.ts index fd86bf88..2bde81c7 100644 --- a/src/plugin/config/schema.test.ts +++ b/src/plugin/config/schema.test.ts @@ -11,7 +11,12 @@ describe("cli_first config", () => { it("documents cli_first in the JSON schema", () => { const schemaPath = new URL("../../../assets/antigravity.schema.json", import.meta.url); const schema = JSON.parse(readFileSync(schemaPath, "utf8")) as { - properties?: Record; + properties?: Record; }; const cliFirst = schema.properties?.cli_first; @@ -46,3 +51,53 @@ describe("claude_prompt_auto_caching config", () => { expect(claudePromptAutoCaching?.description?.length ?? 0).toBeGreaterThan(0); }); }); + +describe("claude_long_context_beta config", () => { + it("includes claude_long_context_beta defaults in DEFAULT_CONFIG", () => { + expect(DEFAULT_CONFIG).toHaveProperty("claude_long_context_beta", false); + expect(DEFAULT_CONFIG).toHaveProperty("claude_long_context_beta_header", "context-1m-2025-08-07"); + }); + + it("documents claude_long_context_beta in the JSON schema", () => { + const schemaPath = new URL("../../../assets/antigravity.schema.json", import.meta.url); + const schema = JSON.parse(readFileSync(schemaPath, "utf8")) as { + properties?: Record; + }; + + const claudeLongContextBeta = schema.properties?.claude_long_context_beta; + expect(claudeLongContextBeta).toBeDefined(); + expect(claudeLongContextBeta).toMatchObject({ + type: "boolean", + default: false, + }); + expect(typeof claudeLongContextBeta?.description).toBe("string"); + expect(claudeLongContextBeta?.description?.length ?? 0).toBeGreaterThan(0); + }); + + it("documents claude_long_context_beta_header in the JSON schema", () => { + const schemaPath = new URL("../../../assets/antigravity.schema.json", import.meta.url); + const schema = JSON.parse(readFileSync(schemaPath, "utf8")) as { + properties?: Record; + }; + + const claudeLongContextBetaHeader = schema.properties?.claude_long_context_beta_header; + expect(claudeLongContextBetaHeader).toBeDefined(); + expect(claudeLongContextBetaHeader).toMatchObject({ + type: "string", + default: "context-1m-2025-08-07", + minLength: 1, + }); + expect(typeof claudeLongContextBetaHeader?.description).toBe("string"); + expect(claudeLongContextBetaHeader?.description?.length ?? 0).toBeGreaterThan(0); + }); +}); diff --git a/src/plugin/config/schema.ts b/src/plugin/config/schema.ts index c29e7e38..ef010e80 100644 --- a/src/plugin/config/schema.ts +++ b/src/plugin/config/schema.ts @@ -223,6 +223,26 @@ export const AntigravityConfigSchema = z.object({ * @default false */ claude_prompt_auto_caching: z.boolean().default(false), + + /** + * Enable experimental Claude long-context beta header injection. + * + * When enabled, Claude 4.6 models attempt a provider beta header for 1M context. + * If the provider rejects the beta header, the request automatically falls back + * to the stable 200k path. + * + * @default false + */ + claude_long_context_beta: z.boolean().default(false), + + /** + * Claude long-context beta header value. + * + * Override this if provider beta token changes in the future. + * + * @default "context-1m-2025-08-07" + */ + claude_long_context_beta_header: z.string().trim().min(1).default("context-1m-2025-08-07"), // ========================================================================= // Proactive Token Refresh (ported from LLM-API-Key-Proxy) @@ -460,6 +480,8 @@ export const DEFAULT_CONFIG: AntigravityConfig = { tool_id_recovery: true, claude_tool_hardening: true, claude_prompt_auto_caching: false, + claude_long_context_beta: false, + claude_long_context_beta_header: "context-1m-2025-08-07", proactive_token_refresh: true, proactive_refresh_buffer_seconds: 1800, proactive_refresh_check_interval_seconds: 300, diff --git a/src/plugin/logging-utils.test.ts b/src/plugin/logging-utils.test.ts index 1d32ebe9..2b313a9c 100644 --- a/src/plugin/logging-utils.test.ts +++ b/src/plugin/logging-utils.test.ts @@ -5,6 +5,7 @@ import { formatAccountLabel, formatBodyPreviewForLog, formatErrorForLog, + scrubTextForLog, truncateTextForLog, writeConsoleLog, } from "./logging-utils" @@ -67,6 +68,80 @@ describe("format helpers", () => { expect(formatBodyPreviewForLog(new URLSearchParams({ q: "value" }), 100)).toBe("q=value") expect(formatBodyPreviewForLog(new Uint8Array([1, 2]), 100)).toBe("[Uint8Array payload omitted]") }) + + it("scrubs sensitive values from error previews", () => { + const raw = "token=abc123 email=user@example.com authorization: Bearer abc123 card=4242 4242 4242 4242" + const scrubbed = scrubTextForLog(raw, 500) + + expect(scrubbed).toContain("token=[redacted]") + expect(scrubbed).toContain("email=[redacted-email]") + expect(scrubbed).toContain("authorization: [redacted]") + expect(scrubbed).toContain("card=[redacted-card]") + expect(scrubbed).not.toContain("user@example.com") + expect(scrubbed).not.toContain("Bearer") + expect(scrubbed).not.toContain("abc123") + }) + + it("scrubs quoted credential keys in JSON-like payloads", () => { + const raw = '{"authorization":"Bearer abc123","token":"abc123","api_key":"k-123"}' + const scrubbed = scrubTextForLog(raw, 500) + + expect(scrubbed).toContain('"authorization":"[redacted]"') + expect(scrubbed).toContain('"token":"[redacted]"') + expect(scrubbed).toContain('"api_key":"[redacted]"') + expect(scrubbed).not.toContain("abc123") + expect(scrubbed).not.toContain("k-123") + }) + + it("scrubs multi-word credential values without leaking trailing words", () => { + const raw = '{"secret":"multi word value","email":"user@example.com"}' + const scrubbed = scrubTextForLog(raw, 500) + + expect(scrubbed).toContain('"secret":"[redacted]"') + expect(scrubbed).not.toContain("multi word value") + expect(scrubbed).toContain('"email":"[redacted-email]"') + }) + + it("scrubs standalone base64-like tokens with trailing padding", () => { + const token = "QWxhZGRpbjpvcGVuIHNlc2FtZQ+/=QWxhZGRpbjpvcGVuIHNlc2FtZQ+/=" + const raw = `debug=${token}` + const scrubbed = scrubTextForLog(raw, 500) + + expect(scrubbed).toContain("[redacted-token]") + expect(scrubbed).not.toContain(token) + }) + + it("scrubs long token values without redacting long key names", () => { + const key = "K".repeat(44) + const token = "T".repeat(44) + const raw = `${key}=${token}` + const scrubbed = scrubTextForLog(raw, 500) + + expect(scrubbed).toContain(`${key}=[redacted-token]`) + expect(scrubbed).not.toContain(token) + }) + + it("does not scrub punctuation-separated digits as credit cards", () => { + const raw = "id=4!2!4!2!4!2!4!2!4!2!4!2!4!2!4!2" + const scrubbed = scrubTextForLog(raw, 500) + + expect(scrubbed).toContain(raw) + expect(scrubbed).not.toContain("[redacted-card]") + }) + + it("does not scrub plain long numeric identifiers as credit cards", () => { + const raw = "trace_id=1748000000000000000" + const scrubbed = scrubTextForLog(raw, 500) + + expect(scrubbed).toContain(raw) + expect(scrubbed).not.toContain("[redacted-card]") + }) + + it("normalizes and truncates scrubbed text", () => { + const raw = " a b c " + expect(scrubTextForLog(raw, 5)).toBe("a b c") + expect(scrubTextForLog("x".repeat(20), 5)).toBe("xxxxx... (truncated 15 chars)") + }) }) describe("writeConsoleLog", () => { diff --git a/src/plugin/logging-utils.ts b/src/plugin/logging-utils.ts index c91a73c0..e97245a7 100644 --- a/src/plugin/logging-utils.ts +++ b/src/plugin/logging-utils.ts @@ -76,6 +76,29 @@ export function truncateTextForLog(text: string, maxChars: number): string { return `${text.slice(0, maxChars)}... (truncated ${text.length - maxChars} chars)` } +export function scrubTextForLog(text: string, maxChars: number): string { + const normalized = text.replace(/\s+/g, " ").trim() + if (!normalized) { + return "" + } + + const scrubbed = normalized + .replace(/\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/gi, "[redacted-email]") + .replace( + /((?:["']?(?:authorization|api[_-]?key|token|secret|password)["']?)\s*[:=]\s*["']?)(?:(?:bearer|basic)\s+)?.+?(?=(?:["'\r\n,;]|$|\s+[A-Za-z_][A-Za-z0-9_-]*\s*[:=]))/gi, + "$1[redacted]", + ) + .replace(/\b[a-f0-9]{32,}\b/gi, "[redacted-hex]") + .replace( + /\b(?:\d{4}[- ]\d{4}[- ]\d{4}[- ]\d{4}|\d{4}[- ]\d{6}[- ]\d{5}|\d{4}[- ]\d{3}[- ]\d{3}[- ]\d{3})\b/g, + "[redacted-card]", + ) + .replace(/(?<==)[A-Za-z0-9+/_-][A-Za-z0-9+/_=-]{39,}(?![A-Za-z0-9+/_=-])/g, "[redacted-token]") + .replace(/(? { }); }); + describe("isUnsupportedClaudeLongContextBetaError", () => { + it("returns true for unsupported anthropic-beta long-context errors", () => { + const body = JSON.stringify({ + error: { + message: "INVALID_ARGUMENT: unsupported anthropic-beta header context-1m-2025-08-07", + }, + }); + + expect(isUnsupportedClaudeLongContextBetaError(400, body)).toBe(true); + }); + + it("returns true for 403 unsupported anthropic-beta long-context errors", () => { + const body = JSON.stringify({ + error: { + message: "INVALID_ARGUMENT: unsupported anthropic-beta header context-1m-2025-08-07", + }, + }); + + expect(isUnsupportedClaudeLongContextBetaError(403, body)).toBe(true); + }); + + it("returns true for 422 unsupported anthropic-beta long-context errors", () => { + const body = JSON.stringify({ + error: { + message: "INVALID_ARGUMENT: unsupported anthropic-beta header context-1m-2025-08-07", + }, + }); + + expect(isUnsupportedClaudeLongContextBetaError(422, body)).toBe(true); + }); + + it("returns false when message only mentions context-1m beta without rejection signal", () => { + const body = JSON.stringify({ + error: { + message: "Request included anthropic-beta context-1m-2025-08-07 during beta rollout", + }, + }); + + expect(isUnsupportedClaudeLongContextBetaError(400, body)).toBe(false); + }); + + it("returns false for anthropic-beta errors with generic context wording", () => { + const body = JSON.stringify({ + error: { + message: "INVALID_ARGUMENT: anthropic-beta header count exceeds limit. Request context was valid.", + }, + }); + + expect(isUnsupportedClaudeLongContextBetaError(400, body)).toBe(false); + }); + + it("returns false for anthropic-beta errors with generic context wording when expectedHeader is provided", () => { + const body = JSON.stringify({ + error: { + message: "INVALID_ARGUMENT: anthropic-beta header count exceeds limit. Request context was valid.", + }, + }); + + expect( + isUnsupportedClaudeLongContextBetaError(400, body, "context-1m-2025-08-07"), + ).toBe(false); + }); + + it("returns false for quota or rate-limit messages even when context-1m is mentioned", () => { + const body = JSON.stringify({ + error: { + message: "RESOURCE_EXHAUSTED: rate limit quota exceeded for anthropic-beta context-1m-2025-08-07", + }, + }); + + expect(isUnsupportedClaudeLongContextBetaError(403, body)).toBe(false); + }); + + it("returns false for unrelated context length errors", () => { + const body = JSON.stringify({ + error: { + message: "Prompt is too long for this model", + }, + }); + + expect(isUnsupportedClaudeLongContextBetaError(400, body)).toBe(false); + }); + + it("returns false for non-4xx statuses", () => { + const body = JSON.stringify({ + error: { + message: "unsupported anthropic-beta header context-1m-2025-08-07", + }, + }); + + expect(isUnsupportedClaudeLongContextBetaError(500, body)).toBe(false); + }); + + it("returns false for generic anthropic-beta rejection that does not mention context-1m", () => { + const body = JSON.stringify({ + error: { + message: "INVALID_ARGUMENT: unsupported anthropic-beta header", + }, + }); + + expect( + isUnsupportedClaudeLongContextBetaError(400, body, "context-1m-2025-08-07"), + ).toBe(false); + }); + + it("returns false for interleaved-thinking beta rejection when expected header is context-1m", () => { + const body = JSON.stringify({ + error: { + message: "INVALID_ARGUMENT: unsupported anthropic-beta header interleaved-thinking-2025-05-14", + }, + }); + + expect( + isUnsupportedClaudeLongContextBetaError(400, body, "context-1m-2025-08-07"), + ).toBe(false); + }); + + it("returns false when unsupported anthropic-beta rejection only references unrelated beta header", () => { + const body = JSON.stringify({ + error: { + message: "UNKNOWN: unsupported anthropic-beta header foo-beta-2025-01-01", + }, + }); + + expect( + isUnsupportedClaudeLongContextBetaError(400, body, "context-1m-2025-08-07"), + ).toBe(false); + }); + }); + describe("buildSignatureSessionKey", () => { it("builds key from sessionId, model, project, and conversation", () => { const key = buildSignatureSessionKey("session-1", "claude-3", "conv-456", "proj-123"); @@ -656,6 +787,155 @@ it("removes x-api-key header", () => { expect(parsed.requestId).toBeUndefined(); }); + describe("Claude long-context beta header", () => { + it("adds long-context beta header for Claude Sonnet 4.6 when enabled", () => { + const result = prepareAntigravityRequest( + "https://generativelanguage.googleapis.com/v1beta/models/claude-sonnet-4-6:generateContent", + { method: "POST", body: JSON.stringify({ contents: [] }) }, + mockAccessToken, + mockProjectId, + undefined, + "antigravity", + false, + { + claudeLongContextBetaEnabled: true, + claudeLongContextBetaHeader: "context-1m-2025-08-07", + }, + ); + + const headers = result.init.headers as Headers; + const anthropicBeta = headers.get("anthropic-beta"); + expect(anthropicBeta).toContain("context-1m-2025-08-07"); + expect(result.claudeLongContextBetaApplied).toBe(true); + expect(result.claudeLongContextBetaHeader).toBe("context-1m-2025-08-07"); + }); + + it("adds long-context beta header for versioned Claude Sonnet 4.6 model when enabled", () => { + const result = prepareAntigravityRequest( + "https://generativelanguage.googleapis.com/v1beta/models/claude-sonnet-4-6-20250514:generateContent", + { method: "POST", body: JSON.stringify({ contents: [] }) }, + mockAccessToken, + mockProjectId, + undefined, + "antigravity", + false, + { + claudeLongContextBetaEnabled: true, + claudeLongContextBetaHeader: "context-1m-2025-08-07", + }, + ); + + const headers = result.init.headers as Headers; + const anthropicBeta = headers.get("anthropic-beta"); + expect(anthropicBeta).toContain("context-1m-2025-08-07"); + expect(result.claudeLongContextBetaApplied).toBe(true); + }); + + it("treats versioned Claude Sonnet 4.6 as non-thinking and strips thinkingConfig", () => { + const result = prepareAntigravityRequest( + "https://generativelanguage.googleapis.com/v1beta/models/claude-sonnet-4-6-20250514:generateContent", + { + method: "POST", + body: JSON.stringify({ + contents: [], + generationConfig: { + thinkingConfig: { + includeThoughts: true, + thinkingBudget: 8192, + }, + }, + }), + }, + mockAccessToken, + mockProjectId, + ); + + const parsed = JSON.parse(result.init.body as string) as { + request?: { + generationConfig?: { + thinkingConfig?: unknown + } + } + }; + + expect(parsed.request?.generationConfig?.thinkingConfig).toBeUndefined(); + }); + + it("does not add long-context beta header for non-Claude models", () => { + const result = prepareAntigravityRequest( + "https://generativelanguage.googleapis.com/v1beta/models/gemini-3-flash:generateContent", + { method: "POST", body: JSON.stringify({ contents: [] }) }, + mockAccessToken, + mockProjectId, + undefined, + "antigravity", + false, + { + claudeLongContextBetaEnabled: true, + claudeLongContextBetaHeader: "context-1m-2025-08-07", + }, + ); + + const headers = result.init.headers as Headers; + const anthropicBeta = headers.get("anthropic-beta"); + expect(anthropicBeta ?? "").not.toContain("context-1m-2025-08-07"); + expect(result.claudeLongContextBetaApplied).toBe(false); + }); + + it("deduplicates anthropic-beta values when header already exists", () => { + const result = prepareAntigravityRequest( + "https://generativelanguage.googleapis.com/v1beta/models/claude-opus-4-6-thinking:generateContent", + { + method: "POST", + body: JSON.stringify({ contents: [] }), + headers: { + "anthropic-beta": "context-1m-2025-08-07,interleaved-thinking-2025-05-14,foo-beta", + }, + }, + mockAccessToken, + mockProjectId, + undefined, + "antigravity", + false, + { + claudeLongContextBetaEnabled: true, + claudeLongContextBetaHeader: "context-1m-2025-08-07", + }, + ); + + const headers = result.init.headers as Headers; + const anthropicBeta = headers.get("anthropic-beta"); + expect(anthropicBeta).toBeTruthy(); + const parts = anthropicBeta!.split(",").map((part) => part.trim()); + expect(parts.filter((part) => part === "context-1m-2025-08-07")).toHaveLength(1); + expect(parts.filter((part) => part === "interleaved-thinking-2025-05-14")).toHaveLength(1); + expect(parts).toContain("foo-beta"); + }); + + it("disables long-context beta header on retry while keeping interleaved thinking", () => { + const result = prepareAntigravityRequest( + "https://generativelanguage.googleapis.com/v1beta/models/claude-opus-4-6-thinking:generateContent", + { method: "POST", body: JSON.stringify({ contents: [] }) }, + mockAccessToken, + mockProjectId, + undefined, + "antigravity", + false, + { + claudeLongContextBetaEnabled: true, + claudeLongContextBetaHeader: "context-1m-2025-08-07", + disableClaudeLongContextBetaForRetry: true, + }, + ); + + const headers = result.init.headers as Headers; + const anthropicBeta = headers.get("anthropic-beta"); + expect(anthropicBeta ?? "").not.toContain("context-1m-2025-08-07"); + expect(anthropicBeta ?? "").toContain("interleaved-thinking-2025-05-14"); + expect(result.claudeLongContextBetaApplied).toBe(false); + }); + }); + it("identifies Claude models correctly", () => { const result = prepareAntigravityRequest( "https://generativelanguage.googleapis.com/v1beta/models/claude-sonnet-4-20250514:generateContent", diff --git a/src/plugin/request.ts b/src/plugin/request.ts index dc989738..39b33386 100644 --- a/src/plugin/request.ts +++ b/src/plugin/request.ts @@ -721,6 +721,35 @@ function generateSyntheticProjectId(): string { const STREAM_ACTION = "streamGenerateContent"; +function appendAnthropicBetaHeader(headers: Headers, value: string): void { + const trimmedValue = value.trim(); + if (!trimmedValue) { + return; + } + + const existing = headers.get("anthropic-beta"); + if (!existing) { + headers.set("anthropic-beta", trimmedValue); + return; + } + + const tokens = existing + .split(",") + .map((token) => token.trim()) + .filter((token) => token.length > 0); + + if (!tokens.includes(trimmedValue)) { + tokens.push(trimmedValue); + } + + headers.set("anthropic-beta", tokens.join(",")); +} + +function isClaudeModelEligibleForLongContextBeta(model: string): boolean { + const lower = model.toLowerCase(); + return lower.startsWith("claude-sonnet-4-6") || lower.startsWith("claude-opus-4-6-thinking"); +} + /** * Detects requests headed to the Google Generative Language API so we can intercept them. */ @@ -728,6 +757,75 @@ export function isGenerativeLanguageRequest(input: RequestInfo): input is string return typeof input === "string" && input.includes("generativelanguage.googleapis.com"); } +export function isUnsupportedClaudeLongContextBetaError( + status: number, + bodyText: string | undefined, + expectedHeader?: string, +): boolean { + if (status !== 400 && status !== 403 && status !== 422) { + return false; + } + + if (!bodyText || typeof bodyText !== "string") { + return false; + } + + const lower = bodyText.toLowerCase(); + const normalizedExpectedHeader = expectedHeader?.trim().toLowerCase() ?? ""; + const mentionsExpectedHeader = normalizedExpectedHeader.length > 0 && lower.includes(normalizedExpectedHeader); + + const mentionsLongContextToken = lower.includes("context-1m"); + const mentionsAnthropicBeta = lower.includes("anthropic-beta") || lower.includes("anthropic beta"); + const mentionsInterleavedThinking = lower.includes("interleaved-thinking"); + const mentionsUnsupportedKeyword = + lower.includes("unsupported") + || lower.includes("not supported") + || lower.includes("unknown") + || lower.includes("unrecognized"); + const mentionsInvalidKeyword = lower.includes("invalid"); + const mentionsHeaderValueIssue = + lower.includes("invalid header") + || lower.includes("header value") + || lower.includes("malformed"); + const mentionsQuotaOrRateLimit = + lower.includes("quota") + || lower.includes("rate limit") + || lower.includes("resource_exhausted") + || lower.includes("too many requests"); + const hasHeaderRejectionSignal = + mentionsUnsupportedKeyword + || mentionsHeaderValueIssue + || ( + mentionsInvalidKeyword + && mentionsAnthropicBeta + && (lower.includes("header") || mentionsLongContextToken) + ); + + if (mentionsQuotaOrRateLimit || !hasHeaderRejectionSignal) { + return false; + } + + if (mentionsExpectedHeader) { + return true; + } + + if (mentionsLongContextToken && mentionsAnthropicBeta) { + return true; + } + + if ( + normalizedExpectedHeader.startsWith("context-1m") + && mentionsAnthropicBeta + && mentionsLongContextToken + && !mentionsInterleavedThinking + && (mentionsUnsupportedKeyword || mentionsHeaderValueIssue) + ) { + return true; + } + + return false; +} + /** * Options for request preparation. */ @@ -736,6 +834,12 @@ export interface PrepareRequestOptions { claudeToolHardening?: boolean; /** Enable top-level Claude prompt auto-caching (`cache_control`). Default: false */ claudePromptAutoCaching?: boolean; + /** Enable experimental Claude 1M long-context beta header injection for Claude 4.6 models. */ + claudeLongContextBetaEnabled?: boolean; + /** Header value for Claude long-context beta capability. */ + claudeLongContextBetaHeader?: string; + /** Disable Claude long-context beta header for one retry attempt. */ + disableClaudeLongContextBetaForRetry?: boolean; /** Google Search configuration (global default) */ googleSearch?: GoogleSearchConfig; /** Per-account fingerprint for rate limit mitigation. Falls back to session fingerprint if not provided. */ @@ -764,6 +868,8 @@ export function prepareAntigravityRequest( toolDebugSummary?: string; toolDebugPayload?: string; needsSignedThinkingWarmup?: boolean; + claudeLongContextBetaApplied?: boolean; + claudeLongContextBetaHeader?: string; headerStyle: HeaderStyle; thinkingRecoveryMessage?: string; } { @@ -775,6 +881,8 @@ export function prepareAntigravityRequest( let toolDebugPayload: string | undefined; let sessionId: string | undefined; let needsSignedThinkingWarmup = false; + let claudeLongContextBetaApplied = false; + let appliedClaudeLongContextBetaHeader: string | undefined; let thinkingRecoveryMessage: string | undefined; if (!isGenerativeLanguageRequest(input)) { @@ -818,6 +926,9 @@ export function prepareAntigravityRequest( const isClaudeThinking = isClaudeThinkingModel(resolved.actualModel); const keepThinkingEnabled = getKeepThinking(); const enableClaudePromptAutoCaching = options?.claudePromptAutoCaching ?? false; + const claudeLongContextBetaEnabled = options?.claudeLongContextBetaEnabled ?? false; + const claudeLongContextBetaHeader = options?.claudeLongContextBetaHeader?.trim() ?? ""; + const disableClaudeLongContextBetaForRetry = options?.disableClaudeLongContextBetaForRetry ?? false; // Tier-based thinking configuration from model resolver (can be overridden by variant config) let tierThinkingBudget = resolved.thinkingBudget; @@ -965,7 +1076,10 @@ export function prepareAntigravityRequest( // Claude Sonnet 4.6 is non-thinking only. // Ignore any client-provided thinkingConfig for this model. const lowerEffective = effectiveModel.toLowerCase(); - const isClaudeSonnetNonThinking = lowerEffective === "claude-sonnet-4-6"; + const isClaudeSonnetNonThinking = lowerEffective.startsWith("claude-sonnet-4-6"); + if (isClaudeSonnetNonThinking && rawGenerationConfig) { + delete rawGenerationConfig.thinkingConfig; + } const effectiveUserThinkingConfig = (isClaudeSonnetNonThinking || isImageModel) ? undefined : userThinkingConfig; // For image models, add imageConfig instead of thinkingConfig @@ -1521,19 +1635,23 @@ export function prepareAntigravityRequest( headers.set("Accept", "text/event-stream"); } + if ( + isClaude + && claudeLongContextBetaEnabled + && !disableClaudeLongContextBetaForRetry + && claudeLongContextBetaHeader.length > 0 + && isClaudeModelEligibleForLongContextBeta(effectiveModel) + ) { + appendAnthropicBetaHeader(headers, claudeLongContextBetaHeader); + claudeLongContextBetaApplied = true; + appliedClaudeLongContextBetaHeader = claudeLongContextBetaHeader; + } + // Add interleaved thinking header for Claude thinking models // This enables real-time streaming of thinking tokens if (isClaudeThinking) { - const existing = headers.get("anthropic-beta"); const interleavedHeader = "interleaved-thinking-2025-05-14"; - - if (existing) { - if (!existing.includes(interleavedHeader)) { - headers.set("anthropic-beta", `${existing},${interleavedHeader}`); - } - } else { - headers.set("anthropic-beta", interleavedHeader); - } + appendAnthropicBetaHeader(headers, interleavedHeader); } if (headerStyle === "antigravity") { @@ -1570,6 +1688,8 @@ export function prepareAntigravityRequest( toolDebugSummary: toolDebugSummaries.slice(0, 20).join(" | "), toolDebugPayload, needsSignedThinkingWarmup, + claudeLongContextBetaApplied, + claudeLongContextBetaHeader: appliedClaudeLongContextBetaHeader, headerStyle, thinkingRecoveryMessage, };