From 4256c6088acc35d0fd90836213549b910bf83f3e Mon Sep 17 00:00:00 2001 From: anand-92 <28579740+anand-92@users.noreply.github.com> Date: Tue, 30 Jun 2026 14:24:45 -0400 Subject: [PATCH] Drop Sonnet 4.6, add Sonnet 5 Replace the Claude-provider Sonnet 4.6 model with Sonnet 5 (claude-sonnet-5), exposing the full reasoning level set (low/medium/high/xhigh/max, default xhigh) and a 128000 output ceiling. Sonnet 5 supports output_config.effort:max natively, so remove the Sonnet 4.6-only max-thinking workaround in ThinkingProxy. --- AGENTS.md | 2 +- src/Sources/DroidProxyModelCatalog.swift | 16 ++-- src/Sources/ThinkingProxy.swift | 85 ------------------- .../DroidProxyModelCatalogTests.swift | 15 ++-- .../ThinkingProxySonnetMaxThinkingTests.swift | 60 ------------- 5 files changed, 13 insertions(+), 165 deletions(-) delete mode 100644 src/Tests/CLIProxyMenuBarTests/ThinkingProxySonnetMaxThinkingTests.swift diff --git a/AGENTS.md b/AGENTS.md index 2d8c57b..d10cf5f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -70,7 +70,7 @@ What it still does today: What it no longer does (removed in the Droid-CLI-thinking refactor): -- No Claude adaptive thinking injection (Opus 4.8 / Sonnet 4.6 — `thinking` + `output_config`) +- No Claude adaptive thinking injection (Opus 4.8 / Sonnet 5 — `thinking` + `output_config`) - No classic `thinking.budget_tokens` injection - No Codex `reasoning.effort` injection - No Gemini `generationConfig.thinkingConfig` injection diff --git a/src/Sources/DroidProxyModelCatalog.swift b/src/Sources/DroidProxyModelCatalog.swift index 7f6e3e4..18c9c77 100644 --- a/src/Sources/DroidProxyModelCatalog.swift +++ b/src/Sources/DroidProxyModelCatalog.swift @@ -92,10 +92,6 @@ enum DroidProxyModelCatalog { private static let max = DroidProxyThinkingLevel(value: "max", displayName: "Max") private static let claudeAdvancedLevels = [low, medium, high, xhigh, max] - // Sonnet 4.6 exposes max in Droid's selector. Its adaptive thinking rejects - // output_config.effort:max upstream, so ThinkingProxy auto-converts a max - // request to classic extended thinking; lower efforts pass through adaptive. - private static let claudeSonnetLevels = [low, medium, high, max] private static let codexLevels = [low, medium, high, xhigh] private static func antigravityModel( @@ -147,16 +143,16 @@ enum DroidProxyModelCatalog { defaultLevelValue: "xhigh" ), DroidProxyModelDefinition( - baseModel: "claude-sonnet-4-6", - idSlug: "sonnet-4-6", - displayName: "Sonnet 4.6", - maxOutputTokens: 64000, + baseModel: "claude-sonnet-5", + idSlug: "sonnet-5", + displayName: "Sonnet 5", + maxOutputTokens: 128000, provider: "anthropic", providerKey: "claude", baseURL: "http://localhost:8317", kind: .claudeAdaptive, - levels: claudeSonnetLevels, - defaultLevelValue: "high" + levels: claudeAdvancedLevels, + defaultLevelValue: "xhigh" ), DroidProxyModelDefinition( diff --git a/src/Sources/ThinkingProxy.swift b/src/Sources/ThinkingProxy.swift index 438f4f2..ad2f2f5 100644 --- a/src/Sources/ThinkingProxy.swift +++ b/src/Sources/ThinkingProxy.swift @@ -11,9 +11,6 @@ import Network request enables thinking, so Claude emits visible thinking blocks. - Injects `service_tier: "priority"` for OpenAI Responses API requests on the user-enabled GPT 5.x fast-mode models (these toggles are independent of reasoning effort). - - Converts Sonnet 4.6 to classic extended thinking (`thinking:{type:enabled,budget_tokens}`) - when Droid selects the `max` reasoning effort, since Sonnet's adaptive thinking rejects - `output_config.effort:max` upstream. Other effort levels pass through as adaptive. - Rewrites Gemini `/v1/responses` to `/v1/chat/completions` since the backend does not support Gemini via the Responses API endpoint. @@ -318,10 +315,6 @@ class ThinkingProxy { if method == "POST" && !bodyString.isEmpty { ThinkingProxy.fileLog("INCOMING REQUEST: \(method) \(rewrittenPath)") - if let result = applySonnetMaxThinking(jsonString: modifiedBody, fields: requestFields) { - modifiedBody = result - requestFields = inspectRequestJSONFields(in: modifiedBody) - } if let result = rewriteAntigravityModelAlias(jsonString: modifiedBody, fields: requestFields) { modifiedBody = result requestFields = inspectRequestJSONFields(in: modifiedBody) @@ -449,84 +442,6 @@ class ThinkingProxy { "cursor-composer-2.5": "composer-2.5" ] - // Sonnet 4.6 max-thinking. Sonnet's adaptive thinking rejects - // `output_config.effort:max` upstream (HTTP 400 "level max not supported"), - // so when Droid selects the `max` effort we convert the request to classic - // extended thinking (`thinking:{type:enabled,budget_tokens}`), which the - // backend accepts. Lower efforts pass through as adaptive untouched. - // budget_tokens must be strictly less than max_tokens, so we also pin - // max_tokens to the model's output ceiling. - private static let sonnetMaxThinkingModel = "claude-sonnet-4-6" - private static let sonnetMaxThinkingMaxTokens = 64000 - private static let sonnetMaxThinkingBudgetTokens = 63999 - - /// Test entry point for the Sonnet 4.6 max-thinking transform. - static func applySonnetMaxThinking(in jsonString: String) -> String { - let proxy = ThinkingProxy() - let fields = proxy.inspectRequestJSONFields(in: jsonString) - return proxy.applySonnetMaxThinking(jsonString: jsonString, fields: fields) ?? jsonString - } - - /// When a Sonnet 4.6 request asks for `output_config.effort == "max"`, rewrite - /// its `thinking` field to classic extended thinking and pin `max_tokens` so - /// the budget fits. Returns nil when the request is not Sonnet 4.6 or is not - /// requesting max effort, so the caller can skip re-inspection and lower - /// efforts forward unchanged. Edits are surgical (in-place value replacement / - /// single insertion) to preserve JSON key ordering, which Anthropic's prompt - /// cache is sensitive to. - private func applySonnetMaxThinking(jsonString: String, fields: RequestJSONFields?) -> String? { - guard fields?.model == Self.sonnetMaxThinkingModel, - sonnetRequestsMaxEffort(in: jsonString) else { - return nil - } - - let thinkingValue = "{\"type\":\"enabled\",\"budget_tokens\":\(Self.sonnetMaxThinkingBudgetTokens)}" - var result = jsonString - - if let thinkingLocation = fields?.thinkingLocation { - result.replaceSubrange(thinkingLocation.valueRange, with: thinkingValue) - } else if let modelLocation = fields?.modelLocation { - result.insert(contentsOf: ",\"thinking\":\(thinkingValue)", at: modelLocation.pairRange.upperBound) - } else { - return nil - } - - // The edit above shifted indices, so re-scan to locate max_tokens/model. - result = pinSonnetMaxThinkingMaxTokens(in: result) - - ThinkingProxy.fileLog("SONNET MAX THINKING: effort=max -> classic extended thinking budget_tokens=\(Self.sonnetMaxThinkingBudgetTokens) max_tokens=\(Self.sonnetMaxThinkingMaxTokens)") - return result - } - - /// True when the request's `output_config.effort` is `"max"`. Scoped to the - /// Sonnet 4.6 path so non-Sonnet requests never pay for the extra - /// `output_config` scan (which would otherwise defeat the routing scan's - /// early-exit before the large `messages` array). - private func sonnetRequestsMaxEffort(in jsonString: String) -> Bool { - guard let outputConfig = findTopLevelFieldLocations(in: jsonString, keys: ["output_config"])?["output_config"], - let effort = objectStringField(in: jsonString, objectRange: outputConfig.valueRange, key: "effort")?.value else { - return false - } - return effort == "max" - } - - /// Pins `max_tokens` to the Sonnet output ceiling so it stays strictly above - /// the thinking budget. Replaces the value in place if present, otherwise - /// injects it right after `model`. - private func pinSonnetMaxThinkingMaxTokens(in jsonString: String) -> String { - guard let locations = findTopLevelFieldLocations(in: jsonString, keys: ["max_tokens", "model"]) else { - return jsonString - } - - var result = jsonString - if let maxTokensLocation = locations["max_tokens"] { - result.replaceSubrange(maxTokensLocation.valueRange, with: "\(Self.sonnetMaxThinkingMaxTokens)") - } else if let modelLocation = locations["model"] { - result.insert(contentsOf: ",\"max_tokens\":\(Self.sonnetMaxThinkingMaxTokens)", at: modelLocation.pairRange.upperBound) - } - return result - } - private func rewriteAntigravityModelAlias(jsonString: String, fields: RequestJSONFields?) -> String? { guard let model = fields?.model, let modelLocation = fields?.modelLocation, diff --git a/src/Tests/CLIProxyMenuBarTests/DroidProxyModelCatalogTests.swift b/src/Tests/CLIProxyMenuBarTests/DroidProxyModelCatalogTests.swift index 87121f3..78cc964 100644 --- a/src/Tests/CLIProxyMenuBarTests/DroidProxyModelCatalogTests.swift +++ b/src/Tests/CLIProxyMenuBarTests/DroidProxyModelCatalogTests.swift @@ -13,17 +13,14 @@ final class DroidProxyModelCatalogTests: XCTestCase { XCTAssertEqual(fable["maxOutputTokens"] as? Int, 128000) } - func testSonnet46UsesNativeModelIDAndExposesMax() throws { - let sonnet = try XCTUnwrap(settingsEntry(id: "custom:droidproxy:sonnet-4-6")) + func testSonnet5UsesNativeModelIDAndExposesFullLevels() throws { + let sonnet = try XCTUnwrap(settingsEntry(id: "custom:droidproxy:sonnet-5")) - // Sonnet 4.6 ships its native Anthropic model id (no proxy alias) and - // exposes max in Droid's selector. ThinkingProxy auto-converts a max - // request to classic extended thinking since adaptive rejects effort:max. - XCTAssertEqual(sonnet["model"] as? String, "claude-sonnet-4-6") + XCTAssertEqual(sonnet["model"] as? String, "claude-sonnet-5") XCTAssertEqual(sonnet["enableThinking"] as? Bool, true) - XCTAssertEqual(sonnet["reasoningEffort"] as? String, "high") - XCTAssertEqual(sonnet["defaultReasoningEffort"] as? String, "high") - XCTAssertEqual(sonnet["supportedReasoningEfforts"] as? [String], ["low", "medium", "high", "max"]) + XCTAssertEqual(sonnet["reasoningEffort"] as? String, "xhigh") + XCTAssertEqual(sonnet["defaultReasoningEffort"] as? String, "xhigh") + XCTAssertEqual(sonnet["supportedReasoningEfforts"] as? [String], ["low", "medium", "high", "xhigh", "max"]) } private func settingsEntry(id: String) -> [String: Any]? { diff --git a/src/Tests/CLIProxyMenuBarTests/ThinkingProxySonnetMaxThinkingTests.swift b/src/Tests/CLIProxyMenuBarTests/ThinkingProxySonnetMaxThinkingTests.swift deleted file mode 100644 index dce2ff8..0000000 --- a/src/Tests/CLIProxyMenuBarTests/ThinkingProxySonnetMaxThinkingTests.swift +++ /dev/null @@ -1,60 +0,0 @@ -import XCTest -@testable import CLIProxyMenuBar - -final class ThinkingProxySonnetMaxThinkingTests: XCTestCase { - func testConvertsSonnetAdaptiveThinkingToClassicWhenEffortIsMax() { - let request = """ - {"model":"claude-sonnet-4-6","max_tokens":8192,"thinking":{"type":"adaptive"},"output_config":{"effort":"max"},"messages":[{"role":"user","content":"hi"}]} - """ - - let rewritten = ThinkingProxy.applySonnetMaxThinking(in: request) - - // thinking becomes classic extended thinking and max_tokens is pinned so - // the budget fits; key ordering and other fields are preserved. - XCTAssertEqual( - rewritten, - """ - {"model":"claude-sonnet-4-6","max_tokens":64000,"thinking":{"type":"enabled","budget_tokens":63999},"output_config":{"effort":"max"},"messages":[{"role":"user","content":"hi"}]} - """ - ) - } - - func testInjectsThinkingAndMaxTokensWhenAbsent() { - let request = """ - {"model":"claude-sonnet-4-6","output_config":{"effort":"max"},"messages":[{"role":"user","content":"hi"}]} - """ - - let rewritten = ThinkingProxy.applySonnetMaxThinking(in: request) - - XCTAssertEqual( - rewritten, - """ - {"model":"claude-sonnet-4-6","max_tokens":64000,"thinking":{"type":"enabled","budget_tokens":63999},"output_config":{"effort":"max"},"messages":[{"role":"user","content":"hi"}]} - """ - ) - } - - func testLeavesSonnetUnchangedForNonMaxEffort() { - let request = """ - {"model":"claude-sonnet-4-6","thinking":{"type":"adaptive"},"output_config":{"effort":"high"}} - """ - - XCTAssertEqual(ThinkingProxy.applySonnetMaxThinking(in: request), request) - } - - func testLeavesSonnetUnchangedWhenNoOutputConfig() { - let request = """ - {"model":"claude-sonnet-4-6","thinking":{"type":"adaptive"}} - """ - - XCTAssertEqual(ThinkingProxy.applySonnetMaxThinking(in: request), request) - } - - func testLeavesNonSonnetModelUnchangedEvenAtMaxEffort() { - let request = """ - {"model":"claude-opus-4-8","thinking":{"type":"adaptive"},"output_config":{"effort":"max"}} - """ - - XCTAssertEqual(ThinkingProxy.applySonnetMaxThinking(in: request), request) - } -}