nikships · nikships · Jun 30, 2026 · Jun 30, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -70,7 +70,7 @@ What it still does today:
 
 What it no longer does (removed in the Droid-CLI-thinking refactor):
 
-- No Claude adaptive thinking injection (Opus 4.8 / Sonnet 4.6 — `thinking` + `output_config`)
+- No Claude adaptive thinking injection (Opus 4.8 / Sonnet 5 — `thinking` + `output_config`)
 - No classic `thinking.budget_tokens` injection
 - No Codex `reasoning.effort` injection
 - No Gemini `generationConfig.thinkingConfig` injection

diff --git a/src/Sources/DroidProxyModelCatalog.swift b/src/Sources/DroidProxyModelCatalog.swift
@@ -92,10 +92,6 @@ enum DroidProxyModelCatalog {
     private static let max = DroidProxyThinkingLevel(value: "max", displayName: "Max")
 
     private static let claudeAdvancedLevels = [low, medium, high, xhigh, max]
-    // Sonnet 4.6 exposes max in Droid's selector. Its adaptive thinking rejects
-    // output_config.effort:max upstream, so ThinkingProxy auto-converts a max
-    // request to classic extended thinking; lower efforts pass through adaptive.
-    private static let claudeSonnetLevels = [low, medium, high, max]
     private static let codexLevels = [low, medium, high, xhigh]
 
     private static func antigravityModel(
@@ -147,16 +143,16 @@ enum DroidProxyModelCatalog {
                 defaultLevelValue: "xhigh"
             ),
             DroidProxyModelDefinition(
-                baseModel: "claude-sonnet-4-6",
-                idSlug: "sonnet-4-6",
-                displayName: "Sonnet 4.6",
-                maxOutputTokens: 64000,
+                baseModel: "claude-sonnet-5",
+                idSlug: "sonnet-5",
+                displayName: "Sonnet 5",
+                maxOutputTokens: 128000,
                 provider: "anthropic",
                 providerKey: "claude",
                 baseURL: "http://localhost:8317",
                 kind: .claudeAdaptive,
-                levels: claudeSonnetLevels,
-                defaultLevelValue: "high"
+                levels: claudeAdvancedLevels,
+                defaultLevelValue: "xhigh"
             ),
 
             DroidProxyModelDefinition(

diff --git a/src/Sources/ThinkingProxy.swift b/src/Sources/ThinkingProxy.swift
@@ -11,9 +11,6 @@ import Network
    request enables thinking, so Claude emits visible thinking blocks.
  - Injects `service_tier: "priority"` for OpenAI Responses API requests on the user-enabled
    GPT 5.x fast-mode models (these toggles are independent of reasoning effort).
- - Converts Sonnet 4.6 to classic extended thinking (`thinking:{type:enabled,budget_tokens}`)
-   when Droid selects the `max` reasoning effort, since Sonnet's adaptive thinking rejects
-   `output_config.effort:max` upstream. Other effort levels pass through as adaptive.
  - Rewrites Gemini `/v1/responses` to `/v1/chat/completions` since the backend does not
    support Gemini via the Responses API endpoint.
 
@@ -318,10 +315,6 @@ class ThinkingProxy {
 
         if method == "POST" && !bodyString.isEmpty {
             ThinkingProxy.fileLog("INCOMING REQUEST: \(method) \(rewrittenPath)")
-            if let result = applySonnetMaxThinking(jsonString: modifiedBody, fields: requestFields) {
-                modifiedBody = result
-                requestFields = inspectRequestJSONFields(in: modifiedBody)
-            }
             if let result = rewriteAntigravityModelAlias(jsonString: modifiedBody, fields: requestFields) {
                 modifiedBody = result
                 requestFields = inspectRequestJSONFields(in: modifiedBody)
@@ -449,84 +442,6 @@ class ThinkingProxy {
         "cursor-composer-2.5": "composer-2.5"
     ]
 
-    // Sonnet 4.6 max-thinking. Sonnet's adaptive thinking rejects
-    // `output_config.effort:max` upstream (HTTP 400 "level max not supported"),
-    // so when Droid selects the `max` effort we convert the request to classic
-    // extended thinking (`thinking:{type:enabled,budget_tokens}`), which the
-    // backend accepts. Lower efforts pass through as adaptive untouched.
-    // budget_tokens must be strictly less than max_tokens, so we also pin
-    // max_tokens to the model's output ceiling.
-    private static let sonnetMaxThinkingModel = "claude-sonnet-4-6"
-    private static let sonnetMaxThinkingMaxTokens = 64000
-    private static let sonnetMaxThinkingBudgetTokens = 63999
-
-    /// Test entry point for the Sonnet 4.6 max-thinking transform.
-    static func applySonnetMaxThinking(in jsonString: String) -> String {
-        let proxy = ThinkingProxy()
-        let fields = proxy.inspectRequestJSONFields(in: jsonString)
-        return proxy.applySonnetMaxThinking(jsonString: jsonString, fields: fields) ?? jsonString
-    }
-
-    /// When a Sonnet 4.6 request asks for `output_config.effort == "max"`, rewrite
-    /// its `thinking` field to classic extended thinking and pin `max_tokens` so
-    /// the budget fits. Returns nil when the request is not Sonnet 4.6 or is not
-    /// requesting max effort, so the caller can skip re-inspection and lower
-    /// efforts forward unchanged. Edits are surgical (in-place value replacement /
-    /// single insertion) to preserve JSON key ordering, which Anthropic's prompt
-    /// cache is sensitive to.
-    private func applySonnetMaxThinking(jsonString: String, fields: RequestJSONFields?) -> String? {
-        guard fields?.model == Self.sonnetMaxThinkingModel,
-              sonnetRequestsMaxEffort(in: jsonString) else {
-            return nil
-        }
-
-        let thinkingValue = "{\"type\":\"enabled\",\"budget_tokens\":\(Self.sonnetMaxThinkingBudgetTokens)}"
-        var result = jsonString
-
-        if let thinkingLocation = fields?.thinkingLocation {
-            result.replaceSubrange(thinkingLocation.valueRange, with: thinkingValue)
-        } else if let modelLocation = fields?.modelLocation {
-            result.insert(contentsOf: ",\"thinking\":\(thinkingValue)", at: modelLocation.pairRange.upperBound)
-        } else {
-            return nil
-        }
-
-        // The edit above shifted indices, so re-scan to locate max_tokens/model.
-        result = pinSonnetMaxThinkingMaxTokens(in: result)
-
-        ThinkingProxy.fileLog("SONNET MAX THINKING: effort=max -> classic extended thinking budget_tokens=\(Self.sonnetMaxThinkingBudgetTokens) max_tokens=\(Self.sonnetMaxThinkingMaxTokens)")
-        return result
-    }
-
-    /// True when the request's `output_config.effort` is `"max"`. Scoped to the
-    /// Sonnet 4.6 path so non-Sonnet requests never pay for the extra
-    /// `output_config` scan (which would otherwise defeat the routing scan's
-    /// early-exit before the large `messages` array).
-    private func sonnetRequestsMaxEffort(in jsonString: String) -> Bool {
-        guard let outputConfig = findTopLevelFieldLocations(in: jsonString, keys: ["output_config"])?["output_config"],
-              let effort = objectStringField(in: jsonString, objectRange: outputConfig.valueRange, key: "effort")?.value else {
-            return false
-        }
-        return effort == "max"
-    }
-
-    /// Pins `max_tokens` to the Sonnet output ceiling so it stays strictly above
-    /// the thinking budget. Replaces the value in place if present, otherwise
-    /// injects it right after `model`.
-    private func pinSonnetMaxThinkingMaxTokens(in jsonString: String) -> String {
-        guard let locations = findTopLevelFieldLocations(in: jsonString, keys: ["max_tokens", "model"]) else {
-            return jsonString
-        }
-
-        var result = jsonString
-        if let maxTokensLocation = locations["max_tokens"] {
-            result.replaceSubrange(maxTokensLocation.valueRange, with: "\(Self.sonnetMaxThinkingMaxTokens)")
-        } else if let modelLocation = locations["model"] {
-            result.insert(contentsOf: ",\"max_tokens\":\(Self.sonnetMaxThinkingMaxTokens)", at: modelLocation.pairRange.upperBound)
-        }
-        return result
-    }
-
     private func rewriteAntigravityModelAlias(jsonString: String, fields: RequestJSONFields?) -> String? {
         guard let model = fields?.model,
               let modelLocation = fields?.modelLocation,

diff --git a/src/Tests/CLIProxyMenuBarTests/DroidProxyModelCatalogTests.swift b/src/Tests/CLIProxyMenuBarTests/DroidProxyModelCatalogTests.swift
@@ -13,17 +13,14 @@ final class DroidProxyModelCatalogTests: XCTestCase {
         XCTAssertEqual(fable["maxOutputTokens"] as? Int, 128000)
     }
 
-    func testSonnet46UsesNativeModelIDAndExposesMax() throws {
-        let sonnet = try XCTUnwrap(settingsEntry(id: "custom:droidproxy:sonnet-4-6"))
+    func testSonnet5UsesNativeModelIDAndExposesFullLevels() throws {
+        let sonnet = try XCTUnwrap(settingsEntry(id: "custom:droidproxy:sonnet-5"))
 
-        // Sonnet 4.6 ships its native Anthropic model id (no proxy alias) and
-        // exposes max in Droid's selector. ThinkingProxy auto-converts a max
-        // request to classic extended thinking since adaptive rejects effort:max.
-        XCTAssertEqual(sonnet["model"] as? String, "claude-sonnet-4-6")
+        XCTAssertEqual(sonnet["model"] as? String, "claude-sonnet-5")
         XCTAssertEqual(sonnet["enableThinking"] as? Bool, true)
-        XCTAssertEqual(sonnet["reasoningEffort"] as? String, "high")
-        XCTAssertEqual(sonnet["defaultReasoningEffort"] as? String, "high")
-        XCTAssertEqual(sonnet["supportedReasoningEfforts"] as? [String], ["low", "medium", "high", "max"])
+        XCTAssertEqual(sonnet["reasoningEffort"] as? String, "xhigh")
+        XCTAssertEqual(sonnet["defaultReasoningEffort"] as? String, "xhigh")
+        XCTAssertEqual(sonnet["supportedReasoningEfforts"] as? [String], ["low", "medium", "high", "xhigh", "max"])
     }
 
     private func settingsEntry(id: String) -> [String: Any]? {

diff --git a/src/Tests/CLIProxyMenuBarTests/ThinkingProxySonnetMaxThinkingTests.swift b/src/Tests/CLIProxyMenuBarTests/ThinkingProxySonnetMaxThinkingTests.swift