From 4256c6088acc35d0fd90836213549b910bf83f3e Mon Sep 17 00:00:00 2001
From: anand-92 <28579740+anand-92@users.noreply.github.com>
Date: Tue, 30 Jun 2026 14:24:45 -0400
Subject: [PATCH] Drop Sonnet 4.6, add Sonnet 5

Replace the Claude-provider Sonnet 4.6 model with Sonnet 5 (claude-sonnet-5),
exposing the full reasoning level set (low/medium/high/xhigh/max, default xhigh)
and a 128000 output ceiling. Sonnet 5 supports output_config.effort:max natively,
so remove the Sonnet 4.6-only max-thinking workaround in ThinkingProxy.
---
 AGENTS.md                                     |  2 +-
 src/Sources/DroidProxyModelCatalog.swift      | 16 ++--
 src/Sources/ThinkingProxy.swift               | 85 -------------------
 .../DroidProxyModelCatalogTests.swift         | 15 ++--
 .../ThinkingProxySonnetMaxThinkingTests.swift | 60 -------------
 5 files changed, 13 insertions(+), 165 deletions(-)
 delete mode 100644 src/Tests/CLIProxyMenuBarTests/ThinkingProxySonnetMaxThinkingTests.swift

diff --git a/AGENTS.md b/AGENTS.md
index 2d8c57b..d10cf5f 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -70,7 +70,7 @@ What it still does today:
 
 What it no longer does (removed in the Droid-CLI-thinking refactor):
 
-- No Claude adaptive thinking injection (Opus 4.8 / Sonnet 4.6 — `thinking` + `output_config`)
+- No Claude adaptive thinking injection (Opus 4.8 / Sonnet 5 — `thinking` + `output_config`)
 - No classic `thinking.budget_tokens` injection
 - No Codex `reasoning.effort` injection
 - No Gemini `generationConfig.thinkingConfig` injection
diff --git a/src/Sources/DroidProxyModelCatalog.swift b/src/Sources/DroidProxyModelCatalog.swift
index 7f6e3e4..18c9c77 100644
--- a/src/Sources/DroidProxyModelCatalog.swift
+++ b/src/Sources/DroidProxyModelCatalog.swift
@@ -92,10 +92,6 @@ enum DroidProxyModelCatalog {
     private static let max = DroidProxyThinkingLevel(value: "max", displayName: "Max")
 
     private static let claudeAdvancedLevels = [low, medium, high, xhigh, max]
-    // Sonnet 4.6 exposes max in Droid's selector. Its adaptive thinking rejects
-    // output_config.effort:max upstream, so ThinkingProxy auto-converts a max
-    // request to classic extended thinking; lower efforts pass through adaptive.
-    private static let claudeSonnetLevels = [low, medium, high, max]
     private static let codexLevels = [low, medium, high, xhigh]
 
     private static func antigravityModel(
@@ -147,16 +143,16 @@ enum DroidProxyModelCatalog {
                 defaultLevelValue: "xhigh"
             ),
             DroidProxyModelDefinition(
-                baseModel: "claude-sonnet-4-6",
-                idSlug: "sonnet-4-6",
-                displayName: "Sonnet 4.6",
-                maxOutputTokens: 64000,
+                baseModel: "claude-sonnet-5",
+                idSlug: "sonnet-5",
+                displayName: "Sonnet 5",
+                maxOutputTokens: 128000,
                 provider: "anthropic",
                 providerKey: "claude",
                 baseURL: "http://localhost:8317",
                 kind: .claudeAdaptive,
-                levels: claudeSonnetLevels,
-                defaultLevelValue: "high"
+                levels: claudeAdvancedLevels,
+                defaultLevelValue: "xhigh"
             ),
 
             DroidProxyModelDefinition(
diff --git a/src/Sources/ThinkingProxy.swift b/src/Sources/ThinkingProxy.swift
index 438f4f2..ad2f2f5 100644
--- a/src/Sources/ThinkingProxy.swift
+++ b/src/Sources/ThinkingProxy.swift
@@ -11,9 +11,6 @@ import Network
    request enables thinking, so Claude emits visible thinking blocks.
  - Injects `service_tier: "priority"` for OpenAI Responses API requests on the user-enabled
    GPT 5.x fast-mode models (these toggles are independent of reasoning effort).
- - Converts Sonnet 4.6 to classic extended thinking (`thinking:{type:enabled,budget_tokens}`)
-   when Droid selects the `max` reasoning effort, since Sonnet's adaptive thinking rejects
-   `output_config.effort:max` upstream. Other effort levels pass through as adaptive.
  - Rewrites Gemini `/v1/responses` to `/v1/chat/completions` since the backend does not
    support Gemini via the Responses API endpoint.
 
@@ -318,10 +315,6 @@ class ThinkingProxy {
 
         if method == "POST" && !bodyString.isEmpty {
             ThinkingProxy.fileLog("INCOMING REQUEST: \(method) \(rewrittenPath)")
-            if let result = applySonnetMaxThinking(jsonString: modifiedBody, fields: requestFields) {
-                modifiedBody = result
-                requestFields = inspectRequestJSONFields(in: modifiedBody)
-            }
             if let result = rewriteAntigravityModelAlias(jsonString: modifiedBody, fields: requestFields) {
                 modifiedBody = result
                 requestFields = inspectRequestJSONFields(in: modifiedBody)
@@ -449,84 +442,6 @@ class ThinkingProxy {
         "cursor-composer-2.5": "composer-2.5"
     ]
 
-    // Sonnet 4.6 max-thinking. Sonnet's adaptive thinking rejects
-    // `output_config.effort:max` upstream (HTTP 400 "level max not supported"),
-    // so when Droid selects the `max` effort we convert the request to classic
-    // extended thinking (`thinking:{type:enabled,budget_tokens}`), which the
-    // backend accepts. Lower efforts pass through as adaptive untouched.
-    // budget_tokens must be strictly less than max_tokens, so we also pin
-    // max_tokens to the model's output ceiling.
-    private static let sonnetMaxThinkingModel = "claude-sonnet-4-6"
-    private static let sonnetMaxThinkingMaxTokens = 64000
-    private static let sonnetMaxThinkingBudgetTokens = 63999
-
-    /// Test entry point for the Sonnet 4.6 max-thinking transform.
-    static func applySonnetMaxThinking(in jsonString: String) -> String {
-        let proxy = ThinkingProxy()
-        let fields = proxy.inspectRequestJSONFields(in: jsonString)
-        return proxy.applySonnetMaxThinking(jsonString: jsonString, fields: fields) ?? jsonString
-    }
-
-    /// When a Sonnet 4.6 request asks for `output_config.effort == "max"`, rewrite
-    /// its `thinking` field to classic extended thinking and pin `max_tokens` so
-    /// the budget fits. Returns nil when the request is not Sonnet 4.6 or is not
-    /// requesting max effort, so the caller can skip re-inspection and lower
-    /// efforts forward unchanged. Edits are surgical (in-place value replacement /
-    /// single insertion) to preserve JSON key ordering, which Anthropic's prompt
-    /// cache is sensitive to.
-    private func applySonnetMaxThinking(jsonString: String, fields: RequestJSONFields?) -> String? {
-        guard fields?.model == Self.sonnetMaxThinkingModel,
-              sonnetRequestsMaxEffort(in: jsonString) else {
-            return nil
-        }
-
-        let thinkingValue = "{\"type\":\"enabled\",\"budget_tokens\":\(Self.sonnetMaxThinkingBudgetTokens)}"
-        var result = jsonString
-
-        if let thinkingLocation = fields?.thinkingLocation {
-            result.replaceSubrange(thinkingLocation.valueRange, with: thinkingValue)
-        } else if let modelLocation = fields?.modelLocation {
-            result.insert(contentsOf: ",\"thinking\":\(thinkingValue)", at: modelLocation.pairRange.upperBound)
-        } else {
-            return nil
-        }
-
-        // The edit above shifted indices, so re-scan to locate max_tokens/model.
-        result = pinSonnetMaxThinkingMaxTokens(in: result)
-
-        ThinkingProxy.fileLog("SONNET MAX THINKING: effort=max -> classic extended thinking budget_tokens=\(Self.sonnetMaxThinkingBudgetTokens) max_tokens=\(Self.sonnetMaxThinkingMaxTokens)")
-        return result
-    }
-
-    /// True when the request's `output_config.effort` is `"max"`. Scoped to the
-    /// Sonnet 4.6 path so non-Sonnet requests never pay for the extra
-    /// `output_config` scan (which would otherwise defeat the routing scan's
-    /// early-exit before the large `messages` array).
-    private func sonnetRequestsMaxEffort(in jsonString: String) -> Bool {
-        guard let outputConfig = findTopLevelFieldLocations(in: jsonString, keys: ["output_config"])?["output_config"],
-              let effort = objectStringField(in: jsonString, objectRange: outputConfig.valueRange, key: "effort")?.value else {
-            return false
-        }
-        return effort == "max"
-    }
-
-    /// Pins `max_tokens` to the Sonnet output ceiling so it stays strictly above
-    /// the thinking budget. Replaces the value in place if present, otherwise
-    /// injects it right after `model`.
-    private func pinSonnetMaxThinkingMaxTokens(in jsonString: String) -> String {
-        guard let locations = findTopLevelFieldLocations(in: jsonString, keys: ["max_tokens", "model"]) else {
-            return jsonString
-        }
-
-        var result = jsonString
-        if let maxTokensLocation = locations["max_tokens"] {
-            result.replaceSubrange(maxTokensLocation.valueRange, with: "\(Self.sonnetMaxThinkingMaxTokens)")
-        } else if let modelLocation = locations["model"] {
-            result.insert(contentsOf: ",\"max_tokens\":\(Self.sonnetMaxThinkingMaxTokens)", at: modelLocation.pairRange.upperBound)
-        }
-        return result
-    }
-
     private func rewriteAntigravityModelAlias(jsonString: String, fields: RequestJSONFields?) -> String? {
         guard let model = fields?.model,
               let modelLocation = fields?.modelLocation,
diff --git a/src/Tests/CLIProxyMenuBarTests/DroidProxyModelCatalogTests.swift b/src/Tests/CLIProxyMenuBarTests/DroidProxyModelCatalogTests.swift
index 87121f3..78cc964 100644
--- a/src/Tests/CLIProxyMenuBarTests/DroidProxyModelCatalogTests.swift
+++ b/src/Tests/CLIProxyMenuBarTests/DroidProxyModelCatalogTests.swift
@@ -13,17 +13,14 @@ final class DroidProxyModelCatalogTests: XCTestCase {
         XCTAssertEqual(fable["maxOutputTokens"] as? Int, 128000)
     }
 
-    func testSonnet46UsesNativeModelIDAndExposesMax() throws {
-        let sonnet = try XCTUnwrap(settingsEntry(id: "custom:droidproxy:sonnet-4-6"))
+    func testSonnet5UsesNativeModelIDAndExposesFullLevels() throws {
+        let sonnet = try XCTUnwrap(settingsEntry(id: "custom:droidproxy:sonnet-5"))
 
-        // Sonnet 4.6 ships its native Anthropic model id (no proxy alias) and
-        // exposes max in Droid's selector. ThinkingProxy auto-converts a max
-        // request to classic extended thinking since adaptive rejects effort:max.
-        XCTAssertEqual(sonnet["model"] as? String, "claude-sonnet-4-6")
+        XCTAssertEqual(sonnet["model"] as? String, "claude-sonnet-5")
         XCTAssertEqual(sonnet["enableThinking"] as? Bool, true)
-        XCTAssertEqual(sonnet["reasoningEffort"] as? String, "high")
-        XCTAssertEqual(sonnet["defaultReasoningEffort"] as? String, "high")
-        XCTAssertEqual(sonnet["supportedReasoningEfforts"] as? [String], ["low", "medium", "high", "max"])
+        XCTAssertEqual(sonnet["reasoningEffort"] as? String, "xhigh")
+        XCTAssertEqual(sonnet["defaultReasoningEffort"] as? String, "xhigh")
+        XCTAssertEqual(sonnet["supportedReasoningEfforts"] as? [String], ["low", "medium", "high", "xhigh", "max"])
     }
 
     private func settingsEntry(id: String) -> [String: Any]? {
diff --git a/src/Tests/CLIProxyMenuBarTests/ThinkingProxySonnetMaxThinkingTests.swift b/src/Tests/CLIProxyMenuBarTests/ThinkingProxySonnetMaxThinkingTests.swift
deleted file mode 100644
index dce2ff8..0000000
--- a/src/Tests/CLIProxyMenuBarTests/ThinkingProxySonnetMaxThinkingTests.swift
+++ /dev/null
@@ -1,60 +0,0 @@
-import XCTest
-@testable import CLIProxyMenuBar
-
-final class ThinkingProxySonnetMaxThinkingTests: XCTestCase {
-    func testConvertsSonnetAdaptiveThinkingToClassicWhenEffortIsMax() {
-        let request = """
-        {"model":"claude-sonnet-4-6","max_tokens":8192,"thinking":{"type":"adaptive"},"output_config":{"effort":"max"},"messages":[{"role":"user","content":"hi"}]}
-        """
-
-        let rewritten = ThinkingProxy.applySonnetMaxThinking(in: request)
-
-        // thinking becomes classic extended thinking and max_tokens is pinned so
-        // the budget fits; key ordering and other fields are preserved.
-        XCTAssertEqual(
-            rewritten,
-            """
-            {"model":"claude-sonnet-4-6","max_tokens":64000,"thinking":{"type":"enabled","budget_tokens":63999},"output_config":{"effort":"max"},"messages":[{"role":"user","content":"hi"}]}
-            """
-        )
-    }
-
-    func testInjectsThinkingAndMaxTokensWhenAbsent() {
-        let request = """
-        {"model":"claude-sonnet-4-6","output_config":{"effort":"max"},"messages":[{"role":"user","content":"hi"}]}
-        """
-
-        let rewritten = ThinkingProxy.applySonnetMaxThinking(in: request)
-
-        XCTAssertEqual(
-            rewritten,
-            """
-            {"model":"claude-sonnet-4-6","max_tokens":64000,"thinking":{"type":"enabled","budget_tokens":63999},"output_config":{"effort":"max"},"messages":[{"role":"user","content":"hi"}]}
-            """
-        )
-    }
-
-    func testLeavesSonnetUnchangedForNonMaxEffort() {
-        let request = """
-        {"model":"claude-sonnet-4-6","thinking":{"type":"adaptive"},"output_config":{"effort":"high"}}
-        """
-
-        XCTAssertEqual(ThinkingProxy.applySonnetMaxThinking(in: request), request)
-    }
-
-    func testLeavesSonnetUnchangedWhenNoOutputConfig() {
-        let request = """
-        {"model":"claude-sonnet-4-6","thinking":{"type":"adaptive"}}
-        """
-
-        XCTAssertEqual(ThinkingProxy.applySonnetMaxThinking(in: request), request)
-    }
-
-    func testLeavesNonSonnetModelUnchangedEvenAtMaxEffort() {
-        let request = """
-        {"model":"claude-opus-4-8","thinking":{"type":"adaptive"},"output_config":{"effort":"max"}}
-        """
-
-        XCTAssertEqual(ThinkingProxy.applySonnetMaxThinking(in: request), request)
-    }
-}