Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ What it still does today:

What it no longer does (removed in the Droid-CLI-thinking refactor):

- No Claude adaptive thinking injection (Opus 4.8 / Sonnet 4.6 — `thinking` + `output_config`)
- No Claude adaptive thinking injection (Opus 4.8 / Sonnet 5 — `thinking` + `output_config`)
- No classic `thinking.budget_tokens` injection
- No Codex `reasoning.effort` injection
- No Gemini `generationConfig.thinkingConfig` injection
Expand Down
16 changes: 6 additions & 10 deletions src/Sources/DroidProxyModelCatalog.swift
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,6 @@ enum DroidProxyModelCatalog {
private static let max = DroidProxyThinkingLevel(value: "max", displayName: "Max")

private static let claudeAdvancedLevels = [low, medium, high, xhigh, max]
// Sonnet 4.6 exposes max in Droid's selector. Its adaptive thinking rejects
// output_config.effort:max upstream, so ThinkingProxy auto-converts a max
// request to classic extended thinking; lower efforts pass through adaptive.
private static let claudeSonnetLevels = [low, medium, high, max]
private static let codexLevels = [low, medium, high, xhigh]

private static func antigravityModel(
Expand Down Expand Up @@ -147,16 +143,16 @@ enum DroidProxyModelCatalog {
defaultLevelValue: "xhigh"
),
DroidProxyModelDefinition(
baseModel: "claude-sonnet-4-6",
idSlug: "sonnet-4-6",
displayName: "Sonnet 4.6",
maxOutputTokens: 64000,
baseModel: "claude-sonnet-5",
idSlug: "sonnet-5",
displayName: "Sonnet 5",
maxOutputTokens: 128000,
provider: "anthropic",
providerKey: "claude",
baseURL: "http://localhost:8317",
kind: .claudeAdaptive,
levels: claudeSonnetLevels,
defaultLevelValue: "high"
levels: claudeAdvancedLevels,
defaultLevelValue: "xhigh"
),

DroidProxyModelDefinition(
Expand Down
85 changes: 0 additions & 85 deletions src/Sources/ThinkingProxy.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@ import Network
request enables thinking, so Claude emits visible thinking blocks.
- Injects `service_tier: "priority"` for OpenAI Responses API requests on the user-enabled
GPT 5.x fast-mode models (these toggles are independent of reasoning effort).
- Converts Sonnet 4.6 to classic extended thinking (`thinking:{type:enabled,budget_tokens}`)
when Droid selects the `max` reasoning effort, since Sonnet's adaptive thinking rejects
`output_config.effort:max` upstream. Other effort levels pass through as adaptive.
- Rewrites Gemini `/v1/responses` to `/v1/chat/completions` since the backend does not
support Gemini via the Responses API endpoint.

Expand Down Expand Up @@ -318,10 +315,6 @@ class ThinkingProxy {

if method == "POST" && !bodyString.isEmpty {
ThinkingProxy.fileLog("INCOMING REQUEST: \(method) \(rewrittenPath)")
if let result = applySonnetMaxThinking(jsonString: modifiedBody, fields: requestFields) {
modifiedBody = result
requestFields = inspectRequestJSONFields(in: modifiedBody)
}
if let result = rewriteAntigravityModelAlias(jsonString: modifiedBody, fields: requestFields) {
modifiedBody = result
requestFields = inspectRequestJSONFields(in: modifiedBody)
Expand Down Expand Up @@ -449,84 +442,6 @@ class ThinkingProxy {
"cursor-composer-2.5": "composer-2.5"
]

// Sonnet 4.6 max-thinking. Sonnet's adaptive thinking rejects
// `output_config.effort:max` upstream (HTTP 400 "level max not supported"),
// so when Droid selects the `max` effort we convert the request to classic
// extended thinking (`thinking:{type:enabled,budget_tokens}`), which the
// backend accepts. Lower efforts pass through as adaptive untouched.
// budget_tokens must be strictly less than max_tokens, so we also pin
// max_tokens to the model's output ceiling.
private static let sonnetMaxThinkingModel = "claude-sonnet-4-6"
private static let sonnetMaxThinkingMaxTokens = 64000
private static let sonnetMaxThinkingBudgetTokens = 63999

/// Test entry point for the Sonnet 4.6 max-thinking transform.
static func applySonnetMaxThinking(in jsonString: String) -> String {
let proxy = ThinkingProxy()
let fields = proxy.inspectRequestJSONFields(in: jsonString)
return proxy.applySonnetMaxThinking(jsonString: jsonString, fields: fields) ?? jsonString
}

/// When a Sonnet 4.6 request asks for `output_config.effort == "max"`, rewrite
/// its `thinking` field to classic extended thinking and pin `max_tokens` so
/// the budget fits. Returns nil when the request is not Sonnet 4.6 or is not
/// requesting max effort, so the caller can skip re-inspection and lower
/// efforts forward unchanged. Edits are surgical (in-place value replacement /
/// single insertion) to preserve JSON key ordering, which Anthropic's prompt
/// cache is sensitive to.
private func applySonnetMaxThinking(jsonString: String, fields: RequestJSONFields?) -> String? {
guard fields?.model == Self.sonnetMaxThinkingModel,
sonnetRequestsMaxEffort(in: jsonString) else {
return nil
}

let thinkingValue = "{\"type\":\"enabled\",\"budget_tokens\":\(Self.sonnetMaxThinkingBudgetTokens)}"
var result = jsonString

if let thinkingLocation = fields?.thinkingLocation {
result.replaceSubrange(thinkingLocation.valueRange, with: thinkingValue)
} else if let modelLocation = fields?.modelLocation {
result.insert(contentsOf: ",\"thinking\":\(thinkingValue)", at: modelLocation.pairRange.upperBound)
} else {
return nil
}

// The edit above shifted indices, so re-scan to locate max_tokens/model.
result = pinSonnetMaxThinkingMaxTokens(in: result)

ThinkingProxy.fileLog("SONNET MAX THINKING: effort=max -> classic extended thinking budget_tokens=\(Self.sonnetMaxThinkingBudgetTokens) max_tokens=\(Self.sonnetMaxThinkingMaxTokens)")
return result
}

/// True when the request's `output_config.effort` is `"max"`. Scoped to the
/// Sonnet 4.6 path so non-Sonnet requests never pay for the extra
/// `output_config` scan (which would otherwise defeat the routing scan's
/// early-exit before the large `messages` array).
private func sonnetRequestsMaxEffort(in jsonString: String) -> Bool {
guard let outputConfig = findTopLevelFieldLocations(in: jsonString, keys: ["output_config"])?["output_config"],
let effort = objectStringField(in: jsonString, objectRange: outputConfig.valueRange, key: "effort")?.value else {
return false
}
return effort == "max"
}

/// Pins `max_tokens` to the Sonnet output ceiling so it stays strictly above
/// the thinking budget. Replaces the value in place if present, otherwise
/// injects it right after `model`.
private func pinSonnetMaxThinkingMaxTokens(in jsonString: String) -> String {
guard let locations = findTopLevelFieldLocations(in: jsonString, keys: ["max_tokens", "model"]) else {
return jsonString
}

var result = jsonString
if let maxTokensLocation = locations["max_tokens"] {
result.replaceSubrange(maxTokensLocation.valueRange, with: "\(Self.sonnetMaxThinkingMaxTokens)")
} else if let modelLocation = locations["model"] {
result.insert(contentsOf: ",\"max_tokens\":\(Self.sonnetMaxThinkingMaxTokens)", at: modelLocation.pairRange.upperBound)
}
return result
}

private func rewriteAntigravityModelAlias(jsonString: String, fields: RequestJSONFields?) -> String? {
guard let model = fields?.model,
let modelLocation = fields?.modelLocation,
Expand Down
15 changes: 6 additions & 9 deletions src/Tests/CLIProxyMenuBarTests/DroidProxyModelCatalogTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,14 @@ final class DroidProxyModelCatalogTests: XCTestCase {
XCTAssertEqual(fable["maxOutputTokens"] as? Int, 128000)
}

func testSonnet46UsesNativeModelIDAndExposesMax() throws {
let sonnet = try XCTUnwrap(settingsEntry(id: "custom:droidproxy:sonnet-4-6"))
func testSonnet5UsesNativeModelIDAndExposesFullLevels() throws {
let sonnet = try XCTUnwrap(settingsEntry(id: "custom:droidproxy:sonnet-5"))

// Sonnet 4.6 ships its native Anthropic model id (no proxy alias) and
// exposes max in Droid's selector. ThinkingProxy auto-converts a max
// request to classic extended thinking since adaptive rejects effort:max.
XCTAssertEqual(sonnet["model"] as? String, "claude-sonnet-4-6")
XCTAssertEqual(sonnet["model"] as? String, "claude-sonnet-5")
XCTAssertEqual(sonnet["enableThinking"] as? Bool, true)
XCTAssertEqual(sonnet["reasoningEffort"] as? String, "high")
XCTAssertEqual(sonnet["defaultReasoningEffort"] as? String, "high")
XCTAssertEqual(sonnet["supportedReasoningEfforts"] as? [String], ["low", "medium", "high", "max"])
XCTAssertEqual(sonnet["reasoningEffort"] as? String, "xhigh")
XCTAssertEqual(sonnet["defaultReasoningEffort"] as? String, "xhigh")
XCTAssertEqual(sonnet["supportedReasoningEfforts"] as? [String], ["low", "medium", "high", "xhigh", "max"])
}

private func settingsEntry(id: String) -> [String: Any]? {
Expand Down

This file was deleted.

Loading