NanmiCoder · moyu12-ae · May 27, 2026 · May 27, 2026 · May 27, 2026 · May 27, 2026
diff --git a/.github/workflows/pr-quality.yml b/.github/workflows/pr-quality.yml
@@ -37,6 +37,9 @@ jobs:
         with:
           bun-version: latest
 
+      - name: Install dependencies
+        run: bun install
+
       - name: Run policy tests
         run: bun run check:policy
 

diff --git a/src/constants/system.ts b/src/constants/system.ts
@@ -47,13 +47,14 @@ export function getCLISyspromptPrefix(options?: {
 
 /**
  * Check if attribution header is enabled.
- * Enabled by default, can be disabled via env var or GrowthBook killswitch.
+ * Disabled by default to preserve prompt cache stability.
+ * Can be enabled via env var CLAUDE_CODE_ATTRIBUTION_HEADER or GrowthBook feature flag.
  */
 function isAttributionHeaderEnabled(): boolean {
   if (isEnvDefinedFalsy(process.env.CLAUDE_CODE_ATTRIBUTION_HEADER)) {
     return false
   }
-  return getFeatureValue_CACHED_MAY_BE_STALE('tengu_attribution_header', true)
+  return getFeatureValue_CACHED_MAY_BE_STALE('tengu_attribution_header', false)
 }
 
 /**

diff --git a/src/query.ts b/src/query.ts
@@ -7,8 +7,13 @@ import type { CanUseToolFn } from './hooks/useCanUseTool.js'
 import { FallbackTriggeredError } from './services/api/withRetry.js'
 import {
   calculateTokenWarningState,
+  COMPACT_PRECHECK_FOLD_RATIO,
+  estimateTurnStartUsage,
+  getEffectiveContextWindowSize,
   isAutoCompactEnabled,
+  shouldPreFold,
   type AutoCompactTrackingState,
+  type CacheMetrics,
 } from './services/compact/autoCompact.js'
 import { buildPostCompactMessages } from './services/compact/compact.js'
 /* eslint-disable @typescript-eslint/no-require-imports */
@@ -452,7 +457,47 @@ async function* queryLoop(
     )
 
     queryCheckpoint('query_autocompact_start')
-    const { compactionResult, consecutiveFailures } = await deps.autocompact(
+
+    // Turn-start pre-estimation: check whether accumulated context from the
+    // last turn has pushed us into dangerous territory BEFORE the next API
+    // call. When the 90% threshold is crossed and we haven't already folded
+    // this turn, force a pre-fold via the existing autocompact pipeline.
+    let forcePreFold = false
+    if (feature('TURN_START_PRE_ESTIMATION')) {
+      const effectiveWindow = getEffectiveContextWindowSize(
+        toolUseContext.options.mainLoopModel,
+      )
+      const { ratio, estimateTokens } = estimateTurnStartUsage(
+        messagesForQuery,
+        effectiveWindow,
+      )
+      if (
+        shouldPreFold(tracking, estimateTokens, effectiveWindow)
+      ) {
+        forcePreFold = true
+        logForDebugging(
+          `turnStartPreEstimate: context at ${(ratio * 100).toFixed(1)}% ` +
+          `(~${estimateTokens.toLocaleString()} tokens) — forcing pre-fold before API call`,
+          { level: 'warn' },
+        )
+        logEvent('tengu_turn_start_prefold_triggered', {
+          estimatedTokens: estimateTokens,
+          ratio: Math.round(ratio * 100),
+        })
+      } else if (ratio >= COMPACT_PRECHECK_FOLD_RATIO) {
+        // Above threshold but suppressed by alreadyFoldedThisTurn
+        logForDebugging(
+          `turnStartPreEstimate: context at ${(ratio * 100).toFixed(1)}% ` +
+          `but pre-fold suppressed (already folded this turn)`,
+        )
+      }
+    }
+
+    const {
+      compactionResult,
+      consecutiveFailures,
+      cacheMetrics,
+    } = await deps.autocompact(
       messagesForQuery,
       toolUseContext,
       {
@@ -465,9 +510,20 @@ async function* queryLoop(
       querySource,
       tracking,
       snipTokensFreed,
+      forcePreFold,
     )
     queryCheckpoint('query_autocompact_end')
 
+    if (cacheMetrics) {
+      logForDebugging(
+        `cacheMetrics.compaction: hit=${cacheMetrics.cacheHitTokens.toLocaleString()} ` +
+        `miss=${cacheMetrics.cacheMissTokens.toLocaleString()} ` +
+        `ratio=${(cacheMetrics.cacheHitRatio * 100).toFixed(1)}%`,
+      )
+    }
+
+    queryCheckpoint('query_autocompact_end')
+
     if (compactionResult) {
       const {
         preCompactTokenCount,
@@ -491,6 +547,7 @@ async function* queryLoop(
           compactionUsage?.cache_read_input_tokens ?? 0,
         compactionCacheCreationTokens:
           compactionUsage?.cache_creation_input_tokens ?? 0,
+        compactionCacheHitRatio: cacheMetrics?.cacheHitRatio ?? 0,
         compactionTotalTokens: compactionUsage
           ? compactionUsage.input_tokens +
             (compactionUsage.cache_creation_input_tokens ?? 0) +

diff --git a/src/server/proxy/handler.ts b/src/server/proxy/handler.ts
@@ -19,6 +19,7 @@ import { openaiResponsesToAnthropic } from './transform/openaiResponsesToAnthrop
 import { openaiChatStreamToAnthropic } from './streaming/openaiChatStreamToAnthropic.js'
 import { openaiResponsesStreamToAnthropic } from './streaming/openaiResponsesStreamToAnthropic.js'
 import type { AnthropicRequest } from './transform/types.js'
+import { normalizeModelStringForAPI } from '../../utils/model/model.js'
 import { getProxyFetchOptions } from '../../utils/proxy.js'
 import { getManualNetworkProxyUrl, loadNetworkSettings } from '../services/networkSettings.js'
 
@@ -128,6 +129,9 @@ export async function handleProxyRequest(req: Request, url: URL): Promise<Respon
   }
 
   body = ensureClaudeCodeAttribution(body)
+  // Strip [1m]/[2m] suffix before forwarding to third-party APIs —
+  // third-party providers don't understand the context-window suffix convention.
+  body.model = normalizeModelStringForAPI(body.model)
 
   const isStream = body.stream === true
   const baseUrl = config.baseUrl.replace(/\/+$/, '')

diff --git a/src/server/services/providerService.ts b/src/server/services/providerService.ts
@@ -21,6 +21,7 @@ import {
   OPENAI_OFFICIAL_PROVIDER,
   isOpenAIOfficialProviderId,
 } from './openaiOfficialProvider.js'
+import { normalizeModelStringForAPI } from '../../utils/model/model.js'
 import { hahaOpenAIOAuthService } from './hahaOpenAIOAuthService.js'
 import {
   CURRENT_PROVIDER_INDEX_SCHEMA_VERSION,
@@ -477,9 +478,10 @@ export class ProviderService {
     authStrategy: ProviderAuthStrategy,
     networkSettings: NetworkSettings,
   ): Promise<ProviderTestStepResult> {
+    const normalizedModelId = normalizeModelStringForAPI(modelId)
     const start = Date.now()
     try {
-      const { url, headers, body } = buildDirectTestRequest(base, apiKey, modelId, format, authStrategy)
+      const { url, headers, body } = buildDirectTestRequest(base, apiKey, normalizedModelId, format, authStrategy)
       const proxyOptions = getProxyFetchOptions({ proxyUrl: getManualNetworkProxyUrl(networkSettings) })
       const response = await fetch(url, {
         method: 'POST',
@@ -497,22 +499,22 @@ export class ProviderService {
         if (resBody?.error && typeof resBody.error === 'object') {
           error = ((resBody.error as Record<string, unknown>).message as string) || error
         }
-        return { success: false, latencyMs, error, modelUsed: modelId, httpStatus: response.status }
+        return { success: false, latencyMs, error, modelUsed: normalizedModelId, httpStatus: response.status }
       }
 
       // Validate response structure
       const valid = validateResponseBody(resBody, format)
       if (!valid.ok) {
-        return { success: false, latencyMs, error: valid.error, modelUsed: modelId, httpStatus: response.status }
+        return { success: false, latencyMs, error: valid.error, modelUsed: normalizedModelId, httpStatus: response.status }
       }
 
-      return { success: true, latencyMs, modelUsed: valid.model || modelId, httpStatus: response.status }
+      return { success: true, latencyMs, modelUsed: valid.model || normalizedModelId, httpStatus: response.status }
     } catch (err: unknown) {
       const latencyMs = Date.now() - start
       if (err instanceof DOMException && err.name === 'TimeoutError') {
-        return { success: false, latencyMs, error: `Request timed out (${Math.round(networkSettings.aiRequestTimeoutMs / 1000)}s)`, modelUsed: modelId }
+        return { success: false, latencyMs, error: `Request timed out (${Math.round(networkSettings.aiRequestTimeoutMs / 1000)}s)`, modelUsed: normalizedModelId }
       }
-      return { success: false, latencyMs, error: err instanceof Error ? err.message : String(err), modelUsed: modelId }
+      return { success: false, latencyMs, error: err instanceof Error ? err.message : String(err), modelUsed: normalizedModelId }
     }
   }
 
@@ -524,11 +526,12 @@ export class ProviderService {
     format: 'openai_chat' | 'openai_responses',
     networkSettings: NetworkSettings,
   ): Promise<ProviderTestStepResult> {
+    const normalizedModelId = normalizeModelStringForAPI(modelId)
     const start = Date.now()
     try {
       // Build an Anthropic Messages API request (same shape as what CLI sends)
       const anthropicReq: AnthropicRequest = {
-        model: modelId,
+        model: normalizedModelId,
         max_tokens: 64,
         messages: [{ role: 'user', content: 'Say "ok" and nothing else.' }],
       }
@@ -557,31 +560,31 @@ export class ProviderService {
       if (!response.ok) {
         const latencyMs = Date.now() - start
         const errText = await response.text().catch(() => '')
-        return { success: false, latencyMs, modelUsed: modelId, httpStatus: response.status,
+        return { success: false, latencyMs, modelUsed: normalizedModelId, httpStatus: response.status,
           error: `Upstream HTTP ${response.status}: ${errText.slice(0, 200)}` }
       }
 
       // Transform response back to Anthropic format
       const responseBody = await response.json()
       const anthropicRes = format === 'openai_chat'
-        ? openaiChatToAnthropic(responseBody, modelId)
-        : openaiResponsesToAnthropic(responseBody, modelId)
+        ? openaiChatToAnthropic(responseBody, normalizedModelId)
+        : openaiResponsesToAnthropic(responseBody, normalizedModelId)
 
       const latencyMs = Date.now() - start
 
       // Validate the final Anthropic response
       if (anthropicRes.type !== 'message' || !Array.isArray(anthropicRes.content)) {
-        return { success: false, latencyMs, modelUsed: modelId,
+        return { success: false, latencyMs, modelUsed: normalizedModelId,
           error: 'Proxy transform produced invalid Anthropic response' }
       }
 
-      return { success: true, latencyMs, modelUsed: anthropicRes.model || modelId, httpStatus: response.status }
+      return { success: true, latencyMs, modelUsed: anthropicRes.model || normalizedModelId, httpStatus: response.status }
     } catch (err: unknown) {
       const latencyMs = Date.now() - start
       if (err instanceof DOMException && err.name === 'TimeoutError') {
-        return { success: false, latencyMs, error: `Proxy pipeline timed out (${Math.round(networkSettings.aiRequestTimeoutMs / 1000)}s)`, modelUsed: modelId }
+        return { success: false, latencyMs, error: `Proxy pipeline timed out (${Math.round(networkSettings.aiRequestTimeoutMs / 1000)}s)`, modelUsed: normalizedModelId }
       }
-      return { success: false, latencyMs, error: err instanceof Error ? err.message : String(err), modelUsed: modelId }
+      return { success: false, latencyMs, error: err instanceof Error ? err.message : String(err), modelUsed: normalizedModelId }
     }
   }
 }