diff --git a/.github/workflows/pr-quality.yml b/.github/workflows/pr-quality.yml
index 28ce016a2..78d143341 100644
--- a/.github/workflows/pr-quality.yml
+++ b/.github/workflows/pr-quality.yml
@@ -37,6 +37,9 @@ jobs:
         with:
           bun-version: latest
 
+      - name: Install dependencies
+        run: bun install
+
       - name: Run policy tests
         run: bun run check:policy
 
diff --git a/src/constants/system.ts b/src/constants/system.ts
index d6d3efe13..7122c6bfb 100644
--- a/src/constants/system.ts
+++ b/src/constants/system.ts
@@ -47,13 +47,14 @@ export function getCLISyspromptPrefix(options?: {
 
 /**
  * Check if attribution header is enabled.
- * Enabled by default, can be disabled via env var or GrowthBook killswitch.
+ * Disabled by default to preserve prompt cache stability.
+ * Can be enabled via env var CLAUDE_CODE_ATTRIBUTION_HEADER or GrowthBook feature flag.
  */
 function isAttributionHeaderEnabled(): boolean {
   if (isEnvDefinedFalsy(process.env.CLAUDE_CODE_ATTRIBUTION_HEADER)) {
     return false
   }
-  return getFeatureValue_CACHED_MAY_BE_STALE('tengu_attribution_header', true)
+  return getFeatureValue_CACHED_MAY_BE_STALE('tengu_attribution_header', false)
 }
 
 /**
diff --git a/src/query.ts b/src/query.ts
index ccd17a0ff..085a00154 100644
--- a/src/query.ts
+++ b/src/query.ts
@@ -7,8 +7,13 @@ import type { CanUseToolFn } from './hooks/useCanUseTool.js'
 import { FallbackTriggeredError } from './services/api/withRetry.js'
 import {
   calculateTokenWarningState,
+  COMPACT_PRECHECK_FOLD_RATIO,
+  estimateTurnStartUsage,
+  getEffectiveContextWindowSize,
   isAutoCompactEnabled,
+  shouldPreFold,
   type AutoCompactTrackingState,
+  type CacheMetrics,
 } from './services/compact/autoCompact.js'
 import { buildPostCompactMessages } from './services/compact/compact.js'
 /* eslint-disable @typescript-eslint/no-require-imports */
@@ -452,7 +457,47 @@ async function* queryLoop(
     )
 
     queryCheckpoint('query_autocompact_start')
-    const { compactionResult, consecutiveFailures } = await deps.autocompact(
+
+    // Turn-start pre-estimation: check whether accumulated context from the
+    // last turn has pushed us into dangerous territory BEFORE the next API
+    // call. When the 90% threshold is crossed and we haven't already folded
+    // this turn, force a pre-fold via the existing autocompact pipeline.
+    let forcePreFold = false
+    if (feature('TURN_START_PRE_ESTIMATION')) {
+      const effectiveWindow = getEffectiveContextWindowSize(
+        toolUseContext.options.mainLoopModel,
+      )
+      const { ratio, estimateTokens } = estimateTurnStartUsage(
+        messagesForQuery,
+        effectiveWindow,
+      )
+      if (
+        shouldPreFold(tracking, estimateTokens, effectiveWindow)
+      ) {
+        forcePreFold = true
+        logForDebugging(
+          `turnStartPreEstimate: context at ${(ratio * 100).toFixed(1)}% ` +
+          `(~${estimateTokens.toLocaleString()} tokens) — forcing pre-fold before API call`,
+          { level: 'warn' },
+        )
+        logEvent('tengu_turn_start_prefold_triggered', {
+          estimatedTokens: estimateTokens,
+          ratio: Math.round(ratio * 100),
+        })
+      } else if (ratio >= COMPACT_PRECHECK_FOLD_RATIO) {
+        // Above threshold but suppressed by alreadyFoldedThisTurn
+        logForDebugging(
+          `turnStartPreEstimate: context at ${(ratio * 100).toFixed(1)}% ` +
+          `but pre-fold suppressed (already folded this turn)`,
+        )
+      }
+    }
+
+    const {
+      compactionResult,
+      consecutiveFailures,
+      cacheMetrics,
+    } = await deps.autocompact(
       messagesForQuery,
       toolUseContext,
       {
@@ -465,9 +510,20 @@ async function* queryLoop(
       querySource,
       tracking,
       snipTokensFreed,
+      forcePreFold,
     )
     queryCheckpoint('query_autocompact_end')
 
+    if (cacheMetrics) {
+      logForDebugging(
+        `cacheMetrics.compaction: hit=${cacheMetrics.cacheHitTokens.toLocaleString()} ` +
+        `miss=${cacheMetrics.cacheMissTokens.toLocaleString()} ` +
+        `ratio=${(cacheMetrics.cacheHitRatio * 100).toFixed(1)}%`,
+      )
+    }
+
+    queryCheckpoint('query_autocompact_end')
+
     if (compactionResult) {
       const {
         preCompactTokenCount,
@@ -491,6 +547,7 @@ async function* queryLoop(
           compactionUsage?.cache_read_input_tokens ?? 0,
         compactionCacheCreationTokens:
           compactionUsage?.cache_creation_input_tokens ?? 0,
+        compactionCacheHitRatio: cacheMetrics?.cacheHitRatio ?? 0,
         compactionTotalTokens: compactionUsage
           ? compactionUsage.input_tokens +
             (compactionUsage.cache_creation_input_tokens ?? 0) +
diff --git a/src/server/proxy/handler.ts b/src/server/proxy/handler.ts
index cb0fc7e48..75f6a76f6 100644
--- a/src/server/proxy/handler.ts
+++ b/src/server/proxy/handler.ts
@@ -19,6 +19,7 @@ import { openaiResponsesToAnthropic } from './transform/openaiResponsesToAnthrop
 import { openaiChatStreamToAnthropic } from './streaming/openaiChatStreamToAnthropic.js'
 import { openaiResponsesStreamToAnthropic } from './streaming/openaiResponsesStreamToAnthropic.js'
 import type { AnthropicRequest } from './transform/types.js'
+import { normalizeModelStringForAPI } from '../../utils/model/model.js'
 import { getProxyFetchOptions } from '../../utils/proxy.js'
 import { getManualNetworkProxyUrl, loadNetworkSettings } from '../services/networkSettings.js'
 
@@ -128,6 +129,9 @@ export async function handleProxyRequest(req: Request, url: URL): Promise<Respon
   }
 
   body = ensureClaudeCodeAttribution(body)
+  // Strip [1m]/[2m] suffix before forwarding to third-party APIs —
+  // third-party providers don't understand the context-window suffix convention.
+  body.model = normalizeModelStringForAPI(body.model)
 
   const isStream = body.stream === true
   const baseUrl = config.baseUrl.replace(/\/+$/, '')
diff --git a/src/server/services/providerService.ts b/src/server/services/providerService.ts
index 940c069ce..1891294ea 100644
--- a/src/server/services/providerService.ts
+++ b/src/server/services/providerService.ts
@@ -21,6 +21,7 @@ import {
   OPENAI_OFFICIAL_PROVIDER,
   isOpenAIOfficialProviderId,
 } from './openaiOfficialProvider.js'
+import { normalizeModelStringForAPI } from '../../utils/model/model.js'
 import { hahaOpenAIOAuthService } from './hahaOpenAIOAuthService.js'
 import {
   CURRENT_PROVIDER_INDEX_SCHEMA_VERSION,
@@ -477,9 +478,10 @@ export class ProviderService {
     authStrategy: ProviderAuthStrategy,
     networkSettings: NetworkSettings,
   ): Promise<ProviderTestStepResult> {
+    const normalizedModelId = normalizeModelStringForAPI(modelId)
     const start = Date.now()
     try {
-      const { url, headers, body } = buildDirectTestRequest(base, apiKey, modelId, format, authStrategy)
+      const { url, headers, body } = buildDirectTestRequest(base, apiKey, normalizedModelId, format, authStrategy)
       const proxyOptions = getProxyFetchOptions({ proxyUrl: getManualNetworkProxyUrl(networkSettings) })
       const response = await fetch(url, {
         method: 'POST',
@@ -497,22 +499,22 @@ export class ProviderService {
         if (resBody?.error && typeof resBody.error === 'object') {
           error = ((resBody.error as Record<string, unknown>).message as string) || error
         }
-        return { success: false, latencyMs, error, modelUsed: modelId, httpStatus: response.status }
+        return { success: false, latencyMs, error, modelUsed: normalizedModelId, httpStatus: response.status }
       }
 
       // Validate response structure
       const valid = validateResponseBody(resBody, format)
       if (!valid.ok) {
-        return { success: false, latencyMs, error: valid.error, modelUsed: modelId, httpStatus: response.status }
+        return { success: false, latencyMs, error: valid.error, modelUsed: normalizedModelId, httpStatus: response.status }
       }
 
-      return { success: true, latencyMs, modelUsed: valid.model || modelId, httpStatus: response.status }
+      return { success: true, latencyMs, modelUsed: valid.model || normalizedModelId, httpStatus: response.status }
     } catch (err: unknown) {
       const latencyMs = Date.now() - start
       if (err instanceof DOMException && err.name === 'TimeoutError') {
-        return { success: false, latencyMs, error: `Request timed out (${Math.round(networkSettings.aiRequestTimeoutMs / 1000)}s)`, modelUsed: modelId }
+        return { success: false, latencyMs, error: `Request timed out (${Math.round(networkSettings.aiRequestTimeoutMs / 1000)}s)`, modelUsed: normalizedModelId }
       }
-      return { success: false, latencyMs, error: err instanceof Error ? err.message : String(err), modelUsed: modelId }
+      return { success: false, latencyMs, error: err instanceof Error ? err.message : String(err), modelUsed: normalizedModelId }
     }
   }
 
@@ -524,11 +526,12 @@ export class ProviderService {
     format: 'openai_chat' | 'openai_responses',
     networkSettings: NetworkSettings,
   ): Promise<ProviderTestStepResult> {
+    const normalizedModelId = normalizeModelStringForAPI(modelId)
     const start = Date.now()
     try {
       // Build an Anthropic Messages API request (same shape as what CLI sends)
       const anthropicReq: AnthropicRequest = {
-        model: modelId,
+        model: normalizedModelId,
         max_tokens: 64,
         messages: [{ role: 'user', content: 'Say "ok" and nothing else.' }],
       }
@@ -557,31 +560,31 @@ export class ProviderService {
       if (!response.ok) {
         const latencyMs = Date.now() - start
         const errText = await response.text().catch(() => '')
-        return { success: false, latencyMs, modelUsed: modelId, httpStatus: response.status,
+        return { success: false, latencyMs, modelUsed: normalizedModelId, httpStatus: response.status,
           error: `Upstream HTTP ${response.status}: ${errText.slice(0, 200)}` }
       }
 
       // Transform response back to Anthropic format
       const responseBody = await response.json()
       const anthropicRes = format === 'openai_chat'
-        ? openaiChatToAnthropic(responseBody, modelId)
-        : openaiResponsesToAnthropic(responseBody, modelId)
+        ? openaiChatToAnthropic(responseBody, normalizedModelId)
+        : openaiResponsesToAnthropic(responseBody, normalizedModelId)
 
       const latencyMs = Date.now() - start
 
       // Validate the final Anthropic response
       if (anthropicRes.type !== 'message' || !Array.isArray(anthropicRes.content)) {
-        return { success: false, latencyMs, modelUsed: modelId,
+        return { success: false, latencyMs, modelUsed: normalizedModelId,
           error: 'Proxy transform produced invalid Anthropic response' }
       }
 
-      return { success: true, latencyMs, modelUsed: anthropicRes.model || modelId, httpStatus: response.status }
+      return { success: true, latencyMs, modelUsed: anthropicRes.model || normalizedModelId, httpStatus: response.status }
     } catch (err: unknown) {
       const latencyMs = Date.now() - start
       if (err instanceof DOMException && err.name === 'TimeoutError') {
-        return { success: false, latencyMs, error: `Proxy pipeline timed out (${Math.round(networkSettings.aiRequestTimeoutMs / 1000)}s)`, modelUsed: modelId }
+        return { success: false, latencyMs, error: `Proxy pipeline timed out (${Math.round(networkSettings.aiRequestTimeoutMs / 1000)}s)`, modelUsed: normalizedModelId }
       }
-      return { success: false, latencyMs, error: err instanceof Error ? err.message : String(err), modelUsed: modelId }
+      return { success: false, latencyMs, error: err instanceof Error ? err.message : String(err), modelUsed: normalizedModelId }
     }
   }
 }
diff --git a/src/services/compact/autoCompact.ts b/src/services/compact/autoCompact.ts
index 8c0a786d7..aeb67d8ca 100644
--- a/src/services/compact/autoCompact.ts
+++ b/src/services/compact/autoCompact.ts
@@ -57,6 +57,10 @@ export type AutoCompactTrackingState = {
   // Used as a circuit breaker to stop retrying when the context is
   // irrecoverably over the limit (e.g., prompt_too_long).
   consecutiveFailures?: number
+  // True when compaction has already run this turn (prevents double-fold).
+  // Mirrors Reasonix's alreadyFoldedThisTurn — the post-response check
+  // should not trigger a second fold if the pre-check already folded.
+  alreadyFoldedThisTurn?: boolean
 }
 
 export const AUTOCOMPACT_BUFFER_TOKENS = 13_000
@@ -64,6 +68,56 @@ export const WARNING_THRESHOLD_BUFFER_TOKENS = 20_000
 export const ERROR_THRESHOLD_BUFFER_TOKENS = 20_000
 export const MANUAL_COMPACT_BUFFER_TOKENS = 3_000
 
+// ---------------------------------------------------------------------------
+// Percentage-based multi-level compaction thresholds (supplement, not replace)
+//
+// The fixed-buffer threshold (effectiveWindow - 13_000) is the "final defense"
+// at ~93-98% of the window. These percentage thresholds provide earlier,
+// gentler interventions that work well across all context window sizes
+// (200K through 1M+).
+// ---------------------------------------------------------------------------
+
+/** Normal fold: compact older messages, keep 20% of context window as tail budget */
+export const COMPACT_NORMAL_FOLD_RATIO = 0.75
+export const COMPACT_NORMAL_FOLD_TAIL_RATIO = 0.20
+
+/** Aggressive fold: compact harder, keep 10% of context window as tail budget */
+export const COMPACT_AGGRESSIVE_FOLD_RATIO = 0.78
+export const COMPACT_AGGRESSIVE_FOLD_TAIL_RATIO = 0.10
+
+/** Force summary exit: stop the agent with a summary — no more room for folds */
+export const COMPACT_FORCE_SUMMARY_RATIO = 0.80
+
+/** Turn-start pre-fold: pre-check before the API call (used by estimateTurnStartUsage) */
+export const COMPACT_PRECHECK_FOLD_RATIO = 0.90
+
+/**
+ * Compaction levels ordered by severity.
+ *  - none / turn_start_prefold are soft checks
+ *  - normal_fold / aggressive_fold are actual compactions with tail budgets
+ *  - force_summary / fixed_buffer are hard exits (no more folds)
+ */
+export type CompactionLevel =
+  | 'none'
+  | 'turn_start_prefold'
+  | 'normal_fold'
+  | 'aggressive_fold'
+  | 'force_summary'
+  | 'fixed_buffer'
+
+export type CompactionLevelResult = {
+  level: CompactionLevel
+  /** Token budget for the recent tail when level is normal_fold or aggressive_fold */
+  tailBudgetTokens: number
+  effectiveWindow: number
+  fixedBufferThreshold: number
+}
+
+// Minimum fraction of context that must be in the compactable "head" portion
+// for compaction to be worthwhile. Prevents wasting a compact API call when
+// the savings are marginal (Reasonix reference: HISTORY_FOLD_MIN_SAVINGS_FRACTION).
+export const MIN_COMPACTION_SAVINGS_RATIO = 0.30
+
 // Stop trying autocompact after this many consecutive failures.
 // BQ 2026-03-10: 1,279 sessions had 50+ consecutive failures (up to 3,272)
 // in a single session, wasting ~250K API calls/day globally.
@@ -90,6 +144,251 @@ export function getAutoCompactThreshold(model: string): number {
   return autocompactThreshold
 }
 
+/**
+ * Gate for the multi-level percentage-based compaction feature.
+ * When disabled, the existing fixed-buffer behavior is unchanged.
+ */
+export function isPercentageCompactionEnabled(): boolean {
+  if (!isAutoCompactEnabled()) return false
+  return getFeatureValue_CACHED_MAY_BE_STALE('tengu_multi_level_compact', true)
+}
+
+/**
+ * Determine the compaction level based on percentage thresholds AND the
+ * fixed-buffer threshold. Percentage thresholds act as earlier, gentler
+ * interventions; the fixed-buffer threshold is the "final defense."
+ *
+ * Checks in descending severity order so the most urgent level wins.
+ *
+ * @param tokenCount - current estimated token usage
+ * @param model - model name for context window lookup
+ */
+export function getCompactionLevel(
+  tokenCount: number,
+  model: string,
+): CompactionLevelResult {
+  const effectiveWindow = getEffectiveContextWindowSize(model)
+  const fixedBufferThreshold = effectiveWindow - AUTOCOMPACT_BUFFER_TOKENS
+
+  // Fixed buffer is the "final defense" — triggers closest to the window limit
+  if (tokenCount >= fixedBufferThreshold) {
+    return {
+      level: 'fixed_buffer',
+      tailBudgetTokens: Math.floor(effectiveWindow * 0.05),
+      effectiveWindow,
+      fixedBufferThreshold,
+    }
+  }
+
+  const forceSummaryThreshold = Math.floor(
+    effectiveWindow * COMPACT_FORCE_SUMMARY_RATIO,
+  )
+  if (tokenCount >= forceSummaryThreshold) {
+    return {
+      level: 'force_summary',
+      tailBudgetTokens: 0, // No tail — force exit
+      effectiveWindow,
+      fixedBufferThreshold,
+    }
+  }
+
+  const aggressiveFoldThreshold = Math.floor(
+    effectiveWindow * COMPACT_AGGRESSIVE_FOLD_RATIO,
+  )
+  if (tokenCount >= aggressiveFoldThreshold) {
+    return {
+      level: 'aggressive_fold',
+      tailBudgetTokens: Math.floor(
+        effectiveWindow * COMPACT_AGGRESSIVE_FOLD_TAIL_RATIO,
+      ),
+      effectiveWindow,
+      fixedBufferThreshold,
+    }
+  }
+
+  const normalFoldThreshold = Math.floor(
+    effectiveWindow * COMPACT_NORMAL_FOLD_RATIO,
+  )
+  if (tokenCount >= normalFoldThreshold) {
+    return {
+      level: 'normal_fold',
+      tailBudgetTokens: Math.floor(
+        effectiveWindow * COMPACT_NORMAL_FOLD_TAIL_RATIO,
+      ),
+      effectiveWindow,
+      fixedBufferThreshold,
+    }
+  }
+
+  return {
+    level: 'none',
+    tailBudgetTokens: effectiveWindow,
+    effectiveWindow,
+    fixedBufferThreshold,
+  }
+}
+
+/**
+ * Estimate whether compaction would save enough tokens to justify its cost.
+ *
+ * The "head portion" is the messages that would be summarized (those before
+ * the last compact boundary). If this portion is less than
+ * MIN_COMPACTION_SAVINGS_RATIO of the total context, the compact agent's own
+ * token consumption would exceed or nearly match the savings.
+ *
+ * @returns true if compaction is worthwhile, false to skip
+ */
+export function isCompactionWorthwhile(
+  estimatedTotalTokens: number,
+  effectiveWindow: number,
+): boolean {
+  // Circuit breaker: if total tokens are somehow higher than the window,
+  // compaction is definitely worthwhile (emergency scenario).
+  if (estimatedTotalTokens >= effectiveWindow) return true
+
+  // Head portion = tokens above the normal fold threshold that could be freed.
+  // If most tokens are already in the "tail" (recent messages), compaction
+  // would save very little — the summary alone costs thousands of tokens.
+  const headFraction = estimatedTotalTokens / effectiveWindow
+
+  logForDebugging(
+    `compaction_savings_check: tokens=${estimatedTotalTokens} window=${effectiveWindow} ` +
+      `headFraction=${(headFraction * 100).toFixed(1)}% ` +
+      `minRequired=${(MIN_COMPACTION_SAVINGS_RATIO * 100).toFixed(0)}%`,
+  )
+
+  return headFraction >= MIN_COMPACTION_SAVINGS_RATIO
+}
+
+/**
+ * Fast turn-start token estimation using rough heuristics.
+ * Does NOT make an API call — intentionally a coarse estimate.
+ *
+ * Uses the existing roughTokenCountEstimationForMessages (~4 chars/token)
+ * plus fixed overhead estimates for system prompt and tool schemas.
+ *
+ * @returns estimated token count for messages + overhead
+ */
+export function estimateTurnStartUsage(
+  messages: Message[],
+  effectiveWindow: number,
+): { estimateTokens: number; ratio: number } {
+  // Use the same token estimation pipeline that shouldAutoCompact uses
+  const dynamicTokens = tokenCountWithEstimation(messages)
+  // Pre-check ratio: compare against the effective context window
+  const ratio = effectiveWindow > 0 ? dynamicTokens / effectiveWindow : 0
+
+  logForDebugging(
+    `turnStartEstimate: tokens=${dynamicTokens} window=${effectiveWindow} ratio=${(ratio * 100).toFixed(1)}%`,
+  )
+
+  return { estimateTokens: dynamicTokens, ratio }
+}
+
+// Hysteresis buffer: only trigger a second pre-fold when context grows by
+// at least this ratio beyond the threshold, preventing oscillation when
+// context hovers right at the boundary.
+export const COMPACT_PRECHECK_FOLD_HYSTERESIS = 0.05
+
+/**
+ * Check whether the turn-start pre-estimation triggers a pre-fold.
+ *
+ * This is NOT redundant with shouldAutoCompact — it uses the 90% threshold
+ * (vs 75%) and is called BEFORE the API call, catching the case where the
+ * last turn's tool output pushed context way up but no assistant response
+ * carried the usage data yet.
+ *
+ * The hysteresis buffer prevents oscillating fold/no-fold when token counts
+ * hover near the threshold (Reasonix: requireTailBoundary equivalent).
+ *
+ * @returns true when a pre-fold is recommended before the next API call
+ */
+export function needsTurnStartPreFold(
+  estimateTokens: number,
+  effectiveWindow: number,
+  lastPreFoldTokens?: number,
+): boolean {
+  const threshold = Math.floor(effectiveWindow * COMPACT_PRECHECK_FOLD_RATIO)
+  if (estimateTokens < threshold) return false
+
+  // Hysteresis: if we pre-folded recently, only re-trigger when context
+  // has grown significantly beyond the threshold (avoids oscillation).
+  if (lastPreFoldTokens !== undefined && lastPreFoldTokens > 0) {
+    const hysteresisThreshold = Math.floor(
+      threshold * (1 + COMPACT_PRECHECK_FOLD_HYSTERESIS),
+    )
+    if (estimateTokens < hysteresisThreshold) return false
+  }
+
+  return true
+}
+
+// ---------------------------------------------------------------------------
+// Cache Economics — per-session cache hit/miss tracking (Reasonix SessionStats)
+// ---------------------------------------------------------------------------
+
+/** Per-turn cache metrics extracted from API usage data */
+export type CacheMetrics = {
+  /** Tokens read from prompt cache (HIT) */
+  cacheHitTokens: number
+  /** Tokens NOT read from cache — fresh input (MISS) */
+  cacheMissTokens: number
+  /** Tokens written to cache by this request */
+  cacheWriteTokens: number
+  /** Total prompt-side tokens (hit + miss + write = input_tokens total) */
+  totalPromptTokens: number
+  /** Cache hit ratio: hit / (hit + miss). 1.0 = perfect cache, 0.0 = all miss */
+  cacheHitRatio: number
+}
+
+/**
+ * Compute cache efficiency metrics from API usage data.
+ *
+ * Uses Anthropic's `cache_read_input_tokens` and `cache_creation_input_tokens`
+ * fields. For other providers (DeepSeek, OpenAI) that use different field names,
+ * the caller should normalize before passing.
+ *
+ * Pure function — no side effects, no state. Safe to call in any context.
+ */
+export function computeCacheMetrics(usage: {
+  input_tokens: number
+  cache_read_input_tokens?: number | null
+  cache_creation_input_tokens?: number | null
+}): CacheMetrics {
+  const cacheHitTokens = usage.cache_read_input_tokens ?? 0
+  const cacheWriteTokens = usage.cache_creation_input_tokens ?? 0
+  const cacheMissTokens = Math.max(
+    0,
+    usage.input_tokens - cacheHitTokens - cacheWriteTokens,
+  )
+  const totalPromptTokens = cacheHitTokens + cacheMissTokens + cacheWriteTokens
+  const cacheHitRatio =
+    totalPromptTokens > 0 && (cacheHitTokens + cacheMissTokens) > 0
+      ? cacheHitTokens / (cacheHitTokens + cacheMissTokens)
+      : 0
+
+  return {
+    cacheHitTokens,
+    cacheMissTokens,
+    cacheWriteTokens,
+    totalPromptTokens,
+    cacheHitRatio,
+  }
+}
+
+/**
+ * The pre-fold decision needs `alreadyFoldedThisTurn` context — exported so
+ * query.ts can thread it without reaching into tracking internals.
+ */
+export function shouldPreFold(
+  tracking: AutoCompactTrackingState | undefined,
+  estimateTokens: number,
+  effectiveWindow: number,
+): boolean {
+  if (tracking?.alreadyFoldedThisTurn) return false
+  return needsTurnStartPreFold(estimateTokens, effectiveWindow)
+}
+
 export function calculateTokenWarningState(
   tokenUsage: number,
   model: string,
@@ -230,12 +529,26 @@ export async function shouldAutoCompact(
     `autocompact: tokens=${tokenCount} threshold=${threshold} effectiveWindow=${effectiveWindow}${snipTokensFreed > 0 ? ` snipFreed=${snipTokensFreed}` : ''}`,
   )
 
+  // Existing fixed-buffer check: final defense at ~93-98% of window
   const { isAboveAutoCompactThreshold } = calculateTokenWarningState(
     tokenCount,
     model,
   )
 
-  return isAboveAutoCompactThreshold
+  if (isAboveAutoCompactThreshold) return true
+
+  // New: percentage-based multi-level check — earlier, gentler intervention
+  if (isPercentageCompactionEnabled()) {
+    const level = getCompactionLevel(tokenCount, model)
+    if (level.level !== 'none' && level.level !== 'turn_start_prefold') {
+      logForDebugging(
+        `autocompact: percentage threshold triggered (level=${level.level}, ratio=${(tokenCount / effectiveWindow * 100).toFixed(1)}%)`,
+      )
+      return true
+    }
+  }
+
+  return false
 }
 
 export async function autoCompactIfNeeded(
@@ -245,10 +558,14 @@ export async function autoCompactIfNeeded(
   querySource?: QuerySource,
   tracking?: AutoCompactTrackingState,
   snipTokensFreed?: number,
+  /** Pass true from query.ts turn-start pre-estimation to run a pre-fold */
+  forcePreFold?: boolean,
 ): Promise<{
   wasCompacted: boolean
   compactionResult?: CompactionResult
   consecutiveFailures?: number
+  /** Loggable cache metrics from the compaction call (if one ran) */
+  cacheMetrics?: CacheMetrics
 }> {
   if (isEnvTruthy(process.env.DISABLE_COMPACT)) {
     return { wasCompacted: false }
@@ -272,10 +589,59 @@ export async function autoCompactIfNeeded(
     snipTokensFreed,
   )
 
-  if (!shouldCompact) {
+  // alreadyFoldedThisTurn guard: if compaction ran via pre-fold earlier this
+  // turn, the post-response check should not re-trigger. Mirrors Reasonix's
+  // decideAfterUsage: alreadyFoldedThisTurn → skip fold.
+  if (!forcePreFold && tracking?.alreadyFoldedThisTurn) {
+    logForDebugging('autocompact: skipping post-response check — already folded this turn')
+    return { wasCompacted: false }
+  }
+
+  if (!shouldCompact && !forcePreFold) {
     return { wasCompacted: false }
   }
 
+  // Route decision: forcePreFold overrides the passive check when the
+  // turn-start pre-estimation flagged us above 90%. In that case we only
+  // skip if the minimum savings check says there's nothing worth freeing.
+  if (!shouldCompact && forcePreFold) {
+    const tokenCount = tokenCountWithEstimation(messages) - (snipTokensFreed ?? 0)
+    const effectiveWindow = getEffectiveContextWindowSize(model)
+    if (!isCompactionWorthwhile(tokenCount, effectiveWindow)) {
+      logForDebugging(
+        `autocompact: skipping forced pre-fold — head portion too small (tokens=${tokenCount}, window=${effectiveWindow})`,
+      )
+      return { wasCompacted: false }
+    }
+    logForDebugging(
+      `autocompact: forcePreFold active — triggering pre-fold (tokens=${tokenCount})`,
+    )
+  }
+
+  // Minimum savings gate: skip compaction when the head portion is too small
+  // to save meaningful tokens. Prevents wasting a compact API call when the
+  // summary alone costs nearly as many tokens as it frees.
+  // Skip this check for forcePreFold — already handled above.
+  if (!forcePreFold && isPercentageCompactionEnabled()) {
+    const tokenCount = tokenCountWithEstimation(messages) - (snipTokensFreed ?? 0)
+    const effectiveWindow = getEffectiveContextWindowSize(model)
+    if (
+      !isCompactionWorthwhile(tokenCount, effectiveWindow)
+    ) {
+      logForDebugging(
+        `autocompact: skipping — head portion too small for worthwhile savings (tokens=${tokenCount}, window=${effectiveWindow})`,
+      )
+      return { wasCompacted: false }
+    }
+  }
+
+  // Compute the compaction level for use in recompactionInfo and to guide
+  // the compaction strategy (tail budget, aggressiveness).
+  const tokenCount = tokenCountWithEstimation(messages) - (snipTokensFreed ?? 0)
+  const compactionLevel = isPercentageCompactionEnabled()
+    ? getCompactionLevel(tokenCount, model).level
+    : 'fixed_buffer'
+
   const recompactionInfo: RecompactionInfo = {
     isRecompactionInChain: tracking?.compacted === true,
     turnsSincePreviousCompact: tracking?.turnCounter ?? -1,
@@ -284,6 +650,10 @@ export async function autoCompactIfNeeded(
     querySource,
   }
 
+  logForDebugging(
+    `autocompact: triggering compaction (level=${compactionLevel}, tokens=${tokenCount})`,
+  )
+
   // EXPERIMENT: Try session memory compaction first
   const sessionMemoryResult = await trySessionMemoryCompaction(
     messages,
@@ -297,12 +667,13 @@ export async function autoCompactIfNeeded(
     runPostCompactCleanup(querySource)
     // Reset cache read baseline so the post-compact drop isn't flagged as a
     // break. compactConversation does this internally; SM-compact doesn't.
-    // BQ 2026-03-01: missing this made 20% of tengu_prompt_cache_break events
-    // false positives (systemPromptChanged=true, timeSinceLastAssistantMsg=-1).
     if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
       notifyCompaction(querySource ?? 'compact', toolUseContext.agentId)
     }
     markPostCompaction()
+    // Mark alreadyFoldedThisTurn to prevent the post-response check from
+    // double-folding (Reasonix: decideAfterUsage returns 'none' when true).
+    if (tracking) tracking.alreadyFoldedThisTurn = true
     return {
       wasCompacted: true,
       compactionResult: sessionMemoryResult,
@@ -325,11 +696,25 @@ export async function autoCompactIfNeeded(
     setLastSummarizedMessageId(undefined)
     runPostCompactCleanup(querySource)
 
+    // Compute cache metrics from the compaction agent's usage for visibility
+    const compactionUsage = compactionResult.compactionUsage
+    const compactionCacheMetrics = compactionUsage
+      ? computeCacheMetrics({
+          input_tokens: compactionUsage.input_tokens,
+          cache_read_input_tokens: compactionUsage.cache_read_input_tokens,
+          cache_creation_input_tokens: compactionUsage.cache_creation_input_tokens,
+        })
+      : undefined
+
+    // Mark alreadyFoldedThisTurn to prevent the post-response check from
+    // double-folding (Reasonix: decideAfterUsage returns 'none' when true).
+    if (tracking) tracking.alreadyFoldedThisTurn = true
+
     return {
       wasCompacted: true,
       compactionResult,
-      // Reset failure count on success
       consecutiveFailures: 0,
+      cacheMetrics: compactionCacheMetrics,
     }
   } catch (error) {
     if (!hasExactErrorMessage(error, ERROR_MESSAGE_USER_ABORT)) {
diff --git a/src/utils/toolResultStorage.ts b/src/utils/toolResultStorage.ts
index f4dfef326..ec119d985 100644
--- a/src/utils/toolResultStorage.ts
+++ b/src/utils/toolResultStorage.ts
@@ -14,6 +14,7 @@ import {
 } from '../constants/toolLimits.js'
 import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'
 import { logEvent } from '../services/analytics/index.js'
+import { roughTokenCountEstimation } from '../services/tokenEstimation.js'
 import { sanitizeToolNameForAnalytics } from '../services/analytics/metadata.js'
 import type { Message } from '../types/message.js'
 import { logForDebugging } from './debug.js'
@@ -333,6 +334,55 @@ async function maybePersistLargeToolResult(
   return { ...toolResultBlock, content: message }
 }
 
+/**
+ * Truncate tool result content to fit within a token budget.
+ *
+ * Uses rough character-based estimation (char/4) for speed. For CJK content
+ * this is conservative (undercounts tokens per char), but provides better
+ * accuracy than pure char-based truncation for mixed-language content.
+ *
+ * The truncation preserves newline boundaries when possible, and appends
+ * a marker so the model knows content was truncated.
+ *
+ * @param content - The content to potentially truncate
+ * @param maxTokens - Maximum allowed tokens before truncation
+ * @returns truncated content and whether truncation occurred
+ */
+export function truncateToolResultByTokens(
+  content: string,
+  maxTokens: number,
+): { truncated: string; wasTruncated: boolean; estimatedTokens: number } {
+  const estimatedTokens = roughTokenCountEstimation(content)
+  if (estimatedTokens <= maxTokens || content.length <= maxTokens) {
+    return { truncated: content, wasTruncated: false, estimatedTokens }
+  }
+
+  // Approximation: each token ~4 chars on average
+  const charBudget = maxTokens * BYTES_PER_TOKEN
+  let truncated = content.slice(0, charBudget)
+
+  // Try to find a clean boundary (newline) near the cut point to avoid
+  // splitting mid-line or mid-word
+  const lastNewline = truncated.lastIndexOf('\n')
+  if (lastNewline > charBudget * 0.7) {
+    truncated = truncated.slice(0, lastNewline)
+  }
+
+  // tool_result_content_block_start already wraps each block; this marker
+  // replaces bulky output with a compact signal the model can act on.
+  // Wording mirrors the per-tool persist message so the model already
+  // knows what to expect when it just *couldn't* fit in context.
+  truncated +=
+    `\n\n[Content truncated from ~${formatFileSize(content.length)} to fit ` +
+    `within ~${maxTokens.toLocaleString()} tokens. Full output may be available in the session tool-results directory.]`
+
+  return {
+    truncated,
+    wasTruncated: true,
+    estimatedTokens: roughTokenCountEstimation(truncated),
+  }
+}
+
 /**
  * Generate a preview of content, truncating at a newline boundary when possible.
  */
diff --git a/tests/cacheOptimization.integration.test.ts b/tests/cacheOptimization.integration.test.ts
new file mode 100644
index 000000000..fa5962440
--- /dev/null
+++ b/tests/cacheOptimization.integration.test.ts
@@ -0,0 +1,423 @@
+import { expect, test } from 'bun:test'
+import {
+  COMPACT_NORMAL_FOLD_RATIO,
+  COMPACT_AGGRESSIVE_FOLD_RATIO,
+  COMPACT_FORCE_SUMMARY_RATIO,
+  COMPACT_PRECHECK_FOLD_RATIO,
+  COMPACT_NORMAL_FOLD_TAIL_RATIO,
+  COMPACT_AGGRESSIVE_FOLD_TAIL_RATIO,
+  COMPACT_PRECHECK_FOLD_HYSTERESIS,
+  MIN_COMPACTION_SAVINGS_RATIO,
+  type CompactionLevel,
+  computeCacheMetrics,
+  isCompactionWorthwhile,
+  needsTurnStartPreFold,
+  shouldPreFold,
+} from '../src/services/compact/autoCompact.js'
+import { truncateToolResultByTokens } from '../src/utils/toolResultStorage.js'
+import type { AutoCompactTrackingState } from '../src/services/compact/autoCompact.js'
+
+// ===========================================================================
+// TDD Integration Suite — cache optimization decision chain + boundary tests
+// ===========================================================================
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+const WINDOWS = {
+  SMALL: 200_000,
+  MEDIUM: 500_000,
+  LARGE: 1_000_000,
+} as const
+
+function makeTracking(overrides: Partial<AutoCompactTrackingState> = {}): AutoCompactTrackingState {
+  return {
+    compacted: false,
+    turnCounter: 1,
+    turnId: 'tdd-test-turn',
+    ...overrides,
+  }
+}
+
+function thresholdTokens(window: number, ratio: number): number {
+  return Math.floor(window * ratio)
+}
+
+// ---------------------------------------------------------------------------
+// Integration 1: Percentage threshold cross-window consistency
+//
+// The entire point of percentage thresholds is to work consistently across
+// all context window sizes. Verify that for every window size the ratios
+// produce the same percentile behavior.
+// ---------------------------------------------------------------------------
+
+test('INTEGRATION: normal fold fires at same % regardless of window size', () => {
+  for (const w of Object.values(WINDOWS)) {
+    const tokens = thresholdTokens(w, COMPACT_NORMAL_FOLD_RATIO)
+    const ratio = tokens / w
+    expect(ratio).toBeGreaterThanOrEqual(COMPACT_NORMAL_FOLD_RATIO - 0.01)
+    expect(ratio).toBeLessThanOrEqual(COMPACT_NORMAL_FOLD_RATIO + 0.01)
+  }
+})
+
+test('INTEGRATION: aggressive fold fires at same % regardless of window size', () => {
+  for (const w of Object.values(WINDOWS)) {
+    const tokens = thresholdTokens(w, COMPACT_AGGRESSIVE_FOLD_RATIO)
+    const ratio = tokens / w
+    expect(ratio).toBeGreaterThanOrEqual(COMPACT_AGGRESSIVE_FOLD_RATIO - 0.01)
+  }
+})
+
+test('INTEGRATION: force summary fires at same % regardless of window size', () => {
+  for (const w of Object.values(WINDOWS)) {
+    const tokens = thresholdTokens(w, COMPACT_FORCE_SUMMARY_RATIO)
+    const ratio = tokens / w
+    expect(ratio).toBeGreaterThanOrEqual(COMPACT_FORCE_SUMMARY_RATIO - 0.01)
+  }
+})
+
+test('INTEGRATION: pre-check fires at same % regardless of window size', () => {
+  for (const w of Object.values(WINDOWS)) {
+    const tokens = thresholdTokens(w, COMPACT_PRECHECK_FOLD_RATIO)
+    const ratio = tokens / w
+    expect(ratio).toBeGreaterThanOrEqual(COMPACT_PRECHECK_FOLD_RATIO - 0.01)
+  }
+})
+
+// ---------------------------------------------------------------------------
+// Integration 2: Tail budget ratio — aggressive < normal (cross-window)
+// ---------------------------------------------------------------------------
+
+test('INTEGRATION: aggressive tail budget is half of normal across all window sizes', () => {
+  for (const w of Object.values(WINDOWS)) {
+    const normalTail = Math.floor(w * COMPACT_NORMAL_FOLD_TAIL_RATIO)
+    const aggressiveTail = Math.floor(w * COMPACT_AGGRESSIVE_FOLD_TAIL_RATIO)
+    expect(aggressiveTail).toBeLessThan(normalTail)
+    // Aggressive tail should be exactly half of normal
+    expect(aggressiveTail).toBe(Math.floor(normalTail / 2))
+  }
+})
+
+// ---------------------------------------------------------------------------
+// Integration 3: Decision chain — isCompactionWorthwhile gates normal vs emergency
+// ---------------------------------------------------------------------------
+
+test('INTEGRATION: isCompactionWorthwhile gates at exact boundary across windows', () => {
+  for (const w of Object.values(WINDOWS)) {
+    const boundary = Math.floor(w * MIN_COMPACTION_SAVINGS_RATIO)
+    // At exact boundary — worthwhile
+    expect(isCompactionWorthwhile(boundary, w)).toBe(true)
+    // Just below boundary — not worthwhile
+    if (boundary > 1) {
+      expect(isCompactionWorthwhile(boundary - 1, w)).toBe(false)
+    }
+  }
+})
+
+test('INTEGRATION: isCompactionWorthwhile emergency gate consistent', () => {
+  // When tokens exceed window, always worthwhile regardless of window size
+  for (const w of Object.values(WINDOWS)) {
+    expect(isCompactionWorthwhile(w + 1, w)).toBe(true)
+    expect(isCompactionWorthwhile(w * 2, w)).toBe(true)
+  }
+})
+
+// ---------------------------------------------------------------------------
+// Integration 4: Pre-fold chain — estimate → needsPreFold → shouldPreFold
+// Simulates a full turn-start pre-estimation decision
+// ---------------------------------------------------------------------------
+
+test('INTEGRATION: pre-fold triggers at 90% regardless of window size', () => {
+  for (const w of Object.values(WINDOWS)) {
+    const atThreshold = thresholdTokens(w, COMPACT_PRECHECK_FOLD_RATIO)
+    expect(needsTurnStartPreFold(atThreshold, w)).toBe(true)
+  }
+})
+
+test('INTEGRATION: pre-fold does NOT trigger at 89% across windows', () => {
+  for (const w of Object.values(WINDOWS)) {
+    const belowThreshold = thresholdTokens(w, COMPACT_PRECHECK_FOLD_RATIO * 0.99)
+    expect(needsTurnStartPreFold(belowThreshold, w)).toBe(false)
+  }
+})
+
+test('INTEGRATION: hysteresis prevents oscillation at boundary', () => {
+  // Simulate context hovering right at 90% boundary
+  const w = WINDOWS.LARGE
+  const threshold = thresholdTokens(w, COMPACT_PRECHECK_FOLD_RATIO)
+  const hysteresisThreshold = thresholdTokens(
+    w,
+    COMPACT_PRECHECK_FOLD_RATIO * (1 + COMPACT_PRECHECK_FOLD_HYSTERESIS),
+  )
+
+  // First pre-fold: at threshold → true
+  const lastFoldAt = threshold + 1000
+  expect(needsTurnStartPreFold(threshold, w)).toBe(true)
+
+  // Second check: context slightly above threshold but below hysteresis
+  const afterFold = threshold + Math.floor((hysteresisThreshold - threshold) * 0.5)
+  expect(needsTurnStartPreFold(afterFold, w, lastFoldAt)).toBe(false)
+
+  // Third check: context now well above hysteresis → re-trigger
+  const aboveHysteresis = hysteresisThreshold + 100
+  expect(needsTurnStartPreFold(aboveHysteresis, w, lastFoldAt)).toBe(true)
+})
+
+// ---------------------------------------------------------------------------
+// Integration 5: alreadyFoldedThisTurn — prevents double-fold in the same turn
+// ---------------------------------------------------------------------------
+
+test('INTEGRATION: alreadyFoldedThisTurn suppresses shouldPreFold', () => {
+  const tracking = makeTracking({ alreadyFoldedThisTurn: true })
+  // 95% context — would normally pre-fold, but already did this turn
+  expect(shouldPreFold(tracking, thresholdTokens(200_000, 0.95), 200_000)).toBe(false)
+})
+
+test('INTEGRATION: shouldPreFold returns true on fresh turn (not yet folded)', () => {
+  const tracking = makeTracking({ alreadyFoldedThisTurn: false })
+  expect(shouldPreFold(tracking, thresholdTokens(200_000, 0.95), 200_000)).toBe(true)
+})
+
+test('INTEGRATION: shouldPreFold returns false below threshold even on fresh turn', () => {
+  const tracking = makeTracking({ alreadyFoldedThisTurn: false })
+  // 80% — below 90% pre-check threshold
+  expect(shouldPreFold(tracking, thresholdTokens(200_000, 0.80), 200_000)).toBe(false)
+})
+
+// ---------------------------------------------------------------------------
+// Integration 6: Compaction level ordering — verify severity hierarchy
+// ---------------------------------------------------------------------------
+
+test('INTEGRATION: compaction level severity is monotonically ordered', () => {
+  // The severity order of compaction levels, from least to most urgent:
+  //   none < normal_fold < aggressive_fold < force_summary
+  // turn_start_prefold (90%) is a pre-check at a different point in the
+  // turn lifecycle, so its ratio doesn't follow the post-response severity
+  // chain. fixed_buffer depends on the window size.
+  const postResponseLevels: CompactionLevel[] = [
+    'none',
+    'normal_fold',
+    'aggressive_fold',
+    'force_summary',
+  ]
+
+  const ratios: Record<CompactionLevel, number> = {
+    none: 0,
+    turn_start_prefold: COMPACT_PRECHECK_FOLD_RATIO,
+    normal_fold: COMPACT_NORMAL_FOLD_RATIO,
+    aggressive_fold: COMPACT_AGGRESSIVE_FOLD_RATIO,
+    force_summary: COMPACT_FORCE_SUMMARY_RATIO,
+    fixed_buffer: 0.935,
+  }
+
+  for (let i = 1; i < postResponseLevels.length; i++) {
+    const prev = postResponseLevels[i - 1]!
+    const curr = postResponseLevels[i]!
+    expect(ratios[curr]).toBeGreaterThanOrEqual(ratios[prev])
+  }
+})
+
+// ---------------------------------------------------------------------------
+// Integration 7: Cache metrics computation consistency
+// ---------------------------------------------------------------------------
+
+test('INTEGRATION: cacheHitRatio consistent across different hit/miss splits', () => {
+  const scenarios = [
+    { input: 100_000, hit: 99_000, write: 500, expectedRatio: 99_000 / (99_000 + 500) },
+    { input: 100_000, hit: 50_000, write: 0, expectedRatio: 0.5 },
+    { input: 100_000, hit: 0, write: 0, expectedRatio: 0 },
+    { input: 100_000, hit: 100_000, write: 0, expectedRatio: 1 },
+    { input: 1_000_000, hit: 750_000, write: 100_000, expectedRatio: 750_000 / (750_000 + 150_000) },
+  ]
+
+  for (const s of scenarios) {
+    const m = computeCacheMetrics({
+      input_tokens: s.input,
+      cache_read_input_tokens: s.hit,
+      cache_creation_input_tokens: s.write,
+    })
+    expect(m.cacheHitRatio).toBeCloseTo(s.expectedRatio, 4)
+    expect(m.cacheHitTokens).toBe(s.hit)
+    expect(m.cacheWriteTokens).toBe(s.write)
+    // hit + miss + write ≈ input (miss may be adjusted if hit+write > input)
+    expect(m.totalPromptTokens).toBeGreaterThanOrEqual(s.input - 10)
+  }
+})
+
+test('INTEGRATION: cacheMetrics invariant: hitRatio ∈ [0, 1]', () => {
+  // Random-ish sampling of plausible usage patterns
+  const patterns = [
+    { input: 1, hit: 0, write: 0 },
+    { input: 1, hit: 1, write: 0 },
+    { input: 999_999, hit: 1, write: 0 },
+    { input: 500_000, hit: 500_000, write: 0 },
+    { input: 500_000, hit: 0, write: 500_000 },
+  ]
+  for (const p of patterns) {
+    const m = computeCacheMetrics({
+      input_tokens: p.input,
+      cache_read_input_tokens: p.hit,
+      cache_creation_input_tokens: p.write,
+    })
+    expect(m.cacheHitRatio).toBeGreaterThanOrEqual(0)
+    expect(m.cacheHitRatio).toBeLessThanOrEqual(1)
+  }
+})
+
+// ---------------------------------------------------------------------------
+// Integration 8: Token truncation — CJK and mixed-language edge cases
+// ---------------------------------------------------------------------------
+
+test('INTEGRATION: truncation preserves CJK character integrity', () => {
+  // CJK: each character is 1-3 tokens. The truncation uses char/4 estimate
+  // which is conservative for CJK (undercounts). Verify it still truncates
+  // gracefully without corrupting characters.
+  const content = 'これは日本語のテストです。'.repeat(1000)
+  const result = truncateToolResultByTokens(content, 100)
+  expect(result.wasTruncated).toBe(true)
+  // Truncated content should be valid Unicode (no orphan surrogate pairs)
+  expect(() => encodeURIComponent(result.truncated)).not.toThrow()
+  // Should contain the truncation marker
+  expect(result.truncated).toContain('Content truncated')
+})
+
+test('INTEGRATION: truncation with data-like content is valid', () => {
+  // When content is dense data (no natural line breaks), truncation at the
+  // exact char boundary is acceptable — the function guarantees content
+  // is a prefix of the original and that the marker is present.
+  const lines: string[] = []
+  for (let i = 0; i < 100; i++) {
+    lines.push(`Line ${i.toString().padStart(4, '0')}: ${'data '.repeat(50)}`)
+  }
+  const content = lines.join('\n')
+  const result = truncateToolResultByTokens(content, 100)
+
+  if (result.wasTruncated) {
+    // Verify truncated is shorter than original
+    expect(result.truncated.length).toBeLessThan(content.length)
+
+    // The truncated content should be a prefix of the original
+    // (before the marker is appended)
+    const markerIdx = result.truncated.lastIndexOf('[Content truncated')
+    expect(markerIdx).toBeGreaterThan(0)
+
+    const beforeMarker = result.truncated.slice(0, markerIdx)
+    // Content before the marker should be contained in the original
+    // (may be truncated mid-word, which is acceptable)
+    expect(content.includes(beforeMarker.trim())).toBe(true)
+  }
+})
+
+test('INTEGRATION: truncation with mixed ASCII plus emoji content', () => {
+  const content = 'Regular text with emoji 🚀🔥💻 mixed in. '.repeat(200)
+  const result = truncateToolResultByTokens(content, 100)
+  expect(result.wasTruncated).toBe(true)
+  // Emoji are multi-byte; verify no orphan bytes
+  expect(() => encodeURIComponent(result.truncated)).not.toThrow()
+})
+
+// ---------------------------------------------------------------------------
+// Integration 9: Threshold cross-check — all ratios are in valid range
+// ---------------------------------------------------------------------------
+
+test('INTEGRATION: all compaction ratios are in (0, 1)', () => {
+  const ratios = [
+    COMPACT_NORMAL_FOLD_RATIO,
+    COMPACT_AGGRESSIVE_FOLD_RATIO,
+    COMPACT_FORCE_SUMMARY_RATIO,
+    COMPACT_PRECHECK_FOLD_RATIO,
+    COMPACT_NORMAL_FOLD_TAIL_RATIO,
+    COMPACT_AGGRESSIVE_FOLD_TAIL_RATIO,
+    MIN_COMPACTION_SAVINGS_RATIO,
+    COMPACT_PRECHECK_FOLD_HYSTERESIS,
+  ]
+  for (const r of ratios) {
+    expect(r).toBeGreaterThan(0)
+    expect(r).toBeLessThan(1)
+  }
+})
+
+test('INTEGRATION: normal fold threshold < aggressive < force_summary < precheck', () => {
+  expect(COMPACT_NORMAL_FOLD_RATIO).toBeLessThan(COMPACT_AGGRESSIVE_FOLD_RATIO)
+  expect(COMPACT_AGGRESSIVE_FOLD_RATIO).toBeLessThan(COMPACT_FORCE_SUMMARY_RATIO)
+  expect(COMPACT_FORCE_SUMMARY_RATIO).toBeLessThan(COMPACT_PRECHECK_FOLD_RATIO)
+})
+
+// ---------------------------------------------------------------------------
+// Integration 10: Savings check never blocks emergency compaction
+// ---------------------------------------------------------------------------
+
+test('INTEGRATION: savings check does not block when tokens exceed window', () => {
+  // Emergency: tokens > window → always true regardless of savings ratio
+  for (const w of Object.values(WINDOWS)) {
+    expect(isCompactionWorthwhile(w + 1, w)).toBe(true)
+    expect(isCompactionWorthwhile(w + 1000, w)).toBe(true)
+  }
+})
+
+test('INTEGRATION: savings check criteria consistent with min savings ratio', () => {
+  const w = WINDOWS.MEDIUM
+  const minFraction = MIN_COMPACTION_SAVINGS_RATIO
+
+  // At exact fraction → worthwhile
+  expect(isCompactionWorthwhile(Math.floor(w * minFraction), w)).toBe(true)
+
+  // Slightly below → not worthwhile (but we test 1 token below for small ratios)
+  const belowFraction = Math.floor(w * (minFraction - 0.01))
+  if (belowFraction > 0) {
+    expect(isCompactionWorthwhile(belowFraction, w)).toBe(false)
+  }
+})
+
+// ---------------------------------------------------------------------------
+// Integration 11: Pre-fold + post-fold never overlap (alreadyFoldedThisTurn invariant)
+// ---------------------------------------------------------------------------
+
+test('INTEGRATION: pre-fold + post-fold coordination — alreadyFolded prevents second fold', () => {
+  // Simulate a full turn:
+  //   1. Turn starts
+  //   2. Pre-estimation finds 92% → triggers pre-fold
+  //   3. Pre-fold succeeds → alreadyFoldedThisTurn = true
+  //   4. API call runs
+  //   5. Post-response check → should NOT re-fold
+
+  const tracking = makeTracking({ alreadyFoldedThisTurn: false })
+  const w = WINDOWS.SMALL
+
+  // Step 2: pre-estimation at 92%
+  const tokens = thresholdTokens(w, 0.92)
+  expect(shouldPreFold(tracking, tokens, w)).toBe(true)
+
+  // Step 3: after pre-fold, mark
+  tracking.alreadyFoldedThisTurn = true
+
+  // Step 5: post-response check — suppressed
+  expect(shouldPreFold(tracking, tokens, w)).toBe(false)
+})
+
+test('INTEGRATION: next turn resets alreadyFoldedThisTurn (caller responsibility)', () => {
+  // The tracking object is expected to be reset by the caller (query.ts)
+  // at the start of each new turn. Verify the flag is not sticky.
+  const tracking = makeTracking({ alreadyFoldedThisTurn: true })
+  // Caller resets for new turn
+  tracking.alreadyFoldedThisTurn = false
+  expect(shouldPreFold(tracking, thresholdTokens(200_000, 0.95), 200_000)).toBe(true)
+})
+
+// ---------------------------------------------------------------------------
+// Integration 12: Hysteresis prevents thrashing across all window sizes
+// ---------------------------------------------------------------------------
+
+test('INTEGRATION: hysteresis gap is at least 4% of threshold across windows', () => {
+  for (const w of Object.values(WINDOWS)) {
+    const threshold = thresholdTokens(w, COMPACT_PRECHECK_FOLD_RATIO)
+    const hysteresisGap = thresholdTokens(
+      w,
+      COMPACT_PRECHECK_FOLD_RATIO * (1 + COMPACT_PRECHECK_FOLD_HYSTERESIS),
+    ) - threshold
+    // Gap should be ~5% of the threshold value
+    const expectedGap = Math.floor(threshold * COMPACT_PRECHECK_FOLD_HYSTERESIS)
+    expect(hysteresisGap).toBeGreaterThanOrEqual(expectedGap - 1)
+  }
+})
diff --git a/tests/compactionThresholds.test.ts b/tests/compactionThresholds.test.ts
new file mode 100644
index 000000000..a6bdeba23
--- /dev/null
+++ b/tests/compactionThresholds.test.ts
@@ -0,0 +1,323 @@
+import { expect, test } from 'bun:test'
+import {
+  COMPACT_NORMAL_FOLD_RATIO,
+  COMPACT_AGGRESSIVE_FOLD_RATIO,
+  COMPACT_FORCE_SUMMARY_RATIO,
+  COMPACT_PRECHECK_FOLD_RATIO,
+  COMPACT_NORMAL_FOLD_TAIL_RATIO,
+  COMPACT_AGGRESSIVE_FOLD_TAIL_RATIO,
+  COMPACT_PRECHECK_FOLD_HYSTERESIS,
+  MIN_COMPACTION_SAVINGS_RATIO,
+  computeCacheMetrics,
+  isCompactionWorthwhile,
+  needsTurnStartPreFold,
+  shouldPreFold,
+} from '../src/services/compact/autoCompact.js'
+import { truncateToolResultByTokens } from '../src/utils/toolResultStorage.js'
+import type { AutoCompactTrackingState } from '../src/services/compact/autoCompact.js'
+
+// ---------------------------------------------------------------------------
+// Constant validation — ensure thresholds stay at their expected values
+// ---------------------------------------------------------------------------
+
+test('percentage thresholds are correctly ordered', () => {
+  expect(COMPACT_NORMAL_FOLD_RATIO).toBe(0.75)
+  expect(COMPACT_AGGRESSIVE_FOLD_RATIO).toBe(0.78)
+  expect(COMPACT_FORCE_SUMMARY_RATIO).toBe(0.80)
+  expect(COMPACT_PRECHECK_FOLD_RATIO).toBe(0.90)
+
+  // Thresholds must be monotonically increasing
+  expect(COMPACT_NORMAL_FOLD_RATIO).toBeLessThan(COMPACT_AGGRESSIVE_FOLD_RATIO)
+  expect(COMPACT_AGGRESSIVE_FOLD_RATIO).toBeLessThan(
+    COMPACT_FORCE_SUMMARY_RATIO,
+  )
+  expect(COMPACT_FORCE_SUMMARY_RATIO).toBeLessThan(COMPACT_PRECHECK_FOLD_RATIO)
+})
+
+test('tail budget ratios are correctly ordered', () => {
+  expect(COMPACT_NORMAL_FOLD_TAIL_RATIO).toBe(0.20)
+  expect(COMPACT_AGGRESSIVE_FOLD_TAIL_RATIO).toBe(0.10)
+
+  // Normal fold should preserve more tail than aggressive fold
+  expect(COMPACT_AGGRESSIVE_FOLD_TAIL_RATIO).toBeLessThan(
+    COMPACT_NORMAL_FOLD_TAIL_RATIO,
+  )
+})
+
+test('minimum savings ratio is a reasonable value', () => {
+  expect(MIN_COMPACTION_SAVINGS_RATIO).toBe(0.30)
+  expect(MIN_COMPACTION_SAVINGS_RATIO).toBeGreaterThan(0)
+  expect(MIN_COMPACTION_SAVINGS_RATIO).toBeLessThan(1)
+})
+
+test('pre-check hysteresis is a small positive fraction', () => {
+  expect(COMPACT_PRECHECK_FOLD_HYSTERESIS).toBe(0.05)
+  expect(COMPACT_PRECHECK_FOLD_HYSTERESIS).toBeGreaterThan(0)
+  expect(COMPACT_PRECHECK_FOLD_HYSTERESIS).toBeLessThan(0.15)
+})
+
+// ---------------------------------------------------------------------------
+// isCompactionWorthwhile
+// ---------------------------------------------------------------------------
+
+test('isCompactionWorthwhile returns true when most of context is occupied', () => {
+  // 90K tokens in 100K window → 90% occupied → worthwhile
+  expect(isCompactionWorthwhile(90_000, 100_000)).toBe(true)
+})
+
+test('isCompactionWorthwhile returns true at the boundary (30%)', () => {
+  // 30K tokens in 100K window → exactly 30% → still worthwhile
+  expect(isCompactionWorthwhile(30_000, 100_000)).toBe(true)
+})
+
+test('isCompactionWorthwhile returns false when below threshold', () => {
+  // 20K tokens in 100K window → 20% → not worthwhile
+  expect(isCompactionWorthwhile(20_000, 100_000)).toBe(false)
+})
+
+test('isCompactionWorthwhile returns true when tokens exceed window (emergency)', () => {
+  // Emergency: tokens exceed the context window — always worthwhile
+  expect(isCompactionWorthwhile(105_000, 100_000)).toBe(true)
+})
+
+test('isCompactionWorthwhile handles large 1M context window', () => {
+  // 400K tokens in 1M window → 40% → worthwhile
+  expect(isCompactionWorthwhile(400_000, 1_000_000)).toBe(true)
+
+  // 200K tokens in 1M window → 20% → not worthwhile
+  expect(isCompactionWorthwhile(200_000, 1_000_000)).toBe(false)
+
+  // 205K tokens in 200K window → >100% → emergency → worthwhile
+  expect(isCompactionWorthwhile(205_000, 200_000)).toBe(true)
+})
+
+// ---------------------------------------------------------------------------
+// computeCacheMetrics — cache economics tracking (Reasonix SessionStats parity)
+// ---------------------------------------------------------------------------
+
+test('computeCacheMetrics with perfect cache hit', () => {
+  const result = computeCacheMetrics({
+    input_tokens: 10_000,
+    cache_read_input_tokens: 9_000,
+    cache_creation_input_tokens: 500,
+  })
+  expect(result.cacheHitTokens).toBe(9_000)
+  expect(result.cacheWriteTokens).toBe(500)
+  // miss = input - hit - write = 10000 - 9000 - 500 = 500
+  expect(result.cacheMissTokens).toBe(500)
+  expect(result.totalPromptTokens).toBe(10_000)
+  // ratio = 9000 / (9000 + 500) ≈ 0.947
+  expect(result.cacheHitRatio).toBeCloseTo(0.947, 2)
+})
+
+test('computeCacheMetrics with complete cache miss', () => {
+  const result = computeCacheMetrics({
+    input_tokens: 50_000,
+    cache_read_input_tokens: 0,
+    cache_creation_input_tokens: 0,
+  })
+  expect(result.cacheHitTokens).toBe(0)
+  expect(result.cacheMissTokens).toBe(50_000)
+  expect(result.cacheWriteTokens).toBe(0)
+  expect(result.cacheHitRatio).toBe(0)
+})
+
+test('computeCacheMetrics with null fields defaults to zero', () => {
+  const result = computeCacheMetrics({
+    input_tokens: 5_000,
+    cache_read_input_tokens: null,
+    cache_creation_input_tokens: null,
+  })
+  expect(result.cacheHitTokens).toBe(0)
+  expect(result.cacheWriteTokens).toBe(0)
+  expect(result.cacheMissTokens).toBe(5_000)
+})
+
+test('computeCacheMetrics with undefined fields defaults to zero', () => {
+  const result = computeCacheMetrics({
+    input_tokens: 3_000,
+  })
+  expect(result.cacheHitTokens).toBe(0)
+  expect(result.cacheWriteTokens).toBe(0)
+  expect(result.cacheMissTokens).toBe(3_000)
+})
+
+test('computeCacheMetrics handles mixed cache scenario', () => {
+  // 200K total prompt: 150K cache hit, 30K miss, 20K new writes
+  const result = computeCacheMetrics({
+    input_tokens: 200_000,
+    cache_read_input_tokens: 150_000,
+    cache_creation_input_tokens: 20_000,
+  })
+  expect(result.cacheHitTokens).toBe(150_000)
+  expect(result.cacheWriteTokens).toBe(20_000)
+  expect(result.cacheMissTokens).toBe(30_000)
+  // ratio = 150000 / (150000 + 30000) ≈ 0.833
+  expect(result.cacheHitRatio).toBeCloseTo(0.833, 2)
+})
+
+test('computeCacheMetrics guards against negative miss (defensive)', () => {
+  // Edge case: if API reports more hit+write than input (shouldn't happen
+  // but the function should never return negative)
+  const result = computeCacheMetrics({
+    input_tokens: 1_000,
+    cache_read_input_tokens: 800,
+    cache_creation_input_tokens: 300,
+  })
+  // hit + write = 1100 > input = 1000 → miss clamped to 0
+  expect(result.cacheMissTokens).toBe(0)
+  expect(result.cacheHitTokens).toBe(800)
+  expect(result.cacheWriteTokens).toBe(300)
+  expect(result.cacheHitRatio).toBe(1.0) // 800/(800+0) = 1.0
+})
+
+// ---------------------------------------------------------------------------
+// needsTurnStartPreFold — 90% threshold with hysteresis
+// ---------------------------------------------------------------------------
+
+test('needsTurnStartPreFold returns false when below 90% threshold', () => {
+  // 80K tokens in 100K window → 80% → not at 90% threshold
+  expect(needsTurnStartPreFold(80_000, 100_000)).toBe(false)
+})
+
+test('needsTurnStartPreFold returns true at exactly 90%', () => {
+  // 90K tokens in 100K window → exactly 90%
+  expect(needsTurnStartPreFold(90_000, 100_000)).toBe(true)
+})
+
+test('needsTurnStartPreFold returns true well above 90%', () => {
+  expect(needsTurnStartPreFold(95_000, 100_000)).toBe(true)
+})
+
+test('needsTurnStartPreFold with hysteresis: skip when near previous fold', () => {
+  // 92K tokens in 100K window: 92% → above 90% threshold
+  // But we folded at 91K recently → hysteresis threshold = 90K * 1.05 = 94.5K
+  // 92K < 94.5K → hysteresis suppresses re-fold
+  expect(needsTurnStartPreFold(92_000, 100_000, 91_000)).toBe(false)
+})
+
+test('needsTurnStartPreFold with hysteresis: trigger when significantly above', () => {
+  // 96K tokens in 100K window: 96% → above 90% threshold
+  // Folded at 91K recently → hysteresis threshold = 94.5K
+  // 96K > 94.5K → hysteresis does NOT suppress
+  expect(needsTurnStartPreFold(96_000, 100_000, 91_000)).toBe(true)
+})
+
+test('needsTurnStartPreFold with hysteresis under the threshold is fine', () => {
+  // 89K tokens in 100K window: 89% → below 90% threshold
+  // Hysteresis doesn't matter when under threshold
+  expect(needsTurnStartPreFold(89_000, 100_000, 88_000)).toBe(false)
+})
+
+test('needsTurnStartPreFold with large 1M context window', () => {
+  // 920K in 1M window → 92% → above 90%
+  expect(needsTurnStartPreFold(920_000, 1_000_000)).toBe(true)
+
+  // 880K in 1M window → 88% → below 90%
+  expect(needsTurnStartPreFold(880_000, 1_000_000)).toBe(false)
+
+  // Hysteresis: 905K in 1M, folded at 900K
+  // Hysteresis threshold = 900K * 1.05 = 945K, 905K < 945K → suppressed
+  expect(needsTurnStartPreFold(905_000, 1_000_000, 900_000)).toBe(false)
+})
+
+// ---------------------------------------------------------------------------
+// shouldPreFold — respects alreadyFoldedThisTurn
+// ---------------------------------------------------------------------------
+
+function makeTracking(alreadyFolded: boolean): AutoCompactTrackingState {
+  return {
+    compacted: false,
+    turnCounter: 0,
+    turnId: 'test',
+    alreadyFoldedThisTurn: alreadyFolded,
+  }
+}
+
+test('shouldPreFold returns false when already folded this turn', () => {
+  const tracking = makeTracking(true)
+  // 95K in 100K → 95% → would normally trigger, but alreadyFolded suppresses
+  expect(shouldPreFold(tracking, 95_000, 100_000)).toBe(false)
+})
+
+test('shouldPreFold returns true when not yet folded this turn', () => {
+  const tracking = makeTracking(false)
+  expect(shouldPreFold(tracking, 95_000, 100_000)).toBe(true)
+})
+
+test('shouldPreFold returns false when tracking is undefined', () => {
+  // Without tracking, assume no pre-fold needed (defensive)
+  expect(shouldPreFold(undefined, 95_000, 100_000)).toBe(true)
+  // But when under threshold, still false
+  expect(shouldPreFold(undefined, 85_000, 100_000)).toBe(false)
+})
+
+// ---------------------------------------------------------------------------
+// truncateToolResultByTokens
+// ---------------------------------------------------------------------------
+
+test('truncateToolResultByTokens returns content unchanged when under limit', () => {
+  const content = 'short content'
+  const result = truncateToolResultByTokens(content, 100)
+  expect(result.wasTruncated).toBe(false)
+  expect(result.truncated).toBe(content)
+})
+
+test('truncateToolResultByTokens returns content unchanged when exactly at limit', () => {
+  // 400 bytes → ~100 tokens at 4 bytes/token
+  const content = 'A'.repeat(400)
+  const result = truncateToolResultByTokens(content, 100)
+  // May or may not truncate depending on rough estimate — but marker
+  // should not appear when content is small enough
+  if (!result.wasTruncated) {
+    expect(result.truncated).toBe(content)
+  }
+})
+
+test('truncateToolResultByTokens truncates when well above limit', () => {
+  // ~50K chars → ~12,500 tokens at 4 bytes/token
+  const content = 'A'.repeat(50_000)
+  const result = truncateToolResultByTokens(content, 100)
+  expect(result.wasTruncated).toBe(true)
+  expect(result.truncated.length).toBeLessThan(content.length)
+  expect(result.truncated).toContain('Content truncated')
+})
+
+test('truncateToolResultByTokens includes marker in truncated content', () => {
+  const content = 'B'.repeat(10_000)
+  const result = truncateToolResultByTokens(content, 100)
+  expect(result.wasTruncated).toBe(true)
+  expect(result.truncated).toContain(
+    'Content truncated',
+  )
+})
+
+test('truncateToolResultByTokens handles CJK content', () => {
+  // CJK characters are ~1-3 tokens each, so char/4 underestimates tokens.
+  // The function should still gracefully handle and truncate CJK content.
+  const content = '中文测试内容'.repeat(5_000)
+  const result = truncateToolResultByTokens(content, 500)
+  expect(result.wasTruncated).toBe(true)
+  expect(result.truncated.length).toBeLessThan(content.length)
+})
+
+test('truncateToolResultByTokens preserves content integrity', () => {
+  const content = 'Hello World\nThis is a test\n'.repeat(200)
+  const result = truncateToolResultByTokens(content, 100)
+  expect(result.wasTruncated).toBe(true)
+  // Should not start with partial line when possible
+  // (if a newline was found within 70% of the budget)
+  const truncatedPart = result.truncated.replace(
+    /\n\n\[Content truncated.*\]$/s,
+    '',
+  )
+  // Content before the marker should be a prefix of the original
+  expect(content.startsWith(truncatedPart)).toBe(true)
+})
+
+test('truncateToolResultByTokens handles empty content', () => {
+  const result = truncateToolResultByTokens('', 100)
+  expect(result.wasTruncated).toBe(false)
+  expect(result.truncated).toBe('')
+  expect(result.estimatedTokens).toBe(0)
+})