Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/pr-quality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ jobs:
with:
bun-version: latest

- name: Install dependencies
run: bun install

- name: Run policy tests
run: bun run check:policy

Expand Down
5 changes: 3 additions & 2 deletions src/constants/system.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,14 @@ export function getCLISyspromptPrefix(options?: {

/**
* Check if attribution header is enabled.
* Enabled by default, can be disabled via env var or GrowthBook killswitch.
* Disabled by default to preserve prompt cache stability.
* Can be enabled via env var CLAUDE_CODE_ATTRIBUTION_HEADER or GrowthBook feature flag.
*/
function isAttributionHeaderEnabled(): boolean {
if (isEnvDefinedFalsy(process.env.CLAUDE_CODE_ATTRIBUTION_HEADER)) {
return false
}
return getFeatureValue_CACHED_MAY_BE_STALE('tengu_attribution_header', true)
return getFeatureValue_CACHED_MAY_BE_STALE('tengu_attribution_header', false)
}

/**
Expand Down
59 changes: 58 additions & 1 deletion src/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,13 @@ import type { CanUseToolFn } from './hooks/useCanUseTool.js'
import { FallbackTriggeredError } from './services/api/withRetry.js'
import {
calculateTokenWarningState,
COMPACT_PRECHECK_FOLD_RATIO,
estimateTurnStartUsage,
getEffectiveContextWindowSize,
isAutoCompactEnabled,
shouldPreFold,
type AutoCompactTrackingState,
type CacheMetrics,
} from './services/compact/autoCompact.js'
import { buildPostCompactMessages } from './services/compact/compact.js'
/* eslint-disable @typescript-eslint/no-require-imports */
Expand Down Expand Up @@ -452,7 +457,47 @@ async function* queryLoop(
)

queryCheckpoint('query_autocompact_start')
const { compactionResult, consecutiveFailures } = await deps.autocompact(

// Turn-start pre-estimation: check whether accumulated context from the
// last turn has pushed us into dangerous territory BEFORE the next API
// call. When the 90% threshold is crossed and we haven't already folded
// this turn, force a pre-fold via the existing autocompact pipeline.
let forcePreFold = false
if (feature('TURN_START_PRE_ESTIMATION')) {
const effectiveWindow = getEffectiveContextWindowSize(
toolUseContext.options.mainLoopModel,
)
const { ratio, estimateTokens } = estimateTurnStartUsage(
messagesForQuery,
effectiveWindow,
)
if (
shouldPreFold(tracking, estimateTokens, effectiveWindow)
) {
forcePreFold = true
logForDebugging(
`turnStartPreEstimate: context at ${(ratio * 100).toFixed(1)}% ` +
`(~${estimateTokens.toLocaleString()} tokens) — forcing pre-fold before API call`,
{ level: 'warn' },
)
logEvent('tengu_turn_start_prefold_triggered', {
estimatedTokens: estimateTokens,
ratio: Math.round(ratio * 100),
})
} else if (ratio >= COMPACT_PRECHECK_FOLD_RATIO) {
// Above threshold but suppressed by alreadyFoldedThisTurn
logForDebugging(
`turnStartPreEstimate: context at ${(ratio * 100).toFixed(1)}% ` +
`but pre-fold suppressed (already folded this turn)`,
)
}
}

const {
compactionResult,
consecutiveFailures,
cacheMetrics,
} = await deps.autocompact(
messagesForQuery,
toolUseContext,
{
Expand All @@ -465,9 +510,20 @@ async function* queryLoop(
querySource,
tracking,
snipTokensFreed,
forcePreFold,
)
queryCheckpoint('query_autocompact_end')

if (cacheMetrics) {
logForDebugging(
`cacheMetrics.compaction: hit=${cacheMetrics.cacheHitTokens.toLocaleString()} ` +
`miss=${cacheMetrics.cacheMissTokens.toLocaleString()} ` +
`ratio=${(cacheMetrics.cacheHitRatio * 100).toFixed(1)}%`,
)
}

queryCheckpoint('query_autocompact_end')

if (compactionResult) {
const {
preCompactTokenCount,
Expand All @@ -491,6 +547,7 @@ async function* queryLoop(
compactionUsage?.cache_read_input_tokens ?? 0,
compactionCacheCreationTokens:
compactionUsage?.cache_creation_input_tokens ?? 0,
compactionCacheHitRatio: cacheMetrics?.cacheHitRatio ?? 0,
compactionTotalTokens: compactionUsage
? compactionUsage.input_tokens +
(compactionUsage.cache_creation_input_tokens ?? 0) +
Expand Down
4 changes: 4 additions & 0 deletions src/server/proxy/handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import { openaiResponsesToAnthropic } from './transform/openaiResponsesToAnthrop
import { openaiChatStreamToAnthropic } from './streaming/openaiChatStreamToAnthropic.js'
import { openaiResponsesStreamToAnthropic } from './streaming/openaiResponsesStreamToAnthropic.js'
import type { AnthropicRequest } from './transform/types.js'
import { normalizeModelStringForAPI } from '../../utils/model/model.js'
import { getProxyFetchOptions } from '../../utils/proxy.js'
import { getManualNetworkProxyUrl, loadNetworkSettings } from '../services/networkSettings.js'

Expand Down Expand Up @@ -128,6 +129,9 @@ export async function handleProxyRequest(req: Request, url: URL): Promise<Respon
}

body = ensureClaudeCodeAttribution(body)
// Strip [1m]/[2m] suffix before forwarding to third-party APIs —
// third-party providers don't understand the context-window suffix convention.
body.model = normalizeModelStringForAPI(body.model)

const isStream = body.stream === true
const baseUrl = config.baseUrl.replace(/\/+$/, '')
Expand Down
31 changes: 17 additions & 14 deletions src/server/services/providerService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import {
OPENAI_OFFICIAL_PROVIDER,
isOpenAIOfficialProviderId,
} from './openaiOfficialProvider.js'
import { normalizeModelStringForAPI } from '../../utils/model/model.js'
import { hahaOpenAIOAuthService } from './hahaOpenAIOAuthService.js'
import {
CURRENT_PROVIDER_INDEX_SCHEMA_VERSION,
Expand Down Expand Up @@ -477,9 +478,10 @@ export class ProviderService {
authStrategy: ProviderAuthStrategy,
networkSettings: NetworkSettings,
): Promise<ProviderTestStepResult> {
const normalizedModelId = normalizeModelStringForAPI(modelId)
const start = Date.now()
try {
const { url, headers, body } = buildDirectTestRequest(base, apiKey, modelId, format, authStrategy)
const { url, headers, body } = buildDirectTestRequest(base, apiKey, normalizedModelId, format, authStrategy)
const proxyOptions = getProxyFetchOptions({ proxyUrl: getManualNetworkProxyUrl(networkSettings) })
const response = await fetch(url, {
method: 'POST',
Expand All @@ -497,22 +499,22 @@ export class ProviderService {
if (resBody?.error && typeof resBody.error === 'object') {
error = ((resBody.error as Record<string, unknown>).message as string) || error
}
return { success: false, latencyMs, error, modelUsed: modelId, httpStatus: response.status }
return { success: false, latencyMs, error, modelUsed: normalizedModelId, httpStatus: response.status }
}

// Validate response structure
const valid = validateResponseBody(resBody, format)
if (!valid.ok) {
return { success: false, latencyMs, error: valid.error, modelUsed: modelId, httpStatus: response.status }
return { success: false, latencyMs, error: valid.error, modelUsed: normalizedModelId, httpStatus: response.status }
}

return { success: true, latencyMs, modelUsed: valid.model || modelId, httpStatus: response.status }
return { success: true, latencyMs, modelUsed: valid.model || normalizedModelId, httpStatus: response.status }
} catch (err: unknown) {
const latencyMs = Date.now() - start
if (err instanceof DOMException && err.name === 'TimeoutError') {
return { success: false, latencyMs, error: `Request timed out (${Math.round(networkSettings.aiRequestTimeoutMs / 1000)}s)`, modelUsed: modelId }
return { success: false, latencyMs, error: `Request timed out (${Math.round(networkSettings.aiRequestTimeoutMs / 1000)}s)`, modelUsed: normalizedModelId }
}
return { success: false, latencyMs, error: err instanceof Error ? err.message : String(err), modelUsed: modelId }
return { success: false, latencyMs, error: err instanceof Error ? err.message : String(err), modelUsed: normalizedModelId }
}
}

Expand All @@ -524,11 +526,12 @@ export class ProviderService {
format: 'openai_chat' | 'openai_responses',
networkSettings: NetworkSettings,
): Promise<ProviderTestStepResult> {
const normalizedModelId = normalizeModelStringForAPI(modelId)
const start = Date.now()
try {
// Build an Anthropic Messages API request (same shape as what CLI sends)
const anthropicReq: AnthropicRequest = {
model: modelId,
model: normalizedModelId,
max_tokens: 64,
messages: [{ role: 'user', content: 'Say "ok" and nothing else.' }],
}
Expand Down Expand Up @@ -557,31 +560,31 @@ export class ProviderService {
if (!response.ok) {
const latencyMs = Date.now() - start
const errText = await response.text().catch(() => '')
return { success: false, latencyMs, modelUsed: modelId, httpStatus: response.status,
return { success: false, latencyMs, modelUsed: normalizedModelId, httpStatus: response.status,
error: `Upstream HTTP ${response.status}: ${errText.slice(0, 200)}` }
}

// Transform response back to Anthropic format
const responseBody = await response.json()
const anthropicRes = format === 'openai_chat'
? openaiChatToAnthropic(responseBody, modelId)
: openaiResponsesToAnthropic(responseBody, modelId)
? openaiChatToAnthropic(responseBody, normalizedModelId)
: openaiResponsesToAnthropic(responseBody, normalizedModelId)

const latencyMs = Date.now() - start

// Validate the final Anthropic response
if (anthropicRes.type !== 'message' || !Array.isArray(anthropicRes.content)) {
return { success: false, latencyMs, modelUsed: modelId,
return { success: false, latencyMs, modelUsed: normalizedModelId,
error: 'Proxy transform produced invalid Anthropic response' }
}

return { success: true, latencyMs, modelUsed: anthropicRes.model || modelId, httpStatus: response.status }
return { success: true, latencyMs, modelUsed: anthropicRes.model || normalizedModelId, httpStatus: response.status }
} catch (err: unknown) {
const latencyMs = Date.now() - start
if (err instanceof DOMException && err.name === 'TimeoutError') {
return { success: false, latencyMs, error: `Proxy pipeline timed out (${Math.round(networkSettings.aiRequestTimeoutMs / 1000)}s)`, modelUsed: modelId }
return { success: false, latencyMs, error: `Proxy pipeline timed out (${Math.round(networkSettings.aiRequestTimeoutMs / 1000)}s)`, modelUsed: normalizedModelId }
}
return { success: false, latencyMs, error: err instanceof Error ? err.message : String(err), modelUsed: modelId }
return { success: false, latencyMs, error: err instanceof Error ? err.message : String(err), modelUsed: normalizedModelId }
}
}
}
Expand Down
Loading
Loading