From d1a86933c14a47920691612385c6383f55ef73a5 Mon Sep 17 00:00:00 2001 From: NaNomicon Date: Mon, 13 Apr 2026 00:34:53 +0700 Subject: [PATCH 1/9] feat(settings): add ttftTimeoutMs and ttftCooldownMs defaults --- src/lib/localDb.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lib/localDb.js b/src/lib/localDb.js index 08848083..f15ec374 100644 --- a/src/lib/localDb.js +++ b/src/lib/localDb.js @@ -72,6 +72,8 @@ const defaultData = { outboundProxyUrl: "", outboundNoProxy: "", mitmRouterBaseUrl: DEFAULT_MITM_ROUTER_BASE, + ttftTimeoutMs: 0, + ttftCooldownMs: 15000, }, pricing: {} // NEW: pricing configuration }; @@ -110,6 +112,8 @@ function cloneDefaultData() { outboundProxyUrl: "", outboundNoProxy: "", mitmRouterBaseUrl: DEFAULT_MITM_ROUTER_BASE, + ttftTimeoutMs: 0, + ttftCooldownMs: 15000, }, pricing: {}, }; From 778ef982f2f76e37050f93aae7434f8e6a7c93d3 Mon Sep 17 00:00:00 2001 From: NaNomicon Date: Mon, 13 Apr 2026 00:37:45 +0700 Subject: [PATCH 2/9] feat(fallback): handle ttft_timeout in checkFallbackError --- .sisyphus/evidence/task-2-fallback.txt | 22 ++++++++++++++++++++++ open-sse/services/accountFallback.js | 6 +++++- 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 .sisyphus/evidence/task-2-fallback.txt diff --git a/.sisyphus/evidence/task-2-fallback.txt b/.sisyphus/evidence/task-2-fallback.txt new file mode 100644 index 00000000..4bda3b0b --- /dev/null +++ b/.sisyphus/evidence/task-2-fallback.txt @@ -0,0 +1,22 @@ +diff --git a/open-sse/services/accountFallback.js b/open-sse/services/accountFallback.js +index 957655c..fba0c1c 100644 +--- a/open-sse/services/accountFallback.js ++++ b/open-sse/services/accountFallback.js +@@ -18,12 +18,16 @@ export function getQuotaCooldown(backoffLevel = 0) { + * @param {number} backoffLevel - Current backoff level for exponential backoff + * @returns {{ shouldFallback: boolean, cooldownMs: number, newBackoffLevel?: number }} + */ +-export function checkFallbackError(status, errorText, backoffLevel = 0) { ++export function checkFallbackError(status, errorText, backoffLevel = 0, options = {}) { + // Check error message FIRST - specific patterns take priority over status codes + if (errorText) { + const errorStr = typeof errorText === "string" ? errorText : JSON.stringify(errorText); + const lowerError = errorStr.toLowerCase(); + ++ if (lowerError.includes("ttft_timeout")) { ++ return { shouldFallback: true, cooldownMs: options?.ttftCooldownMs ?? 15000 }; ++ } ++ + if (lowerError.includes("no credentials")) { + return { shouldFallback: true, cooldownMs: COOLDOWN_MS.notFound }; + } diff --git a/open-sse/services/accountFallback.js b/open-sse/services/accountFallback.js index 957655c3..fba0c1c1 100644 --- a/open-sse/services/accountFallback.js +++ b/open-sse/services/accountFallback.js @@ -18,12 +18,16 @@ export function getQuotaCooldown(backoffLevel = 0) { * @param {number} backoffLevel - Current backoff level for exponential backoff * @returns {{ shouldFallback: boolean, cooldownMs: number, newBackoffLevel?: number }} */ -export function checkFallbackError(status, errorText, backoffLevel = 0) { +export function checkFallbackError(status, errorText, backoffLevel = 0, options = {}) { // Check error message FIRST - specific patterns take priority over status codes if (errorText) { const errorStr = typeof errorText === "string" ? errorText : JSON.stringify(errorText); const lowerError = errorStr.toLowerCase(); + if (lowerError.includes("ttft_timeout")) { + return { shouldFallback: true, cooldownMs: options?.ttftCooldownMs ?? 15000 }; + } + if (lowerError.includes("no credentials")) { return { shouldFallback: true, cooldownMs: COOLDOWN_MS.notFound }; } From 6353bb7c3d969f2985a59115ea2bb7da034d0ea9 Mon Sep 17 00:00:00 2001 From: NaNomicon Date: Mon, 13 Apr 2026 00:38:22 +0700 Subject: [PATCH 3/9] fix(streaming): defer onRequestSuccess to first-chunk arrival --- .sisyphus/evidence/task-3-streaming.txt | 36 +++++++++++++++++++ .../handlers/chatCore/streamingHandler.js | 23 ++++++++++-- 2 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 .sisyphus/evidence/task-3-streaming.txt diff --git a/.sisyphus/evidence/task-3-streaming.txt b/.sisyphus/evidence/task-3-streaming.txt new file mode 100644 index 00000000..4af28ece --- /dev/null +++ b/.sisyphus/evidence/task-3-streaming.txt @@ -0,0 +1,36 @@ +diff --git a/open-sse/handlers/chatCore/streamingHandler.js b/open-sse/handlers/chatCore/streamingHandler.js +index 03d12c1..c5a3002 100644 +--- a/open-sse/handlers/chatCore/streamingHandler.js ++++ b/open-sse/handlers/chatCore/streamingHandler.js +@@ -40,10 +40,29 @@ function buildTransformStream({ provider, sourceFormat, targetFormat, userAgent, + * Handle streaming response — pipe provider SSE through transform stream to client. + */ + export function handleStreamingResponse({ providerResponse, provider, model, sourceFormat, targetFormat, userAgent, body, stream, translatedBody, finalBody, requestStartTime, connectionId, apiKey, clientRawRequest, onRequestSuccess, reqLogger, toolNameMap, streamController, onStreamComplete }) { +- if (onRequestSuccess) onRequestSuccess(); ++ let responseToStream = providerResponse; ++ if (onRequestSuccess) { ++ let firstChunkFired = false; ++ const original = providerResponse.body; ++ const wrapped = new TransformStream({ ++ transform(chunk, controller) { ++ if (!firstChunkFired) { ++ firstChunkFired = true; ++ onRequestSuccess(); ++ } ++ controller.enqueue(chunk); ++ } ++ }); ++ original.pipeTo(wrapped.writable).catch(() => {}); ++ responseToStream = new Response(wrapped.readable, { ++ status: providerResponse.status, ++ statusText: providerResponse.statusText, ++ headers: providerResponse.headers ++ }); ++ } + + const transformStream = buildTransformStream({ provider, sourceFormat, targetFormat, userAgent, reqLogger, toolNameMap, model, connectionId, body, onStreamComplete, apiKey }); +- const transformedBody = pipeWithDisconnect(providerResponse, transformStream, streamController); ++ const transformedBody = pipeWithDisconnect(responseToStream, transformStream, streamController); + + const streamDetailId = `${Date.now()}-${Math.random().toString(36).slice(2, 11)}`; + saveRequestDetail(buildRequestDetail({ diff --git a/open-sse/handlers/chatCore/streamingHandler.js b/open-sse/handlers/chatCore/streamingHandler.js index 03d12c13..c5a3002d 100644 --- a/open-sse/handlers/chatCore/streamingHandler.js +++ b/open-sse/handlers/chatCore/streamingHandler.js @@ -40,10 +40,29 @@ function buildTransformStream({ provider, sourceFormat, targetFormat, userAgent, * Handle streaming response — pipe provider SSE through transform stream to client. */ export function handleStreamingResponse({ providerResponse, provider, model, sourceFormat, targetFormat, userAgent, body, stream, translatedBody, finalBody, requestStartTime, connectionId, apiKey, clientRawRequest, onRequestSuccess, reqLogger, toolNameMap, streamController, onStreamComplete }) { - if (onRequestSuccess) onRequestSuccess(); + let responseToStream = providerResponse; + if (onRequestSuccess) { + let firstChunkFired = false; + const original = providerResponse.body; + const wrapped = new TransformStream({ + transform(chunk, controller) { + if (!firstChunkFired) { + firstChunkFired = true; + onRequestSuccess(); + } + controller.enqueue(chunk); + } + }); + original.pipeTo(wrapped.writable).catch(() => {}); + responseToStream = new Response(wrapped.readable, { + status: providerResponse.status, + statusText: providerResponse.statusText, + headers: providerResponse.headers + }); + } const transformStream = buildTransformStream({ provider, sourceFormat, targetFormat, userAgent, reqLogger, toolNameMap, model, connectionId, body, onStreamComplete, apiKey }); - const transformedBody = pipeWithDisconnect(providerResponse, transformStream, streamController); + const transformedBody = pipeWithDisconnect(responseToStream, transformStream, streamController); const streamDetailId = `${Date.now()}-${Math.random().toString(36).slice(2, 11)}`; saveRequestDetail(buildRequestDetail({ From b683acc0323d4c8ceb04a6995489f9301ee921f5 Mon Sep 17 00:00:00 2001 From: NaNomicon Date: Mon, 13 Apr 2026 00:38:50 +0700 Subject: [PATCH 4/9] feat(chatcore): add TTFT deadline race before streaming response --- .sisyphus/evidence/task-4-chatcore.txt | 73 ++++++++++++++++++++++++++ open-sse/handlers/chatCore.js | 55 ++++++++++++++++++- 2 files changed, 126 insertions(+), 2 deletions(-) create mode 100644 .sisyphus/evidence/task-4-chatcore.txt diff --git a/.sisyphus/evidence/task-4-chatcore.txt b/.sisyphus/evidence/task-4-chatcore.txt new file mode 100644 index 00000000..eabdacf3 --- /dev/null +++ b/.sisyphus/evidence/task-4-chatcore.txt @@ -0,0 +1,73 @@ +diff --git a/open-sse/handlers/chatCore.js b/open-sse/handlers/chatCore.js +index fc564d3..5a48af9 100644 +--- a/open-sse/handlers/chatCore.js ++++ b/open-sse/handlers/chatCore.js +@@ -24,7 +24,7 @@ import { detectClientTool, isNativePassthrough } from "../utils/clientDetector.j + * @param {object} options.credentials - Provider credentials + * @param {string} options.sourceFormatOverride - Override detected source format (e.g. "openai-responses") + */ +-export async function handleChatCore({ body, modelInfo, credentials, log, onCredentialsRefreshed, onRequestSuccess, onDisconnect, clientRawRequest, connectionId, userAgent, apiKey, ccFilterNaming, sourceFormatOverride }) { ++export async function handleChatCore({ body, modelInfo, credentials, log, onCredentialsRefreshed, onRequestSuccess, onDisconnect, clientRawRequest, connectionId, userAgent, apiKey, ccFilterNaming, sourceFormatOverride, ttftTimeoutMs = 0, ttftCooldownMs = 15000 }) { + const { provider, model } = modelInfo; + const requestStartTime = Date.now(); + +@@ -218,7 +218,58 @@ export async function handleChatCore({ body, modelInfo, credentials, log, onCred + + // Streaming response + const { onStreamComplete } = buildOnStreamComplete({ ...sharedCtx }); +- return handleStreamingResponse({ ...sharedCtx, providerResponse, sourceFormat, targetFormat, userAgent, reqLogger, toolNameMap, streamController, onStreamComplete }); ++ ++ // TTFT deadline race: if enabled, race first chunk vs deadline before handing off to streaming handler ++ let responseToStream = providerResponse; ++ if (stream && ttftTimeoutMs > 0) { ++ const ttftResult = await raceTtftDeadline(providerResponse, ttftTimeoutMs, streamController); ++ if (ttftResult.timedOut) { ++ trackPendingRequest(model, provider, connectionId, false, true); ++ appendRequestLog({ model, provider, connectionId, status: "TTFT_TIMEOUT" }).catch(() => {}); ++ console.log(`[TTFT] ${provider}/${model} exceeded ${ttftTimeoutMs}ms`); ++ return createErrorResult(408, "ttft_timeout"); ++ } ++ responseToStream = ttftResult.response; ++ } ++ ++ return handleStreamingResponse({ ...sharedCtx, providerResponse: responseToStream, sourceFormat, targetFormat, userAgent, reqLogger, toolNameMap, streamController, onStreamComplete }); ++} ++ ++async function raceTtftDeadline(providerResponse, ttftTimeoutMs, streamController) { ++ return new Promise((resolve) => { ++ const timer = setTimeout(() => { ++ streamController.abort(); ++ resolve({ timedOut: true }); ++ }, ttftTimeoutMs); ++ ++ const reader = providerResponse.body.getReader(); ++ reader.read().then(({ value, done }) => { ++ clearTimeout(timer); ++ const newBody = new ReadableStream({ ++ start(controller) { ++ if (!done && value) controller.enqueue(value); ++ if (done) { controller.close(); return; } ++ }, ++ async pull(controller) { ++ const { value: chunk, done: isDone } = await reader.read(); ++ if (isDone) { controller.close(); return; } ++ controller.enqueue(chunk); ++ }, ++ cancel() { reader.cancel(); } ++ }); ++ resolve({ ++ timedOut: false, ++ response: new Response(newBody, { ++ status: providerResponse.status, ++ statusText: providerResponse.statusText, ++ headers: providerResponse.headers ++ }) ++ }); ++ }).catch(() => { ++ clearTimeout(timer); ++ resolve({ timedOut: true }); ++ }); ++ }); + } + + export function isTokenExpiringSoon(expiresAt, bufferMs = 5 * 60 * 1000) { diff --git a/open-sse/handlers/chatCore.js b/open-sse/handlers/chatCore.js index 780a4fee..dba1b214 100644 --- a/open-sse/handlers/chatCore.js +++ b/open-sse/handlers/chatCore.js @@ -24,7 +24,7 @@ import { detectClientTool, isNativePassthrough } from "../utils/clientDetector.j * @param {object} options.credentials - Provider credentials * @param {string} options.sourceFormatOverride - Override detected source format (e.g. "openai-responses") */ -export async function handleChatCore({ body, modelInfo, credentials, log, onCredentialsRefreshed, onRequestSuccess, onDisconnect, clientRawRequest, connectionId, userAgent, apiKey, ccFilterNaming, sourceFormatOverride, providerThinking }) { +export async function handleChatCore({ body, modelInfo, credentials, log, onCredentialsRefreshed, onRequestSuccess, onDisconnect, clientRawRequest, connectionId, userAgent, apiKey, ccFilterNaming, sourceFormatOverride, providerThinking, ttftTimeoutMs = 0, ttftCooldownMs = 15000 }) { const { provider, model } = modelInfo; const requestStartTime = Date.now(); @@ -237,7 +237,58 @@ export async function handleChatCore({ body, modelInfo, credentials, log, onCred // Streaming response const { onStreamComplete } = buildOnStreamComplete({ ...sharedCtx }); - return handleStreamingResponse({ ...sharedCtx, providerResponse, sourceFormat, targetFormat, userAgent, reqLogger, toolNameMap, streamController, onStreamComplete }); + + // TTFT deadline race: if enabled, race first chunk vs deadline before handing off to streaming handler + let responseToStream = providerResponse; + if (stream && ttftTimeoutMs > 0) { + const ttftResult = await raceTtftDeadline(providerResponse, ttftTimeoutMs, streamController); + if (ttftResult.timedOut) { + trackPendingRequest(model, provider, connectionId, false, true); + appendRequestLog({ model, provider, connectionId, status: "TTFT_TIMEOUT" }).catch(() => {}); + console.log(`[TTFT] ${provider}/${model} exceeded ${ttftTimeoutMs}ms`); + return createErrorResult(408, "ttft_timeout"); + } + responseToStream = ttftResult.response; + } + + return handleStreamingResponse({ ...sharedCtx, providerResponse: responseToStream, sourceFormat, targetFormat, userAgent, reqLogger, toolNameMap, streamController, onStreamComplete }); +} + +async function raceTtftDeadline(providerResponse, ttftTimeoutMs, streamController) { + return new Promise((resolve) => { + const timer = setTimeout(() => { + streamController.abort(); + resolve({ timedOut: true }); + }, ttftTimeoutMs); + + const reader = providerResponse.body.getReader(); + reader.read().then(({ value, done }) => { + clearTimeout(timer); + const newBody = new ReadableStream({ + start(controller) { + if (!done && value) controller.enqueue(value); + if (done) { controller.close(); return; } + }, + async pull(controller) { + const { value: chunk, done: isDone } = await reader.read(); + if (isDone) { controller.close(); return; } + controller.enqueue(chunk); + }, + cancel() { reader.cancel(); } + }); + resolve({ + timedOut: false, + response: new Response(newBody, { + status: providerResponse.status, + statusText: providerResponse.statusText, + headers: providerResponse.headers + }) + }); + }).catch(() => { + clearTimeout(timer); + resolve({ timedOut: true }); + }); + }); } export function isTokenExpiringSoon(expiresAt, bufferMs = 5 * 60 * 1000) { From b34d922f7a03dddd45efc905a8eb4598ed4ae0e2 Mon Sep 17 00:00:00 2001 From: NaNomicon Date: Mon, 13 Apr 2026 00:43:12 +0700 Subject: [PATCH 5/9] feat(chat): thread TTFT settings through to chatCore and fallback --- .sisyphus/evidence/task-5-wiring.txt | 42 ++++++++++++++++++++++++++++ src/sse/handlers/chat.js | 4 ++- src/sse/services/auth.js | 4 +-- 3 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 .sisyphus/evidence/task-5-wiring.txt diff --git a/.sisyphus/evidence/task-5-wiring.txt b/.sisyphus/evidence/task-5-wiring.txt new file mode 100644 index 00000000..4c8fac51 --- /dev/null +++ b/.sisyphus/evidence/task-5-wiring.txt @@ -0,0 +1,42 @@ +diff --git a/src/sse/handlers/chat.js b/src/sse/handlers/chat.js +index bd45748..0b2c0bd 100644 +--- a/src/sse/handlers/chat.js ++++ b/src/sse/handlers/chat.js +@@ -201,6 +201,8 @@ async function handleSingleModelChat(body, modelStr, clientRawRequest = null, re + ccFilterNaming: !!chatSettings.ccFilterNaming, + // Detect source format by endpoint + body + sourceFormatOverride: request?.url ? detectFormatByEndpoint(new URL(request.url).pathname, body) : null, ++ ttftTimeoutMs: chatSettings.ttftTimeoutMs || 0, ++ ttftCooldownMs: chatSettings.ttftCooldownMs || 15000, + onCredentialsRefreshed: async (newCreds) => { + await updateProviderCredentials(credentials.connectionId, { + accessToken: newCreds.accessToken, +@@ -217,7 +219,7 @@ async function handleSingleModelChat(body, modelStr, clientRawRequest = null, re + if (result.success) return result.response; + + // Mark account unavailable (auto-calculates cooldown with exponential backoff) +- const { shouldFallback } = await markAccountUnavailable(credentials.connectionId, result.status, result.error, provider, model); ++ const { shouldFallback } = await markAccountUnavailable(credentials.connectionId, result.status, result.error, provider, model, { ttftCooldownMs: chatSettings.ttftCooldownMs || 15000 }); + + if (shouldFallback) { + log.warn("AUTH", `Account ${credentials.connectionName} unavailable (${result.status}), trying fallback`); +diff --git a/src/sse/services/auth.js b/src/sse/services/auth.js +index 3f426c1..1246886 100644 +--- a/src/sse/services/auth.js ++++ b/src/sse/services/auth.js +@@ -168,13 +168,13 @@ export async function getProviderCredentials(provider, excludeConnectionIds = nu + * @param {string|null} model - The specific model that triggered the error + * @returns {{ shouldFallback: boolean, cooldownMs: number }} + */ +-export async function markAccountUnavailable(connectionId, status, errorText, provider = null, model = null) { ++export async function markAccountUnavailable(connectionId, status, errorText, provider = null, model = null, options = {}) { + if (!connectionId || connectionId === "noauth") return { shouldFallback: false, cooldownMs: 0 }; + const connections = await getProviderConnections({ provider }); + const conn = connections.find(c => c.id === connectionId); + const backoffLevel = conn?.backoffLevel || 0; + +- const { shouldFallback, cooldownMs, newBackoffLevel } = checkFallbackError(status, errorText, backoffLevel); ++ const { shouldFallback, cooldownMs, newBackoffLevel } = checkFallbackError(status, errorText, backoffLevel, options); + if (!shouldFallback) return { shouldFallback: false, cooldownMs: 0 }; + + const reason = typeof errorText === "string" ? errorText.slice(0, 100) : "Provider error"; diff --git a/src/sse/handlers/chat.js b/src/sse/handlers/chat.js index b2e64a1e..b1edef0d 100644 --- a/src/sse/handlers/chat.js +++ b/src/sse/handlers/chat.js @@ -203,6 +203,8 @@ async function handleSingleModelChat(body, modelStr, clientRawRequest = null, re providerThinking, // Detect source format by endpoint + body sourceFormatOverride: request?.url ? detectFormatByEndpoint(new URL(request.url).pathname, body) : null, + ttftTimeoutMs: chatSettings.ttftTimeoutMs || 0, + ttftCooldownMs: chatSettings.ttftCooldownMs || 15000, onCredentialsRefreshed: async (newCreds) => { await updateProviderCredentials(credentials.connectionId, { accessToken: newCreds.accessToken, @@ -219,7 +221,7 @@ async function handleSingleModelChat(body, modelStr, clientRawRequest = null, re if (result.success) return result.response; // Mark account unavailable (auto-calculates cooldown with exponential backoff) - const { shouldFallback } = await markAccountUnavailable(credentials.connectionId, result.status, result.error, provider, model); + const { shouldFallback } = await markAccountUnavailable(credentials.connectionId, result.status, result.error, provider, model, { ttftCooldownMs: chatSettings.ttftCooldownMs || 15000 }); if (shouldFallback) { log.warn("AUTH", `Account ${credentials.connectionName} unavailable (${result.status}), trying fallback`); diff --git a/src/sse/services/auth.js b/src/sse/services/auth.js index 9f13ffbe..e4c377f8 100644 --- a/src/sse/services/auth.js +++ b/src/sse/services/auth.js @@ -169,13 +169,13 @@ export async function getProviderCredentials(provider, excludeConnectionIds = nu * @param {string|null} model - The specific model that triggered the error * @returns {{ shouldFallback: boolean, cooldownMs: number }} */ -export async function markAccountUnavailable(connectionId, status, errorText, provider = null, model = null) { +export async function markAccountUnavailable(connectionId, status, errorText, provider = null, model = null, options = {}) { if (!connectionId || connectionId === "noauth") return { shouldFallback: false, cooldownMs: 0 }; const connections = await getProviderConnections({ provider }); const conn = connections.find(c => c.id === connectionId); const backoffLevel = conn?.backoffLevel || 0; - const { shouldFallback, cooldownMs, newBackoffLevel } = checkFallbackError(status, errorText, backoffLevel); + const { shouldFallback, cooldownMs, newBackoffLevel } = checkFallbackError(status, errorText, backoffLevel, options); if (!shouldFallback) return { shouldFallback: false, cooldownMs: 0 }; const reason = typeof errorText === "string" ? errorText.slice(0, 100) : "Provider error"; From 769e89eb832cbdbb3b2330970595b71fc43f02fa Mon Sep 17 00:00:00 2001 From: NaNomicon Date: Mon, 13 Apr 2026 00:47:31 +0700 Subject: [PATCH 6/9] feat(ui): add TTFT timeout controls to profile settings --- src/app/(dashboard)/dashboard/profile/page.js | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/src/app/(dashboard)/dashboard/profile/page.js b/src/app/(dashboard)/dashboard/profile/page.js index 27310276..1bd1b97e 100644 --- a/src/app/(dashboard)/dashboard/profile/page.js +++ b/src/app/(dashboard)/dashboard/profile/page.js @@ -223,6 +223,36 @@ export default function ProfilePage() { } }; + const updateTtftTimeout = async (val) => { + const num = parseInt(val); + if (isNaN(num) || num < 0) return; + try { + const res = await fetch("/api/settings", { + method: "PATCH", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ ttftTimeoutMs: num }), + }); + if (res.ok) setSettings(prev => ({ ...prev, ttftTimeoutMs: num })); + } catch (err) { + console.error("Failed to update TTFT timeout:", err); + } + }; + + const updateTtftCooldown = async (val) => { + const num = parseInt(val); + if (isNaN(num) || num < 1000) return; + try { + const res = await fetch("/api/settings", { + method: "PATCH", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ ttftCooldownMs: num }), + }); + if (res.ok) setSettings(prev => ({ ...prev, ttftCooldownMs: num })); + } catch (err) { + console.error("Failed to update TTFT cooldown:", err); + } + }; + const updateRequireLogin = async (requireLogin) => { try { const res = await fetch("/api/settings", { @@ -548,6 +578,44 @@ export default function ProfilePage() { /> + {/* TTFT Timeout */} +
+
+

TTFT Timeout (ms)

+

+ Maximum time to wait for first token (0 = disabled) +

+
+ updateTtftTimeout(e.target.value)} + disabled={loading} + className="w-24 text-center" + /> +
+ + {/* TTFT Cooldown */} +
+
+

TTFT Cooldown (ms)

+

+ How long to lock slow providers after a timeout +

+
+ updateTtftCooldown(e.target.value)} + disabled={loading} + className="w-24 text-center" + /> +
+

{settings.fallbackStrategy === "round-robin" ? `Currently distributing requests across all available accounts with ${settings.stickyRoundRobinLimit || 3} calls per account.` From e126cc30590dd3ddb7643440780ddcd2c1f38d0f Mon Sep 17 00:00:00 2001 From: NaNomicon Date: Mon, 13 Apr 2026 02:35:17 +0700 Subject: [PATCH 7/9] fix(chatcore): align TTFT timeout with request-start timing Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus --- open-sse/handlers/chatCore.js | 40 ++++++++++++++++--- .../handlers/chatCore/streamingHandler.js | 6 +-- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/open-sse/handlers/chatCore.js b/open-sse/handlers/chatCore.js index dba1b214..e82f2087 100644 --- a/open-sse/handlers/chatCore.js +++ b/open-sse/handlers/chatCore.js @@ -236,22 +236,52 @@ export async function handleChatCore({ body, modelInfo, credentials, log, onCred } // Streaming response - const { onStreamComplete } = buildOnStreamComplete({ ...sharedCtx }); + const streamDetailId = `${Date.now()}-${Math.random().toString(36).slice(2, 11)}`; + const { onStreamComplete } = buildOnStreamComplete({ ...sharedCtx, streamDetailId }); - // TTFT deadline race: if enabled, race first chunk vs deadline before handing off to streaming handler + // TTFT deadline race: if enabled, measure from full request start → first token let responseToStream = providerResponse; if (stream && ttftTimeoutMs > 0) { - const ttftResult = await raceTtftDeadline(providerResponse, ttftTimeoutMs, streamController); + const elapsedBeforeStreamMs = Date.now() - requestStartTime; + const remainingTtftMs = ttftTimeoutMs - elapsedBeforeStreamMs; + + if (remainingTtftMs <= 0) { + streamController.abort(); + trackPendingRequest(model, provider, connectionId, false, true); + appendRequestLog({ model, provider, connectionId, status: "TTFT_TIMEOUT" }).catch(() => {}); + saveRequestDetail(buildRequestDetail({ + provider, model, connectionId, + latency: { ttft: Date.now() - requestStartTime, total: Date.now() - requestStartTime }, + tokens: { prompt_tokens: 0, completion_tokens: 0 }, + request: extractRequestConfig(body, stream), + providerRequest: finalBody || translatedBody || null, + response: { error: "ttft_timeout", message: "Timed out before first token; fell back to the next account.", thinking: null }, + status: "error" + }, { id: streamDetailId })).catch(() => {}); + console.log(`[TTFT] ${provider}/${model} exceeded ${ttftTimeoutMs}ms before first token`); + return createErrorResult(408, "ttft_timeout"); + } + + const ttftResult = await raceTtftDeadline(providerResponse, remainingTtftMs, streamController); if (ttftResult.timedOut) { trackPendingRequest(model, provider, connectionId, false, true); appendRequestLog({ model, provider, connectionId, status: "TTFT_TIMEOUT" }).catch(() => {}); - console.log(`[TTFT] ${provider}/${model} exceeded ${ttftTimeoutMs}ms`); + saveRequestDetail(buildRequestDetail({ + provider, model, connectionId, + latency: { ttft: Date.now() - requestStartTime, total: Date.now() - requestStartTime }, + tokens: { prompt_tokens: 0, completion_tokens: 0 }, + request: extractRequestConfig(body, stream), + providerRequest: finalBody || translatedBody || null, + response: { error: "ttft_timeout", message: "Timed out before first token; fell back to the next account.", thinking: null }, + status: "error" + }, { id: streamDetailId })).catch(() => {}); + console.log(`[TTFT] ${provider}/${model} exceeded ${ttftTimeoutMs}ms before first token`); return createErrorResult(408, "ttft_timeout"); } responseToStream = ttftResult.response; } - return handleStreamingResponse({ ...sharedCtx, providerResponse: responseToStream, sourceFormat, targetFormat, userAgent, reqLogger, toolNameMap, streamController, onStreamComplete }); + return handleStreamingResponse({ ...sharedCtx, providerResponse: responseToStream, sourceFormat, targetFormat, userAgent, reqLogger, toolNameMap, streamController, onStreamComplete, streamDetailId }); } async function raceTtftDeadline(providerResponse, ttftTimeoutMs, streamController) { diff --git a/open-sse/handlers/chatCore/streamingHandler.js b/open-sse/handlers/chatCore/streamingHandler.js index c5a3002d..6e6813f4 100644 --- a/open-sse/handlers/chatCore/streamingHandler.js +++ b/open-sse/handlers/chatCore/streamingHandler.js @@ -39,7 +39,7 @@ function buildTransformStream({ provider, sourceFormat, targetFormat, userAgent, /** * Handle streaming response — pipe provider SSE through transform stream to client. */ -export function handleStreamingResponse({ providerResponse, provider, model, sourceFormat, targetFormat, userAgent, body, stream, translatedBody, finalBody, requestStartTime, connectionId, apiKey, clientRawRequest, onRequestSuccess, reqLogger, toolNameMap, streamController, onStreamComplete }) { +export function handleStreamingResponse({ providerResponse, provider, model, sourceFormat, targetFormat, userAgent, body, stream, translatedBody, finalBody, requestStartTime, connectionId, apiKey, clientRawRequest, onRequestSuccess, reqLogger, toolNameMap, streamController, onStreamComplete, streamDetailId }) { let responseToStream = providerResponse; if (onRequestSuccess) { let firstChunkFired = false; @@ -64,7 +64,6 @@ export function handleStreamingResponse({ providerResponse, provider, model, sou const transformStream = buildTransformStream({ provider, sourceFormat, targetFormat, userAgent, reqLogger, toolNameMap, model, connectionId, body, onStreamComplete, apiKey }); const transformedBody = pipeWithDisconnect(responseToStream, transformStream, streamController); - const streamDetailId = `${Date.now()}-${Math.random().toString(36).slice(2, 11)}`; saveRequestDetail(buildRequestDetail({ provider, model, connectionId, latency: { ttft: 0, total: Date.now() - requestStartTime }, @@ -87,8 +86,7 @@ export function handleStreamingResponse({ providerResponse, provider, model, sou /** * Build onStreamComplete callback for streaming usage tracking. */ -export function buildOnStreamComplete({ provider, model, connectionId, apiKey, requestStartTime, body, stream, finalBody, translatedBody, clientRawRequest }) { - const streamDetailId = `${Date.now()}-${Math.random().toString(36).slice(2, 11)}`; +export function buildOnStreamComplete({ provider, model, connectionId, apiKey, requestStartTime, body, stream, finalBody, translatedBody, clientRawRequest, streamDetailId }) { const onStreamComplete = (contentObj, usage, ttftAt) => { const latency = { From ff27be9b01ecd30efb430b2397e356d5f4a40879 Mon Sep 17 00:00:00 2001 From: NaNomicon Date: Mon, 13 Apr 2026 02:35:36 +0700 Subject: [PATCH 8/9] feat(usage): show TTFT timeout fallback message in request details Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus --- .../dashboard/usage/components/RequestDetailsTab.js | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/app/(dashboard)/dashboard/usage/components/RequestDetailsTab.js b/src/app/(dashboard)/dashboard/usage/components/RequestDetailsTab.js index 4f527361..2e7c0e71 100644 --- a/src/app/(dashboard)/dashboard/usage/components/RequestDetailsTab.js +++ b/src/app/(dashboard)/dashboard/usage/components/RequestDetailsTab.js @@ -285,6 +285,11 @@ export default function RequestDetailsTab() {

TTFT: {detail.latency?.ttft || 0}ms
Total: {detail.latency?.total || 0}ms
+ {detail.response?.error === "ttft_timeout" && ( +
+ Timed out before first token; fell back to next account. +
+ )}
@@ -397,6 +402,11 @@ export default function RequestDetailsTab() { )} + {selectedDetail.response?.error === "ttft_timeout" && ( +
+ Timed out before first token; fell back to the next account. +
+ )} {selectedDetail.response?.thinking && (

From 9a2226ed64d911a4755ae02ba3ed7ef8b7e714a4 Mon Sep 17 00:00:00 2001 From: NaNomicon Date: Mon, 13 Apr 2026 13:58:46 +0700 Subject: [PATCH 9/9] chore: remove planning evidence from TTFT branch --- .sisyphus/evidence/task-2-fallback.txt | 22 -------- .sisyphus/evidence/task-3-streaming.txt | 36 ------------ .sisyphus/evidence/task-4-chatcore.txt | 73 ------------------------- .sisyphus/evidence/task-5-wiring.txt | 42 -------------- 4 files changed, 173 deletions(-) delete mode 100644 .sisyphus/evidence/task-2-fallback.txt delete mode 100644 .sisyphus/evidence/task-3-streaming.txt delete mode 100644 .sisyphus/evidence/task-4-chatcore.txt delete mode 100644 .sisyphus/evidence/task-5-wiring.txt diff --git a/.sisyphus/evidence/task-2-fallback.txt b/.sisyphus/evidence/task-2-fallback.txt deleted file mode 100644 index 4bda3b0b..00000000 --- a/.sisyphus/evidence/task-2-fallback.txt +++ /dev/null @@ -1,22 +0,0 @@ -diff --git a/open-sse/services/accountFallback.js b/open-sse/services/accountFallback.js -index 957655c..fba0c1c 100644 ---- a/open-sse/services/accountFallback.js -+++ b/open-sse/services/accountFallback.js -@@ -18,12 +18,16 @@ export function getQuotaCooldown(backoffLevel = 0) { - * @param {number} backoffLevel - Current backoff level for exponential backoff - * @returns {{ shouldFallback: boolean, cooldownMs: number, newBackoffLevel?: number }} - */ --export function checkFallbackError(status, errorText, backoffLevel = 0) { -+export function checkFallbackError(status, errorText, backoffLevel = 0, options = {}) { - // Check error message FIRST - specific patterns take priority over status codes - if (errorText) { - const errorStr = typeof errorText === "string" ? errorText : JSON.stringify(errorText); - const lowerError = errorStr.toLowerCase(); - -+ if (lowerError.includes("ttft_timeout")) { -+ return { shouldFallback: true, cooldownMs: options?.ttftCooldownMs ?? 15000 }; -+ } -+ - if (lowerError.includes("no credentials")) { - return { shouldFallback: true, cooldownMs: COOLDOWN_MS.notFound }; - } diff --git a/.sisyphus/evidence/task-3-streaming.txt b/.sisyphus/evidence/task-3-streaming.txt deleted file mode 100644 index 4af28ece..00000000 --- a/.sisyphus/evidence/task-3-streaming.txt +++ /dev/null @@ -1,36 +0,0 @@ -diff --git a/open-sse/handlers/chatCore/streamingHandler.js b/open-sse/handlers/chatCore/streamingHandler.js -index 03d12c1..c5a3002 100644 ---- a/open-sse/handlers/chatCore/streamingHandler.js -+++ b/open-sse/handlers/chatCore/streamingHandler.js -@@ -40,10 +40,29 @@ function buildTransformStream({ provider, sourceFormat, targetFormat, userAgent, - * Handle streaming response — pipe provider SSE through transform stream to client. - */ - export function handleStreamingResponse({ providerResponse, provider, model, sourceFormat, targetFormat, userAgent, body, stream, translatedBody, finalBody, requestStartTime, connectionId, apiKey, clientRawRequest, onRequestSuccess, reqLogger, toolNameMap, streamController, onStreamComplete }) { -- if (onRequestSuccess) onRequestSuccess(); -+ let responseToStream = providerResponse; -+ if (onRequestSuccess) { -+ let firstChunkFired = false; -+ const original = providerResponse.body; -+ const wrapped = new TransformStream({ -+ transform(chunk, controller) { -+ if (!firstChunkFired) { -+ firstChunkFired = true; -+ onRequestSuccess(); -+ } -+ controller.enqueue(chunk); -+ } -+ }); -+ original.pipeTo(wrapped.writable).catch(() => {}); -+ responseToStream = new Response(wrapped.readable, { -+ status: providerResponse.status, -+ statusText: providerResponse.statusText, -+ headers: providerResponse.headers -+ }); -+ } - - const transformStream = buildTransformStream({ provider, sourceFormat, targetFormat, userAgent, reqLogger, toolNameMap, model, connectionId, body, onStreamComplete, apiKey }); -- const transformedBody = pipeWithDisconnect(providerResponse, transformStream, streamController); -+ const transformedBody = pipeWithDisconnect(responseToStream, transformStream, streamController); - - const streamDetailId = `${Date.now()}-${Math.random().toString(36).slice(2, 11)}`; - saveRequestDetail(buildRequestDetail({ diff --git a/.sisyphus/evidence/task-4-chatcore.txt b/.sisyphus/evidence/task-4-chatcore.txt deleted file mode 100644 index eabdacf3..00000000 --- a/.sisyphus/evidence/task-4-chatcore.txt +++ /dev/null @@ -1,73 +0,0 @@ -diff --git a/open-sse/handlers/chatCore.js b/open-sse/handlers/chatCore.js -index fc564d3..5a48af9 100644 ---- a/open-sse/handlers/chatCore.js -+++ b/open-sse/handlers/chatCore.js -@@ -24,7 +24,7 @@ import { detectClientTool, isNativePassthrough } from "../utils/clientDetector.j - * @param {object} options.credentials - Provider credentials - * @param {string} options.sourceFormatOverride - Override detected source format (e.g. "openai-responses") - */ --export async function handleChatCore({ body, modelInfo, credentials, log, onCredentialsRefreshed, onRequestSuccess, onDisconnect, clientRawRequest, connectionId, userAgent, apiKey, ccFilterNaming, sourceFormatOverride }) { -+export async function handleChatCore({ body, modelInfo, credentials, log, onCredentialsRefreshed, onRequestSuccess, onDisconnect, clientRawRequest, connectionId, userAgent, apiKey, ccFilterNaming, sourceFormatOverride, ttftTimeoutMs = 0, ttftCooldownMs = 15000 }) { - const { provider, model } = modelInfo; - const requestStartTime = Date.now(); - -@@ -218,7 +218,58 @@ export async function handleChatCore({ body, modelInfo, credentials, log, onCred - - // Streaming response - const { onStreamComplete } = buildOnStreamComplete({ ...sharedCtx }); -- return handleStreamingResponse({ ...sharedCtx, providerResponse, sourceFormat, targetFormat, userAgent, reqLogger, toolNameMap, streamController, onStreamComplete }); -+ -+ // TTFT deadline race: if enabled, race first chunk vs deadline before handing off to streaming handler -+ let responseToStream = providerResponse; -+ if (stream && ttftTimeoutMs > 0) { -+ const ttftResult = await raceTtftDeadline(providerResponse, ttftTimeoutMs, streamController); -+ if (ttftResult.timedOut) { -+ trackPendingRequest(model, provider, connectionId, false, true); -+ appendRequestLog({ model, provider, connectionId, status: "TTFT_TIMEOUT" }).catch(() => {}); -+ console.log(`[TTFT] ${provider}/${model} exceeded ${ttftTimeoutMs}ms`); -+ return createErrorResult(408, "ttft_timeout"); -+ } -+ responseToStream = ttftResult.response; -+ } -+ -+ return handleStreamingResponse({ ...sharedCtx, providerResponse: responseToStream, sourceFormat, targetFormat, userAgent, reqLogger, toolNameMap, streamController, onStreamComplete }); -+} -+ -+async function raceTtftDeadline(providerResponse, ttftTimeoutMs, streamController) { -+ return new Promise((resolve) => { -+ const timer = setTimeout(() => { -+ streamController.abort(); -+ resolve({ timedOut: true }); -+ }, ttftTimeoutMs); -+ -+ const reader = providerResponse.body.getReader(); -+ reader.read().then(({ value, done }) => { -+ clearTimeout(timer); -+ const newBody = new ReadableStream({ -+ start(controller) { -+ if (!done && value) controller.enqueue(value); -+ if (done) { controller.close(); return; } -+ }, -+ async pull(controller) { -+ const { value: chunk, done: isDone } = await reader.read(); -+ if (isDone) { controller.close(); return; } -+ controller.enqueue(chunk); -+ }, -+ cancel() { reader.cancel(); } -+ }); -+ resolve({ -+ timedOut: false, -+ response: new Response(newBody, { -+ status: providerResponse.status, -+ statusText: providerResponse.statusText, -+ headers: providerResponse.headers -+ }) -+ }); -+ }).catch(() => { -+ clearTimeout(timer); -+ resolve({ timedOut: true }); -+ }); -+ }); - } - - export function isTokenExpiringSoon(expiresAt, bufferMs = 5 * 60 * 1000) { diff --git a/.sisyphus/evidence/task-5-wiring.txt b/.sisyphus/evidence/task-5-wiring.txt deleted file mode 100644 index 4c8fac51..00000000 --- a/.sisyphus/evidence/task-5-wiring.txt +++ /dev/null @@ -1,42 +0,0 @@ -diff --git a/src/sse/handlers/chat.js b/src/sse/handlers/chat.js -index bd45748..0b2c0bd 100644 ---- a/src/sse/handlers/chat.js -+++ b/src/sse/handlers/chat.js -@@ -201,6 +201,8 @@ async function handleSingleModelChat(body, modelStr, clientRawRequest = null, re - ccFilterNaming: !!chatSettings.ccFilterNaming, - // Detect source format by endpoint + body - sourceFormatOverride: request?.url ? detectFormatByEndpoint(new URL(request.url).pathname, body) : null, -+ ttftTimeoutMs: chatSettings.ttftTimeoutMs || 0, -+ ttftCooldownMs: chatSettings.ttftCooldownMs || 15000, - onCredentialsRefreshed: async (newCreds) => { - await updateProviderCredentials(credentials.connectionId, { - accessToken: newCreds.accessToken, -@@ -217,7 +219,7 @@ async function handleSingleModelChat(body, modelStr, clientRawRequest = null, re - if (result.success) return result.response; - - // Mark account unavailable (auto-calculates cooldown with exponential backoff) -- const { shouldFallback } = await markAccountUnavailable(credentials.connectionId, result.status, result.error, provider, model); -+ const { shouldFallback } = await markAccountUnavailable(credentials.connectionId, result.status, result.error, provider, model, { ttftCooldownMs: chatSettings.ttftCooldownMs || 15000 }); - - if (shouldFallback) { - log.warn("AUTH", `Account ${credentials.connectionName} unavailable (${result.status}), trying fallback`); -diff --git a/src/sse/services/auth.js b/src/sse/services/auth.js -index 3f426c1..1246886 100644 ---- a/src/sse/services/auth.js -+++ b/src/sse/services/auth.js -@@ -168,13 +168,13 @@ export async function getProviderCredentials(provider, excludeConnectionIds = nu - * @param {string|null} model - The specific model that triggered the error - * @returns {{ shouldFallback: boolean, cooldownMs: number }} - */ --export async function markAccountUnavailable(connectionId, status, errorText, provider = null, model = null) { -+export async function markAccountUnavailable(connectionId, status, errorText, provider = null, model = null, options = {}) { - if (!connectionId || connectionId === "noauth") return { shouldFallback: false, cooldownMs: 0 }; - const connections = await getProviderConnections({ provider }); - const conn = connections.find(c => c.id === connectionId); - const backoffLevel = conn?.backoffLevel || 0; - -- const { shouldFallback, cooldownMs, newBackoffLevel } = checkFallbackError(status, errorText, backoffLevel); -+ const { shouldFallback, cooldownMs, newBackoffLevel } = checkFallbackError(status, errorText, backoffLevel, options); - if (!shouldFallback) return { shouldFallback: false, cooldownMs: 0 }; - - const reason = typeof errorText === "string" ? errorText.slice(0, 100) : "Provider error";