diff --git a/convex/ai/resilience.ts b/convex/ai/resilience.ts index dce6bebe..a6fdbf75 100644 --- a/convex/ai/resilience.ts +++ b/convex/ai/resilience.ts @@ -333,10 +333,16 @@ async function finalizePendingMessages( }); for (const message of result.page) { if (message.status !== "pending") continue; - await ctx.runMutation(components.agent.messages.finalizeMessage, { - messageId: message._id, - result: { status: "failed", error: reason }, - }); + try { + await ctx.runMutation(components.agent.messages.finalizeMessage, { + messageId: message._id, + result: { status: "failed", error: reason }, + }); + } catch { + // The @convex-dev/agent library may have already finalized this message + // internally when the stream failed (before our retry logic ran). Ignore + // the error and continue so any remaining pending messages are processed. + } } } diff --git a/convex/ai/resilienceFinalize.test.ts b/convex/ai/resilienceFinalize.test.ts index d497c6c1..6f95b60e 100644 --- a/convex/ai/resilienceFinalize.test.ts +++ b/convex/ai/resilienceFinalize.test.ts @@ -13,6 +13,15 @@ describe("getFinalizeCodeForError", () => { expect(finalizeCode).not.toContain("high demand"); }); + it("maps 'model is currently overloaded' to provider_overload", () => { + const error = new Error("The model is currently overloaded. Please try again later."); + + const finalizeCode = getFinalizeCodeForError(error); + + expect(finalizeCode).toBe("provider_overload"); + expect(finalizeCode).not.toContain("overloaded"); + }); + it("uses the Error name for non-transient errors", () => { const error = new Error("database blew up unexpectedly"); @@ -27,4 +36,23 @@ describe("getFinalizeCodeForError", () => { expect(finalizeCode).toBe("unknown_error"); }); + + it("returns a sanitized code (never the raw error message) for every transient kind", () => { + // Ensure no transient error leaks its raw message into the finalize reason. + const cases: [string, string][] = [ + ["This model is currently experiencing high demand.", "provider_overload"], + ["The model is currently overloaded.", "provider_overload"], + ["Service busy — please try again later.", "provider_overload"], + ["Request timed out after 180s", "timeout"], + ["fetch failed", "network"], + ]; + + for (const [msg, expectedCode] of cases) { + const error = msg === "fetch failed" ? new TypeError(msg) : new Error(msg); + if (msg.includes("timed out")) error.name = "TimeoutError"; + const code = getFinalizeCodeForError(error); + expect(code, `expected sanitized code for: ${msg}`).toBe(expectedCode); + expect(code, `raw message leaked for: ${msg}`).not.toContain(msg.slice(0, 10)); + } + }); }); diff --git a/src/lib/posthogBeforeSend.test.ts b/src/lib/posthogBeforeSend.test.ts index 8d59a855..ad7048a7 100644 --- a/src/lib/posthogBeforeSend.test.ts +++ b/src/lib/posthogBeforeSend.test.ts @@ -100,6 +100,20 @@ describe("shouldDropPosthogEvent", () => { expect(dropped).toBe(true); }); + it("drops Gemini overloaded errors (alternate 503 phrasing)", () => { + const event = makeEvent({ + properties: { + $exception_values: [ + { value: "The model is currently overloaded. Please try again later." }, + ], + }, + }); + + const dropped = shouldDropPosthogEvent(event); + + expect(dropped).toBe(true); + }); + it("drops Firefox reader-mode injection errors", () => { const event = makeEvent({ properties: { diff --git a/src/lib/posthogBeforeSend.ts b/src/lib/posthogBeforeSend.ts index f1abbddd..f319afbe 100644 --- a/src/lib/posthogBeforeSend.ts +++ b/src/lib/posthogBeforeSend.ts @@ -37,6 +37,8 @@ const SUPPRESSED_MESSAGE_SUBSTRINGS: readonly string[] = [ "function call turn comes immediately after", "exceeded your current quota", "model is currently experiencing high demand", + // Gemini's alternative 503 overload phrasing (mirrors sentryBeforeSend.ts). + "model is currently overloaded", "RESOURCE_EXHAUSTED", // Gemini paid-tier billing exhaustion: surfaces as "Your prepayment credits // are depleted." for both BYOK and house-key users. classifyByokError maps diff --git a/src/lib/sentryBeforeSend.test.ts b/src/lib/sentryBeforeSend.test.ts index 73bb9270..c6da1f2d 100644 --- a/src/lib/sentryBeforeSend.test.ts +++ b/src/lib/sentryBeforeSend.test.ts @@ -84,6 +84,52 @@ describe("shouldDropSentryEvent", () => { expect(shouldDropSentryEvent(eventWithValue(payload), hintWithError(payload))).toBe(true); }); + it("drops Gemini overloaded errors (alternate 503 phrasing)", () => { + const payload = "The model is currently overloaded. Please try again later."; + expect(shouldDropSentryEvent(eventWithValue(payload), hintWithError(payload))).toBe(true); + }); + + it("drops high-demand error when buried in AI SDK wrapper via Error.cause", () => { + // The Vercel AI SDK wraps provider errors: outer.message is generic while + // the provider-specific text lives on Error.cause. + const inner = new Error( + "This model is currently experiencing high demand. Spikes in demand are usually temporary. Please try again later.", + ); + const outer = new Error("Error reading UI message stream"); + (outer as Error & { cause?: unknown }).cause = inner; + + const genericEvent = eventWithValue("Error reading UI message stream"); + expect(shouldDropSentryEvent(genericEvent, { originalException: outer })).toBe(true); + }); + + it("drops high-demand error buried two levels deep in cause chain", () => { + const root = new Error("This model is currently experiencing high demand."); + const mid = new Error("Failed to process stream chunk"); + (mid as Error & { cause?: unknown }).cause = root; + const outer = new Error("Unhandled stream error"); + (outer as Error & { cause?: unknown }).cause = mid; + + const genericEvent = eventWithValue("Unhandled stream error"); + expect(shouldDropSentryEvent(genericEvent, { originalException: outer })).toBe(true); + }); + + it("drops error when originalException is a non-Error object with a message property", () => { + // Some SDK error types don't extend Error but carry a .message field. + const fakeError = { message: "This model is currently experiencing high demand." }; + const genericEvent = eventWithValue("Unknown error"); + expect(shouldDropSentryEvent(genericEvent, { originalException: fakeError as Error })).toBe( + true, + ); + }); + + it("keeps real errors that happen to have an innocent cause chain", () => { + const cause = new Error("Some benign internal detail"); + const outer = new Error("Database connection failed"); + (outer as Error & { cause?: unknown }).cause = cause; + const event = eventWithValue("Database connection failed"); + expect(shouldDropSentryEvent(event, { originalException: outer })).toBe(false); + }); + it("drops Gemini RESOURCE_EXHAUSTED errors", () => { const payload = "Error: 429 RESOURCE_EXHAUSTED"; expect(shouldDropSentryEvent(eventWithValue(payload), hintWithError(payload))).toBe(true); diff --git a/src/lib/sentryBeforeSend.ts b/src/lib/sentryBeforeSend.ts index 60dfe399..54e8c0f4 100644 --- a/src/lib/sentryBeforeSend.ts +++ b/src/lib/sentryBeforeSend.ts @@ -36,6 +36,10 @@ const SUPPRESSED_MESSAGE_SUBSTRINGS: readonly string[] = [ // which is what Sentry captures here. "function call turn comes immediately after", "model is currently experiencing high demand", + // Gemini's alternative 503 overload phrasing. Both variants map to the same + // provider_overload transient kind on the backend; the client stream consumer + // surfaces whichever string was in the streaming error chunk. + "model is currently overloaded", "RESOURCE_EXHAUSTED", // Gemini free-tier quota errors. The leading "You exceeded" prefix is // Gemini's exact phrasing (capitalized "You exceeded ..."), so it covers @@ -50,17 +54,35 @@ const SUPPRESSED_MESSAGE_SUBSTRINGS: readonly string[] = [ "credits are depleted", ]; -// Collect every candidate message from the hint and event so that quota errors -// nested in event.exception.values are not missed when hint.originalException -// carries a generic wrapper message. +// Collect every candidate message from the hint and event so that provider +// errors nested inside AI SDK wrapper errors are not missed. +// +// The Vercel AI SDK sometimes wraps provider errors: the outer Error has a +// generic message (e.g. "Error reading stream") while the provider-specific +// text (e.g. "This model is currently experiencing high demand") lives on +// Error.cause. We walk the full cause chain so the suppression list matches +// regardless of how many wrappers the SDK adds. function errorMessages(event: ErrorEvent, hint: EventHint): string[] { const messages: string[] = []; const hintError = hint.originalException; - if (hintError instanceof Error && typeof hintError.message === "string") { - messages.push(hintError.message); + if (hintError instanceof Error) { + if (typeof hintError.message === "string") { + messages.push(hintError.message); + } + // Walk the cause chain — provider errors are often buried here. + let cause: unknown = hintError.cause; + while (cause instanceof Error) { + if (typeof cause.message === "string") messages.push(cause.message); + cause = cause.cause; + } } else if (typeof hintError === "string") { messages.push(hintError); + } else if (hintError !== null && typeof hintError === "object") { + // Some SDK error classes don't extend Error but carry a message property + // (duck-typed Error interface). Extract it defensively. + const msg = (hintError as Record).message; + if (typeof msg === "string") messages.push(msg); } const values = event.exception?.values;