Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions src/conversation/window.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,21 @@ export function stripOlderReasoning(messages: LanguageModelV3Message[]): Languag
return changed ? out : messages;
}

/**
* Apply provider-specific replay policy for reasoning blocks.
*
* The historical stripping optimization is Anthropic-specific: older thinking
* signatures are safe to omit and otherwise grow the prompt quickly. OpenAI
* Responses API and Gemini can require reasoning/thought metadata to remain
* paired with replayed tool calls, so preserve those providers' history intact.
*/
export function applyReasoningReplayPolicy(
messages: LanguageModelV3Message[],
provider: string,
): LanguageModelV3Message[] {
return provider === "anthropic" ? stripOlderReasoning(messages) : messages;
}

/**
* Limit conversation history by message group count.
* Keeps the first message (initial user request) plus the most recent
Expand Down
16 changes: 11 additions & 5 deletions src/runtime/runtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@ import type {
ConversationStore,
ParticipantInfo,
} from "../conversation/types.ts";
import { sliceHistory, stripOlderReasoning, windowMessages } from "../conversation/window.ts";
import {
applyReasoningReplayPolicy,
sliceHistory,
windowMessages,
} from "../conversation/window.ts";
import { AgentEngine } from "../engine/engine.ts";
import { estimateMessageTokens, estimateToolDescriptionTokens } from "../engine/token-estimate.ts";
import type {
Expand All @@ -46,6 +50,7 @@ import { createIdentityProvider } from "../identity/provider.ts";
import { DEV_IDENTITY } from "../identity/providers/dev.ts";
import { UserStore } from "../identity/user.ts";
import { InstructionsStore } from "../instructions/index.ts";
import { getProviderFromModel } from "../model/catalog.ts";
import { buildModelResolver, resolveModelString } from "../model/registry.ts";
import { PermissionStore } from "../permissions/permission-store.ts";
import type { Layer3SkillEntry, PromptAppInfo } from "../prompt/compose.ts";
Expand Down Expand Up @@ -963,9 +968,10 @@ export class Runtime {

// Per-request hooks: inherit `beforeToolCall` from the runtime-level
// hooks; compose `transformContext` here so the windowing budget is
// the one we just resolved for THIS call. The order (slice → strip
// older reasoning → window by token budget) is preserved.
// the one we just resolved for THIS call. The order (slice → apply
// provider replay policy → window by token budget) is preserved.
const maxHistoryMessages = this.config.maxHistoryMessages ?? DEFAULT_MAX_HISTORY_MESSAGES;
const replayProvider = getProviderFromModel(resolvedModelString);
const perRequestHooks: EngineHooks = {
...this.hooks,
transformContext: (historyMessages, opts) => {
Expand All @@ -977,8 +983,8 @@ export class Runtime {
const budget =
attempt > 0 ? Math.floor(messageBudget.budget / (1 << attempt)) : messageBudget.budget;
const sliced = sliceHistory(historyMessages, maxHistoryMessages);
const reasoningStripped = stripOlderReasoning(sliced);
return windowMessages(reasoningStripped, budget);
const replayReady = applyReasoningReplayPolicy(sliced, replayProvider);
return windowMessages(replayReady, budget);
},
};

Expand Down
60 changes: 60 additions & 0 deletions test/unit/window.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { describe, expect, it } from "bun:test";
import type { LanguageModelV3Message } from "@ai-sdk/provider";
import {
applyReasoningReplayPolicy,
sliceHistory,
stripOlderReasoning,
windowMessages,
Expand Down Expand Up @@ -460,3 +461,62 @@ describe("stripOlderReasoning", () => {
expect(result).toBe(msgs);
});
});

describe("applyReasoningReplayPolicy", () => {
const replayHistoryWithToolCall = (): LanguageModelV3Message[] => [
textMsg("user", "do something"),
{
role: "assistant",
content: [
{ type: "reasoning" as const, text: "considering options" },
{
type: "tool-call" as const,
toolCallId: "call_1",
toolName: "search",
input: { q: "x" },
providerOptions: {
google: { thoughtSignature: "opaque-signature" },
},
},
],
},
toolResultMsg("call_1"),
assistantWithReasoning("now reasoning again", "done"),
];

it("uses Anthropic's older-reasoning stripping policy", () => {
const msgs = replayHistoryWithToolCall();
const result = applyReasoningReplayPolicy(msgs, "anthropic");

expect(result[1]).toEqual({
role: "assistant",
content: [
{
type: "tool-call",
toolCallId: "call_1",
toolName: "search",
input: { q: "x" },
providerOptions: {
google: { thoughtSignature: "opaque-signature" },
},
},
],
});
});

it("preserves OpenAI reasoning paired with replayed tool calls", () => {
const msgs = replayHistoryWithToolCall();
const result = applyReasoningReplayPolicy(msgs, "openai");

expect(result).toBe(msgs);
expect(result[1]).toEqual(msgs[1]!);
});

it("preserves Gemini reasoning and thought metadata paired with replayed tool calls", () => {
const msgs = replayHistoryWithToolCall();
const result = applyReasoningReplayPolicy(msgs, "google");

expect(result).toBe(msgs);
expect(result[1]).toEqual(msgs[1]!);
});
});