Skip to content

Commit 13caf2a

Browse files
authored
Merge pull request #7492 from ferenci84/feature/openrouter-anthropic-caching
Add Anthropic caching support to OpenRouter LLM implementation
2 parents 1f6276d + 4345b16 commit 13caf2a

File tree

2 files changed

+359
-0
lines changed

2 files changed

+359
-0
lines changed

core/llm/llms/OpenRouter.ts

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import { ChatCompletionCreateParams } from "openai/resources/index";
2+
13
import { LLMOptions } from "../../index.js";
24
import { osModelsEditPrompt } from "../templates/edit.js";
35

@@ -13,6 +15,127 @@ class OpenRouter extends OpenAI {
1315
},
1416
useLegacyCompletionsEndpoint: false,
1517
};
18+
19+
/**
20+
* Detect if the model is an Anthropic/Claude model
21+
*/
22+
private isAnthropicModel(model?: string): boolean {
23+
if (!model) return false;
24+
const modelLower = model.toLowerCase();
25+
return modelLower.includes("claude");
26+
}
27+
28+
/**
29+
* Add cache_control to message content for Anthropic models
30+
*/
31+
private addCacheControlToContent(content: any, addCaching: boolean): any {
32+
if (!addCaching) return content;
33+
34+
if (typeof content === "string") {
35+
return [
36+
{
37+
type: "text",
38+
text: content,
39+
cache_control: { type: "ephemeral" },
40+
},
41+
];
42+
}
43+
44+
if (Array.isArray(content)) {
45+
// For array content, add cache_control to the last text item
46+
return content.map((part, idx) => {
47+
if (part.type === "text" && idx === content.length - 1) {
48+
return {
49+
...part,
50+
cache_control: { type: "ephemeral" },
51+
};
52+
}
53+
return part;
54+
});
55+
}
56+
57+
return content;
58+
}
59+
60+
/**
61+
* Override modifyChatBody to add Anthropic caching when appropriate
62+
*/
63+
protected modifyChatBody(
64+
body: ChatCompletionCreateParams,
65+
): ChatCompletionCreateParams {
66+
// First apply parent modifications
67+
body = super.modifyChatBody(body);
68+
69+
// Check if we should apply Anthropic caching
70+
if (
71+
!this.isAnthropicModel(body.model) ||
72+
(!this.cacheBehavior && !this.completionOptions.promptCaching)
73+
) {
74+
return body;
75+
}
76+
77+
const shouldCacheConversation =
78+
this.cacheBehavior?.cacheConversation ||
79+
this.completionOptions.promptCaching;
80+
const shouldCacheSystemMessage =
81+
this.cacheBehavior?.cacheSystemMessage ||
82+
this.completionOptions.promptCaching;
83+
84+
if (!shouldCacheConversation && !shouldCacheSystemMessage) {
85+
return body;
86+
}
87+
88+
// Follow the same logic as Anthropic.ts: filter out system messages first
89+
const filteredMessages = body.messages.filter(
90+
(m: any) => m.role !== "system" && !!m.content,
91+
);
92+
93+
// Find the last two user message indices from the filtered array
94+
const lastTwoUserMsgIndices = filteredMessages
95+
.map((msg: any, index: number) => (msg.role === "user" ? index : -1))
96+
.filter((index: number) => index !== -1)
97+
.slice(-2);
98+
99+
// Create a mapping from filtered indices to original indices
100+
let filteredIndex = 0;
101+
const filteredToOriginalIndexMap: number[] = [];
102+
body.messages.forEach((msg: any, originalIndex: number) => {
103+
if (msg.role !== "system" && !!msg.content) {
104+
filteredToOriginalIndexMap[filteredIndex] = originalIndex;
105+
filteredIndex++;
106+
}
107+
});
108+
109+
// Modify messages to add cache_control
110+
body.messages = body.messages.map((message: any, idx) => {
111+
// Handle system message caching
112+
if (message.role === "system" && shouldCacheSystemMessage) {
113+
return {
114+
...message,
115+
content: this.addCacheControlToContent(message.content, true),
116+
};
117+
}
118+
119+
// Handle conversation caching for last two user messages
120+
// Check if this message's index (in filtered array) is one of the last two user messages
121+
const filteredIdx = filteredToOriginalIndexMap.indexOf(idx);
122+
if (
123+
message.role === "user" &&
124+
shouldCacheConversation &&
125+
filteredIdx !== -1 &&
126+
lastTwoUserMsgIndices.includes(filteredIdx)
127+
) {
128+
return {
129+
...message,
130+
content: this.addCacheControlToContent(message.content, true),
131+
};
132+
}
133+
134+
return message;
135+
});
136+
137+
return body;
138+
}
16139
}
17140

18141
export default OpenRouter;

core/llm/llms/OpenRouter.vitest.ts

Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
import { ChatCompletionCreateParams } from "openai/resources/index";
2+
import { describe, expect, it } from "vitest";
3+
4+
import OpenRouter from "./OpenRouter";
5+
6+
describe("OpenRouter Anthropic Caching", () => {
7+
it("should detect Anthropic models correctly", () => {
8+
const openRouter = new OpenRouter({
9+
model: "claude-3-5-sonnet-latest",
10+
apiKey: "test-key",
11+
});
12+
13+
// Test private method through modifyChatBody
14+
const body: ChatCompletionCreateParams = {
15+
model: "claude-3-5-sonnet-latest",
16+
messages: [],
17+
};
18+
19+
// Should not throw
20+
openRouter["modifyChatBody"](body);
21+
});
22+
23+
it("should add cache_control to user messages when caching is enabled", () => {
24+
const openRouter = new OpenRouter({
25+
model: "anthropic/claude-3.5-sonnet",
26+
apiKey: "test-key",
27+
cacheBehavior: {
28+
cacheConversation: true,
29+
cacheSystemMessage: false,
30+
},
31+
});
32+
33+
const body: ChatCompletionCreateParams = {
34+
model: "anthropic/claude-3.5-sonnet",
35+
messages: [
36+
{ role: "user", content: "First message" },
37+
{ role: "assistant", content: "Response" },
38+
{ role: "user", content: "Second message" },
39+
{ role: "assistant", content: "Another response" },
40+
{ role: "user", content: "Third message" },
41+
],
42+
};
43+
44+
const modifiedBody = openRouter["modifyChatBody"](body);
45+
46+
// Check that the last two user messages have cache_control
47+
const userMessages = modifiedBody.messages.filter(
48+
(msg: any) => msg.role === "user",
49+
);
50+
51+
// First user message should not have cache_control
52+
expect(userMessages[0].content).toBe("First message");
53+
54+
// Last two user messages should have cache_control
55+
expect(userMessages[1].content).toEqual([
56+
{
57+
type: "text",
58+
text: "Second message",
59+
cache_control: { type: "ephemeral" },
60+
},
61+
]);
62+
63+
expect(userMessages[2].content).toEqual([
64+
{
65+
type: "text",
66+
text: "Third message",
67+
cache_control: { type: "ephemeral" },
68+
},
69+
]);
70+
});
71+
72+
it("should correctly handle cache_control with system messages present", () => {
73+
const openRouter = new OpenRouter({
74+
model: "claude-3-5-sonnet-latest",
75+
apiKey: "test-key",
76+
cacheBehavior: {
77+
cacheConversation: true,
78+
cacheSystemMessage: true,
79+
},
80+
});
81+
82+
const body: ChatCompletionCreateParams = {
83+
model: "claude-3-5-sonnet-latest",
84+
messages: [
85+
{ role: "system", content: "You are a helpful assistant" },
86+
{ role: "user", content: "First user message" },
87+
{ role: "assistant", content: "First assistant response" },
88+
{ role: "user", content: "Second user message" },
89+
{ role: "assistant", content: "Second assistant response" },
90+
{ role: "user", content: "Third user message" },
91+
],
92+
};
93+
94+
const modifiedBody = openRouter["modifyChatBody"](body);
95+
96+
// System message should have cache_control
97+
expect(modifiedBody.messages[0]).toEqual({
98+
role: "system",
99+
content: [
100+
{
101+
type: "text",
102+
text: "You are a helpful assistant",
103+
cache_control: { type: "ephemeral" },
104+
},
105+
],
106+
});
107+
108+
// Check user messages - should follow Anthropic filtering logic
109+
const userMessages = modifiedBody.messages.filter(
110+
(msg: any) => msg.role === "user",
111+
);
112+
113+
// First user message should NOT have cache_control (only last 2)
114+
expect(userMessages[0].content).toBe("First user message");
115+
116+
// Last two user messages should have cache_control
117+
expect(userMessages[1].content).toEqual([
118+
{
119+
type: "text",
120+
text: "Second user message",
121+
cache_control: { type: "ephemeral" },
122+
},
123+
]);
124+
125+
expect(userMessages[2].content).toEqual([
126+
{
127+
type: "text",
128+
text: "Third user message",
129+
cache_control: { type: "ephemeral" },
130+
},
131+
]);
132+
133+
// Assistant messages should remain unchanged
134+
expect(modifiedBody.messages[2].content).toBe("First assistant response");
135+
expect(modifiedBody.messages[4].content).toBe("Second assistant response");
136+
});
137+
138+
it("should add cache_control to system message when caching is enabled", () => {
139+
const openRouter = new OpenRouter({
140+
model: "claude-3-5-sonnet-latest",
141+
apiKey: "test-key",
142+
cacheBehavior: {
143+
cacheConversation: false,
144+
cacheSystemMessage: true,
145+
},
146+
});
147+
148+
const body: ChatCompletionCreateParams = {
149+
model: "claude-3-5-sonnet-latest",
150+
messages: [
151+
{ role: "system", content: "You are a helpful assistant" },
152+
{ role: "user", content: "Hello" },
153+
],
154+
};
155+
156+
const modifiedBody = openRouter["modifyChatBody"](body);
157+
158+
// System message should have cache_control
159+
expect(modifiedBody.messages[0]).toEqual({
160+
role: "system",
161+
content: [
162+
{
163+
type: "text",
164+
text: "You are a helpful assistant",
165+
cache_control: { type: "ephemeral" },
166+
},
167+
],
168+
});
169+
170+
// User message should remain unchanged
171+
expect(modifiedBody.messages[1]).toEqual({
172+
role: "user",
173+
content: "Hello",
174+
});
175+
});
176+
177+
it("should handle array content correctly", () => {
178+
const openRouter = new OpenRouter({
179+
model: "claude-3-5-sonnet-latest",
180+
apiKey: "test-key",
181+
cacheBehavior: {
182+
cacheConversation: true,
183+
cacheSystemMessage: false,
184+
},
185+
});
186+
187+
const body: ChatCompletionCreateParams = {
188+
model: "claude-3-5-sonnet-latest",
189+
messages: [
190+
{
191+
role: "user",
192+
content: [
193+
{ type: "text", text: "First part" },
194+
{ type: "text", text: "Second part" },
195+
],
196+
},
197+
],
198+
};
199+
200+
const modifiedBody = openRouter["modifyChatBody"](body);
201+
202+
// Only the last text part should have cache_control
203+
expect(modifiedBody.messages[0].content).toEqual([
204+
{ type: "text", text: "First part" },
205+
{
206+
type: "text",
207+
text: "Second part",
208+
cache_control: { type: "ephemeral" },
209+
},
210+
]);
211+
});
212+
213+
it("should not modify messages for non-Anthropic models", () => {
214+
const openRouter = new OpenRouter({
215+
model: "gpt-4o",
216+
apiKey: "test-key",
217+
cacheBehavior: {
218+
cacheConversation: true,
219+
cacheSystemMessage: true,
220+
},
221+
});
222+
223+
const body: ChatCompletionCreateParams = {
224+
model: "gpt-4o",
225+
messages: [
226+
{ role: "system", content: "System message" },
227+
{ role: "user", content: "User message" },
228+
],
229+
};
230+
231+
const modifiedBody = openRouter["modifyChatBody"](body);
232+
233+
// Messages should remain unchanged
234+
expect(modifiedBody.messages).toEqual(body.messages);
235+
});
236+
});

0 commit comments

Comments
 (0)