Merge pull request #7492 from ferenci84/feature/openrouter-anthropic-caching

RomneyDa · web-flow · commit 13caf2a731c7 · 2025-09-22T16:03:31.000-07:00
Add Anthropic caching support to OpenRouter LLM implementation
diff --git a/core/llm/llms/OpenRouter.ts b/core/llm/llms/OpenRouter.ts
@@ -1,3 +1,5 @@
+import { ChatCompletionCreateParams } from "openai/resources/index";
+
 import { LLMOptions } from "../../index.js";
 import { osModelsEditPrompt } from "../templates/edit.js";
 
@@ -13,6 +15,127 @@ class OpenRouter extends OpenAI {
     },
     useLegacyCompletionsEndpoint: false,
   };
+
+  /**
+   * Detect if the model is an Anthropic/Claude model
+   */
+  private isAnthropicModel(model?: string): boolean {
+    if (!model) return false;
+    const modelLower = model.toLowerCase();
+    return modelLower.includes("claude");
+  }
+
+  /**
+   * Add cache_control to message content for Anthropic models
+   */
+  private addCacheControlToContent(content: any, addCaching: boolean): any {
+    if (!addCaching) return content;
+
+    if (typeof content === "string") {
+      return [
+        {
+          type: "text",
+          text: content,
+          cache_control: { type: "ephemeral" },
+        },
+      ];
+    }
+
+    if (Array.isArray(content)) {
+      // For array content, add cache_control to the last text item
+      return content.map((part, idx) => {
+        if (part.type === "text" && idx === content.length - 1) {
+          return {
+            ...part,
+            cache_control: { type: "ephemeral" },
+          };
+        }
+        return part;
+      });
+    }
+
+    return content;
+  }
+
+  /**
+   * Override modifyChatBody to add Anthropic caching when appropriate
+   */
+  protected modifyChatBody(
+    body: ChatCompletionCreateParams,
+  ): ChatCompletionCreateParams {
+    // First apply parent modifications
+    body = super.modifyChatBody(body);
+
+    // Check if we should apply Anthropic caching
+    if (
+      !this.isAnthropicModel(body.model) ||
+      (!this.cacheBehavior && !this.completionOptions.promptCaching)
+    ) {
+      return body;
+    }
+
+    const shouldCacheConversation =
+      this.cacheBehavior?.cacheConversation ||
+      this.completionOptions.promptCaching;
+    const shouldCacheSystemMessage =
+      this.cacheBehavior?.cacheSystemMessage ||
+      this.completionOptions.promptCaching;
+
+    if (!shouldCacheConversation && !shouldCacheSystemMessage) {
+      return body;
+    }
+
+    // Follow the same logic as Anthropic.ts: filter out system messages first
+    const filteredMessages = body.messages.filter(
+      (m: any) => m.role !== "system" && !!m.content,
+    );
+
+    // Find the last two user message indices from the filtered array
+    const lastTwoUserMsgIndices = filteredMessages
+      .map((msg: any, index: number) => (msg.role === "user" ? index : -1))
+      .filter((index: number) => index !== -1)
+      .slice(-2);
+
+    // Create a mapping from filtered indices to original indices
+    let filteredIndex = 0;
+    const filteredToOriginalIndexMap: number[] = [];
+    body.messages.forEach((msg: any, originalIndex: number) => {
+      if (msg.role !== "system" && !!msg.content) {
+        filteredToOriginalIndexMap[filteredIndex] = originalIndex;
+        filteredIndex++;
+      }
+    });
+
+    // Modify messages to add cache_control
+    body.messages = body.messages.map((message: any, idx) => {
+      // Handle system message caching
+      if (message.role === "system" && shouldCacheSystemMessage) {
+        return {
+          ...message,
+          content: this.addCacheControlToContent(message.content, true),
+        };
+      }
+
+      // Handle conversation caching for last two user messages
+      // Check if this message's index (in filtered array) is one of the last two user messages
+      const filteredIdx = filteredToOriginalIndexMap.indexOf(idx);
+      if (
+        message.role === "user" &&
+        shouldCacheConversation &&
+        filteredIdx !== -1 &&
+        lastTwoUserMsgIndices.includes(filteredIdx)
+      ) {
+        return {
+          ...message,
+          content: this.addCacheControlToContent(message.content, true),
+        };
+      }
+
+      return message;
+    });
+
+    return body;
+  }
 }
 
 export default OpenRouter;
diff --git a/core/llm/llms/OpenRouter.vitest.ts b/core/llm/llms/OpenRouter.vitest.ts
@@ -0,0 +1,236 @@
+import { ChatCompletionCreateParams } from "openai/resources/index";
+import { describe, expect, it } from "vitest";
+
+import OpenRouter from "./OpenRouter";
+
+describe("OpenRouter Anthropic Caching", () => {
+  it("should detect Anthropic models correctly", () => {
+    const openRouter = new OpenRouter({
+      model: "claude-3-5-sonnet-latest",
+      apiKey: "test-key",
+    });
+
+    // Test private method through modifyChatBody
+    const body: ChatCompletionCreateParams = {
+      model: "claude-3-5-sonnet-latest",
+      messages: [],
+    };
+
+    // Should not throw
+    openRouter["modifyChatBody"](body);
+  });
+
+  it("should add cache_control to user messages when caching is enabled", () => {
+    const openRouter = new OpenRouter({
+      model: "anthropic/claude-3.5-sonnet",
+      apiKey: "test-key",
+      cacheBehavior: {
+        cacheConversation: true,
+        cacheSystemMessage: false,
+      },
+    });
+
+    const body: ChatCompletionCreateParams = {
+      model: "anthropic/claude-3.5-sonnet",
+      messages: [
+        { role: "user", content: "First message" },
+        { role: "assistant", content: "Response" },
+        { role: "user", content: "Second message" },
+        { role: "assistant", content: "Another response" },
+        { role: "user", content: "Third message" },
+      ],
+    };
+
+    const modifiedBody = openRouter["modifyChatBody"](body);
+
+    // Check that the last two user messages have cache_control
+    const userMessages = modifiedBody.messages.filter(
+      (msg: any) => msg.role === "user",
+    );
+
+    // First user message should not have cache_control
+    expect(userMessages[0].content).toBe("First message");
+
+    // Last two user messages should have cache_control
+    expect(userMessages[1].content).toEqual([
+      {
+        type: "text",
+        text: "Second message",
+        cache_control: { type: "ephemeral" },
+      },
+    ]);
+
+    expect(userMessages[2].content).toEqual([
+      {
+        type: "text",
+        text: "Third message",
+        cache_control: { type: "ephemeral" },
+      },
+    ]);
+  });
+
+  it("should correctly handle cache_control with system messages present", () => {
+    const openRouter = new OpenRouter({
+      model: "claude-3-5-sonnet-latest",
+      apiKey: "test-key",
+      cacheBehavior: {
+        cacheConversation: true,
+        cacheSystemMessage: true,
+      },
+    });
+
+    const body: ChatCompletionCreateParams = {
+      model: "claude-3-5-sonnet-latest",
+      messages: [
+        { role: "system", content: "You are a helpful assistant" },
+        { role: "user", content: "First user message" },
+        { role: "assistant", content: "First assistant response" },
+        { role: "user", content: "Second user message" },
+        { role: "assistant", content: "Second assistant response" },
+        { role: "user", content: "Third user message" },
+      ],
+    };
+
+    const modifiedBody = openRouter["modifyChatBody"](body);
+
+    // System message should have cache_control
+    expect(modifiedBody.messages[0]).toEqual({
+      role: "system",
+      content: [
+        {
+          type: "text",
+          text: "You are a helpful assistant",
+          cache_control: { type: "ephemeral" },
+        },
+      ],
+    });
+
+    // Check user messages - should follow Anthropic filtering logic
+    const userMessages = modifiedBody.messages.filter(
+      (msg: any) => msg.role === "user",
+    );
+
+    // First user message should NOT have cache_control (only last 2)
+    expect(userMessages[0].content).toBe("First user message");
+
+    // Last two user messages should have cache_control
+    expect(userMessages[1].content).toEqual([
+      {
+        type: "text",
+        text: "Second user message",
+        cache_control: { type: "ephemeral" },
+      },
+    ]);
+
+    expect(userMessages[2].content).toEqual([
+      {
+        type: "text",
+        text: "Third user message",
+        cache_control: { type: "ephemeral" },
+      },
+    ]);
+
+    // Assistant messages should remain unchanged
+    expect(modifiedBody.messages[2].content).toBe("First assistant response");
+    expect(modifiedBody.messages[4].content).toBe("Second assistant response");
+  });
+
+  it("should add cache_control to system message when caching is enabled", () => {
+    const openRouter = new OpenRouter({
+      model: "claude-3-5-sonnet-latest",
+      apiKey: "test-key",
+      cacheBehavior: {
+        cacheConversation: false,
+        cacheSystemMessage: true,
+      },
+    });
+
+    const body: ChatCompletionCreateParams = {
+      model: "claude-3-5-sonnet-latest",
+      messages: [
+        { role: "system", content: "You are a helpful assistant" },
+        { role: "user", content: "Hello" },
+      ],
+    };
+
+    const modifiedBody = openRouter["modifyChatBody"](body);
+
+    // System message should have cache_control
+    expect(modifiedBody.messages[0]).toEqual({
+      role: "system",
+      content: [
+        {
+          type: "text",
+          text: "You are a helpful assistant",
+          cache_control: { type: "ephemeral" },
+        },
+      ],
+    });
+
+    // User message should remain unchanged
+    expect(modifiedBody.messages[1]).toEqual({
+      role: "user",
+      content: "Hello",
+    });
+  });
+
+  it("should handle array content correctly", () => {
+    const openRouter = new OpenRouter({
+      model: "claude-3-5-sonnet-latest",
+      apiKey: "test-key",
+      cacheBehavior: {
+        cacheConversation: true,
+        cacheSystemMessage: false,
+      },
+    });
+
+    const body: ChatCompletionCreateParams = {
+      model: "claude-3-5-sonnet-latest",
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "text", text: "First part" },
+            { type: "text", text: "Second part" },
+          ],
+        },
+      ],
+    };
+
+    const modifiedBody = openRouter["modifyChatBody"](body);
+
+    // Only the last text part should have cache_control
+    expect(modifiedBody.messages[0].content).toEqual([
+      { type: "text", text: "First part" },
+      {
+        type: "text",
+        text: "Second part",
+        cache_control: { type: "ephemeral" },
+      },
+    ]);
+  });
+
+  it("should not modify messages for non-Anthropic models", () => {
+    const openRouter = new OpenRouter({
+      model: "gpt-4o",
+      apiKey: "test-key",
+      cacheBehavior: {
+        cacheConversation: true,
+        cacheSystemMessage: true,
+      },
+    });
+
+    const body: ChatCompletionCreateParams = {
+      model: "gpt-4o",
+      messages: [
+        { role: "system", content: "System message" },
+        { role: "user", content: "User message" },
+      ],
+    };
+
+    const modifiedBody = openRouter["modifyChatBody"](body);
+
+    // Messages should remain unchanged
+    expect(modifiedBody.messages).toEqual(body.messages);
+  });
+});