danny-avila · danny-avila · Jun 9, 2026 · Jun 9, 2026 · Jun 9, 2026 · chatgpt-codex-connector
diff --git a/src/messages/cache.test.ts b/src/messages/cache.test.ts
@@ -13,6 +13,7 @@ import {
   stripBedrockCacheControl,
   addBedrockCacheControl,
   addCacheControl,
+  addCacheControlToStablePrefixMessages,
 } from './cache';
 import { _convertMessagesToOpenAIParams } from '@/llm/openai/utils';
 import { toLangChainContent } from './langchain';
@@ -767,6 +768,127 @@ describe('addBedrockCacheControl (Bedrock cache checkpoints)', () => {
   });
 });
 
+describe('synthetic skill/meta messages are not cache-anchored', () => {
+  const hasAnthropicMarker = (m: BaseMessage): boolean =>
+    Array.isArray(m.content) &&
+    m.content.some((block) => 'cache_control' in block);
+
+  const hasBedrockCachePoint = (m: BaseMessage): boolean =>
+    Array.isArray(m.content) &&
+    m.content.some((block) => 'cachePoint' in block);
+
+  const skillBody = (skillName: string, content = 'SKILL BODY'): HumanMessage =>
+    new HumanMessage({
+      content,
+      additional_kwargs: { isMeta: true, source: 'skill', skillName },
+    });
+
+  it('Anthropic: skips a trailing synthetic skill message; markers land on the real user messages', () => {
+    const messages: BaseMessage[] = [
+      new HumanMessage('First real question'),
+      new AIMessage('Answer'),
+      new HumanMessage('Second real question'),
+      skillBody('pdf-analyzer'),
+    ];
+
+    const result = addCacheControl<BaseMessage>(messages);
+
+    expect(hasAnthropicMarker(result[3])).toBe(false);
+    expect(hasAnthropicMarker(result[2])).toBe(true);
+    expect(hasAnthropicMarker(result[0])).toBe(true);
+  });
+
+  it('Anthropic: strips a stale marker from a synthetic skill message without re-adding one', () => {
+    const stale = new HumanMessage({
+      content: toLangChainContent([
+        {
+          type: 'text',
+          text: 'SKILL BODY',
+          cache_control: { type: 'ephemeral' },
+        } as MessageContentComplex,
+      ]),
+      additional_kwargs: {
+        isMeta: true,
+        source: 'skill',
+        skillName: 'pdf-analyzer',
+      },
+    });
+    const messages: BaseMessage[] = [
+      new HumanMessage('Real question'),
+      new AIMessage('Answer'),
+      stale,
+    ];
+
+    const result = addCacheControl<BaseMessage>(messages);
+
+    expect(hasAnthropicMarker(result[2])).toBe(false);
+    expect(hasAnthropicMarker(result[0])).toBe(true);
+  });
+
+  it('Anthropic: detects skill messages by additional_kwargs.source even without isMeta', () => {
+    const messages: BaseMessage[] = [
+      new HumanMessage('Real question'),
+      new AIMessage('Answer'),
+      new HumanMessage({
+        content: 'SKILL BODY',
+        additional_kwargs: { source: 'skill', skillName: 'pdf-analyzer' },
+      }),
+    ];
+
+    const result = addCacheControl<BaseMessage>(messages);
+
+    expect(hasAnthropicMarker(result[2])).toBe(false);
+    expect(hasAnthropicMarker(result[0])).toBe(true);
+  });
+
+  it('Bedrock: skips a trailing synthetic skill message; cachePoints land on the real user messages', () => {
+    const messages: BaseMessage[] = [
+      new HumanMessage('First real question'),
+      new AIMessage('Answer'),
+      new HumanMessage('Second real question'),
+      skillBody('pdf-analyzer'),
+    ];
+
+    const result = addBedrockCacheControl<BaseMessage>(messages);
+
+    expect(hasBedrockCachePoint(result[3])).toBe(false);
+    expect(hasBedrockCachePoint(result[2])).toBe(true);
+    expect(hasBedrockCachePoint(result[0])).toBe(true);
+  });
+
+  it('stable-prefix fallback: anchors the real user message, not a synthetic skill message', () => {
+    // Mirrors AgentContext's dynamic-tail path: the only assistant message is a
+    // skill-only tool call (no text), so the assistant-only pass adds no marker
+    // and the cacheable fallback runs. It must skip the reconstructed skill
+    // HumanMessage and anchor the real user message instead.
+    const messages: BaseMessage[] = [
+      new HumanMessage('Real stable question'),
+      new AIMessage({
+        content: toLangChainContent([
+          {
+            type: 'tool_use',
+            id: 'call_1',
+            name: 'skill',
+            input: { skillName: 'pdf-analyzer' },
+          } as MessageContentComplex,
+        ]),
+        tool_calls: [
+          { id: 'call_1', name: 'skill', args: { skillName: 'pdf-analyzer' } },
+        ],
+      }),
+      skillBody('pdf-analyzer'),
+    ];
+
+    const result = addCacheControlToStablePrefixMessages<BaseMessage>(
+      messages,
+      2
+    );
+
+    expect(hasAnthropicMarker(result[2])).toBe(false);
+    expect(hasAnthropicMarker(result[0])).toBe(true);
+  });
+});
+
 describe('stripAnthropicCacheControl', () => {
   it('removes cache_control fields from content blocks', () => {
     const messages: TestMsg[] = [

diff --git a/src/messages/cache.ts b/src/messages/cache.ts
@@ -182,6 +182,7 @@ export function addCacheControl<T extends AnthropicMessage | BaseMessage>(
     const needsCacheAdd =
       userMessagesModified < 2 &&
       isUserMessage &&
+      !isSyntheticMetaMessage(originalMessage) &&
       (typeof content === 'string' || hasArrayContent);
 
     // Skip messages that don't need any work
@@ -263,6 +264,26 @@ function getMessageRole(message: MessageWithContent): string | undefined {
   return undefined;
 }
 
+const SKILL_MESSAGE_SOURCE = 'skill';
+
+/**
+ * Synthetic skill/meta messages (reconstructed skill bodies, primed SKILL.md
+ * instructions) are re-injected every turn and are not stable conversation
+ * turns. They must not anchor a fresh prompt-cache marker — doing so pins the
+ * cache to a volatile/duplicated prefix. Stale markers are still stripped from
+ * them; only the *adding* of new markers is suppressed. Detected via
+ * `additional_kwargs.isMeta === true` or `additional_kwargs.source === 'skill'`.
+ */
+function isSyntheticMetaMessage(message: MessageWithContent): boolean {
+  const { additional_kwargs: kwargs } = message as {
+    additional_kwargs?: { isMeta?: unknown; source?: unknown };
+  };
+  if (kwargs == null) {
+    return false;
+  }
+  return kwargs.isMeta === true || kwargs.source === SKILL_MESSAGE_SOURCE;
+}
+
 function isCacheableConversationMessage(message: MessageWithContent): boolean {
   const role = getMessageRole(message);
   return (
@@ -305,7 +326,9 @@ function addCacheControlToRecentMessages<
     const content = originalMessage.content;
     const hasArrayContent = Array.isArray(content);
     const canAddCache =
-      cachePointsAdded < maxCachePoints && canUseMessage(originalMessage);
+      cachePointsAdded < maxCachePoints &&
+      canUseMessage(originalMessage) &&
+      !isSyntheticMetaMessage(originalMessage);
 
     if (!canAddCache && !hasArrayContent) {
       continue;
@@ -536,6 +559,7 @@ export function addBedrockCacheControl<
       isUserMessage &&
       !isToolMessage &&
       !isEmptyString &&
+      !isSyntheticMetaMessage(originalMessage) &&
       (typeof content === 'string' || hasArrayContent);
 
     if (!needsCacheAdd && !hasArrayContent && !hasSerializationProps) {

diff --git a/src/messages/format.ts b/src/messages/format.ts
@@ -334,6 +334,10 @@ interface FormatAssistantMessageOptions {
 
 interface FormatAgentMessagesOptions {
   provider?: Providers;
+  /** Skill names already primed fresh this turn (manual/always-apply). Their
+   *  historical `skill` tool_calls are not reconstructed into a HumanMessage,
+   *  so the same SKILL.md body is not injected twice in one request. */
+  skipSkillBodyNames?: Set<string>;
 }
 
 function extractReasoningContent(
@@ -1159,6 +1163,7 @@ function extractSkillName(args: unknown): string | undefined {
  * @param indexTokenCountMap - Optional map of message indices to token counts.
  * @param tools - Optional set of tool names that are allowed in the request.
  * @param skills - Optional map of skill name to body for reconstructing skill HumanMessages.
+ * @param options - Optional formatting options (provider, skipSkillBodyNames).
  * @returns - Object containing formatted messages and updated indexTokenCountMap if provided.
  */
 export const formatAgentMessages = (
@@ -1445,7 +1450,11 @@ export const formatAgentMessages = (
     const endMessageIndex = messages.length;
 
     if (pendingSkillNames?.size != null && pendingSkillNames.size > 0) {
+      const skipSkillBodyNames = options?.skipSkillBodyNames;
       for (const skillName of pendingSkillNames) {
+        if (skipSkillBodyNames != null && skipSkillBodyNames.has(skillName)) {
+          continue;
+        }
         const body = skills?.get(skillName) ?? '';
         if (body) {
           messages.push(

diff --git a/src/messages/formatAgentMessages.skills.test.ts b/src/messages/formatAgentMessages.skills.test.ts
@@ -410,4 +410,104 @@ describe('formatAgentMessages skill body reconstruction', () => {
       expect(assistantTotal).toBe(500);
     });
   });
+
+  describe('skipSkillBodyNames (fresh-prime dedupe)', () => {
+    const tools = new Set([Constants.SKILL_TOOL]);
+
+    const injectedSkillBodies = (
+      messages: ReturnType<typeof formatAgentMessages>['messages']
+    ) =>
+      messages.filter(
+        (m) =>
+          m instanceof HumanMessage &&
+          (m as HumanMessage).additional_kwargs.source === 'skill'
+      );
+
+    it('does NOT reconstruct body when skill name is in skipSkillBodyNames', () => {
+      const payload: TPayload = [
+        { role: 'user', content: 'Review my code' },
+        {
+          role: 'assistant',
+          content: [skillToolCall('call_1', 'code-review')],
+        },
+      ];
+
+      const { messages } = formatAgentMessages(
+        payload,
+        undefined,
+        tools,
+        skillBodies,
+        { skipSkillBodyNames: new Set(['code-review']) }
+      );
+
+      expect(injectedSkillBodies(messages)).toHaveLength(0);
+    });
+
+    it('reconstructs only names NOT in skipSkillBodyNames', () => {
+      const payload: TPayload = [
+        { role: 'user', content: 'Go' },
+        {
+          role: 'assistant',
+          content: [
+            skillToolCall('call_1', 'pdf-analyzer'),
+            skillToolCall('call_2', 'code-review'),
+          ],
+        },
+      ];
+
+      const { messages } = formatAgentMessages(
+        payload,
+        undefined,
+        tools,
+        skillBodies,
+        { skipSkillBodyNames: new Set(['code-review']) }
+      );
+
+      const injected = injectedSkillBodies(messages);
+      expect(injected).toHaveLength(1);
+      expect((injected[0] as HumanMessage).additional_kwargs.skillName).toBe(
+        'pdf-analyzer'
+      );
+    });
+
+    it('reconstructs normally when skipSkillBodyNames is empty', () => {
+      const payload: TPayload = [
+        { role: 'user', content: 'Review my code' },
+        {
+          role: 'assistant',
+          content: [skillToolCall('call_1', 'code-review')],
+        },
+      ];
+
+      const { messages } = formatAgentMessages(
+        payload,
+        undefined,
+        tools,
+        skillBodies,
+        { skipSkillBodyNames: new Set() }
+      );
+
+      expect(injectedSkillBodies(messages)).toHaveLength(1);
+    });
+
+    it('skips in the non-tools-filtering path too', () => {
+      const payload: TPayload = [
+        { role: 'user', content: 'Review my code' },
+        {
+          role: 'assistant',
+          content: [skillToolCall('call_1', 'code-review')],
+        },
+      ];
+
+      const { messages } = formatAgentMessages(
+        payload,
+        undefined,
+        undefined,
+        skillBodies,
+        { skipSkillBodyNames: new Set(['code-review']) }
+      );
+
+      expect(injectedSkillBodies(messages)).toHaveLength(0);
+    });
+  });
 });