Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 122 additions & 0 deletions src/messages/cache.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import {
stripBedrockCacheControl,
addBedrockCacheControl,
addCacheControl,
addCacheControlToStablePrefixMessages,
} from './cache';
import { _convertMessagesToOpenAIParams } from '@/llm/openai/utils';
import { toLangChainContent } from './langchain';
Expand Down Expand Up @@ -767,6 +768,127 @@ describe('addBedrockCacheControl (Bedrock cache checkpoints)', () => {
});
});

describe('synthetic skill/meta messages are not cache-anchored', () => {
const hasAnthropicMarker = (m: BaseMessage): boolean =>
Array.isArray(m.content) &&
m.content.some((block) => 'cache_control' in block);

const hasBedrockCachePoint = (m: BaseMessage): boolean =>
Array.isArray(m.content) &&
m.content.some((block) => 'cachePoint' in block);

const skillBody = (skillName: string, content = 'SKILL BODY'): HumanMessage =>
new HumanMessage({
content,
additional_kwargs: { isMeta: true, source: 'skill', skillName },
});

it('Anthropic: skips a trailing synthetic skill message; markers land on the real user messages', () => {
const messages: BaseMessage[] = [
new HumanMessage('First real question'),
new AIMessage('Answer'),
new HumanMessage('Second real question'),
skillBody('pdf-analyzer'),
];

const result = addCacheControl<BaseMessage>(messages);

expect(hasAnthropicMarker(result[3])).toBe(false);
expect(hasAnthropicMarker(result[2])).toBe(true);
expect(hasAnthropicMarker(result[0])).toBe(true);
});

it('Anthropic: strips a stale marker from a synthetic skill message without re-adding one', () => {
const stale = new HumanMessage({
content: toLangChainContent([
{
type: 'text',
text: 'SKILL BODY',
cache_control: { type: 'ephemeral' },
} as MessageContentComplex,
]),
additional_kwargs: {
isMeta: true,
source: 'skill',
skillName: 'pdf-analyzer',
},
});
const messages: BaseMessage[] = [
new HumanMessage('Real question'),
new AIMessage('Answer'),
stale,
];

const result = addCacheControl<BaseMessage>(messages);

expect(hasAnthropicMarker(result[2])).toBe(false);
expect(hasAnthropicMarker(result[0])).toBe(true);
});

it('Anthropic: detects skill messages by additional_kwargs.source even without isMeta', () => {
const messages: BaseMessage[] = [
new HumanMessage('Real question'),
new AIMessage('Answer'),
new HumanMessage({
content: 'SKILL BODY',
additional_kwargs: { source: 'skill', skillName: 'pdf-analyzer' },
}),
];

const result = addCacheControl<BaseMessage>(messages);

expect(hasAnthropicMarker(result[2])).toBe(false);
expect(hasAnthropicMarker(result[0])).toBe(true);
});

it('Bedrock: skips a trailing synthetic skill message; cachePoints land on the real user messages', () => {
const messages: BaseMessage[] = [
new HumanMessage('First real question'),
new AIMessage('Answer'),
new HumanMessage('Second real question'),
skillBody('pdf-analyzer'),
];

const result = addBedrockCacheControl<BaseMessage>(messages);

expect(hasBedrockCachePoint(result[3])).toBe(false);
expect(hasBedrockCachePoint(result[2])).toBe(true);
expect(hasBedrockCachePoint(result[0])).toBe(true);
});

it('stable-prefix fallback: anchors the real user message, not a synthetic skill message', () => {
// Mirrors AgentContext's dynamic-tail path: the only assistant message is a
// skill-only tool call (no text), so the assistant-only pass adds no marker
// and the cacheable fallback runs. It must skip the reconstructed skill
// HumanMessage and anchor the real user message instead.
const messages: BaseMessage[] = [
new HumanMessage('Real stable question'),
new AIMessage({
content: toLangChainContent([
{
type: 'tool_use',
id: 'call_1',
name: 'skill',
input: { skillName: 'pdf-analyzer' },
} as MessageContentComplex,
]),
tool_calls: [
{ id: 'call_1', name: 'skill', args: { skillName: 'pdf-analyzer' } },
],
}),
skillBody('pdf-analyzer'),
];

const result = addCacheControlToStablePrefixMessages<BaseMessage>(
messages,
2
);

expect(hasAnthropicMarker(result[2])).toBe(false);
expect(hasAnthropicMarker(result[0])).toBe(true);
});
});

describe('stripAnthropicCacheControl', () => {
it('removes cache_control fields from content blocks', () => {
const messages: TestMsg[] = [
Expand Down
26 changes: 25 additions & 1 deletion src/messages/cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ export function addCacheControl<T extends AnthropicMessage | BaseMessage>(
const needsCacheAdd =
userMessagesModified < 2 &&
isUserMessage &&
!isSyntheticMetaMessage(originalMessage) &&

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Apply synthetic-message skip to stable-prefix cache path

This guard only covers the direct addCacheControl path; the stable-prefix path used by AgentContext.buildBodyWithPromptCacheDynamicTail still calls addCacheControlToStablePrefixMessages, whose fallback delegates to addCacheControlToRecentMessages without checking isSyntheticMetaMessage. When prompt caching has a dynamic tail (dynamic instructions/summary) and the stable prefix contains a reconstructed skill HumanMessage after a skill-only assistant tool call, the assistant-only pass can add no marker and the fallback will still cache-anchor the synthetic SKILL.md body, so the duplicated/volatile prefix this change is trying to avoid remains possible.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — fixed in ddd1ddf. The guard now lives in addCacheControlToRecentMessages (the shared backend for addCacheControlToStablePrefixMessages), so both the assistant pass and the cacheable fallback skip synthetic skill/meta messages. Added a regression test that reproduces the dynamic-tail scenario: a skill-only (text-less) assistant tool call followed by a reconstructed skill HumanMessage in the stable prefix — the fallback now anchors the real user message instead of the SKILL.md body.

(typeof content === 'string' || hasArrayContent);

// Skip messages that don't need any work
Expand Down Expand Up @@ -263,6 +264,26 @@ function getMessageRole(message: MessageWithContent): string | undefined {
return undefined;
}

const SKILL_MESSAGE_SOURCE = 'skill';

/**
* Synthetic skill/meta messages (reconstructed skill bodies, primed SKILL.md
* instructions) are re-injected every turn and are not stable conversation
* turns. They must not anchor a fresh prompt-cache marker — doing so pins the
* cache to a volatile/duplicated prefix. Stale markers are still stripped from
* them; only the *adding* of new markers is suppressed. Detected via
* `additional_kwargs.isMeta === true` or `additional_kwargs.source === 'skill'`.
*/
function isSyntheticMetaMessage(message: MessageWithContent): boolean {
const { additional_kwargs: kwargs } = message as {
additional_kwargs?: { isMeta?: unknown; source?: unknown };
};
if (kwargs == null) {
return false;
}
return kwargs.isMeta === true || kwargs.source === SKILL_MESSAGE_SOURCE;
}

function isCacheableConversationMessage(message: MessageWithContent): boolean {
const role = getMessageRole(message);
return (
Expand Down Expand Up @@ -305,7 +326,9 @@ function addCacheControlToRecentMessages<
const content = originalMessage.content;
const hasArrayContent = Array.isArray(content);
const canAddCache =
cachePointsAdded < maxCachePoints && canUseMessage(originalMessage);
cachePointsAdded < maxCachePoints &&
canUseMessage(originalMessage) &&
!isSyntheticMetaMessage(originalMessage);

if (!canAddCache && !hasArrayContent) {
continue;
Expand Down Expand Up @@ -536,6 +559,7 @@ export function addBedrockCacheControl<
isUserMessage &&
!isToolMessage &&
!isEmptyString &&
!isSyntheticMetaMessage(originalMessage) &&
(typeof content === 'string' || hasArrayContent);

if (!needsCacheAdd && !hasArrayContent && !hasSerializationProps) {
Expand Down
9 changes: 9 additions & 0 deletions src/messages/format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,10 @@ interface FormatAssistantMessageOptions {

interface FormatAgentMessagesOptions {
provider?: Providers;
/** Skill names already primed fresh this turn (manual/always-apply). Their
* historical `skill` tool_calls are not reconstructed into a HumanMessage,
* so the same SKILL.md body is not injected twice in one request. */
skipSkillBodyNames?: Set<string>;
}

function extractReasoningContent(
Expand Down Expand Up @@ -1159,6 +1163,7 @@ function extractSkillName(args: unknown): string | undefined {
* @param indexTokenCountMap - Optional map of message indices to token counts.
* @param tools - Optional set of tool names that are allowed in the request.
* @param skills - Optional map of skill name to body for reconstructing skill HumanMessages.
* @param options - Optional formatting options (provider, skipSkillBodyNames).
* @returns - Object containing formatted messages and updated indexTokenCountMap if provided.
*/
export const formatAgentMessages = (
Expand Down Expand Up @@ -1445,7 +1450,11 @@ export const formatAgentMessages = (
const endMessageIndex = messages.length;

if (pendingSkillNames?.size != null && pendingSkillNames.size > 0) {
const skipSkillBodyNames = options?.skipSkillBodyNames;
for (const skillName of pendingSkillNames) {
if (skipSkillBodyNames != null && skipSkillBodyNames.has(skillName)) {
continue;
}
const body = skills?.get(skillName) ?? '';
if (body) {
messages.push(
Expand Down
100 changes: 100 additions & 0 deletions src/messages/formatAgentMessages.skills.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -410,4 +410,104 @@ describe('formatAgentMessages skill body reconstruction', () => {
expect(assistantTotal).toBe(500);
});
});

describe('skipSkillBodyNames (fresh-prime dedupe)', () => {
const tools = new Set([Constants.SKILL_TOOL]);

const injectedSkillBodies = (
messages: ReturnType<typeof formatAgentMessages>['messages']
) =>
messages.filter(
(m) =>
m instanceof HumanMessage &&
(m as HumanMessage).additional_kwargs.source === 'skill'
);

it('does NOT reconstruct body when skill name is in skipSkillBodyNames', () => {
const payload: TPayload = [
{ role: 'user', content: 'Review my code' },
{
role: 'assistant',
content: [skillToolCall('call_1', 'code-review')],
},
];

const { messages } = formatAgentMessages(
payload,
undefined,
tools,
skillBodies,
{ skipSkillBodyNames: new Set(['code-review']) }
);

expect(injectedSkillBodies(messages)).toHaveLength(0);
});

it('reconstructs only names NOT in skipSkillBodyNames', () => {
const payload: TPayload = [
{ role: 'user', content: 'Go' },
{
role: 'assistant',
content: [
skillToolCall('call_1', 'pdf-analyzer'),
skillToolCall('call_2', 'code-review'),
],
},
];

const { messages } = formatAgentMessages(
payload,
undefined,
tools,
skillBodies,
{ skipSkillBodyNames: new Set(['code-review']) }
);

const injected = injectedSkillBodies(messages);
expect(injected).toHaveLength(1);
expect((injected[0] as HumanMessage).additional_kwargs.skillName).toBe(
'pdf-analyzer'
);
});

it('reconstructs normally when skipSkillBodyNames is empty', () => {
const payload: TPayload = [
{ role: 'user', content: 'Review my code' },
{
role: 'assistant',
content: [skillToolCall('call_1', 'code-review')],
},
];

const { messages } = formatAgentMessages(
payload,
undefined,
tools,
skillBodies,
{ skipSkillBodyNames: new Set() }
);

expect(injectedSkillBodies(messages)).toHaveLength(1);
});

it('skips in the non-tools-filtering path too', () => {
const payload: TPayload = [
{ role: 'user', content: 'Review my code' },
{
role: 'assistant',
content: [skillToolCall('call_1', 'code-review')],
},
];

const { messages } = formatAgentMessages(
payload,
undefined,
undefined,
skillBodies,
{ skipSkillBodyNames: new Set(['code-review']) }
);

expect(injectedSkillBodies(messages)).toHaveLength(0);
});
});
});
Loading