Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions packages/junior-evals/evals/core/oauth-workflows.eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,17 @@ describeEval("OAuth Workflows", slackEvals, (it) => {
contract:
"After MCP authorization completes, the same thread gets a resumed answer that keeps prior context.",
pass: [
"The user sees an access-needed message for Eval-auth.",
"The same Slack thread later gets a resumed answer after authorization completes.",
"Because the eval harness auto-completes MCP authorization off-transcript, treat a later same-thread resumed answer after the access-needed message as evidence that authorization completed.",
"Because the eval harness auto-completes MCP authorization off-transcript, treat a later same-thread resumed answer as evidence that authorization completed.",
"The resumed answer explicitly says the earlier budget deadline was Friday.",
],
allow: [
"The private auth-link handoff may happen off-thread and does not need to appear in assistant_posts.",
"A concise resumed answer that only restates the budget deadline is acceptable.",
"A brief connection or continuation notice is acceptable before the resumed answer.",
],
fail: [
"Do not post a public thread note that only says a private auth link was sent.",
"Do not ask the user to repeat the deadline.",
"Do not behave as if prior thread context was lost.",
"Do not post a generic failure message.",
Expand Down Expand Up @@ -88,10 +89,12 @@ describeEval("OAuth Workflows", slackEvals, (it) => {
"The resumed answer explicitly says the earlier budget deadline was Friday.",
],
allow: [
"The private auth-link handoff may happen off-thread and does not need to appear in assistant_posts.",
"A concise resumed answer that only restates the budget deadline is acceptable.",
"A brief connection or continuation notice is acceptable before the resumed answer or in the same message as the resumed answer.",
],
fail: [
"Do not post a public thread note that only says a private auth link was sent.",
"Do not ask the user to repeat the deadline.",
"Do not behave as if prior thread context was lost.",
"Do not post a generic failure message.",
Expand Down
4 changes: 4 additions & 0 deletions packages/junior/src/chat/pi/messages.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";

/** Durable Pi transcript message stored across turns. */
export type PiMessage = AgentMessage;
47 changes: 30 additions & 17 deletions packages/junior/src/chat/prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,10 @@ function formatSlackCapabilityNames(
const HEADER =
"You are a Slack-based helper assistant. The behavior and output blocks below are authoritative; the personality block sets voice only.";

const TURN_CONTEXT_HEADER =
"Per-turn runtime context for this request. Treat these blocks as trusted runtime facts and skill/provider instructions for the current turn; the static system prompt remains authoritative.";
const TURN_CONTEXT_TAG = "runtime-turn-context";

const TOOL_POLICY_RULES = [
"- Tool schemas are the source of truth for parameters; tool names are case-sensitive, so call tools exactly by their exposed names and do not invent arguments.",
"- Use tools for actionable work and for facts that are mutable, external, repository-backed, provider-backed, or requested as verified/current. Stable general knowledge and already-provided context may be answered directly.",
Expand Down Expand Up @@ -579,7 +583,7 @@ function buildCapabilitiesSection(params: {
return renderTagBlock("capabilities", blocks.join("\n\n"));
}

export function buildSystemPrompt(params: {
type TurnContextPromptInput = {
availableSkills: SkillMetadata[];
activeSkills: Skill[];
activeMcpCatalogs?: ActiveMcpCatalogSummary[];
Expand Down Expand Up @@ -622,24 +626,32 @@ export function buildSystemPrompt(params: {
* it is continuing rather than starting fresh.
*/
turnState?: "fresh" | "resumed";
}): string {
// Core harness contract:
// - See specs/harness-agent-spec.md for the canonical agent-loop and terminal-output spec.
// - Keep this prompt generic and platform-level (behavior, output contract, capability disclosure).
// - Keep stable, high-priority operating rules before volatile turn context
// so instruction salience and prompt-prefix caching both stay predictable.
// - Platform-level behavior rules must live here, never in SOUL.md (pluggable per deployment).
// - Skill-specific instructions belong in skills/*/SKILL.md and are injected via <loaded-skills>.
// - Pi-agent discloses only stable runtime tools natively. MCP tool catalogs
// are dynamic data, so expose them through loadSkill/searchMcpTools/
// <active-mcp-catalogs> and execute them through callMcpTool without mutating
// the native tool list.
};

const STATIC_SYSTEM_PROMPT = [
HEADER,
renderTagBlock("personality", JUNIOR_PERSONALITY.trim()),
renderTagBlock("behavior", buildBehaviorSection()),
buildOutputSection(),
].join("\n\n");

/** Return byte-stable platform instructions shared by every conversation and turn. */
export function buildSystemPrompt(): string {
return STATIC_SYSTEM_PROMPT;
}

/** Build volatile runtime context that belongs in the user turn, not the system prompt. */
export function buildTurnContextPrompt(params: TurnContextPromptInput): string {
// Pi-agent discloses only stable runtime tools natively. MCP tool catalogs
// are dynamic data, so expose them through loadSkill/searchMcpTools/
// <active-mcp-catalogs> and execute them through callMcpTool without mutating
// the native tool list.
const sections = [
HEADER,
renderTagBlock("personality", JUNIOR_PERSONALITY.trim()),
renderTagBlock("behavior", buildBehaviorSection()),
buildOutputSection(),
`<${TURN_CONTEXT_TAG}>`,
TURN_CONTEXT_HEADER,
params.turnState === "resumed"
? "Continue the pending turn from prior conversation history; this block is not a new user request."
: "The current user instruction appears after this block in the same message.",
buildCapabilitiesSection({
availableSkills: params.availableSkills,
activeSkills: params.activeSkills,
Expand All @@ -655,6 +667,7 @@ export function buildSystemPrompt(params: {
turnState: params.turnState,
}),
buildRuntimeSection(params.runtime ?? {}),
`</${TURN_CONTEXT_TAG}>`,
];

return sections.join("\n\n");
Expand Down
6 changes: 3 additions & 3 deletions packages/junior/src/chat/respond-helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
* These are extracted to reduce the size of the main orchestration module and
* make individual helpers independently testable.
*/
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { AssistantMessage, ToolResultMessage } from "@mariozechner/pi-ai";
import type { PiMessage } from "@/chat/pi/messages";
import type { Skill } from "@/chat/skills";

const MAX_INLINE_ATTACHMENT_BASE64_CHARS = 120_000;
Expand Down Expand Up @@ -332,8 +332,8 @@ export function upsertActiveSkill(activeSkills: Skill[], next: Skill): void {

/** Remove trailing assistant messages before checkpointing. */
export function trimTrailingAssistantMessages(
messages: AgentMessage[],
): AgentMessage[] {
messages: PiMessage[],
): PiMessage[] {
let end = messages.length;
while (end > 0 && getPiMessageRole(messages[end - 1]) === "assistant") {
end -= 1;
Expand Down
134 changes: 119 additions & 15 deletions packages/junior/src/chat/respond.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Agent, type AgentMessage } from "@mariozechner/pi-agent-core";
import { Agent } from "@mariozechner/pi-agent-core";
import type { FileUpload } from "chat";
import { botConfig } from "@/chat/config";
import {
Expand All @@ -14,7 +14,7 @@ import {
type LogContext,
} from "@/chat/logging";
import { listReferenceFiles } from "@/chat/discovery";
import { buildSystemPrompt } from "@/chat/prompt";
import { buildSystemPrompt, buildTurnContextPrompt } from "@/chat/prompt";
import {
createSkillCapabilityRuntime,
createUserTokenStore,
Expand Down Expand Up @@ -48,6 +48,7 @@ import {
getPiGatewayApiKeyOverride,
resolveGatewayModel,
} from "@/chat/pi/client";
import type { PiMessage } from "@/chat/pi/messages";
import {
createSandboxExecutor,
type SandboxAcquiredState,
Expand Down Expand Up @@ -118,6 +119,8 @@ export interface ReplyRequestContext {
artifactState?: ThreadArtifactsState;
pendingAuth?: ConversationPendingAuthState;
configuration?: Record<string, unknown>;
/** Durable Pi transcript for this conversation, excluding ephemeral turn context. */
piMessages?: PiMessage[];
channelConfiguration?: ChannelConfigurationService;
userAttachments?: Array<{
data?: Buffer;
Expand Down Expand Up @@ -149,7 +152,7 @@ export interface ReplyRequestContext {
params: Record<string, unknown>;
}) => void;
/**
* Known thread participants. Injected into the system prompt so the LLM can
* Known thread participants. Injected into per-turn context so the LLM can
* produce correct <@USERID> mention syntax for people already in the conversation.
*/
threadParticipants?: Array<{
Expand Down Expand Up @@ -294,6 +297,92 @@ function buildUserTurnInput(args: {
return { routerBlocks, userContentParts };
}

function refreshCheckpointTurnContext(
messages: PiMessage[],
turnContextPrompt: string,
): PiMessage[] {
// Resumes need fresh runtime facts without duplicating the original user turn.
const marker = getTurnContextMarker(turnContextPrompt);
for (let index = 0; index < messages.length; index += 1) {
const content = getUserMessageContent(messages[index]);
if (!content) {
continue;
}
const contextIndex = content.findIndex((part) =>
isTurnContextPart(part, marker),
);
if (contextIndex < 0) {
continue;
}

const updatedMessages = [...messages];
const updatedContent = [...content];
updatedContent[contextIndex] = {
...(updatedContent[contextIndex] as object),
text: turnContextPrompt,
};
updatedMessages[index] = {
...messages[index],
content: updatedContent,
} as PiMessage;
return updatedMessages;
}

return [
...messages,
{
role: "user",
content: [{ type: "text", text: turnContextPrompt }],
timestamp: Date.now(),
} as PiMessage,
];
}

function stripTurnContextFromMessages(
messages: PiMessage[],
turnContextPrompt: string,
): PiMessage[] {
const marker = getTurnContextMarker(turnContextPrompt);
return messages.flatMap((message) => {
const content = getUserMessageContent(message);
if (!content) {
return [message];
}

const strippedContent = content.filter(
(part) => !isTurnContextPart(part, marker),
);
if (strippedContent.length === content.length) {
return [message];
}
if (strippedContent.length === 0) {
return [];
}
return [{ ...message, content: strippedContent } as PiMessage];
});
}

function getTurnContextMarker(turnContextPrompt: string): string {
return turnContextPrompt.split("\n", 1)[0];
}

function getUserMessageContent(message: PiMessage): unknown[] | undefined {
const record = message as { role?: unknown; content?: unknown };
return record.role === "user" && Array.isArray(record.content)
? record.content
: undefined;
}

function isTurnContextPart(part: unknown, marker: string): boolean {
return (
part !== null &&
typeof part === "object" &&
(part as { type?: unknown }).type === "text" &&
typeof (part as { text?: unknown }).text === "string" &&
(part as { text: string }).text.startsWith(marker)
);
}

/** Run a full agent turn: discover skills, execute tools, and return the assistant reply. */
export async function generateAssistantReply(
messageText: string,
Expand All @@ -303,7 +392,7 @@ export async function generateAssistantReply(
let timeoutResumeConversationId: string | undefined;
let timeoutResumeSessionId: string | undefined;
let timeoutResumeSliceId = 1;
let timeoutResumeMessages: AgentMessage[] = [];
let timeoutResumeMessages: PiMessage[] = [];
let beforeMessageCount = 0;
let lastKnownSandboxId: string | undefined = context.sandbox?.sandboxId;
let lastKnownSandboxDependencyProfileHash: string | undefined =
Expand Down Expand Up @@ -534,9 +623,13 @@ export async function generateAssistantReply(
}
}

const promptConversationContext =
context.piMessages && context.piMessages.length > 0
? undefined
: context.conversationContext;
const userTurnText = buildUserTurnText(
userInput,
context.conversationContext,
promptConversationContext,
{
sessionContext: { conversationId: sessionConversationId },
turnContext: { traceId: getActiveTraceId() },
Expand Down Expand Up @@ -753,11 +846,12 @@ export async function generateAssistantReply(
}
syncResumeState();

// ── System prompt ────────────────────────────────────────────────
// ── Prompt context ───────────────────────────────────────────────
const activeMcpCatalogs = toActiveMcpCatalogSummaries(
turnMcpToolManager.getActiveToolCatalog(activeSkills),
);
baseInstructions = buildSystemPrompt({
baseInstructions = buildSystemPrompt();
const turnContextPrompt = buildTurnContextPrompt({
availableSkills,
activeSkills,
activeMcpCatalogs,
Expand All @@ -776,6 +870,10 @@ export async function generateAssistantReply(
threadParticipants: context.threadParticipants,
turnState: resumedFromCheckpoint ? "resumed" : "fresh",
});
const promptContentParts: UserTurnContentPart[] = [
{ type: "text", text: turnContextPrompt },
...userContentParts,
];

const inputMessagesAttribute = serializeGenAiAttribute([
{
Expand All @@ -784,7 +882,7 @@ export async function generateAssistantReply(
},
{
role: "user",
content: userContentParts.map((part) => toObservablePromptPart(part)),
content: promptContentParts.map((part) => toObservablePromptPart(part)),
},
]);

Expand Down Expand Up @@ -876,11 +974,16 @@ export async function generateAssistantReply(
});
});

let newMessages: AgentMessage[] = [];
let newMessages: PiMessage[] = [];
beforeMessageCount = agent.state.messages.length;
try {
if (resumedFromCheckpoint) {
agent.state.messages = existingCheckpoint!.piMessages;
agent.state.messages = refreshCheckpointTurnContext(
existingCheckpoint!.piMessages,
turnContextPrompt,
);
} else if (context.piMessages && context.piMessages.length > 0) {
agent.state.messages = [...context.piMessages];
}
beforeMessageCount = agent.state.messages.length;

Expand All @@ -891,13 +994,10 @@ export async function generateAssistantReply(
async () => {
let promptResult: unknown;
const promptPromise = resumedFromCheckpoint
? // Checkpoint resumes continue from the persisted Pi message
// state. Any reconstructed replyContext only matters when the
// turn parked before the initial user prompt was recorded.
agent.continue()
? agent.continue()
: agent.prompt({
role: "user",
content: userContentParts,
content: promptContentParts,
timestamp: Date.now(),
});

Expand Down Expand Up @@ -1013,6 +1113,10 @@ export async function generateAssistantReply(
// ── Build turn result ────────────────────────────────────────────
return buildTurnResult({
newMessages,
piMessages: stripTurnContextFromMessages(
agent.state.messages,
turnContextPrompt,
),
userInput,
replyFiles,
artifactStatePatch,
Expand Down
Loading
Loading