getsentry · dcramer · May 6, 2026 · May 6, 2026 · May 6, 2026 · May 6, 2026
diff --git a/packages/junior-evals/evals/core/oauth-workflows.eval.ts b/packages/junior-evals/evals/core/oauth-workflows.eval.ts
@@ -34,16 +34,17 @@ describeEval("OAuth Workflows", slackEvals, (it) => {
         contract:
           "After MCP authorization completes, the same thread gets a resumed answer that keeps prior context.",
         pass: [
-          "The user sees an access-needed message for Eval-auth.",
           "The same Slack thread later gets a resumed answer after authorization completes.",
-          "Because the eval harness auto-completes MCP authorization off-transcript, treat a later same-thread resumed answer after the access-needed message as evidence that authorization completed.",
+          "Because the eval harness auto-completes MCP authorization off-transcript, treat a later same-thread resumed answer as evidence that authorization completed.",
           "The resumed answer explicitly says the earlier budget deadline was Friday.",
         ],
         allow: [
+          "The private auth-link handoff may happen off-thread and does not need to appear in assistant_posts.",
           "A concise resumed answer that only restates the budget deadline is acceptable.",
           "A brief connection or continuation notice is acceptable before the resumed answer.",
         ],
         fail: [
+          "Do not post a public thread note that only says a private auth link was sent.",
           "Do not ask the user to repeat the deadline.",
           "Do not behave as if prior thread context was lost.",
           "Do not post a generic failure message.",
@@ -88,10 +89,12 @@ describeEval("OAuth Workflows", slackEvals, (it) => {
           "The resumed answer explicitly says the earlier budget deadline was Friday.",
         ],
         allow: [
+          "The private auth-link handoff may happen off-thread and does not need to appear in assistant_posts.",
           "A concise resumed answer that only restates the budget deadline is acceptable.",
           "A brief connection or continuation notice is acceptable before the resumed answer or in the same message as the resumed answer.",
         ],
         fail: [
+          "Do not post a public thread note that only says a private auth link was sent.",
           "Do not ask the user to repeat the deadline.",
           "Do not behave as if prior thread context was lost.",
           "Do not post a generic failure message.",

diff --git a/packages/junior/src/chat/pi/messages.ts b/packages/junior/src/chat/pi/messages.ts
@@ -0,0 +1,4 @@
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+
+/** Durable Pi transcript message stored across turns. */
+export type PiMessage = AgentMessage;
diff --git a/packages/junior/src/chat/prompt.ts b/packages/junior/src/chat/prompt.ts
@@ -343,6 +343,10 @@ function formatSlackCapabilityNames(
 const HEADER =
   "You are a Slack-based helper assistant. The behavior and output blocks below are authoritative; the personality block sets voice only.";
 
+const TURN_CONTEXT_HEADER =
+  "Per-turn runtime context for this request. Treat these blocks as trusted runtime facts and skill/provider instructions for the current turn; the static system prompt remains authoritative.";
+const TURN_CONTEXT_TAG = "runtime-turn-context";
+
 const TOOL_POLICY_RULES = [
   "- Tool schemas are the source of truth for parameters; tool names are case-sensitive, so call tools exactly by their exposed names and do not invent arguments.",
   "- Use tools for actionable work and for facts that are mutable, external, repository-backed, provider-backed, or requested as verified/current. Stable general knowledge and already-provided context may be answered directly.",
@@ -579,7 +583,7 @@ function buildCapabilitiesSection(params: {
   return renderTagBlock("capabilities", blocks.join("\n\n"));
 }
 
-export function buildSystemPrompt(params: {
+type TurnContextPromptInput = {
   availableSkills: SkillMetadata[];
   activeSkills: Skill[];
   activeMcpCatalogs?: ActiveMcpCatalogSummary[];
@@ -622,24 +626,32 @@ export function buildSystemPrompt(params: {
    * it is continuing rather than starting fresh.
    */
   turnState?: "fresh" | "resumed";
-}): string {
-  // Core harness contract:
-  // - See specs/harness-agent-spec.md for the canonical agent-loop and terminal-output spec.
-  // - Keep this prompt generic and platform-level (behavior, output contract, capability disclosure).
-  // - Keep stable, high-priority operating rules before volatile turn context
-  //   so instruction salience and prompt-prefix caching both stay predictable.
-  // - Platform-level behavior rules must live here, never in SOUL.md (pluggable per deployment).
-  // - Skill-specific instructions belong in skills/*/SKILL.md and are injected via <loaded-skills>.
-  // - Pi-agent discloses only stable runtime tools natively. MCP tool catalogs
-  //   are dynamic data, so expose them through loadSkill/searchMcpTools/
-  //   <active-mcp-catalogs> and execute them through callMcpTool without mutating
-  //   the native tool list.
+};
+
+const STATIC_SYSTEM_PROMPT = [
+  HEADER,
+  renderTagBlock("personality", JUNIOR_PERSONALITY.trim()),
+  renderTagBlock("behavior", buildBehaviorSection()),
+  buildOutputSection(),
+].join("\n\n");
+
+/** Return byte-stable platform instructions shared by every conversation and turn. */
+export function buildSystemPrompt(): string {
+  return STATIC_SYSTEM_PROMPT;
+}
 
+/** Build volatile runtime context that belongs in the user turn, not the system prompt. */
+export function buildTurnContextPrompt(params: TurnContextPromptInput): string {
+  // Pi-agent discloses only stable runtime tools natively. MCP tool catalogs
+  // are dynamic data, so expose them through loadSkill/searchMcpTools/
+  // <active-mcp-catalogs> and execute them through callMcpTool without mutating
+  // the native tool list.
   const sections = [
-    HEADER,
-    renderTagBlock("personality", JUNIOR_PERSONALITY.trim()),
-    renderTagBlock("behavior", buildBehaviorSection()),
-    buildOutputSection(),
+    `<${TURN_CONTEXT_TAG}>`,
+    TURN_CONTEXT_HEADER,
+    params.turnState === "resumed"
+      ? "Continue the pending turn from prior conversation history; this block is not a new user request."
+      : "The current user instruction appears after this block in the same message.",
     buildCapabilitiesSection({
       availableSkills: params.availableSkills,
       activeSkills: params.activeSkills,
@@ -655,6 +667,7 @@ export function buildSystemPrompt(params: {
       turnState: params.turnState,
     }),
     buildRuntimeSection(params.runtime ?? {}),
+    `</${TURN_CONTEXT_TAG}>`,
   ];
 
   return sections.join("\n\n");

diff --git a/packages/junior/src/chat/respond-helpers.ts b/packages/junior/src/chat/respond-helpers.ts
@@ -4,8 +4,8 @@
  * These are extracted to reduce the size of the main orchestration module and
  * make individual helpers independently testable.
  */
-import type { AgentMessage } from "@mariozechner/pi-agent-core";
 import type { AssistantMessage, ToolResultMessage } from "@mariozechner/pi-ai";
+import type { PiMessage } from "@/chat/pi/messages";
 import type { Skill } from "@/chat/skills";
 
 const MAX_INLINE_ATTACHMENT_BASE64_CHARS = 120_000;
@@ -332,8 +332,8 @@ export function upsertActiveSkill(activeSkills: Skill[], next: Skill): void {
 
 /** Remove trailing assistant messages before checkpointing. */
 export function trimTrailingAssistantMessages(
-  messages: AgentMessage[],
-): AgentMessage[] {
+  messages: PiMessage[],
+): PiMessage[] {
   let end = messages.length;
   while (end > 0 && getPiMessageRole(messages[end - 1]) === "assistant") {
     end -= 1;

diff --git a/packages/junior/src/chat/respond.ts b/packages/junior/src/chat/respond.ts
@@ -1,4 +1,4 @@
-import { Agent, type AgentMessage } from "@mariozechner/pi-agent-core";
+import { Agent } from "@mariozechner/pi-agent-core";
 import type { FileUpload } from "chat";
 import { botConfig } from "@/chat/config";
 import {
@@ -14,7 +14,7 @@ import {
   type LogContext,
 } from "@/chat/logging";
 import { listReferenceFiles } from "@/chat/discovery";
-import { buildSystemPrompt } from "@/chat/prompt";
+import { buildSystemPrompt, buildTurnContextPrompt } from "@/chat/prompt";
 import {
   createSkillCapabilityRuntime,
   createUserTokenStore,
@@ -48,6 +48,7 @@ import {
   getPiGatewayApiKeyOverride,
   resolveGatewayModel,
 } from "@/chat/pi/client";
+import type { PiMessage } from "@/chat/pi/messages";
 import {
   createSandboxExecutor,
   type SandboxAcquiredState,
@@ -118,6 +119,8 @@ export interface ReplyRequestContext {
   artifactState?: ThreadArtifactsState;
   pendingAuth?: ConversationPendingAuthState;
   configuration?: Record<string, unknown>;
+  /** Durable Pi transcript for this conversation, excluding ephemeral turn context. */
+  piMessages?: PiMessage[];
   channelConfiguration?: ChannelConfigurationService;
   userAttachments?: Array<{
     data?: Buffer;
@@ -149,7 +152,7 @@ export interface ReplyRequestContext {
     params: Record<string, unknown>;
   }) => void;
   /**
-   * Known thread participants. Injected into the system prompt so the LLM can
+   * Known thread participants. Injected into per-turn context so the LLM can
    * produce correct <@USERID> mention syntax for people already in the conversation.
    */
   threadParticipants?: Array<{
@@ -294,6 +297,92 @@ function buildUserTurnInput(args: {
   return { routerBlocks, userContentParts };
 }
 
+function refreshCheckpointTurnContext(
+  messages: PiMessage[],
+  turnContextPrompt: string,
+): PiMessage[] {
+  // Resumes need fresh runtime facts without duplicating the original user turn.
+  const marker = getTurnContextMarker(turnContextPrompt);
+  for (let index = 0; index < messages.length; index += 1) {
+    const content = getUserMessageContent(messages[index]);
+    if (!content) {
+      continue;
+    }
+    const contextIndex = content.findIndex((part) =>
+      isTurnContextPart(part, marker),
+    );
+    if (contextIndex < 0) {
+      continue;
+    }
+
+    const updatedMessages = [...messages];
+    const updatedContent = [...content];
+    updatedContent[contextIndex] = {
+      ...(updatedContent[contextIndex] as object),
+      text: turnContextPrompt,
+    };
+    updatedMessages[index] = {
+      ...messages[index],
+      content: updatedContent,
+    } as PiMessage;
+    return updatedMessages;
+  }
+
+  return [
+    ...messages,
+    {
+      role: "user",
+      content: [{ type: "text", text: turnContextPrompt }],
+      timestamp: Date.now(),
+    } as PiMessage,
+  ];
+}
+
+function stripTurnContextFromMessages(
+  messages: PiMessage[],
+  turnContextPrompt: string,
+): PiMessage[] {
+  const marker = getTurnContextMarker(turnContextPrompt);
+  return messages.flatMap((message) => {
+    const content = getUserMessageContent(message);
+    if (!content) {
+      return [message];
+    }
+
+    const strippedContent = content.filter(
+      (part) => !isTurnContextPart(part, marker),
+    );
+    if (strippedContent.length === content.length) {
+      return [message];
+    }
+    if (strippedContent.length === 0) {
+      return [];
+    }
+    return [{ ...message, content: strippedContent } as PiMessage];
+  });
+}
+
+function getTurnContextMarker(turnContextPrompt: string): string {
+  return turnContextPrompt.split("\n", 1)[0];
+}
+
+function getUserMessageContent(message: PiMessage): unknown[] | undefined {
+  const record = message as { role?: unknown; content?: unknown };
+  return record.role === "user" && Array.isArray(record.content)
+    ? record.content
+    : undefined;
+}
+
+function isTurnContextPart(part: unknown, marker: string): boolean {
+  return (
+    part !== null &&
+    typeof part === "object" &&
+    (part as { type?: unknown }).type === "text" &&
+    typeof (part as { text?: unknown }).text === "string" &&
+    (part as { text: string }).text.startsWith(marker)
+  );
+}
+
 /** Run a full agent turn: discover skills, execute tools, and return the assistant reply. */
 export async function generateAssistantReply(
   messageText: string,
@@ -303,7 +392,7 @@ export async function generateAssistantReply(
   let timeoutResumeConversationId: string | undefined;
   let timeoutResumeSessionId: string | undefined;
   let timeoutResumeSliceId = 1;
-  let timeoutResumeMessages: AgentMessage[] = [];
+  let timeoutResumeMessages: PiMessage[] = [];
   let beforeMessageCount = 0;
   let lastKnownSandboxId: string | undefined = context.sandbox?.sandboxId;
   let lastKnownSandboxDependencyProfileHash: string | undefined =
@@ -534,9 +623,13 @@ export async function generateAssistantReply(
       }
     }
 
+    const promptConversationContext =
+      context.piMessages && context.piMessages.length > 0
+        ? undefined
+        : context.conversationContext;
     const userTurnText = buildUserTurnText(
       userInput,
-      context.conversationContext,
+      promptConversationContext,
       {
         sessionContext: { conversationId: sessionConversationId },
         turnContext: { traceId: getActiveTraceId() },
@@ -753,11 +846,12 @@ export async function generateAssistantReply(
     }
     syncResumeState();
 
-    // ── System prompt ────────────────────────────────────────────────
+    // ── Prompt context ───────────────────────────────────────────────
     const activeMcpCatalogs = toActiveMcpCatalogSummaries(
       turnMcpToolManager.getActiveToolCatalog(activeSkills),
     );
-    baseInstructions = buildSystemPrompt({
+    baseInstructions = buildSystemPrompt();
+    const turnContextPrompt = buildTurnContextPrompt({
       availableSkills,
       activeSkills,
       activeMcpCatalogs,
@@ -776,6 +870,10 @@ export async function generateAssistantReply(
       threadParticipants: context.threadParticipants,
       turnState: resumedFromCheckpoint ? "resumed" : "fresh",
     });
+    const promptContentParts: UserTurnContentPart[] = [
+      { type: "text", text: turnContextPrompt },
+      ...userContentParts,
+    ];
 
     const inputMessagesAttribute = serializeGenAiAttribute([
       {
@@ -784,7 +882,7 @@ export async function generateAssistantReply(
       },
       {
         role: "user",
-        content: userContentParts.map((part) => toObservablePromptPart(part)),
+        content: promptContentParts.map((part) => toObservablePromptPart(part)),
       },
     ]);
 
@@ -876,11 +974,16 @@ export async function generateAssistantReply(
       });
     });
 
-    let newMessages: AgentMessage[] = [];
+    let newMessages: PiMessage[] = [];
     beforeMessageCount = agent.state.messages.length;
     try {
       if (resumedFromCheckpoint) {
-        agent.state.messages = existingCheckpoint!.piMessages;
+        agent.state.messages = refreshCheckpointTurnContext(
+          existingCheckpoint!.piMessages,
+          turnContextPrompt,
+        );
+      } else if (context.piMessages && context.piMessages.length > 0) {
+        agent.state.messages = [...context.piMessages];
       }
       beforeMessageCount = agent.state.messages.length;
 
@@ -891,13 +994,10 @@ export async function generateAssistantReply(
         async () => {
           let promptResult: unknown;
           const promptPromise = resumedFromCheckpoint
-            ? // Checkpoint resumes continue from the persisted Pi message
-              // state. Any reconstructed replyContext only matters when the
-              // turn parked before the initial user prompt was recorded.
-              agent.continue()
+            ? agent.continue()
             : agent.prompt({
                 role: "user",
-                content: userContentParts,
+                content: promptContentParts,
                 timestamp: Date.now(),
               });
 
@@ -1013,6 +1113,10 @@ export async function generateAssistantReply(
     // ── Build turn result ────────────────────────────────────────────
     return buildTurnResult({
       newMessages,
+      piMessages: stripTurnContextFromMessages(
+        agent.state.messages,
+        turnContextPrompt,
+      ),
       userInput,
       replyFiles,
       artifactStatePatch,