diff --git a/packages/junior-evals/evals/core/oauth-workflows.eval.ts b/packages/junior-evals/evals/core/oauth-workflows.eval.ts index a90c2c9b..f766d435 100644 --- a/packages/junior-evals/evals/core/oauth-workflows.eval.ts +++ b/packages/junior-evals/evals/core/oauth-workflows.eval.ts @@ -34,16 +34,17 @@ describeEval("OAuth Workflows", slackEvals, (it) => { contract: "After MCP authorization completes, the same thread gets a resumed answer that keeps prior context.", pass: [ - "The user sees an access-needed message for Eval-auth.", "The same Slack thread later gets a resumed answer after authorization completes.", - "Because the eval harness auto-completes MCP authorization off-transcript, treat a later same-thread resumed answer after the access-needed message as evidence that authorization completed.", + "Because the eval harness auto-completes MCP authorization off-transcript, treat a later same-thread resumed answer as evidence that authorization completed.", "The resumed answer explicitly says the earlier budget deadline was Friday.", ], allow: [ + "The private auth-link handoff may happen off-thread and does not need to appear in assistant_posts.", "A concise resumed answer that only restates the budget deadline is acceptable.", "A brief connection or continuation notice is acceptable before the resumed answer.", ], fail: [ + "Do not post a public thread note that only says a private auth link was sent.", "Do not ask the user to repeat the deadline.", "Do not behave as if prior thread context was lost.", "Do not post a generic failure message.", @@ -88,10 +89,12 @@ describeEval("OAuth Workflows", slackEvals, (it) => { "The resumed answer explicitly says the earlier budget deadline was Friday.", ], allow: [ + "The private auth-link handoff may happen off-thread and does not need to appear in assistant_posts.", "A concise resumed answer that only restates the budget deadline is acceptable.", "A brief connection or continuation notice is acceptable before the resumed answer or in the same message as the resumed answer.", ], fail: [ + "Do not post a public thread note that only says a private auth link was sent.", "Do not ask the user to repeat the deadline.", "Do not behave as if prior thread context was lost.", "Do not post a generic failure message.", diff --git a/packages/junior/src/chat/pi/messages.ts b/packages/junior/src/chat/pi/messages.ts new file mode 100644 index 00000000..a3eaa4fc --- /dev/null +++ b/packages/junior/src/chat/pi/messages.ts @@ -0,0 +1,4 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; + +/** Durable Pi transcript message stored across turns. */ +export type PiMessage = AgentMessage; diff --git a/packages/junior/src/chat/prompt.ts b/packages/junior/src/chat/prompt.ts index 9f186d05..6624e1e6 100644 --- a/packages/junior/src/chat/prompt.ts +++ b/packages/junior/src/chat/prompt.ts @@ -343,6 +343,10 @@ function formatSlackCapabilityNames( const HEADER = "You are a Slack-based helper assistant. The behavior and output blocks below are authoritative; the personality block sets voice only."; +const TURN_CONTEXT_HEADER = + "Per-turn runtime context for this request. Treat these blocks as trusted runtime facts and skill/provider instructions for the current turn; the static system prompt remains authoritative."; +const TURN_CONTEXT_TAG = "runtime-turn-context"; + const TOOL_POLICY_RULES = [ "- Tool schemas are the source of truth for parameters; tool names are case-sensitive, so call tools exactly by their exposed names and do not invent arguments.", "- Use tools for actionable work and for facts that are mutable, external, repository-backed, provider-backed, or requested as verified/current. Stable general knowledge and already-provided context may be answered directly.", @@ -579,7 +583,7 @@ function buildCapabilitiesSection(params: { return renderTagBlock("capabilities", blocks.join("\n\n")); } -export function buildSystemPrompt(params: { +type TurnContextPromptInput = { availableSkills: SkillMetadata[]; activeSkills: Skill[]; activeMcpCatalogs?: ActiveMcpCatalogSummary[]; @@ -622,24 +626,32 @@ export function buildSystemPrompt(params: { * it is continuing rather than starting fresh. */ turnState?: "fresh" | "resumed"; -}): string { - // Core harness contract: - // - See specs/harness-agent-spec.md for the canonical agent-loop and terminal-output spec. - // - Keep this prompt generic and platform-level (behavior, output contract, capability disclosure). - // - Keep stable, high-priority operating rules before volatile turn context - // so instruction salience and prompt-prefix caching both stay predictable. - // - Platform-level behavior rules must live here, never in SOUL.md (pluggable per deployment). - // - Skill-specific instructions belong in skills/*/SKILL.md and are injected via . - // - Pi-agent discloses only stable runtime tools natively. MCP tool catalogs - // are dynamic data, so expose them through loadSkill/searchMcpTools/ - // and execute them through callMcpTool without mutating - // the native tool list. +}; + +const STATIC_SYSTEM_PROMPT = [ + HEADER, + renderTagBlock("personality", JUNIOR_PERSONALITY.trim()), + renderTagBlock("behavior", buildBehaviorSection()), + buildOutputSection(), +].join("\n\n"); + +/** Return byte-stable platform instructions shared by every conversation and turn. */ +export function buildSystemPrompt(): string { + return STATIC_SYSTEM_PROMPT; +} +/** Build volatile runtime context that belongs in the user turn, not the system prompt. */ +export function buildTurnContextPrompt(params: TurnContextPromptInput): string { + // Pi-agent discloses only stable runtime tools natively. MCP tool catalogs + // are dynamic data, so expose them through loadSkill/searchMcpTools/ + // and execute them through callMcpTool without mutating + // the native tool list. const sections = [ - HEADER, - renderTagBlock("personality", JUNIOR_PERSONALITY.trim()), - renderTagBlock("behavior", buildBehaviorSection()), - buildOutputSection(), + `<${TURN_CONTEXT_TAG}>`, + TURN_CONTEXT_HEADER, + params.turnState === "resumed" + ? "Continue the pending turn from prior conversation history; this block is not a new user request." + : "The current user instruction appears after this block in the same message.", buildCapabilitiesSection({ availableSkills: params.availableSkills, activeSkills: params.activeSkills, @@ -655,6 +667,7 @@ export function buildSystemPrompt(params: { turnState: params.turnState, }), buildRuntimeSection(params.runtime ?? {}), + ``, ]; return sections.join("\n\n"); diff --git a/packages/junior/src/chat/respond-helpers.ts b/packages/junior/src/chat/respond-helpers.ts index 1254a4ff..f343a552 100644 --- a/packages/junior/src/chat/respond-helpers.ts +++ b/packages/junior/src/chat/respond-helpers.ts @@ -4,8 +4,8 @@ * These are extracted to reduce the size of the main orchestration module and * make individual helpers independently testable. */ -import type { AgentMessage } from "@mariozechner/pi-agent-core"; import type { AssistantMessage, ToolResultMessage } from "@mariozechner/pi-ai"; +import type { PiMessage } from "@/chat/pi/messages"; import type { Skill } from "@/chat/skills"; const MAX_INLINE_ATTACHMENT_BASE64_CHARS = 120_000; @@ -332,8 +332,8 @@ export function upsertActiveSkill(activeSkills: Skill[], next: Skill): void { /** Remove trailing assistant messages before checkpointing. */ export function trimTrailingAssistantMessages( - messages: AgentMessage[], -): AgentMessage[] { + messages: PiMessage[], +): PiMessage[] { let end = messages.length; while (end > 0 && getPiMessageRole(messages[end - 1]) === "assistant") { end -= 1; diff --git a/packages/junior/src/chat/respond.ts b/packages/junior/src/chat/respond.ts index 76440f00..c4e8509e 100644 --- a/packages/junior/src/chat/respond.ts +++ b/packages/junior/src/chat/respond.ts @@ -1,4 +1,4 @@ -import { Agent, type AgentMessage } from "@mariozechner/pi-agent-core"; +import { Agent } from "@mariozechner/pi-agent-core"; import type { FileUpload } from "chat"; import { botConfig } from "@/chat/config"; import { @@ -14,7 +14,7 @@ import { type LogContext, } from "@/chat/logging"; import { listReferenceFiles } from "@/chat/discovery"; -import { buildSystemPrompt } from "@/chat/prompt"; +import { buildSystemPrompt, buildTurnContextPrompt } from "@/chat/prompt"; import { createSkillCapabilityRuntime, createUserTokenStore, @@ -48,6 +48,7 @@ import { getPiGatewayApiKeyOverride, resolveGatewayModel, } from "@/chat/pi/client"; +import type { PiMessage } from "@/chat/pi/messages"; import { createSandboxExecutor, type SandboxAcquiredState, @@ -118,6 +119,8 @@ export interface ReplyRequestContext { artifactState?: ThreadArtifactsState; pendingAuth?: ConversationPendingAuthState; configuration?: Record; + /** Durable Pi transcript for this conversation, excluding ephemeral turn context. */ + piMessages?: PiMessage[]; channelConfiguration?: ChannelConfigurationService; userAttachments?: Array<{ data?: Buffer; @@ -149,7 +152,7 @@ export interface ReplyRequestContext { params: Record; }) => void; /** - * Known thread participants. Injected into the system prompt so the LLM can + * Known thread participants. Injected into per-turn context so the LLM can * produce correct <@USERID> mention syntax for people already in the conversation. */ threadParticipants?: Array<{ @@ -294,6 +297,92 @@ function buildUserTurnInput(args: { return { routerBlocks, userContentParts }; } +function refreshCheckpointTurnContext( + messages: PiMessage[], + turnContextPrompt: string, +): PiMessage[] { + // Resumes need fresh runtime facts without duplicating the original user turn. + const marker = getTurnContextMarker(turnContextPrompt); + for (let index = 0; index < messages.length; index += 1) { + const content = getUserMessageContent(messages[index]); + if (!content) { + continue; + } + const contextIndex = content.findIndex((part) => + isTurnContextPart(part, marker), + ); + if (contextIndex < 0) { + continue; + } + + const updatedMessages = [...messages]; + const updatedContent = [...content]; + updatedContent[contextIndex] = { + ...(updatedContent[contextIndex] as object), + text: turnContextPrompt, + }; + updatedMessages[index] = { + ...messages[index], + content: updatedContent, + } as PiMessage; + return updatedMessages; + } + + return [ + ...messages, + { + role: "user", + content: [{ type: "text", text: turnContextPrompt }], + timestamp: Date.now(), + } as PiMessage, + ]; +} + +function stripTurnContextFromMessages( + messages: PiMessage[], + turnContextPrompt: string, +): PiMessage[] { + const marker = getTurnContextMarker(turnContextPrompt); + return messages.flatMap((message) => { + const content = getUserMessageContent(message); + if (!content) { + return [message]; + } + + const strippedContent = content.filter( + (part) => !isTurnContextPart(part, marker), + ); + if (strippedContent.length === content.length) { + return [message]; + } + if (strippedContent.length === 0) { + return []; + } + return [{ ...message, content: strippedContent } as PiMessage]; + }); +} + +function getTurnContextMarker(turnContextPrompt: string): string { + return turnContextPrompt.split("\n", 1)[0]; +} + +function getUserMessageContent(message: PiMessage): unknown[] | undefined { + const record = message as { role?: unknown; content?: unknown }; + return record.role === "user" && Array.isArray(record.content) + ? record.content + : undefined; +} + +function isTurnContextPart(part: unknown, marker: string): boolean { + return ( + part !== null && + typeof part === "object" && + (part as { type?: unknown }).type === "text" && + typeof (part as { text?: unknown }).text === "string" && + (part as { text: string }).text.startsWith(marker) + ); +} + /** Run a full agent turn: discover skills, execute tools, and return the assistant reply. */ export async function generateAssistantReply( messageText: string, @@ -303,7 +392,7 @@ export async function generateAssistantReply( let timeoutResumeConversationId: string | undefined; let timeoutResumeSessionId: string | undefined; let timeoutResumeSliceId = 1; - let timeoutResumeMessages: AgentMessage[] = []; + let timeoutResumeMessages: PiMessage[] = []; let beforeMessageCount = 0; let lastKnownSandboxId: string | undefined = context.sandbox?.sandboxId; let lastKnownSandboxDependencyProfileHash: string | undefined = @@ -534,9 +623,13 @@ export async function generateAssistantReply( } } + const promptConversationContext = + context.piMessages && context.piMessages.length > 0 + ? undefined + : context.conversationContext; const userTurnText = buildUserTurnText( userInput, - context.conversationContext, + promptConversationContext, { sessionContext: { conversationId: sessionConversationId }, turnContext: { traceId: getActiveTraceId() }, @@ -753,11 +846,12 @@ export async function generateAssistantReply( } syncResumeState(); - // ── System prompt ──────────────────────────────────────────────── + // ── Prompt context ─────────────────────────────────────────────── const activeMcpCatalogs = toActiveMcpCatalogSummaries( turnMcpToolManager.getActiveToolCatalog(activeSkills), ); - baseInstructions = buildSystemPrompt({ + baseInstructions = buildSystemPrompt(); + const turnContextPrompt = buildTurnContextPrompt({ availableSkills, activeSkills, activeMcpCatalogs, @@ -776,6 +870,10 @@ export async function generateAssistantReply( threadParticipants: context.threadParticipants, turnState: resumedFromCheckpoint ? "resumed" : "fresh", }); + const promptContentParts: UserTurnContentPart[] = [ + { type: "text", text: turnContextPrompt }, + ...userContentParts, + ]; const inputMessagesAttribute = serializeGenAiAttribute([ { @@ -784,7 +882,7 @@ export async function generateAssistantReply( }, { role: "user", - content: userContentParts.map((part) => toObservablePromptPart(part)), + content: promptContentParts.map((part) => toObservablePromptPart(part)), }, ]); @@ -876,11 +974,16 @@ export async function generateAssistantReply( }); }); - let newMessages: AgentMessage[] = []; + let newMessages: PiMessage[] = []; beforeMessageCount = agent.state.messages.length; try { if (resumedFromCheckpoint) { - agent.state.messages = existingCheckpoint!.piMessages; + agent.state.messages = refreshCheckpointTurnContext( + existingCheckpoint!.piMessages, + turnContextPrompt, + ); + } else if (context.piMessages && context.piMessages.length > 0) { + agent.state.messages = [...context.piMessages]; } beforeMessageCount = agent.state.messages.length; @@ -891,13 +994,10 @@ export async function generateAssistantReply( async () => { let promptResult: unknown; const promptPromise = resumedFromCheckpoint - ? // Checkpoint resumes continue from the persisted Pi message - // state. Any reconstructed replyContext only matters when the - // turn parked before the initial user prompt was recorded. - agent.continue() + ? agent.continue() : agent.prompt({ role: "user", - content: userContentParts, + content: promptContentParts, timestamp: Date.now(), }); @@ -1013,6 +1113,10 @@ export async function generateAssistantReply( // ── Build turn result ──────────────────────────────────────────── return buildTurnResult({ newMessages, + piMessages: stripTurnContextFromMessages( + agent.state.messages, + turnContextPrompt, + ), userInput, replyFiles, artifactStatePatch, diff --git a/packages/junior/src/chat/runtime/auth-pause-reply.ts b/packages/junior/src/chat/runtime/auth-pause-reply.ts deleted file mode 100644 index b6497724..00000000 --- a/packages/junior/src/chat/runtime/auth-pause-reply.ts +++ /dev/null @@ -1,140 +0,0 @@ -import { botConfig } from "@/chat/config"; -import { - getPersistedThreadState, - persistThreadStateById, -} from "@/chat/runtime/thread-state"; -import { isRetryableTurnError, markTurnCompleted } from "@/chat/runtime/turn"; -import { - generateConversationId, - markConversationMessage, - normalizeConversationText, - updateConversationStats, - upsertConversationMessage, -} from "@/chat/services/conversation-memory"; -import { buildAuthPauseReplyText } from "@/chat/services/pending-auth"; -import type { TurnThinkingSelection } from "@/chat/services/turn-thinking-level"; -import { - buildSlackReplyBlocks, - buildSlackReplyFooter, - type SlackMessageBlock, -} from "@/chat/slack/footer"; -import { postSlackMessage } from "@/chat/slack/outbound"; -import { - coerceThreadConversationState, - type ThreadConversationState, -} from "@/chat/state/conversation"; -import { getTurnUserMessageId } from "@/chat/runtime/turn-user-message"; -import type { AgentTurnUsage } from "@/chat/usage"; - -/** Build the Slack text + footer blocks for an auth-pause reply. */ -function buildAuthPauseSlackMessage(args: { - conversationId?: string; - durationMs?: number; - text: string; - thinkingLevel?: TurnThinkingSelection["thinkingLevel"]; - usage?: AgentTurnUsage; -}): { blocks?: SlackMessageBlock[]; text: string } { - const footer = buildSlackReplyFooter({ - conversationId: args.conversationId, - durationMs: args.durationMs, - thinkingLevel: args.thinkingLevel, - usage: args.usage, - }); - const blocks = buildSlackReplyBlocks(args.text, footer); - return blocks ? { text: args.text, blocks } : { text: args.text }; -} - -/** Persist a visible auth-pause note as the completed reply for the turn. */ -export function completeAuthPauseTurn(args: { - conversation: ThreadConversationState; - sessionId: string; - text: string; -}): void { - markConversationMessage( - args.conversation, - getTurnUserMessageId(args.conversation, args.sessionId), - { - replied: true, - skippedReason: undefined, - }, - ); - upsertConversationMessage(args.conversation, { - id: generateConversationId("assistant"), - role: "assistant", - text: normalizeConversationText(args.text) || "[empty response]", - createdAtMs: Date.now(), - author: { - userName: botConfig.userName, - isBot: true, - }, - meta: { - replied: true, - }, - }); - markTurnCompleted({ - conversation: args.conversation, - nowMs: Date.now(), - sessionId: args.sessionId, - updateConversationStats, - }); -} - -/** Reload thread state, mark the auth-pause note as the turn's reply, and persist. */ -export async function persistAuthPauseReplyState(args: { - sessionId: string; - text: string; - threadStateId: string; -}): Promise { - const currentState = await getPersistedThreadState(args.threadStateId); - const conversation = coerceThreadConversationState(currentState); - completeAuthPauseTurn({ - conversation, - sessionId: args.sessionId, - text: args.text, - }); - await persistThreadStateById(args.threadStateId, { conversation }); -} - -/** - * Deliver the visible "I sent you a private link" reply for an auth-pause - * resume and mark the turn as completed in persisted state. - * - * Used by every resume/callback path that surfaces an `onAuthPause` error. - * Text and footer metadata come from the retryable error when available; - * `fallbackProvider` only applies when a non-retryable error is surfaced. - */ -export async function deliverAuthPauseReply(args: { - channelId: string; - conversationId?: string; - error: unknown; - fallbackProvider?: string; - sessionId: string; - threadStateId: string; - threadTs: string; -}): Promise { - const retryable = isRetryableTurnError(args.error) ? args.error : undefined; - const text = retryable - ? buildAuthPauseReplyText({ - disposition: retryable.metadata?.authDisposition, - provider: retryable.metadata?.authProvider, - }) - : buildAuthPauseReplyText({ provider: args.fallbackProvider }); - const message = buildAuthPauseSlackMessage({ - conversationId: args.conversationId, - durationMs: retryable?.metadata?.authDurationMs, - text, - thinkingLevel: retryable?.metadata?.authThinkingLevel, - usage: retryable?.metadata?.authUsage, - }); - await postSlackMessage({ - channelId: args.channelId, - threadTs: args.threadTs, - text: message.text, - ...(message.blocks ? { blocks: message.blocks } : {}), - }); - await persistAuthPauseReplyState({ - sessionId: args.sessionId, - text, - threadStateId: args.threadStateId, - }); -} diff --git a/packages/junior/src/chat/runtime/auth-pause-state.ts b/packages/junior/src/chat/runtime/auth-pause-state.ts new file mode 100644 index 00000000..7b5965c5 --- /dev/null +++ b/packages/junior/src/chat/runtime/auth-pause-state.ts @@ -0,0 +1,49 @@ +import { + getPersistedThreadState, + persistThreadStateById, +} from "@/chat/runtime/thread-state"; +import { markTurnCompleted } from "@/chat/runtime/turn"; +import { getTurnUserMessageId } from "@/chat/runtime/turn-user-message"; +import { + markConversationMessage, + updateConversationStats, +} from "@/chat/services/conversation-memory"; +import { + coerceThreadConversationState, + type ThreadConversationState, +} from "@/chat/state/conversation"; + +/** Mark an auth-paused turn complete after private authorization link delivery. */ +export function completeAuthPauseTurn(args: { + conversation: ThreadConversationState; + sessionId: string; +}): void { + markConversationMessage( + args.conversation, + getTurnUserMessageId(args.conversation, args.sessionId), + { + replied: true, + skippedReason: undefined, + }, + ); + markTurnCompleted({ + conversation: args.conversation, + nowMs: Date.now(), + sessionId: args.sessionId, + updateConversationStats, + }); +} + +/** Reload thread state, mark the auth pause as parked, and persist it. */ +export async function persistAuthPauseTurnState(args: { + sessionId: string; + threadStateId: string; +}): Promise { + const currentState = await getPersistedThreadState(args.threadStateId); + const conversation = coerceThreadConversationState(currentState); + completeAuthPauseTurn({ + conversation, + sessionId: args.sessionId, + }); + await persistThreadStateById(args.threadStateId, { conversation }); +} diff --git a/packages/junior/src/chat/runtime/reply-executor.ts b/packages/junior/src/chat/runtime/reply-executor.ts index fb78a06d..5fb732d2 100644 --- a/packages/junior/src/chat/runtime/reply-executor.ts +++ b/packages/junior/src/chat/runtime/reply-executor.ts @@ -33,7 +33,7 @@ import { persistThreadState, mergeArtifactsState, } from "@/chat/runtime/thread-state"; -import { completeAuthPauseTurn } from "@/chat/runtime/auth-pause-reply"; +import { completeAuthPauseTurn } from "@/chat/runtime/auth-pause-state"; import { buildThreadParticipants } from "@/chat/runtime/thread-participants"; import type { PreparedTurnState } from "@/chat/runtime/turn-preparation"; import { @@ -44,10 +44,7 @@ import { generateConversationId, updateConversationStats, } from "@/chat/services/conversation-memory"; -import { - applyPendingAuthUpdate, - buildAuthPauseReplyText, -} from "@/chat/services/pending-auth"; +import { applyPendingAuthUpdate } from "@/chat/services/pending-auth"; import { countPotentialImageAttachments, hasPotentialImageAttachment, @@ -321,6 +318,7 @@ export function createReplyToThread(deps: ReplyExecutorDeps) { conversationContext: preparedState.routingContext ?? preparedState.conversationContext, artifactState: preparedState.artifacts, + piMessages: preparedState.conversation.piMessages, pendingAuth: preparedState.conversation.processing.pendingAuth, configuration: preparedState.configuration, channelConfiguration: preparedState.channelConfiguration, @@ -451,6 +449,9 @@ export function createReplyToThread(deps: ReplyExecutorDeps) { replied: true, }, }); + if (reply.piMessages) { + preparedState.conversation.piMessages = reply.piMessages; + } const artifactStatePatch: Partial = reply.artifactStatePatch ? { ...reply.artifactStatePatch } : {}; @@ -588,39 +589,9 @@ export function createReplyToThread(deps: ReplyExecutorDeps) { isRetryableTurnError(error, "mcp_auth_resume") || isRetryableTurnError(error, "plugin_auth_resume") ) { - const authPauseText = buildAuthPauseReplyText({ - disposition: error.metadata?.authDisposition, - provider: error.metadata?.authProvider, - }); - const authPauseFooter = buildSlackReplyFooter({ - conversationId, - durationMs: error.metadata?.authDurationMs, - thinkingLevel: error.metadata?.authThinkingLevel, - usage: error.metadata?.authUsage, - }); - const useSlackFooterForAuthPause = - Boolean(authPauseFooter) && - Boolean(channelId && threadTs) && - (thread.adapter as { name?: string } | undefined)?.name === - "slack"; - if (useSlackFooterForAuthPause && channelId && threadTs) { - await beforeFirstResponsePost(); - await postSlackApiReplyPosts({ - channelId, - threadTs, - footer: authPauseFooter, - posts: [{ stage: "thread_reply", text: authPauseText }], - }); - } else { - await postThreadReply( - buildSlackOutputMessage(authPauseText), - "thread_reply", - ); - } completeAuthPauseTurn({ conversation: preparedState.conversation, sessionId: error.metadata?.sessionId ?? turnId, - text: authPauseText, }); await persistThreadState(thread, { conversation: preparedState.conversation, diff --git a/packages/junior/src/chat/services/pending-auth.ts b/packages/junior/src/chat/services/pending-auth.ts index 6b8bc55f..9f57e9fa 100644 --- a/packages/junior/src/chat/services/pending-auth.ts +++ b/packages/junior/src/chat/services/pending-auth.ts @@ -1,8 +1,4 @@ -import { formatProviderLabel } from "@/chat/oauth-flow"; -import type { - AuthorizationPauseDisposition, - AuthorizationPauseKind, -} from "@/chat/services/auth-pause"; +import type { AuthorizationPauseKind } from "@/chat/services/auth-pause"; import type { ConversationPendingAuthState, ThreadConversationState, @@ -38,22 +34,6 @@ export function canReusePendingAuthLink(args: { ); } -export function buildAuthPauseReplyText(args: { - disposition?: AuthorizationPauseDisposition; - provider?: string; -}): string { - const providerLabel = args.provider ? formatProviderLabel(args.provider) : ""; - if (args.disposition === "link_already_sent") { - return providerLabel - ? `I still need your ${providerLabel} access to continue. I already sent you a private link.` - : "I still need additional access to continue. I already sent you a private link."; - } - - return providerLabel - ? `I need your ${providerLabel} access to continue. I sent you a private link.` - : "I need additional access to continue. I sent you a private link."; -} - export function getConversationPendingAuth(args: { conversation: ThreadConversationState; kind: AuthorizationPauseKind; diff --git a/packages/junior/src/chat/services/turn-checkpoint.ts b/packages/junior/src/chat/services/turn-checkpoint.ts index 087fb270..6b3f42d2 100644 --- a/packages/junior/src/chat/services/turn-checkpoint.ts +++ b/packages/junior/src/chat/services/turn-checkpoint.ts @@ -1,10 +1,10 @@ -import type { AgentMessage } from "@mariozechner/pi-agent-core"; import { getAgentTurnSessionCheckpoint, upsertAgentTurnSessionCheckpoint, type AgentTurnSessionCheckpoint, } from "@/chat/state/turn-session-store"; import { logException } from "@/chat/logging"; +import type { PiMessage } from "@/chat/pi/messages"; import { trimTrailingAssistantMessages } from "@/chat/respond-helpers"; export interface TurnCheckpointContext { @@ -48,7 +48,7 @@ export async function persistCompletedCheckpoint(args: { conversationId: string; sessionId: string; sliceId: number; - allMessages: AgentMessage[]; + allMessages: PiMessage[]; loadedSkillNames: string[]; }): Promise { await upsertAgentTurnSessionCheckpoint({ @@ -69,7 +69,7 @@ export async function persistAuthPauseCheckpoint(args: { conversationId: string; sessionId: string; currentSliceId: number; - messages: AgentMessage[]; + messages: PiMessage[]; loadedSkillNames: string[]; errorMessage: string; logContext: { @@ -135,7 +135,7 @@ export async function persistTimeoutCheckpoint(args: { conversationId: string; sessionId: string; currentSliceId: number; - messages: AgentMessage[]; + messages: PiMessage[]; loadedSkillNames: string[]; errorMessage: string; logContext: { diff --git a/packages/junior/src/chat/services/turn-result.ts b/packages/junior/src/chat/services/turn-result.ts index 2c2d749e..33d3bd75 100644 --- a/packages/junior/src/chat/services/turn-result.ts +++ b/packages/junior/src/chat/services/turn-result.ts @@ -1,6 +1,7 @@ import type { FileUpload } from "chat"; import { botConfig } from "@/chat/config"; import { logInfo, logWarn } from "@/chat/logging"; +import type { PiMessage } from "@/chat/pi/messages"; import type { LogContext } from "@/chat/logging"; import type { TurnThinkingSelection } from "@/chat/services/turn-thinking-level"; import type { AgentTurnUsage } from "@/chat/usage"; @@ -48,6 +49,7 @@ export interface AssistantReply { text: string; files?: FileUpload[]; artifactStatePatch?: Partial; + piMessages?: PiMessage[]; deliveryPlan?: ReplyDeliveryPlan; deliveryMode?: "thread" | "channel_only"; sandboxId?: string; @@ -57,6 +59,7 @@ export interface AssistantReply { export interface TurnResultInput { newMessages: unknown[]; + piMessages?: PiMessage[]; userInput: string; replyFiles: FileUpload[]; artifactStatePatch: Partial; @@ -108,6 +111,7 @@ function buildBriefPostCanvasReply( export function buildTurnResult(input: TurnResultInput): AssistantReply { const { newMessages, + piMessages, userInput, replyFiles, artifactStatePatch, @@ -271,6 +275,7 @@ export function buildTurnResult(input: TurnResultInput): AssistantReply { Object.keys(artifactStatePatch).length > 0 ? artifactStatePatch : undefined, + piMessages, deliveryPlan, deliveryMode, sandboxId, diff --git a/packages/junior/src/chat/state/conversation.ts b/packages/junior/src/chat/state/conversation.ts index fba4c564..bb87d997 100644 --- a/packages/junior/src/chat/state/conversation.ts +++ b/packages/junior/src/chat/state/conversation.ts @@ -1,4 +1,5 @@ import { isRecord, toOptionalNumber, toOptionalString } from "@/chat/coerce"; +import type { PiMessage } from "@/chat/pi/messages"; import type { AuthorizationPauseKind } from "@/chat/services/auth-pause"; type ConversationRole = "assistant" | "system" | "user"; @@ -77,6 +78,7 @@ export interface ThreadConversationState { backfill: ConversationBackfillState; compactions: ConversationCompaction[]; messages: ConversationMessage[]; + piMessages: PiMessage[]; processing: ConversationProcessingState; schemaVersion: 1; stats: ConversationStats; @@ -172,6 +174,7 @@ function defaultConversationState(): ThreadConversationState { return { schemaVersion: 1, messages: [], + piMessages: [], compactions: [], backfill: {}, processing: {}, @@ -331,6 +334,9 @@ export function coerceThreadConversationState( return { schemaVersion: 1, messages, + piMessages: Array.isArray(rawConversation.piMessages) + ? (rawConversation.piMessages as PiMessage[]) + : [], compactions, backfill, processing, diff --git a/packages/junior/src/chat/state/turn-session-store.ts b/packages/junior/src/chat/state/turn-session-store.ts index e7e5a7bc..11f87eec 100644 --- a/packages/junior/src/chat/state/turn-session-store.ts +++ b/packages/junior/src/chat/state/turn-session-store.ts @@ -1,5 +1,5 @@ -import type { AgentMessage } from "@mariozechner/pi-agent-core"; import { isRecord } from "@/chat/coerce"; +import type { PiMessage } from "@/chat/pi/messages"; import { getStateAdapter } from "./adapter"; const AGENT_TURN_SESSION_PREFIX = "junior:agent_turn_session"; @@ -19,7 +19,7 @@ export interface AgentTurnSessionCheckpoint { conversationId: string; errorMessage?: string; loadedSkillNames?: string[]; - piMessages: AgentMessage[]; + piMessages: PiMessage[]; resumeReason?: AgentTurnResumeReason; resumedFromSliceId?: number; sessionId: string; @@ -82,7 +82,7 @@ function parseAgentTurnSessionCheckpoint( state: status, updatedAtMs, piMessages: Array.isArray(parsed.piMessages) - ? (parsed.piMessages as AgentMessage[]) + ? (parsed.piMessages as PiMessage[]) : [], ...(Array.isArray(parsed.loadedSkillNames) ? { @@ -123,7 +123,7 @@ export async function upsertAgentTurnSessionCheckpoint(args: { sessionId: string; sliceId: number; state: AgentTurnSessionStatus; - piMessages: AgentMessage[]; + piMessages: PiMessage[]; loadedSkillNames?: string[]; resumeReason?: AgentTurnResumeReason; errorMessage?: string; diff --git a/packages/junior/src/handlers/mcp-oauth-callback.ts b/packages/junior/src/handlers/mcp-oauth-callback.ts index 046d4cce..1b12f681 100644 --- a/packages/junior/src/handlers/mcp-oauth-callback.ts +++ b/packages/junior/src/handlers/mcp-oauth-callback.ts @@ -30,7 +30,7 @@ import { } from "@/chat/services/conversation-memory"; import { coerceThreadArtifactsState } from "@/chat/state/artifacts"; import { resumeAuthorizedRequest } from "@/chat/slack/resume"; -import { deliverAuthPauseReply } from "@/chat/runtime/auth-pause-reply"; +import { persistAuthPauseTurnState } from "@/chat/runtime/auth-pause-state"; import { applyPendingAuthUpdate, clearPendingAuth, @@ -135,6 +135,9 @@ async function persistCompletedReplyState( replied: true, }, }); + if (reply.piMessages) { + conversation.piMessages = reply.piMessages; + } markTurnCompleted({ conversation, nowMs: Date.now(), @@ -251,6 +254,7 @@ async function resumeAuthorizedMcpTurn(args: { authSession.channelId, conversationContext, artifactState: artifacts, + piMessages: conversation.piMessages, configuration: authSession.configuration, pendingAuth, channelConfiguration, @@ -309,19 +313,19 @@ async function resumeAuthorizedMcpTurn(args: { } }, onAuthPause: async (error) => { - await deliverAuthPauseReply({ - channelId: authSession.channelId!, - conversationId: authSession.conversationId, - error, - fallbackProvider: provider, + await persistAuthPauseTurnState({ sessionId: resolvedSessionId, threadStateId: `slack:${authSession.channelId!}:${authSession.threadTs!}`, - threadTs: authSession.threadTs!, }); logWarn( "mcp_oauth_callback_resume_reparked_for_auth", {}, - { "app.credential.provider": provider }, + { + "app.credential.provider": provider, + ...(isRetryableTurnError(error) + ? { "app.turn.retryable_reason": error.reason } + : {}), + }, "Resumed MCP turn requested another authorization flow", ); }, diff --git a/packages/junior/src/handlers/oauth-callback.ts b/packages/junior/src/handlers/oauth-callback.ts index b6478de1..5032fd2f 100644 --- a/packages/junior/src/handlers/oauth-callback.ts +++ b/packages/junior/src/handlers/oauth-callback.ts @@ -21,7 +21,7 @@ import { resumeAuthorizedRequest, resumeSlackTurn, } from "@/chat/slack/resume"; -import { deliverAuthPauseReply } from "@/chat/runtime/auth-pause-reply"; +import { persistAuthPauseTurnState } from "@/chat/runtime/auth-pause-state"; import { logException, logInfo } from "@/chat/logging"; import { htmlCallbackResponse } from "@/handlers/oauth-html"; import { @@ -64,6 +64,7 @@ import { canScheduleTurnTimeoutResume, scheduleTurnTimeoutResume, } from "@/chat/services/timeout-resume"; +import type { AssistantReply } from "@/chat/respond"; /** * OAuth callback contract for `@sentry/junior`. @@ -80,21 +81,6 @@ function htmlErrorResponse( return htmlCallbackResponse(escapeXml(title), escapeXml(message), status); } -async function buildResumeConversationContext( - channelId: string, - threadTs: string, -): Promise { - const conversation = coerceThreadConversationState( - await getPersistedThreadState(`slack:${channelId}:${threadTs}`), - ); - const latestUserMessageId = [...conversation.messages] - .reverse() - .find((message) => message.role === "user")?.id; - return buildConversationContext(conversation, { - excludeMessageId: latestUserMessageId, - }); -} - async function buildCheckpointConversationContext( conversationId: string, sessionId: string, @@ -111,12 +97,7 @@ async function buildCheckpointConversationContext( async function persistCompletedOAuthReplyState(args: { conversationId: string; sessionId: string; - reply: { - text: string; - sandboxId?: string; - sandboxDependencyProfileHash?: string; - artifactStatePatch?: Record; - }; + reply: AssistantReply; }): Promise { const currentState = await getPersistedThreadState(args.conversationId); const conversation = coerceThreadConversationState(currentState); @@ -144,6 +125,9 @@ async function persistCompletedOAuthReplyState(args: { replied: true, }, }); + if (args.reply.piMessages) { + conversation.piMessages = args.reply.piMessages; + } markTurnCompleted({ conversation, nowMs: Date.now(), @@ -291,6 +275,7 @@ async function resumeCheckpointedOAuthTurn( pendingAuth, conversationContext, channelConfiguration, + piMessages: conversation.piMessages, sandbox: getPersistedSandboxState(currentState), threadParticipants: buildThreadParticipants(conversation.messages), onAuthPending: async (nextPendingAuth) => { @@ -335,15 +320,10 @@ async function resumeCheckpointedOAuthTurn( sessionId: resolvedSessionId, }); }, - onAuthPause: async (error) => { - await deliverAuthPauseReply({ - channelId: stored.channelId!, - conversationId: stored.resumeConversationId, - error, - fallbackProvider: stored.provider, + onAuthPause: async () => { + await persistAuthPauseTurnState({ sessionId: resolvedSessionId, threadStateId: stored.resumeConversationId!, - threadTs: stored.threadTs!, }); }, onTimeoutPause: async (error) => { @@ -378,10 +358,16 @@ async function resumePendingOAuthMessage( ): Promise { if (!stored.pendingMessage || !stored.channelId || !stored.threadTs) return; - const conversationContext = await buildResumeConversationContext( - stored.channelId, - stored.threadTs, + const threadId = `slack:${stored.channelId}:${stored.threadTs}`; + const conversation = coerceThreadConversationState( + await getPersistedThreadState(threadId), ); + const latestUserMessageId = [...conversation.messages] + .reverse() + .find((message) => message.role === "user")?.id; + const conversationContext = buildConversationContext(conversation, { + excludeMessageId: latestUserMessageId, + }); await resumeAuthorizedRequest({ messageText: stored.pendingMessage, channelId: stored.channelId, @@ -391,6 +377,7 @@ async function resumePendingOAuthMessage( replyContext: { requester: { userId: stored.userId }, conversationContext, + piMessages: conversation.piMessages, configuration: stored.configuration, }, onSuccess: async (reply) => { diff --git a/packages/junior/src/handlers/turn-resume.ts b/packages/junior/src/handlers/turn-resume.ts index f6721820..3a005a1a 100644 --- a/packages/junior/src/handlers/turn-resume.ts +++ b/packages/junior/src/handlers/turn-resume.ts @@ -40,7 +40,7 @@ import { } from "@/chat/services/timeout-resume"; import { parseSlackThreadId } from "@/chat/slack/context"; import type { AssistantReply } from "@/chat/respond"; -import { deliverAuthPauseReply } from "@/chat/runtime/auth-pause-reply"; +import { persistAuthPauseTurnState } from "@/chat/runtime/auth-pause-state"; import { applyPendingAuthUpdate, clearPendingAuth, @@ -84,6 +84,9 @@ async function persistCompletedReplyState(args: { replied: true, }, }); + if (args.reply.piMessages) { + conversation.piMessages = args.reply.piMessages; + } markTurnCompleted({ conversation, nowMs: Date.now(), @@ -190,6 +193,7 @@ async function resumeTimedOutTurn( pendingAuth: conversation.processing.pendingAuth, conversationContext, channelConfiguration, + piMessages: conversation.piMessages, sandbox, threadParticipants: buildThreadParticipants(conversation.messages), onAuthPending: async (nextPendingAuth) => { @@ -228,19 +232,18 @@ async function resumeTimedOutTurn( { "app.ai.conversation_id": payload.conversationId, "app.ai.session_id": payload.sessionId, + ...(isRetryableTurnError(error) + ? { "app.turn.retryable_reason": error.reason } + : {}), }, "Failed to resume timed-out turn", ); await persistFailedReplyState(checkpoint); }, - onAuthPause: async (error) => { - await deliverAuthPauseReply({ - channelId: thread.channelId, - conversationId: payload.conversationId, - error, + onAuthPause: async () => { + await persistAuthPauseTurnState({ sessionId: payload.sessionId, threadStateId: payload.conversationId, - threadTs: thread.threadTs, }); logWarn( "timeout_resume_reparked_for_auth", diff --git a/packages/junior/tests/integration/mcp-auth-runtime-slack.test.ts b/packages/junior/tests/integration/mcp-auth-runtime-slack.test.ts index 40183dd7..bf2075f0 100644 --- a/packages/junior/tests/integration/mcp-auth-runtime-slack.test.ts +++ b/packages/junior/tests/integration/mcp-auth-runtime-slack.test.ts @@ -70,21 +70,6 @@ function hasPriorBudgetContext(messages: unknown[]): boolean { ); } -function toPostedText(value: unknown): string { - if (typeof value === "string") { - return value; - } - - if (value && typeof value === "object") { - const markdown = (value as { markdown?: unknown }).markdown; - if (typeof markdown === "string") { - return markdown; - } - } - - return String(value); -} - vi.mock("@/chat/services/turn-thinking-level", async () => { const actual = await vi.importActual< typeof import("@/chat/services/turn-thinking-level") @@ -355,10 +340,7 @@ describe("mcp auth runtime slack integration", () => { }), }), ]); - expect(thread.posts).toHaveLength(1); - expect(toPostedText(thread.posts[0])).toContain( - "I need your Eval-auth access to continue. I sent you a private link.", - ); + expect(thread.posts).toHaveLength(0); expect(getCapturedSlackApiCalls("chat.postMessage")).toHaveLength(0); const pendingAuthSession = @@ -480,10 +462,6 @@ describe("mcp auth runtime slack integration", () => { replied: true, }), }), - expect.objectContaining({ - role: "assistant", - text: "I need your Eval-auth access to continue. I sent you a private link.", - }), expect.objectContaining({ role: "assistant", text: assistantReplyWithContext, @@ -563,10 +541,7 @@ describe("mcp auth runtime slack integration", () => { expect(agentProbe.promptCallCount).toBe(1); expect(agentProbe.continueCallCount).toBe(0); - expect(thread.posts).toHaveLength(1); - expect(toPostedText(thread.posts[0])).toContain( - "I need your Eval-auth access to continue. I sent you a private link.", - ); + expect(thread.posts).toHaveLength(0); const pendingCheckpoint = await turnSessionStoreModule.getAgentTurnSessionCheckpoint( diff --git a/packages/junior/tests/integration/slack/bot-handlers.test.ts b/packages/junior/tests/integration/slack/bot-handlers.test.ts index 9eacf849..4085a7f4 100644 --- a/packages/junior/tests/integration/slack/bot-handlers.test.ts +++ b/packages/junior/tests/integration/slack/bot-handlers.test.ts @@ -30,19 +30,6 @@ function createRuntime( }); } -function toPostedText(value: unknown): string { - if (typeof value === "string") { - return value; - } - if (value && typeof value === "object") { - const markdown = (value as { markdown?: unknown }).markdown; - if (typeof markdown === "string") { - return markdown; - } - } - return String(value); -} - // ── Tests ──────────────────────────────────────────────────────────── describe("bot handlers (integration)", () => { @@ -453,23 +440,39 @@ describe("bot handlers (integration)", () => { ), ).resolves.toBeUndefined(); - expect(thread.posts).toHaveLength(1); - expect(toPostedText(thread.posts[0])).toContain( - "I need your Notion access to continue. I sent you a private link.", - ); + expect(thread.posts).toHaveLength(0); const state = thread.getState(); const conversation = ( state as { conversation?: { processing?: { activeTurnId?: string }; - messages?: Array<{ role?: string; text?: string }>; + messages?: Array<{ + id?: string; + meta?: { replied?: boolean; skippedReason?: string }; + role?: string; + text?: string; + }>; }; } ).conversation; expect(conversation?.processing?.activeTurnId).toBeUndefined(); - expect(conversation?.messages?.at(-1)).toMatchObject({ - role: "assistant", - text: "I need your Notion access to continue. I sent you a private link.", + expect(conversation?.messages).not.toEqual( + expect.arrayContaining([ + expect.objectContaining({ + role: "assistant", + text: expect.stringContaining("private link"), + }), + ]), + ); + expect( + conversation?.messages?.find( + (message) => message.id === "msg-auth-pause", + ), + ).toMatchObject({ + meta: { + replied: true, + skippedReason: undefined, + }, }); }); @@ -507,23 +510,39 @@ describe("bot handlers (integration)", () => { ), ).resolves.toBeUndefined(); - expect(thread.posts).toHaveLength(1); - expect(toPostedText(thread.posts[0])).toContain( - "I need your Github access to continue. I sent you a private link.", - ); + expect(thread.posts).toHaveLength(0); const state = thread.getState(); const conversation = ( state as { conversation?: { processing?: { activeTurnId?: string }; - messages?: Array<{ role?: string; text?: string }>; + messages?: Array<{ + id?: string; + meta?: { replied?: boolean; skippedReason?: string }; + role?: string; + text?: string; + }>; }; } ).conversation; expect(conversation?.processing?.activeTurnId).toBeUndefined(); - expect(conversation?.messages?.at(-1)).toMatchObject({ - role: "assistant", - text: "I need your Github access to continue. I sent you a private link.", + expect(conversation?.messages).not.toEqual( + expect.arrayContaining([ + expect.objectContaining({ + role: "assistant", + text: expect.stringContaining("private link"), + }), + ]), + ); + expect( + conversation?.messages?.find( + (message) => message.id === "msg-plugin-auth-pause", + ), + ).toMatchObject({ + meta: { + replied: true, + skippedReason: undefined, + }, }); }); diff --git a/packages/junior/tests/integration/slack/message-content-behavior.test.ts b/packages/junior/tests/integration/slack/message-content-behavior.test.ts index 27e8487b..b5a038f3 100644 --- a/packages/junior/tests/integration/slack/message-content-behavior.test.ts +++ b/packages/junior/tests/integration/slack/message-content-behavior.test.ts @@ -1,4 +1,5 @@ import { describe, expect, it } from "vitest"; +import type { PiMessage } from "@/chat/pi/messages"; import { createTestChatRuntime } from "../../fixtures/chat-runtime"; import { createTestMessage, @@ -7,6 +8,7 @@ import { interface CapturedCall { contextConversation?: string; + piMessages?: PiMessage[]; prompt: string; } @@ -149,8 +151,20 @@ describe("Slack behavior: message content", () => { expect(thread.posts).toHaveLength(0); }); - it("carries prior turn context into the next turn", async () => { + it("passes durable Pi history into the next turn", async () => { const calls: CapturedCall[] = []; + const firstTurnHistory: PiMessage[] = [ + { + role: "user", + content: [{ type: "text", text: "I need the budget by Friday" }], + timestamp: 1, + }, + { + role: "assistant", + content: [{ type: "text", text: "First response." }], + timestamp: 2, + }, + ] as PiMessage[]; const { slackRuntime } = createTestChatRuntime({ services: { @@ -171,9 +185,11 @@ describe("Slack behavior: message content", () => { calls.push({ prompt, contextConversation: context?.conversationContext, + piMessages: context?.piMessages, }); return { text: calls.length === 1 ? "First response." : "Second response.", + piMessages: calls.length === 1 ? firstTurnHistory : undefined, diagnostics: { assistantMessageCount: 1, modelId: "fake-agent-model", @@ -210,5 +226,6 @@ describe("Slack behavior: message content", () => { expect(calls).toHaveLength(2); expect(calls[1]?.contextConversation ?? "").toContain("budget by Friday"); + expect(calls[1]?.piMessages).toEqual(firstTurnHistory); }); }); diff --git a/packages/junior/tests/integration/turn-resume-slack.test.ts b/packages/junior/tests/integration/turn-resume-slack.test.ts index a685f6a2..b8a30940 100644 --- a/packages/junior/tests/integration/turn-resume-slack.test.ts +++ b/packages/junior/tests/integration/turn-resume-slack.test.ts @@ -131,6 +131,7 @@ describe("turn resume slack integration", () => { schemaVersion: 1, backfill: {}, compactions: [], + piMessages: [], messages: [ { id: "msg.1", @@ -279,6 +280,7 @@ describe("turn resume slack integration", () => { schemaVersion: 1, backfill: {}, compactions: [], + piMessages: [], messages: [ { id: "msg.2", @@ -392,6 +394,7 @@ describe("turn resume slack integration", () => { schemaVersion: 1, backfill: {}, compactions: [], + piMessages: [], messages: [ { id: "msg.3", diff --git a/packages/junior/tests/unit/handlers/turn-resume.test.ts b/packages/junior/tests/unit/handlers/turn-resume.test.ts index be6cd587..1fa09220 100644 --- a/packages/junior/tests/unit/handlers/turn-resume.test.ts +++ b/packages/junior/tests/unit/handlers/turn-resume.test.ts @@ -127,6 +127,7 @@ describe("turn resume handler", () => { schemaVersion: 1, backfill: {}, compactions: [], + piMessages: [], messages: [ { id: "msg.1", @@ -216,6 +217,7 @@ describe("turn resume handler", () => { schemaVersion: 1, backfill: {}, compactions: [], + piMessages: [], messages: [ { id: "msg.1", @@ -330,6 +332,7 @@ describe("turn resume handler", () => { schemaVersion: 1, backfill: {}, compactions: [], + piMessages: [], messages: [ { id: "msg.1", diff --git a/packages/junior/tests/unit/prompt.test.ts b/packages/junior/tests/unit/prompt.test.ts new file mode 100644 index 00000000..5938a229 --- /dev/null +++ b/packages/junior/tests/unit/prompt.test.ts @@ -0,0 +1,55 @@ +import { describe, expect, it } from "vitest"; +import { buildSystemPrompt, buildTurnContextPrompt } from "@/chat/prompt"; + +describe("prompt builders", () => { + it("keeps system instructions independent from per-turn context", () => { + const firstSystemPrompt = buildSystemPrompt(); + + const firstTurnContext = buildTurnContextPrompt({ + availableSkills: [ + { + name: "alpha", + description: "Alpha workflow", + skillPath: "/tmp/skills/alpha", + }, + ], + activeSkills: [], + activeMcpCatalogs: [], + invocation: null, + requester: { userId: "U_ALPHA" }, + runtime: { + channelId: "C_ALPHA", + modelId: "model-alpha", + thinkingLevel: "medium", + }, + turnState: "fresh", + }); + + const secondTurnContext = buildTurnContextPrompt({ + availableSkills: [ + { + name: "beta", + description: "Beta workflow", + skillPath: "/tmp/skills/beta", + }, + ], + activeSkills: [], + activeMcpCatalogs: [ + { provider: "beta-provider", available_tool_count: 2 }, + ], + invocation: null, + requester: { userId: "U_BETA" }, + runtime: { + channelId: "C_BETA", + modelId: "model-beta", + thinkingLevel: "high", + }, + turnState: "resumed", + }); + + expect(buildSystemPrompt.length).toBe(0); + expect(buildSystemPrompt()).toBe(firstSystemPrompt); + expect(firstTurnContext).not.toBe(secondTurnContext); + expect(buildSystemPrompt()).toBe(firstSystemPrompt); + }); +}); diff --git a/packages/junior/tests/unit/runtime/respond-mcp-progressive-loading.test.ts b/packages/junior/tests/unit/runtime/respond-mcp-progressive-loading.test.ts index 0dbdb47c..981f041b 100644 --- a/packages/junior/tests/unit/runtime/respond-mcp-progressive-loading.test.ts +++ b/packages/junior/tests/unit/runtime/respond-mcp-progressive-loading.test.ts @@ -1,8 +1,9 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import type { PiMessage } from "@/chat/pi/messages"; const { DEMO_SKILL, + agentInitialSystemPrompts, agentInitialToolNames, callToolMock, clientOptions, @@ -17,9 +18,11 @@ const { omitFinalAssistantAfterTool, pushPreToolAssistantMessage, promptCallCount, + promptSeedMessages, recordToolResultMessage, + resumeTurnContextCounts, searchMcpToolNames, - systemPromptInputs, + turnContextInputs, } = vi.hoisted(() => ({ DEMO_SKILL: { name: "demo-skill", @@ -27,6 +30,7 @@ const { skillPath: "/tmp/skills/demo-skill", pluginProvider: "demo", } as const, + agentInitialSystemPrompts: [] as string[], agentInitialToolNames: [] as string[][], callToolMock: vi.fn(), clientOptions: [] as Array>, @@ -40,10 +44,12 @@ const { loadSkillsByNameMock: vi.fn(), omitFinalAssistantAfterTool: { value: false }, promptCallCount: { value: 0 }, + promptSeedMessages: [] as unknown[][], pushPreToolAssistantMessage: { value: false }, recordToolResultMessage: { value: false }, + resumeTurnContextCounts: [] as number[], searchMcpToolNames: [] as string[][], - systemPromptInputs: [] as Array<{ + turnContextInputs: [] as Array<{ activeMcpCatalogs?: Array<{ provider: string; available_tool_count: number; @@ -123,6 +129,7 @@ vi.mock("@mariozechner/pi-agent-core", () => { systemPrompt: input.initialState.systemPrompt, tools: input.initialState.tools, }; + agentInitialSystemPrompts.push(input.initialState.systemPrompt); agentInitialToolNames.push( input.initialState.tools.map((tool) => tool.name), ); @@ -146,6 +153,7 @@ vi.mock("@mariozechner/pi-agent-core", () => { async prompt(message: unknown) { promptCallCount.value += 1; this.aborted = false; + promptSeedMessages.push([...this.state.messages]); this.state.messages.push(message); const loadSkillTool = this.state.tools.find( @@ -257,6 +265,22 @@ vi.mock("@mariozechner/pi-agent-core", () => { async continue() { continueCallCount.value += 1; + resumeTurnContextCounts.push( + this.state.messages.filter((message) => { + const candidate = message as { role?: unknown; content?: unknown }; + return ( + candidate.role === "user" && + Array.isArray(candidate.content) && + candidate.content.some( + (part) => + part && + typeof part === "object" && + (part as { type?: unknown }).type === "text" && + (part as { text?: unknown }).text === "Turn context", + ) + ); + }).length, + ); const lastMessage = this.state.messages[ this.state.messages.length - 1 ] as { role?: unknown } | undefined; @@ -373,14 +397,15 @@ vi.mock("@/chat/prompt", async (importOriginal) => { const actual = await importOriginal(); return { ...actual, - buildSystemPrompt: (input: { + buildSystemPrompt: () => "System prompt", + buildTurnContextPrompt: (input: { activeMcpCatalogs?: Array<{ provider: string; available_tool_count: number; }>; }) => { - systemPromptInputs.push(input); - return "System prompt"; + turnContextInputs.push(input); + return "Turn context"; }, }; }); @@ -541,6 +566,7 @@ import { isRetryableTurnError } from "@/chat/runtime/turn"; describe("generateAssistantReply progressive MCP loading", () => { beforeEach(async () => { agentInitialToolNames.length = 0; + agentInitialSystemPrompts.length = 0; callToolMock.mockReset(); clientOptions.length = 0; completeEmptyAssistantOnAbort.value = false; @@ -554,9 +580,11 @@ describe("generateAssistantReply progressive MCP loading", () => { loadSkillsByNameMock.mockReset(); omitFinalAssistantAfterTool.value = false; promptCallCount.value = 0; + promptSeedMessages.length = 0; pushPreToolAssistantMessage.value = false; recordToolResultMessage.value = false; - systemPromptInputs.length = 0; + resumeTurnContextCounts.length = 0; + turnContextInputs.length = 0; process.env.JUNIOR_STATE_ADAPTER = "memory"; process.env.JUNIOR_BASE_URL = "https://junior.example.com"; @@ -651,7 +679,12 @@ describe("generateAssistantReply progressive MCP loading", () => { expect(agentInitialToolNames[1]).toContain("callMcpTool"); expect(agentInitialToolNames[1]).not.toContain("searchTools"); expect(agentInitialToolNames[1]).not.toContain("mcp__demo__ping"); - expect(systemPromptInputs[1]?.activeMcpCatalogs).toEqual([ + expect(agentInitialSystemPrompts).toEqual([ + "System prompt", + "System prompt", + ]); + expect(resumeTurnContextCounts).toEqual([1]); + expect(turnContextInputs[1]?.activeMcpCatalogs).toEqual([ { provider: "demo", available_tool_count: 1 }, ]); expect(searchMcpToolNames).toEqual([]); @@ -694,7 +727,8 @@ describe("generateAssistantReply progressive MCP loading", () => { expect(agentInitialToolNames[0]).toContain("callMcpTool"); expect(agentInitialToolNames[0]).not.toContain("searchTools"); expect(agentInitialToolNames[0]).not.toContain("mcp__demo__ping"); - expect(systemPromptInputs[0]?.activeMcpCatalogs).toEqual([]); + expect(agentInitialSystemPrompts).toEqual(["System prompt"]); + expect(turnContextInputs[0]?.activeMcpCatalogs).toEqual([]); expect(searchMcpToolNames).toEqual([["mcp__demo__ping"]]); expect(callToolMock).toHaveBeenCalledWith( expect.objectContaining({ @@ -714,6 +748,41 @@ describe("generateAssistantReply progressive MCP loading", () => { }); }); + it("seeds normal turns from persisted Pi history without storing turn context", async () => { + listToolsMock.mockReset(); + listToolsMock.mockResolvedValue(makeDemoMcpTools()); + const priorMessages: PiMessage[] = [ + { + role: "user", + content: [{ type: "text", text: "prior question" }], + timestamp: 1, + }, + { + role: "assistant", + content: [{ type: "text", text: "prior answer" }], + timestamp: 2, + }, + ] as PiMessage[]; + + const reply = await generateAssistantReply("help me", { + ...makeReplyContext({ + conversationId: "conversation-history", + threadTs: "1712345.0003", + turnId: "turn-history", + }), + conversationContext: "duplicated prior transcript", + piMessages: priorMessages, + }); + + expect(promptSeedMessages[0]).toEqual(priorMessages); + expect(reply.piMessages?.slice(0, 2)).toEqual(priorMessages); + expect(JSON.stringify(reply.piMessages)).not.toContain("Turn context"); + expect(JSON.stringify(reply.piMessages)).not.toContain( + "duplicated prior transcript", + ); + expect(JSON.stringify(reply.piMessages)).toContain("help me"); + }); + it("parks for auth when MCP auth is requested during a tool call", async () => { listToolsMock.mockReset(); listToolsMock.mockImplementation( @@ -833,7 +902,7 @@ describe("generateAssistantReply progressive MCP loading", () => { ignoreReplaceMessages.value = true; continueStopsOnAbort.value = true; - const priorMessages: AgentMessage[] = [ + const priorMessages: PiMessage[] = [ { role: "user", content: [{ type: "text", text: "help me" }], diff --git a/packages/junior/tests/unit/services/turn-checkpoint.test.ts b/packages/junior/tests/unit/services/turn-checkpoint.test.ts index 53754b0d..268fc253 100644 --- a/packages/junior/tests/unit/services/turn-checkpoint.test.ts +++ b/packages/junior/tests/unit/services/turn-checkpoint.test.ts @@ -1,5 +1,5 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import type { PiMessage } from "@/chat/pi/messages"; const ORIGINAL_ENV = { ...process.env }; @@ -27,7 +27,7 @@ describe("persistAuthPauseCheckpoint", () => { const { getAgentTurnSessionCheckpoint, upsertAgentTurnSessionCheckpoint } = await import("@/chat/state/turn-session-store"); - const priorMessages: AgentMessage[] = [ + const priorMessages: PiMessage[] = [ { role: "user", content: [{ type: "text", text: "help me" }], diff --git a/packages/junior/tests/unit/state/conversation-state.test.ts b/packages/junior/tests/unit/state/conversation-state.test.ts index bcb9590e..8093f9a4 100644 --- a/packages/junior/tests/unit/state/conversation-state.test.ts +++ b/packages/junior/tests/unit/state/conversation-state.test.ts @@ -84,4 +84,30 @@ describe("conversation state", () => { "staff engineer", ); }); + + it("keeps durable Pi message history in conversation state", () => { + const conversation = coerceThreadConversationState({ + conversation: { + messages: [], + piMessages: [ + { + role: "user", + content: [{ type: "text", text: "prior request" }], + timestamp: 1, + }, + ], + }, + }); + + expect(conversation.piMessages).toEqual([ + { + role: "user", + content: [{ type: "text", text: "prior request" }], + timestamp: 1, + }, + ]); + expect( + buildConversationStatePatch(conversation).conversation.piMessages, + ).toHaveLength(1); + }); }); diff --git a/specs/agent-prompt-spec.md b/specs/agent-prompt-spec.md index 67f80334..038f696b 100644 --- a/specs/agent-prompt-spec.md +++ b/specs/agent-prompt-spec.md @@ -3,12 +3,13 @@ ## Metadata - Created: 2026-04-28 -- Last Edited: 2026-04-30 +- Last Edited: 2026-05-06 ## Changelog - 2026-04-28: Initial spec defining ownership, structure, and bloat controls for the core agent prompt. - 2026-04-30: Reworked the core prompt contract around fixed operating sections, source hierarchy, explicit completion gates, OpenClaw-style tool-call/safety boundaries, and stable-before-volatile ordering. +- 2026-05-06: Required the initial system prompt to be byte-stable across conversations and turns, with volatile runtime context moved into per-turn user-message context. ## Status @@ -20,7 +21,8 @@ Define the canonical contract for Junior's platform-owned agent prompt so prompt ## Scope -- `buildSystemPrompt(...)` in `packages/junior/src/chat/prompt.ts`. +- `buildSystemPrompt()` in `packages/junior/src/chat/prompt.ts`. +- `buildTurnContextPrompt(...)` in `packages/junior/src/chat/prompt.ts`. - Platform-owned behavior, capability, context, and Slack output instructions. - Boundaries between the core harness prompt, deployment personality files, and skill instructions. @@ -41,7 +43,11 @@ Define the canonical contract for Junior's platform-owned agent prompt so prompt ### Section boundaries -`buildSystemPrompt(...)` must keep these concerns distinct: +`buildSystemPrompt()` must be static: no parameters, no requester/thread/session/runtime/model/provider/catalog data, and no content that can vary between conversations or turns. This is required for provider prompt-prefix caching and for consistent multi-turn behavior. + +`buildTurnContextPrompt(...)` owns volatile prompt context. It is attached to the current user turn, including resumed-turn context, and may vary by conversation or turn. Completed turns must strip this context before storing durable Pi message history so prior turns are not replayed with stale runtime facts. + +The combined prompt surface must keep these concerns distinct: 1. Identity/personality. 2. Core operating rules. @@ -51,7 +57,7 @@ Define the canonical contract for Junior's platform-owned agent prompt so prompt Context blocks describe facts. Behavior and output blocks carry instructions. -Prompt order is part of the contract. Stable, high-priority operating rules must appear before volatile thread/session context so behavior has salience and provider prompt-prefix caching remains predictable. Do not move requester, artifacts, active catalogs, or configuration defaults above the behavior/output contract. +Prompt order is part of the contract. Stable, high-priority operating rules live in the system prompt. Volatile requester, artifacts, active catalogs, configuration defaults, runtime metadata, and resume state must stay out of the system prompt and live in per-turn context. The core operating rules must be split into fixed sections: @@ -107,7 +113,7 @@ Mutable facts need live checks. Examples include files, repos, versions, issues, The tool policy must make sandbox workspace ownership explicit: sandbox-backed file and shell tools inspect the isolated sandbox workspace, not arbitrary host files. If sandbox execution is unavailable, the model should report that blocker instead of implying local inspection succeeded. -Runtime facts should live in a compact runtime block after volatile context. Include only facts that help the model choose valid behavior, such as runtime version, model ids, selected thinking level, channel capabilities, and sandbox workspace root. Do not mix requester, artifacts, or configuration defaults into that runtime block. +Runtime facts should live in a compact runtime block inside per-turn context. Include only facts that help the model choose valid behavior, such as runtime version, model ids, selected thinking level, channel capabilities, and sandbox workspace root. Do not mix requester, artifacts, or configuration defaults into that runtime block. The safety section must stay generic and runtime-level: remain within the user's request, respect stop/pause/audit/approval boundaries, avoid access expansion, and avoid administrative prompt/tool/security/config changes unless explicitly requested and supported by an available tool. diff --git a/specs/agent-session-resumability-spec.md b/specs/agent-session-resumability-spec.md index 6f193915..e6f52463 100644 --- a/specs/agent-session-resumability-spec.md +++ b/specs/agent-session-resumability-spec.md @@ -3,7 +3,7 @@ ## Metadata - Created: 2026-03-05 -- Last Edited: 2026-04-16 +- Last Edited: 2026-05-06 ## Changelog @@ -13,6 +13,7 @@ - 2026-04-13: Aligned the spec with the current implementation: signed internal timeout-resume callbacks, eager thread-state persistence for sandbox/artifact state, and no automatic resume after visible assistant output has started. - 2026-04-16: Clarified that Slack delivery now waits for finalized replies, so timeout continuation remains eligible until final visible reply posting begins. - 2026-04-22: Added `superseded` checkpoint state and clarified that auth checkpoints do not keep `activeTurnId` alive; thread-local pending-auth state decides whether an auth-blocked request is still resumable. +- 2026-05-06: Removed the public Slack auth-pause note; auth pauses complete the live turn after private auth-link delivery. ## Status @@ -195,7 +196,7 @@ The callback must: 1. User message starts a new `session_id` under `conversation_id`. 2. Slice `1` runs and eagerly persists sandbox/artifact state as those values change. 3. If the turn finishes, commit `completed` and persist final thread state/output. -4. If MCP auth pauses at a safe boundary, commit `awaiting_resume` with `resume_reason=auth`; the live Slack turn still ends with a visible "private link sent" note, and the OAuth callback later consults thread-local pending-auth state before resuming. +4. If MCP auth pauses at a safe boundary, commit `awaiting_resume` with `resume_reason=auth`; the live Slack turn ends after private auth-link delivery without a second public thread note, and the OAuth callback later consults thread-local pending-auth state before resuming. 5. If timeout is reached before any assistant text is visible, commit `awaiting_resume` with `resume_reason=timeout` and schedule the signed internal timeout-resume callback. 6. The timeout-resume handler validates `expected_checkpoint_version`, rebuilds durable runtime state, restores Pi messages, and calls `continue()`. 7. If timeout happens after visible assistant output begins, keep the timeout checkpoint but do not auto-schedule continuation. diff --git a/specs/harness-agent-spec.md b/specs/harness-agent-spec.md index 1f0c5088..1955ffb8 100644 --- a/specs/harness-agent-spec.md +++ b/specs/harness-agent-spec.md @@ -3,7 +3,7 @@ ## Metadata - Created: 2026-02-24 -- Last Edited: 2026-04-30 +- Last Edited: 2026-05-06 ## Changelog @@ -12,6 +12,7 @@ - 2026-04-06: Switched stop-reason observability to `gen_ai.response.finish_reasons`. - 2026-04-13: Clarified timeout behavior when turn-session checkpoints are available for resumable slices. - 2026-04-30: Added the thinking-level routing contract and normal-effort default. +- 2026-05-06: Clarified that normal turns seed Pi from durable conversation message history instead of flattening prior turns into the current prompt. ## Status @@ -37,6 +38,9 @@ Define the canonical runtime contract for assistant-turn execution and user-visi ### Loop model - Use `Agent` from `@mariozechner/pi-agent-core` for reply generation. +- For normal turns, instantiate a fresh Pi agent with the static system prompt, restore durable conversation-level Pi message history, then prompt only the current turn. +- Persist updated conversation-level Pi message history only after the final visible reply is delivered by the runtime. +- Per-turn runtime context may be included in the current user prompt for generation, but it must not be stored in durable conversation-level Pi history after completion. - Use bounded execution with `AGENT_TURN_TIMEOUT_MS` and explicit `agent.abort()` on timeout. - Completion is based on assistant text output; there is no classifier-driven continuation loop. diff --git a/specs/oauth-flows-spec.md b/specs/oauth-flows-spec.md index 578e0106..e38cd0f6 100644 --- a/specs/oauth-flows-spec.md +++ b/specs/oauth-flows-spec.md @@ -3,7 +3,7 @@ ## Metadata - Created: 2026-03-03 -- Last Edited: 2026-04-17 +- Last Edited: 2026-05-06 ## Changelog @@ -12,6 +12,7 @@ - 2026-03-13: Documented MCP challenge-driven OAuth, MCP callback routing, and auth-driven turn resume. - 2026-04-17: Removed explicit model-facing auth commands and documented implicit runtime-owned OAuth initiation for plugin-backed commands. - 2026-04-22: Reframed auth-blocked work as completed Slack turns plus persisted thread-local `pendingAuth` state, documented deduped re-prompts, and limited auto-resume to the latest still-relevant blocked request. +- 2026-05-06: Removed the public thread-visible auth-pause note; the private auth-link delivery is the only immediate auth handoff. ## Status @@ -68,7 +69,6 @@ Agent: loads the matching plugin skill and runs the real provider command │ • runtime privately delivers the authorization link │ • runtime checkpoints the turn as awaiting auth resume │ • runtime records thread-local `pendingAuth` - │ • runtime posts a short thread-visible note that a private link was sent └─ Current turn ends cleanly; it is not kept as the active in-flight turn │ ▼ @@ -100,7 +100,6 @@ Agent: calls an MCP tool from the same plugin ├─ Runtime privately delivers the authorization link to the requesting user ├─ Turn checkpoint is written as awaiting auth resume ├─ Runtime records thread-local `pendingAuth` - ├─ Runtime posts a short thread-visible note that a private link was sent └─ Current turn ends cleanly; it is not kept as the active in-flight turn │ ▼ @@ -197,7 +196,7 @@ Providers define OAuth through plugin manifests: ## Security invariants - Authorization links are delivered privately to the requesting user only. -- The thread-visible auth note may mention that a private link was sent, but must not expose the link itself. +- The runtime must not post the authorization URL into the public thread or add a second public thread note just to announce that a private link was sent. - Authorization URLs are never returned to the model. - Tokens are stored server-side and never appear in sandbox files or model-visible tool arguments. - Leases are requester-bound and turn-scoped. diff --git a/specs/providers/catalog-spec.md b/specs/providers/catalog-spec.md index fa7a06c2..85f59538 100644 --- a/specs/providers/catalog-spec.md +++ b/specs/providers/catalog-spec.md @@ -3,12 +3,13 @@ ## Metadata - Created: 2026-02-27 -- Last Edited: 2026-04-30 +- Last Edited: 2026-05-06 ## Changelog - 2026-03-03: Standardized metadata headers and reconciled spec references/structure. - 2026-04-30: Added `github.org` to GitHub provider configKeys. +- 2026-05-06: Clarified that provider catalog prompt disclosure belongs in per-turn context, not the static system prompt. ## Status @@ -102,7 +103,7 @@ target: ## Prompt Contracts -- System prompt should include provider catalog summary so natural language requests can map to valid config/capability tokens. +- Per-turn prompt context should include provider catalog summary so natural language requests can map to valid config/capability tokens without changing the static system prompt. - Prompt guidance must remain generic and provider-extensible. ## Observability diff --git a/specs/slack-agent-delivery-spec.md b/specs/slack-agent-delivery-spec.md index bd5c69a6..d2c36823 100644 --- a/specs/slack-agent-delivery-spec.md +++ b/specs/slack-agent-delivery-spec.md @@ -3,7 +3,7 @@ ## Metadata - Created: 2026-04-15 -- Last Edited: 2026-04-22 +- Last Edited: 2026-05-06 ## Changelog @@ -20,6 +20,7 @@ - 2026-04-22: Updated finalized reply footer metadata examples to reflect the displayed thinking-level bucket instead of the active trace ID. - 2026-04-22: Required explicit progress messages to be written as proper sentence fragments (capitalized first letter, present-participle verb). - 2026-04-22: Reframed auth-blocked requests as completed thread replies plus thread-local pending-auth state, and removed the public OAuth "connected, continuing..." preamble from automatic resumes. +- 2026-05-06: Removed the public thread-visible auth-pause note; private auth-link delivery is the only immediate user-facing auth handoff before callback resume. ## Status @@ -218,7 +219,7 @@ Current rules: 3. Resume success is defined by final visible Slack delivery, not only by successful assistant generation. 4. Persisted thread state is updated only after the final reply has been delivered to Slack. 5. Because live turns do not publish provisional assistant text, timeout continuation remains eligible until final reply delivery starts. -6. When a turn blocks on OAuth/MCP auth, Junior must end that live turn with a short thread-visible note such as "I sent you a private link" and clear `activeTurnId`; the resumable auth state lives separately in persisted thread state. +6. When a turn blocks on OAuth/MCP auth, Junior must end that live turn after privately delivering the auth link, clear `activeTurnId`, and persist thread-local pending-auth state. Do not post a second public thread reply just to say a private link was sent. 7. Automatic auth resumes must not post a separate public "account connected, continuing..." banner before the real resumed answer. The resumed answer itself is the visible continuation. 8. If auth completes after a newer thread message already superseded the blocked request, Junior stores the credentials but does not post a stale resumed answer.