diff --git a/.gitignore b/.gitignore index 61e4325e..72e1974e 100644 --- a/.gitignore +++ b/.gitignore @@ -55,4 +55,7 @@ harness/config.yaml .worktrees/ CLAUDE.md data/ -docs/superpowers/ \ No newline at end of file +docs/superpowers/ + +# Local scratch (untracked, not part of any branch) +harness-node/ \ No newline at end of file diff --git a/acp/README.md b/acp/README.md index d1ec7972..dbc0128f 100644 --- a/acp/README.md +++ b/acp/README.md @@ -281,7 +281,7 @@ connection at startup and translates each event: |---|---| | `message_update { llm_event: text_delta }` | `agent_message_chunk` | | `message_update { llm_event: thinking_delta }` | `agent_thought_chunk` | -| `message_end` (assistant role, full text) | `agent_message_chunk` (one shot) | +| `message_complete` (assistant role, full text) | `agent_message_chunk` (one shot) | | `tool_execution_start` | `tool_call` (status: `in_progress`) | | `tool_execution_end` | `tool_call_update` (status: `completed`/`failed`) | | other | dropped | diff --git a/acp/src/handler.rs b/acp/src/handler.rs index 0f9f73b9..d34db11c 100644 --- a/acp/src/handler.rs +++ b/acp/src/handler.rs @@ -800,14 +800,11 @@ fn translate_agent_event(event: &Value) -> Option> { "content": { "type": "text", "text": delta }, })]) } - // turn-orchestrator (current head) does not emit message_update - // text deltas — provider-router consumes the streaming response - // internally and returns the fully-assembled assistant message, - // surfaced as a single message_end event. Translate those to one - // agent_message_chunk per text content block so Zed renders the - // full reply. message_start and tool-result message_end variants - // are dropped to avoid duplication. - "message_end" => { + // Batch/non-delta clients receive the fully-assembled assistant + // message as a single message_complete event. Translate those to + // one agent_message_chunk per text content block so Zed renders the + // full reply. + "message_complete" => { let message = event.get("message")?; if message.get("role").and_then(|v| v.as_str()) != Some("assistant") { return None; @@ -1042,14 +1039,14 @@ mod tests { #[test] fn translate_unknown_event_drops_silently() { - assert!(translate_agent_event(&json!({ "type": "agent_start" })).is_none()); - assert!(translate_agent_event(&json!({ "type": "turn_start" })).is_none()); + assert!(translate_agent_event(&json!({ "type": "not_a_real_event" })).is_none()); + assert!(translate_agent_event(&json!({ "type": "message_start" })).is_none()); } #[test] - fn translate_message_end_assistant_emits_chunk() { + fn translate_message_complete_assistant_emits_chunk() { let ev = json!({ - "type": "message_end", + "type": "message_complete", "message": { "role": "assistant", "content": [{ "type": "text", "text": "hi there" }], @@ -1064,9 +1061,9 @@ mod tests { } #[test] - fn translate_message_end_user_dropped() { + fn translate_message_complete_user_dropped() { let ev = json!({ - "type": "message_end", + "type": "message_complete", "message": { "role": "user", "content": [{ "type": "text", "text": "x" }], diff --git a/console/web/src/lib/backend/real.ts b/console/web/src/lib/backend/real.ts index 51ed2c32..02164d7a 100644 --- a/console/web/src/lib/backend/real.ts +++ b/console/web/src/lib/backend/real.ts @@ -12,7 +12,7 @@ import type { AgentMessage, SessionEventEnvelope, } from '@/types/iii-agent-event' -import { createTurnStateTranslator, translateAgentEvent } from './translate' +import { createAgentEventTranslator } from './translate' import type { ChatBackend, ChatStreamOptions, @@ -75,7 +75,7 @@ async function* realStream( }) subscribed = true - const turnStateTranslator = createTurnStateTranslator() + const { translate } = createAgentEventTranslator() client .call | null>('turn::get_state', { @@ -101,7 +101,6 @@ async function* realStream( let kickoffError: Error | null = null client .call('harness::trigger', { - function_id: 'run::start', session_id: sessionId, message_id: messageId, payload: { @@ -123,7 +122,7 @@ async function* realStream( .catch((err) => { kickoffError = err instanceof Error ? err : new Error(String(err)) if (import.meta.env.DEV) { - console.warn('[real-backend] harness::trigger run::start failed', err) + console.warn('[real-backend] harness::trigger failed', err) } wake() }) @@ -134,7 +133,7 @@ async function* realStream( const err = kickoffError as Error yield { kind: 'assistant-token', - token: `harness::trigger run::start failed — ${err.message}`, + token: `harness::trigger failed — ${err.message}`, } yield { kind: 'assistant-end' } return @@ -148,10 +147,7 @@ async function* realStream( if (kickoffError) continue const event = queue.shift() if (!event) continue - const streamEvents = - event.type === 'turn_state_changed' - ? turnStateTranslator(event, sessionId) - : translateAgentEvent(event, sessionId) + const streamEvents = translate(event, sessionId) for (const streamEvent of streamEvents) { yield streamEvent } diff --git a/console/web/src/lib/backend/translate.test.ts b/console/web/src/lib/backend/translate.test.ts index d22832fa..82f2d8c4 100644 --- a/console/web/src/lib/backend/translate.test.ts +++ b/console/web/src/lib/backend/translate.test.ts @@ -1,8 +1,10 @@ import { describe, expect, it } from 'vitest' import type { AgentEvent } from '@/types/iii-agent-event' -import { createTurnStateTranslator, translateAgentEvent } from './translate' +import { createAgentEventTranslator } from './translate' + +describe('createAgentEventTranslator — message_complete', () => { + const { translate } = createAgentEventTranslator() -describe('translateAgentEvent — message_end stop_reason surfacing', () => { const baseAssistant = { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'partial reply…' }], @@ -11,34 +13,54 @@ describe('translateAgentEvent — message_end stop_reason surfacing', () => { timestamp: 0, } - it('emits ONLY assistant-end for a clean stop_reason="end"', () => { + it('emits ONLY assistant-end for a clean stop_reason="end" when body was streamed', () => { const event: AgentEvent = { - type: 'message_end', + type: 'message_complete', message: { ...baseAssistant, stop_reason: 'end' }, + body_streamed: true, } - expect(translateAgentEvent(event)).toEqual([{ kind: 'assistant-end' }]) + expect(translate(event)).toEqual([{ kind: 'assistant-end' }]) + }) + + it('emits assistant-token blocks and assistant-end for a non-streamed batch message', () => { + const event: AgentEvent = { + type: 'message_complete', + message: { + ...baseAssistant, + stop_reason: 'end', + content: [{ type: 'text', text: 'hello batch' }], + }, + body_streamed: false, + } + expect(translate(event)).toEqual([ + { kind: 'assistant-token', token: 'hello batch' }, + { kind: 'assistant-end' }, + ]) }) it('emits assistant-end + stop-reason notice when the turn hit max_tokens (stop_reason="length")', () => { const event: AgentEvent = { - type: 'message_end', + type: 'message_complete', message: { ...baseAssistant, stop_reason: 'length' }, + body_streamed: true, } - const out = translateAgentEvent(event) + const out = translate(event) expect(out[0]).toEqual({ kind: 'assistant-end' }) expect(out[1]).toMatchObject({ kind: 'stop-reason', reason: 'length' }) }) it('emits assistant-end + stop-reason notice carrying error_message when stop_reason="error"', () => { const event: AgentEvent = { - type: 'message_end', + type: 'message_complete', message: { ...baseAssistant, stop_reason: 'error', - error_message: 'lmstudio stream closed mid-response after ~3214 output tokens', + error_message: + 'lmstudio stream closed mid-response after ~3214 output tokens', }, + body_streamed: true, } - const out = translateAgentEvent(event) + const out = translate(event) expect(out[0]).toEqual({ kind: 'assistant-end' }) expect(out[1]).toEqual({ kind: 'stop-reason', @@ -49,45 +71,50 @@ describe('translateAgentEvent — message_end stop_reason surfacing', () => { it('emits assistant-end + stop-reason on abort', () => { const event: AgentEvent = { - type: 'message_end', + type: 'message_complete', message: { ...baseAssistant, stop_reason: 'aborted' }, + body_streamed: true, } - const out = translateAgentEvent(event) + const out = translate(event) expect(out).toHaveLength(2) expect((out[1] as { kind: string; reason: string }).reason).toBe('aborted') }) it('does NOT emit a stop-reason notice for function_call (turn will continue)', () => { const event: AgentEvent = { - type: 'message_end', + type: 'message_complete', message: { ...baseAssistant, stop_reason: 'function_call' }, + body_streamed: true, } - expect(translateAgentEvent(event)).toEqual([{ kind: 'assistant-end' }]) + expect(translate(event)).toEqual([{ kind: 'assistant-end' }]) }) - it('returns [] for non-assistant message_end (user/function_result messages)', () => { + it('returns [] for non-assistant message_complete (user/function_result messages)', () => { const event: AgentEvent = { - type: 'message_end', + type: 'message_complete', message: { role: 'user', content: [{ type: 'text', text: 'hi' }], timestamp: 0, }, } - expect(translateAgentEvent(event)).toEqual([]) + expect(translate(event)).toEqual([]) }) it('omits the error_message field when none was provided', () => { const event: AgentEvent = { - type: 'message_end', + type: 'message_complete', message: { ...baseAssistant, stop_reason: 'length' }, + body_streamed: true, } - const out = translateAgentEvent(event) + const out = translate(event) expect(out[1]).toEqual({ kind: 'stop-reason', reason: 'length' }) }) }) -describe('translateAgentEvent — compaction_done', () => { +describe('createAgentEventTranslator — compaction_done', () => { + const { translate } = createAgentEventTranslator() + it('translates compaction_done to a single compaction StreamEvent carrying the summary + tokens_before', () => { const event: AgentEvent = { type: 'compaction_done', @@ -97,7 +124,7 @@ describe('translateAgentEvent — compaction_done', () => { compaction_entry_id: 'entry-c-1', tail_start_id: 'entry-t-1', } - expect(translateAgentEvent(event, 'sess-1')).toEqual([ + expect(translate(event, 'sess-1')).toEqual([ { kind: 'compaction', mode: 'async', @@ -118,7 +145,7 @@ describe('translateAgentEvent — compaction_done', () => { compaction_entry_id: 'e', tail_start_id: null, } - const out = translateAgentEvent(event, 'sess-x') + const out = translate(event, 'sess-x') expect(out).toHaveLength(1) expect( (out[0] as { kind: 'compaction'; mode: 'sync' | 'async' }).mode, @@ -134,16 +161,17 @@ describe('translateAgentEvent — compaction_done', () => { compaction_entry_id: 'e', tail_start_id: null, } - const out = translateAgentEvent(event, 'sess-y') + const out = translate(event, 'sess-y') expect( - (out[0] as { kind: 'compaction'; tailStartId: string | null }).tailStartId, + (out[0] as { kind: 'compaction'; tailStartId: string | null }) + .tailStartId, ).toBeNull() }) }) -describe('createTurnStateTranslator', () => { +describe('createAgentEventTranslator — turn_state_changed', () => { it('emits fcall-start { pendingApproval: true } when a new entry appears', () => { - const translate = createTurnStateTranslator() + const { translate } = createAgentEventTranslator() const event: AgentEvent = { type: 'turn_state_changed', event_type: 'state:updated', @@ -175,7 +203,7 @@ describe('createTurnStateTranslator', () => { }) it('emits nothing when the awaiting_approval list is unchanged', () => { - const translate = createTurnStateTranslator() + const { translate } = createAgentEventTranslator() const same = { state: 'function_awaiting_approval', awaiting_approval: [ @@ -204,7 +232,7 @@ describe('createTurnStateTranslator', () => { }) it('emits nothing when state leaves function_awaiting_approval (the orchestrator emits the matching function_execution_end)', () => { - const translate = createTurnStateTranslator() + const { translate } = createAgentEventTranslator() translate( { type: 'turn_state_changed', @@ -227,7 +255,11 @@ describe('createTurnStateTranslator', () => { old_value: { state: 'function_awaiting_approval', awaiting_approval: [ - { function_call_id: 'fc-1', function_id: 'shell::shell', args: {} }, + { + function_call_id: 'fc-1', + function_id: 'shell::shell', + args: {}, + }, ], }, }, @@ -237,7 +269,7 @@ describe('createTurnStateTranslator', () => { }) it('partitions mirrors by sessionId so two chats do not interfere', () => { - const translate = createTurnStateTranslator() + const { translate } = createAgentEventTranslator() const pending = { state: 'function_awaiting_approval', awaiting_approval: [ @@ -245,12 +277,20 @@ describe('createTurnStateTranslator', () => { ], } translate( - { type: 'turn_state_changed', event_type: 'state:created', new_value: pending }, + { + type: 'turn_state_changed', + event_type: 'state:created', + new_value: pending, + }, 'sess-a', ) expect( translate( - { type: 'turn_state_changed', event_type: 'state:created', new_value: pending }, + { + type: 'turn_state_changed', + event_type: 'state:created', + new_value: pending, + }, 'sess-b', ), ).toHaveLength(1) diff --git a/console/web/src/lib/backend/translate.ts b/console/web/src/lib/backend/translate.ts index 541ccf62..54130ee9 100644 --- a/console/web/src/lib/backend/translate.ts +++ b/console/web/src/lib/backend/translate.ts @@ -3,13 +3,9 @@ * `agent::events`) to console/web's `StreamEvent` contract documented in * `PLAYGROUND.md`. * - * Phase 2.A: `turn-orchestrator` now emits `MessageUpdate` events with a - * provider `AssistantMessageEvent` payload for every non-terminal frame, - * so token-by-token streaming flows through the `message_update` branch - * below. The terminal `MessageStart`/`MessageEnd` for the assistant - * message are still emitted, but `translateMessageStart` no longer - * re-emits the body (the deltas already populated the renderer); it - * just emits any function-call blocks that ride on the same message. + * Use `createAgentEventTranslator()` for a stateful translator that handles + * the full event surface, including `turn_state_changed` pending-approval + * mirroring. * * Wire mapping: * - `text_delta` → `assistant-token { token: delta }` @@ -23,96 +19,103 @@ * - `function_execution_start` → `fcall-start` (with args). * - `function_execution_end` → `fcall-end` (with result). * - `agent_end` → `assistant-end`. - * - `agent_start` / `turn_start` / `turn_end` / `message_end` / - * `function_execution_update` → noop. - * - `turn_state_changed` → noop (routed through `createTurnStateTranslator`). + * - `turn_state_changed` → pending-approval `fcall-start` (stateful). + * - `turn_end` → not translated (async compaction listens on raw wire). */ import type { AgentEvent, AgentMessage, - AssistantMessage, AssistantMessageEvent, ContentBlock, FunctionResult, - TurnStateChangedEvent, } from '@/types/iii-agent-event' import { diffPending, type PendingApproval } from './pending-approvals-store' import { pendingApprovalsFromTurnState } from './turn-state-mirror' import type { StreamEvent } from './types' -export function translateAgentEvent(event: AgentEvent, sessionId?: string): StreamEvent[] { - switch (event.type) { - case 'agent_start': - case 'turn_start': - case 'turn_end': - case 'function_execution_update': - return [] - - case 'turn_state_changed': - return [] - - case 'message_end': - if (event.message.role !== 'assistant') return [] - return translateAssistantMessageEnd(event.message) - - case 'message_update': - return translateMessageUpdate(event.llm_event) - - case 'message_start': - return translateMessageStart(event.message) - - case 'function_execution_start': - return [ - { - kind: 'fcall-start', - functionId: event.function_id, - input: event.args, - functionCallId: event.function_call_id, - sessionId, - }, - ] - - case 'function_execution_end': - return [ - { - kind: 'fcall-end', - /* Soft failures are surfaced in the UI via the canonical - `{ error: { kind, message, ... } }` shape (see PLAYGROUND.md - "Error semantics"). The harness sends a raw FunctionResult and a - sibling `is_error: true` flag; the canonical shape is a UI - concern, so the wrap lives here rather than on the orchestrator - side. Keeps the wire format provider-agnostic. */ - output: event.is_error ? wrapErrorOutput(event.result) : event.result, - durationMs: event.duration_ms, - }, - ] +export function createAgentEventTranslator(): { + translate(event: AgentEvent, sessionId?: string): StreamEvent[] +} { + const mirrors = new Map() - case 'agent_end': - return [{ kind: 'assistant-end' }] + function translateTurnStateChanged( + event: Extract, + sessionId: string, + ): StreamEvent[] { + const prev = mirrors.get(sessionId) ?? [] + const next = pendingApprovalsFromTurnState(event.new_value) + mirrors.set(sessionId, next) + const { added } = diffPending(prev, next) + return added.map((entry) => ({ + kind: 'fcall-start' as const, + functionId: entry.function_id, + input: entry.args, + pendingApproval: true, + functionCallId: entry.function_call_id, + sessionId, + })) + } - case 'compaction_done': - return [ - { - kind: 'compaction', - mode: event.mode, - summaryText: event.summary_text, - tokensBefore: event.tokens_before, - compactionEntryId: event.compaction_entry_id, - tailStartId: event.tail_start_id, - }, - ] + function translate(event: AgentEvent, sessionId?: string): StreamEvent[] { + switch (event.type) { + case 'turn_state_changed': + return sessionId ? translateTurnStateChanged(event, sessionId) : [] + + case 'message_complete': + return translateMessageComplete( + event.message, + event.body_streamed === true, + ) + + case 'message_update': + return translateMessageUpdate(event.llm_event) + + case 'function_execution_start': + return [ + { + kind: 'fcall-start', + functionId: event.function_id, + input: event.args, + functionCallId: event.function_call_id, + sessionId, + }, + ] + + case 'function_execution_end': + return [ + { + kind: 'fcall-end', + output: event.is_error + ? wrapErrorOutput(event.result) + : event.result, + durationMs: event.duration_ms, + }, + ] + + case 'agent_end': + return [{ kind: 'assistant-end' }] + + case 'turn_end': + return [] + + case 'compaction_done': + return [ + { + kind: 'compaction', + mode: event.mode, + summaryText: event.summary_text, + tokensBefore: event.tokens_before, + compactionEntryId: event.compaction_entry_id, + tailStartId: event.tail_start_id, + }, + ] + } } + + return { translate } } -/** - * Phase 2.A: translate a provider `AssistantMessageEvent` (carried inside - * `AgentEvent.MessageUpdate.llm_event`) into the StreamEvent contract. - * Non-terminal text and thinking deltas drive the renderer; everything - * else is silently dropped — the terminal `Done`/`Error` event is - * mirrored by a `MessageEnd` (and ultimately by `agent_end` → - * `assistant-end`), so we don't need to surface them here. - */ function translateMessageUpdate(llm: AssistantMessageEvent): StreamEvent[] { switch (llm.type) { case 'text_delta': @@ -130,31 +133,20 @@ function translateMessageUpdate(llm: AssistantMessageEvent): StreamEvent[] { } } -function translateMessageStart(message: AgentMessage): StreamEvent[] { +function translateMessageComplete( + message: AgentMessage, + bodyStreamed: boolean, +): StreamEvent[] { if (message.role !== 'assistant') { return [] } - const hasStreamableContent = message.content.some((b) => b.type === 'text' || b.type === 'thinking') - - if (hasStreamableContent) { - // The provider streamed; nothing to re-emit. - return [] - } const out: StreamEvent[] = [] - for (const block of message.content) { - appendBlock(block, out) + if (!bodyStreamed) { + for (const block of message.content) { + appendBlock(block, out) + } } - return out -} - -/** - * Emits `assistant-end` plus, when the turn terminated abnormally, a - * `stop-reason` notice so the UI can render a system message with the - * cause. Pre-fix this branch dropped `stop_reason` and `error_message` - * on the floor — the user saw a truncated reply with no diagnostic. - */ -function translateAssistantMessageEnd(message: AssistantMessage): StreamEvent[] { - const out: StreamEvent[] = [{ kind: 'assistant-end' }] + out.push({ kind: 'assistant-end' }) const stop = message.stop_reason as | 'end' | 'length' @@ -162,14 +154,13 @@ function translateAssistantMessageEnd(message: AssistantMessage): StreamEvent[] | 'aborted' | 'function_call' | undefined - // Clean ends and tool-call hops don't need a notice — the next turn - // will visibly continue. We only surface terminal anomalies. if (stop === 'length' || stop === 'error' || stop === 'aborted') { out.push({ kind: 'stop-reason', reason: stop, message: - typeof message.error_message === 'string' && message.error_message.length > 0 + typeof message.error_message === 'string' && + message.error_message.length > 0 ? message.error_message : undefined, }) @@ -198,48 +189,13 @@ function appendBlock(block: ContentBlock, out: StreamEvent[]): void { } } -/** - * Stateful translator for `turn_state_changed` events. Holds a per-session - * mirror of the previous `awaiting_approval` list so it can emit a - * `fcall-start { pendingApproval: true }` exactly once per new pending - * call, and suppress duplicates when the same record is re-broadcast - * (the backend emits on every turn_state write, not just transitions - * into the parking state). - * - * No `fcall-end` is emitted when the list shrinks — the orchestrator - * already fires `function_execution_end` for resolved/denied calls - * through its existing path. Removing the entry from our mirror is - * bookkeeping only. - */ -export function createTurnStateTranslator(): (event: TurnStateChangedEvent, sessionId: string) => StreamEvent[] { - const mirrors = new Map() - return (event, sessionId) => { - const prev = mirrors.get(sessionId) ?? [] - const next = pendingApprovalsFromTurnState(event.new_value) - mirrors.set(sessionId, next) - const { added } = diffPending(prev, next) - return added.map((entry) => ({ - kind: 'fcall-start' as const, - functionId: entry.function_id, - input: entry.args, - pendingApproval: true, - functionCallId: entry.function_call_id, - sessionId, - })) - } -} - -/* - * Wrap a soft function-execution failure into the canonical - * `{ error: { kind, message, details, content } }` shape consumed by the - * group accordion's failed counter and the embedded fcall's error view. - * `details` / `content` carry the raw payload so the expanded response - * pane still has everything to render. `kind: 'function_error'` matches - * the rest of the translator's deny path (`approval_resolved` uses - * `kind: 'denied'`). - */ function wrapErrorOutput(result: FunctionResult): { - error: { kind: string; message: string; details: unknown; content: ContentBlock[] } + error: { + kind: string + message: string + details: unknown + content: ContentBlock[] + } } { return { error: { @@ -251,16 +207,9 @@ function wrapErrorOutput(result: FunctionResult): { } } -/** - * Pull a one-line message out of a FunctionResult's content blocks for the - * canonical `error.message`. First non-empty text block wins; otherwise we - * fall back to a generic string so the UI always has something to render. - */ function deriveErrorMessage(content: ContentBlock[]): string { for (const block of content) { if (block.type === 'text' && block.text.length > 0) { - // Collapse to a single line — the message field is the header; the - // full multi-line payload remains under error.content / details. return block.text.replace(/\s+/g, ' ').trim() } } diff --git a/console/web/src/types/iii-agent-event.ts b/console/web/src/types/iii-agent-event.ts index cbfb6cff..bba2839c 100644 --- a/console/web/src/types/iii-agent-event.ts +++ b/console/web/src/types/iii-agent-event.ts @@ -92,7 +92,11 @@ export interface CustomMessage { timestamp: number } -export type AgentMessage = UserMessage | AssistantMessage | FunctionResultMessage | CustomMessage +export type AgentMessage = + | UserMessage + | AssistantMessage + | FunctionResultMessage + | CustomMessage /** Function call request emitted by an assistant message. */ export interface FunctionCall { @@ -114,8 +118,8 @@ export interface FunctionResult { * * The orchestrator forwards each event verbatim inside an * `AgentEvent::MessageUpdate` so the frontend can render token-by-token - * (Phase 2.A). `Done`/`Error` are terminal — the orchestrator emits a - * separate `MessageStart`/`MessageEnd` pair after them. + * (Phase 2.A). `Done`/`Error` are terminal — the orchestrator emits + * `message_complete` after them. */ export type AssistantMessageEvent = | { type: 'start'; partial: AssistantMessage } @@ -140,45 +144,42 @@ export type AssistantMessageEvent = type: 'stop' stop_reason: 'end' | 'length' | 'function_call' | 'aborted' | 'error' error_message?: string - error_kind?: 'auth_expired' | 'rate_limited' | 'context_overflow' | 'transient' | 'permanent' + error_kind?: + | 'auth_expired' + | 'rate_limited' + | 'context_overflow' + | 'transient' + | 'permanent' } | { type: 'done'; message: AssistantMessage } | { type: 'error'; error: AssistantMessage } /** * Discriminated `AgentEvent` matching the wire shape on `agent::events`. - * `MessageUpdate` and `FunctionExecutionUpdate` are present in the enum but - * not emitted by today's turn-orchestrator — the translator stubs them out - * but accepts them so a Phase 2 backend lands without a frontend change. */ export type AgentEvent = - | { type: 'agent_start' } | { type: 'agent_end'; messages: AgentMessage[] } - | { type: 'turn_start' } | { type: 'turn_end' message: AgentMessage function_results: FunctionResultMessage[] } - | { type: 'message_start'; message: AgentMessage } | { type: 'message_update' message: AgentMessage llm_event: AssistantMessageEvent } - | { type: 'message_end'; message: AgentMessage } | { - type: 'function_execution_start' - function_call_id: string - function_id: string - args: unknown + type: 'message_complete' + message: AgentMessage + /** When true, text/thinking were already delivered via message_update. */ + body_streamed?: boolean } | { - type: 'function_execution_update' + type: 'function_execution_start' function_call_id: string function_id: string args: unknown - partial_result: unknown } | { type: 'function_execution_end' diff --git a/harness/README.md b/harness/README.md index fd7e3050..3a8bfcd6 100644 --- a/harness/README.md +++ b/harness/README.md @@ -14,7 +14,7 @@ alongside `harness` over the iii bus. |---|---|---| | `src/harness/` | `ui::subscribe`/`unsubscribe`, `harness::fs::read_inline`, `policy::check_permissions` | Meta-worker; loads `iii-permissions.yaml`; spins up `ui::*` fanout pumps. | | `src/approval-gate/` | `approval::resolve` | Routes operator decisions to per-call `turn::approval_resume` fns (registered by turn-orchestrator). | -| `src/turn-orchestrator/` | `run::start`, `run::start_and_wait`, `agent::trigger`, `turn::step` | Durable FSM driving each agent turn; chokepoint dispatcher. | +| `src/turn-orchestrator/` | `run::start`, `turn::{state}`, `turn::get_state` | Durable FSM driving each agent turn; `dispatchWithHook` approval chokepoint. | | `src/session/` | `session-tree::*` (11 fns), `session-inbox::*` (3 fns) | Branching session storage + per-session inbox queues. | | `src/llm-budget/` | `budget::*` (14 fns) | Workspace + agent LLM spend caps. | | `src/hook-fanout/` | `hook-fanout::publish_collect` | Generic publish-and-collect over a stream topic. | diff --git a/harness/docs/architecture.md b/harness/docs/architecture.md index 4cd21f79..3cbd10da 100644 --- a/harness/docs/architecture.md +++ b/harness/docs/architecture.md @@ -19,7 +19,7 @@ workers. | Worker | Folder | Role | Doc | |---|---|---|---| | harness | [src/harness/](harness/src/harness/) | Meta-worker; loads `iii-permissions.yaml`, exposes `harness::trigger` (WS ingestion bridge — see [Telemetry & trace correlation](#telemetry--trace-correlation)) / `policy::check_permissions` / `ui::*`, spins up `agent::events` fan-out. | [workers/harness.md](harness/docs/workers/harness.md) | -| turn-orchestrator | [src/turn-orchestrator/](harness/src/turn-orchestrator/) | Durable FSM driving each agent turn; chokepoint dispatcher for `agent::trigger`. | [workers/turn-orchestrator.md](harness/docs/workers/turn-orchestrator.md) | +| turn-orchestrator | [src/turn-orchestrator/](harness/src/turn-orchestrator/) | Durable FSM driving each agent turn; `dispatchWithHook` approval chokepoint. | [workers/turn-orchestrator.md](harness/docs/workers/turn-orchestrator.md) | | approval-gate | [src/approval-gate/](harness/src/approval-gate/) | Registers `approval::resolve` and shared approval wire schemas; routes decisions to per-call `turn::approval_resume` fns owned by the turn-orchestrator. | [workers/approval-gate.md](harness/docs/workers/approval-gate.md) | | session | [src/session/](harness/src/session/) | Branching session storage (`session-tree::*`) plus per-session inbox queues (`session-inbox::*`). | [workers/session.md](harness/docs/workers/session.md) | | llm-budget | [src/llm-budget/](harness/src/llm-budget/) | Workspace + agent LLM spend caps with alerts, forecast, period rollover. | [workers/llm-budget.md](harness/docs/workers/llm-budget.md) | @@ -69,7 +69,7 @@ flowchart LR turnOrch -- "provider::*::stream" --> provKimi turnOrch -- "provider::*::stream" --> provLms turnOrch -- "consultBefore: policy::check_permissions" --> harness - turnOrch -- "agent::trigger → hook-fanout::publish_collect (after-hook)" --> hook + turnOrch -- "publishAfter → hook-fanout::publish_collect (after-hook)" --> hook turnOrch -- "session-tree::* mirror" --> session turnOrch -- "state::* persistence" --> state @@ -174,7 +174,7 @@ Deny shorthands (`!function_id` in the YAML): `approval::resolve`, `policy::check_permissions`, `hook-fanout::publish_collect`, `state::set`, `state::update`, `state::delete`, `stream::set`, `iii::durable::publish`, `auth::set_token`, `auth::delete_token`, `oauth::anthropic::login`, -`oauth::openai-codex::login`, `run::start`, `run::start_and_wait`, +`oauth::openai-codex::login`, `run::start`, `router::stream_assistant`, `router::abort`. Bare-string allow rules: `state::get`, `state::list`, @@ -281,13 +281,13 @@ to write to stderr unchanged. **`harness::trigger` as the WS ingestion bridge.** Browser-originated requests hit `harness::trigger` (see [src/harness/trigger.ts](harness/src/harness/trigger.ts)), NOT -`run::start` directly. The wrapping `instrumentHandler` reads -`session_id`/`message_id` from the outer body and seeds baggage; the -handler then forwards to `iii.trigger` with the inner `function_id` / -`payload`. This is the symmetric counterpart of the Rust harness bridge -(`workers/harness/src/lib.rs:103-159`; legacy bus id `harness::call`) and -means the span tree looks the same regardless of whether the request -landed on a Rust or Node deployment. +`run::start` directly. The request body is `{session_id?, message_id?, +payload}` with a flat `run::start` payload; the wrapping +`instrumentHandler` reads `session_id`/`message_id` from the outer body and +seeds baggage, then the handler forwards `payload` to `run::start` (the +target function id is fixed, not client-supplied). Going through this hop +seeds the baggage before the nested `run::start` span opens, so the span +tree carries the session/message ids end-to-end. ```mermaid sequenceDiagram @@ -297,7 +297,7 @@ sequenceDiagram participant Inner as run::start (turn-orchestrator) participant Trace as engine traces UI - Web->>Bridge: {function_id:"run::start", session_id, message_id, payload} + Web->>Bridge: {session_id, message_id, payload} Wrap->>Wrap: open span "harness.harness::trigger", stamp ids, push baggage Bridge->>Inner: iii.trigger(run::start, payload) -- baggage propagated Wrap->>Wrap: open span "harness.run::start", inherit ids from baggage diff --git a/harness/docs/workers/harness.md b/harness/docs/workers/harness.md index cd3b7fca..0e21f0b6 100644 --- a/harness/docs/workers/harness.md +++ b/harness/docs/workers/harness.md @@ -18,7 +18,7 @@ that drive transitions; its fan-out trigger is a passive stream subscriber. ## Registered functions -- `harness::trigger` — Forward `{function_id, session_id?, message_id?, payload}` to `iii.trigger` and return the result wrapped in an HTTP-style `{status_code, headers, body}` envelope. Used by console/web so the harness span wrapper can seed `iii.session.id` / `iii.message.id` baggage from the outer body (see [architecture.md § Telemetry & trace correlation](harness/docs/architecture.md#telemetry--trace-correlation)). Port of `workers/harness/src/lib.rs:103-159`. +- `harness::trigger` — Browser kickoff for a chat turn: take `{session_id?, message_id?, payload}` (where `payload` is a flat `run::start` payload), forward `payload` to `run::start`, and return the result wrapped in an HTTP-style `{status_code, headers, body}` envelope. The target function id is always `run::start` — clients don't choose it. Routing through this hop (instead of calling `run::start` directly) lets the harness span wrapper seed `iii.session.id` / `iii.message.id` baggage from the outer body (see [architecture.md § Telemetry & trace correlation](harness/docs/architecture.md#telemetry--trace-correlation)). - `ui::subscribe` — Register a browser's interest in a session (or all sessions if session_id is null). - `ui::unsubscribe` — Remove a browser's subscription to a session (or its all-sessions sub if session_id is null). - `harness::fs::read_inline` — Read a host file via shell::fs::read, drain its channel, and return a `{content:[{text}], details:{size, truncated, bytes_read}}` envelope (max 256 KiB inline by default). @@ -74,7 +74,7 @@ From [src/harness/iii.worker.yaml](harness/src/harness/iii.worker.yaml): | [src/harness/main.ts](harness/src/harness/main.ts) | Binary entry point (`iii-harness`). | | [src/harness/register.ts](harness/src/harness/register.ts) | Composes the worker's bus surface; called by both `main.ts` and the composite [src/index.ts](harness/src/index.ts). | | [src/harness/config.ts](harness/src/harness/config.ts) | Loads `engine_url` + `permissions_path` from `config.yaml`. | -| [src/harness/trigger.ts](harness/src/harness/trigger.ts) | `harness::trigger` handler — WS ingestion bridge for browser-originated requests. Forwards `{function_id, payload}` to `iii.trigger`; the wrapping `instrumentHandler` (see `runtime/otel.ts`) reads `session_id`/`message_id` from the outer body and seeds baggage. Port of `workers/harness/src/lib.rs:103-159`. | +| [src/harness/trigger.ts](harness/src/harness/trigger.ts) | `harness::trigger` handler — WS ingestion bridge for browser-originated chat turns. Forwards the flat `payload` to `run::start` (target function id hard-coded, not client-supplied); the wrapping `instrumentHandler` (see `runtime/otel.ts`) reads `session_id`/`message_id` from the outer body and seeds baggage. | | [src/harness/ui-subscribe.ts](harness/src/harness/ui-subscribe.ts) | In-memory `FanoutState` plus `ui::subscribe` / `ui::unsubscribe`. | | [src/harness/fs.ts](harness/src/harness/fs.ts) | `harness::fs::read_inline` — wraps `shell::fs::read` and inlines the channel into the legacy `{content, details}` envelope. | | [src/harness/policy/check-permissions.ts](harness/src/harness/policy/check-permissions.ts) | `registerPolicy` — registers `policy::check_permissions` and maps a `Decision` to the wire reply (`allow` / `deny` / `needs_approval`). | diff --git a/harness/docs/workers/turn-orchestrator.md b/harness/docs/workers/turn-orchestrator.md index ef7dfaa7..4619cbd3 100644 --- a/harness/docs/workers/turn-orchestrator.md +++ b/harness/docs/workers/turn-orchestrator.md @@ -10,12 +10,12 @@ immediately; the rest of the work happens inside the durable `turn::step` state machine, woken once per state transition by a publish to the `turn::step_requested` topic. The FSM provisions the sandbox, streams the assistant turn from a provider, executes any returned function calls -through the `agent::trigger` chokepoint, emits `agent::events` for the +through `dispatchWithHook`, emits `agent::events` for the harness fanout, and persists everything to iii state so the run survives restarts. -`agent::trigger` is the single dispatcher every agent-issued tool call passes -through. It runs `consultBefore` before forwarding to the target function +`dispatchWithHook` in [agent-trigger.ts](harness/src/turn-orchestrator/agent-trigger.ts) is the single +dispatcher every agent-issued tool call passes through. It runs `consultBefore` before forwarding to the target function id. `consultBefore` triggers `policy::check_permissions` directly (5 s timeout) and maps the reply to allow / deny / pending. Fail-closed: policy unreachable → deny with a `gate_unavailable` `DenialEnvelope`. @@ -23,14 +23,10 @@ unreachable → deny with a `gate_unavailable` `DenialEnvelope`. ## Registered functions - `run::start` — Start a durable agent session and return immediately. -- `run::start_and_wait` — Start a durable agent session and block until terminal (test/dev convenience). - `turn::step` — Run one durable state machine transition for a session. - `turn::get_state` — Read the current `TurnStateRecord` for a session (or null for unknown sessions). UI clients use this on reload to recover any in-progress modals (e.g. `function_awaiting_approval`) without reading iii state directly. -- `agent::trigger` — LLM-facing dispatcher: dispatches an iii function and returns a FunctionResult. - `turn::is_abort_signal_set` — Condition function bound to the agent-scope state trigger; matches `state:created`/`state:updated` writes that set `session//abort_signal` to `true`. - `turn::on_abort_signal` — State trigger adapter: publishes `turn::step_requested` when the abort signal is set so the FSM advances on the next safe boundary. -- `turn::is_terminal_state_write` — Condition function bound to the terminal state trigger; matches writes to `session//turn_state` whose `new_value.state === 'stopped'`. -- `turn::on_terminal_state` — State trigger adapter: resolves the in-process waiter installed by `run::start_and_wait` for that session id. - `turn::is_stepable_record_write` — Condition function bound to the record-written state trigger; matches `turn_state` writes whose `new_value.state` is non-terminal and non-parking (i.e. excludes `stopped` and `function_awaiting_approval`). - `turn::on_record_written` — State trigger adapter: directly triggers `turn::step` for the affected session, so saving the record is itself the wake-up event. - `turn::is_turn_state_write` — Condition function bound to the turn-state-changed trigger; matches every `state:created` / `state:updated` write to `session//turn_state` regardless of FSM state. @@ -40,7 +36,6 @@ unreachable → deny with a `gate_unavailable` `DenialEnvelope`. - **Durable subscriber** on `turn::step_requested` → `turn::step`. Registered in [src/turn-orchestrator/subscriber.ts](harness/src/turn-orchestrator/subscriber.ts). Each `step` loads the `TurnStateRecord`, runs one transition, saves it back, and re-publishes `turn::step_requested` unless the run is terminal **or** paused on approvals (`function_awaiting_approval`). Paused turns are woken when `approval::resolve` or abort triggers a per-call `turn::approval_resume` function (see [workers/approval-gate.md](workers/approval-gate.md)). - **State trigger** on `scope: agent` gated by `condition_function_id: turn::is_abort_signal_set` → `turn::on_abort_signal`. Registered in [src/turn-orchestrator/on-abort-signal.ts](harness/src/turn-orchestrator/on-abort-signal.ts). Publishes `turn::step_requested` the moment `session//abort_signal` is set to `true`, so the FSM advances to `steering_check` (and observes the abort) on the next safe boundary without waiting for the current step to time out. -- **State trigger** on `scope: agent` gated by `condition_function_id: turn::is_terminal_state_write` → `turn::on_terminal_state`. Registered in [src/turn-orchestrator/on-terminal.ts](harness/src/turn-orchestrator/on-terminal.ts). Fires on the `session//turn_state` write that lands `stopped`; the handler resolves the per-session waiter installed by `run::start_and_wait` so the sync wrapper returns without polling. - **State trigger** on `scope: agent` gated by `condition_function_id: turn::is_stepable_record_write` → `turn::on_record_written`. Registered in [src/turn-orchestrator/on-record-written.ts](harness/src/turn-orchestrator/on-record-written.ts). Directly triggers `turn::step` for the affected session on every non-terminal, non-parking `session//turn_state` write. Replaces the imperative `publishStep` self-publish — saving the record is now the wake. - **State trigger** on `scope: agent` gated by `condition_function_id: turn::is_turn_state_write` → `turn::on_turn_state_changed`. Registered in [src/turn-orchestrator/on-turn-state-changed.ts](harness/src/turn-orchestrator/on-turn-state-changed.ts). Fires on every `session//turn_state` write (created or updated) and emits a `turn_state_changed` event to `agent::events` carrying the full new (and prior) record so the UI can derive pending approvals from state rather than from a signal event. @@ -55,10 +50,10 @@ The 11 states from |---|---|---| | `provisioning` | [states/provisioning.ts](harness/src/turn-orchestrator/states/provisioning.ts) | Boot the sandbox, prime the system prompt, fetch function schemas. | | `awaiting_assistant` | [states/assistant.ts](harness/src/turn-orchestrator/states/assistant.ts) | Request an assistant turn via `provider::::stream`. | -| `assistant_streaming` | same | Drain the channel; relay events. | +| `assistant_streaming` | same | Drain the provider channel; relay `message_update` (token/thinking deltas) on `agent::events`. Tool args appear at `function_execution_start` when execute runs — no `turn_start` or streaming `function_execution_update` events. | | `assistant_finished` | same | Persist the final `AssistantMessage`; pick next state. | | `function_prepare` | [states/functions.ts](harness/src/turn-orchestrator/states/functions.ts) | Snapshot the pending function calls. | -| `function_execute` | same | Run each call via `dispatchWithHook` → `agent::trigger`. If the gate returns `pending`, append the call to `awaiting_approval` and transition to `function_awaiting_approval` (the rest of the batch is left for the resumed step). Each call is bracketed by a `function_execution_start` / `function_execution_end` pair; the `end` event carries `duration_ms` (wall-clock between the matching start and end), persisted on `ExecutedEntry` so resumed runs replay the original timing instead of the ~0ms it takes to re-emit. Approval wait time is naturally excluded — pending calls return without an end emit, and the resumed step re-emits a fresh start that resets the timer. | +| `function_execute` | same | Run each call via `dispatchWithHook` (pre-approved resume calls use `triggerFunctionCall` and skip the gate). If the gate returns `pending`, append the call to `awaiting_approval` and transition to `function_awaiting_approval` (the rest of the batch is left for the resumed step). Each call is bracketed by a `function_execution_start` / `function_execution_end` pair; the `end` event carries `duration_ms` (wall-clock between the matching start and end), persisted on `ExecutedEntry` so resumed runs replay the original timing instead of the ~0ms it takes to re-emit. Approval wait time is naturally excluded — pending calls return without an end emit, and the resumed step re-emits a fresh start that resets the timer. | | `function_awaiting_approval` | same (`handleAwaitingApproval`) | Read `approvals//` for every entry in `awaiting_approval`. While any decision is still missing, return without stepping (the next `turn::approval_resume` invoke will wake `turn::step`). When all decisions are present, fold them into the prepared snapshot — `allow` → `pre_approved: true`, `deny`/`aborted` → `blocked` with a denial result — clear `awaiting_approval`, and transition back to `function_execute`. | | `function_finalize` | same | Persist results; emit `function_call_end` + `turn_end` events. | | `steering_check` | [states/steering.ts](harness/src/turn-orchestrator/states/steering.ts) | Decide whether to continue, stop, or hit `max_turns`. | @@ -79,9 +74,7 @@ All keys live under iii state scope `agent`. From |---|---| | `session//turn_state` | Serialised `TurnStateRecord`. | | `session//messages` | Active path `AgentMessage[]`; mirrored into `session-tree::*` on every save. | -| `session//run_request` | The original `run::start` payload (provider, model, system_prompt, mode, image, idle_timeout_secs, cwd, cwd_hash). | -| `session//cwd` | Working directory for the sandbox. | -| `harness/cwd//last_session_id` | Reverse index from `cwd_hash` to the last session that ran there. | +| `session//run_request` | The original `run::start` payload (provider, model, system_prompt, mode, image, idle_timeout_secs). | | `session//sandbox_id` | Active sandbox handle. | | `session//function_schemas` | Cached tool schemas exposed to the model. | | `session//tool_schemas` | Legacy alias of `function_schemas`. | @@ -104,9 +97,6 @@ decisions back into the prepared snapshot. From the top-level `turn-orchestrator` section of [config.yaml](harness/config.yaml): -- `sync_default_timeout_ms` (default `120000`) — wall-clock cap on a - `run::start_and_wait` call; if the terminal state trigger doesn't - resolve the waiter within this many ms, the wrapper throws. - `system_default_skills` (default `["iii://iii-directory/index"]`) — skills the bootstrap step downloads into the session's system prompt context. @@ -123,10 +113,9 @@ From | File | Purpose | |---|---| | [src/turn-orchestrator/main.ts](harness/src/turn-orchestrator/main.ts) | Binary entry point. | -| [src/turn-orchestrator/register.ts](harness/src/turn-orchestrator/register.ts) | Composes `run::start*`, `agent::trigger`, `turn::step`, the abort-signal and terminal-state state triggers, and kicks off the bootstrap. | -| [src/turn-orchestrator/run-start.ts](harness/src/turn-orchestrator/run-start.ts) | `run::start` + `run::start_and_wait` handlers and the `publishStep` helper. `executeSync` installs a terminal-state waiter, kicks the run, then races the waiter against `sync_default_timeout_ms` — no polling. | +| [src/turn-orchestrator/register.ts](harness/src/turn-orchestrator/register.ts) | Composes `run::start`, per-state `turn::{state}` handlers, abort-signal trigger, and kicks off the bootstrap. | +| [src/turn-orchestrator/run-start.ts](harness/src/turn-orchestrator/run-start.ts) | `run::start` handler — persists run config and messages, seeds `turn_state`, and wakes the FSM via the record-written state trigger. | | [src/turn-orchestrator/get-state.ts](harness/src/turn-orchestrator/get-state.ts) | `turn::get_state` — one-shot reader that returns the current `TurnStateRecord` for a session. UI clients call this on reload to recover in-progress modals; the orchestrator owns the state schema/key layout so clients never read iii state directly. | -| [src/turn-orchestrator/on-terminal.ts](harness/src/turn-orchestrator/on-terminal.ts) | State trigger adapter — `turn::is_terminal_state_write` (condition) + `turn::on_terminal_state` (handler) — plus the in-process `installTerminalWaiter` / `clearTerminalWaiter` API used by `executeSync` to await a terminal `turn_state` write reactively. | | [src/turn-orchestrator/agent-trigger.ts](harness/src/turn-orchestrator/agent-trigger.ts) | The dispatcher chokepoint; `dispatchWithHook` runs `consultBefore` before triggering the function and returns `result` / `deny` / `pending`. | | [src/turn-orchestrator/hook.ts](harness/src/turn-orchestrator/hook.ts) | `consultBefore` — calls `policy::check_permissions` directly (5 s timeout) and maps the reply via `parsePolicyReply` (`approval-gate/schemas.ts`) to `allow` / `pending` / `deny`; fails closed with a `gate_unavailable` envelope. `publishAfter` still routes through `hook-fanout::publish_collect` for the after-hook fanout path. | | [src/turn-orchestrator/approval-resume.ts](harness/src/turn-orchestrator/approval-resume.ts) | Per-call `turn::approval_resume` registration, handler (persist + `turn::step`), and startup recovery for parked sessions. | diff --git a/harness/engine.config.yaml b/harness/engine.config.yaml new file mode 100644 index 00000000..002fdf1a --- /dev/null +++ b/harness/engine.config.yaml @@ -0,0 +1,19 @@ +# iii engine configuration for the harness local/dev. +# Pass to the engine via `iii -c engine.config.yaml`. + +workers: + - name: iii-queue + config: + adapter: + name: builtin + queue_configs: + turn-step: + type: fifo + message_group_field: session_id + max_retries: 5 + concurrency: 1 + + - name: iii-state + config: + adapter: + name: file_based diff --git a/harness/src/harness/register.ts b/harness/src/harness/register.ts index c502c338..0d1a09d5 100644 --- a/harness/src/harness/register.ts +++ b/harness/src/harness/register.ts @@ -9,13 +9,16 @@ import { loadAndWatch } from './policy/handle.js'; import { FanoutState, registerSubscriptions } from './ui-subscribe.js'; export async function register(iii: ISdk, ctx: { configPath: string; url: string }): Promise { + const fanoutState = new FanoutState(); + const cfg = await loadConfig(ctx.configPath); const harness = loadHarnessConfig(cfg); + registerTrigger(iii); - const fanoutState = new FanoutState(); registerSubscriptions(iii, fanoutState); spawnPumps(iii, fanoutState); registerFs(iii, ctx.url); + const handle = await loadAndWatch(harness.permissions_path); registerPolicy(iii, handle); } diff --git a/harness/src/harness/trigger.ts b/harness/src/harness/trigger.ts index a0096252..92534365 100644 --- a/harness/src/harness/trigger.ts +++ b/harness/src/harness/trigger.ts @@ -1,50 +1,61 @@ /** - * `harness::trigger` — browser → bus bridge. + * `harness::trigger` — browser kickoff for `run::start`. * - * Accepts `{ function_id, session_id?, message_id?, payload }` (or the same - * fields at the top level over WS), calls `iii.trigger` for the inner - * function, and returns `{ status_code, headers, body }`. - * - * console/web routes chat turns through this function instead of calling - * `run::start` directly so `instrumentHandler` can read `session_id` and - * `message_id` from the outer request and stamp OTel baggage before the - * nested trigger runs. That keeps "Group by session" / "Group by message" - * working in the traces UI (`engine::traces::group_by`). + * console/web sends `{ session_id?, message_id?, payload }`; this handler + * stamps OTel baggage from the outer ids, then forwards `payload` to + * `run::start` on the turn-orchestrator worker. Routing chat turns through + * this function (instead of calling `run::start` directly) lets + * `instrumentHandler` read `session_id` / `message_id` from the outer + * request and stamp OTel baggage before the nested trigger runs, which + * keeps "Group by session" / "Group by message" working in the traces UI + * (`engine::traces::group_by`). */ +import type { RemoteFunctionHandler } from 'iii-sdk'; +import { z } from 'zod'; import type { ISdk } from '../runtime/iii.js'; -import { unwrapBody } from '../runtime/handler.js'; +import { + RunStartPayloadSchema, + type RunStartPayload, + type RunStartResult, +} from '../turn-orchestrator/schemas.js'; + +const HarnessTriggerInputSchema = z.object({ + session_id: z.string().optional(), + message_id: z.string().optional(), + payload: RunStartPayloadSchema, +}); + +export type HarnessTriggerInput = z.infer; -/** Upper bound for a single harness::trigger. Mirrors the Rust constant. */ +export interface HarnessTriggerResponse { + status_code: number; + headers: Record; + body: RunStartResult; +} + +/** Upper bound for a single `harness::trigger`. */ const BRIDGE_TIMEOUT_MS = 600_000; export function register(iii: ISdk): void { - iii.registerFunction( - 'harness::trigger', - async (input: unknown) => { - const body = unwrapBody(input); - const functionId = body.function_id; - if (typeof functionId !== 'string' || functionId.length === 0) { - throw new Error('harness::trigger: missing function_id'); - } - const inner = - body.payload && typeof body.payload === 'object' - ? (body.payload as Record) - : {}; - const result = await iii.trigger({ - function_id: functionId, - payload: inner, - timeoutMs: BRIDGE_TIMEOUT_MS, - }); - return { - status_code: 200, - headers: { 'content-type': 'application/json' }, - body: result, - }; - }, - { - description: - 'Forward {function_id, payload} to iii.trigger and return the result. Used by console/web to reach the bus over the iii-browser-sdk.', - }, - ); + const handler: RemoteFunctionHandler = async ( + input, + ) => { + const body = HarnessTriggerInputSchema.parse(input); + const result = await iii.trigger({ + function_id: 'run::start', + payload: body.payload, + timeoutMs: BRIDGE_TIMEOUT_MS, + }); + return { + status_code: 200, + headers: { 'content-type': 'application/json' }, + body: result, + }; + }; + + iii.registerFunction('harness::trigger', handler, { + description: + 'Browser kickoff: forward payload to run::start. Used by console/web over the iii-browser-sdk.', + }); } diff --git a/harness/src/runtime/iii.ts b/harness/src/runtime/iii.ts index a0b03e09..220da8b3 100644 --- a/harness/src/runtime/iii.ts +++ b/harness/src/runtime/iii.ts @@ -4,7 +4,7 @@ * mock the SDK in tests. */ -export { registerWorker } from 'iii-sdk'; +export { registerWorker, TriggerAction } from 'iii-sdk'; export type { ISdk, Channel, diff --git a/harness/src/turn-orchestrator/agent-trigger.ts b/harness/src/turn-orchestrator/agent-trigger.ts index ca01cab7..76f3a545 100644 --- a/harness/src/turn-orchestrator/agent-trigger.ts +++ b/harness/src/turn-orchestrator/agent-trigger.ts @@ -1,27 +1,40 @@ /** - * `agent::trigger` dispatcher + chokepoint. Mirrors - * `turn-orchestrator/src/agent_call.rs`. + * Agent tool-call dispatcher + approval chokepoint. * - * `dispatchWithHook` is the single chokepoint: every agent-issued tool - * call goes through `consultBefore` before reaching the inner trigger. - * Fail-closed: a hook timeout / error / missing subscriber denies the - * call with a `gate_unavailable` envelope (Phase 2.B §F). + * `dispatchWithHook` is the single chokepoint for FSM-issued calls: every + * agent tool call goes through `consultBefore` before reaching the inner + * trigger. `triggerFunctionCall` is the shared trigger/decode/error path + * used by both the hook gate and pre-approved resume execution. */ -import { uuidLike } from '../runtime/ids.js'; import type { ISdk } from '../runtime/iii.js'; +import { z } from 'zod'; import type { ContentBlock } from '../types/content.js'; import type { FunctionCall, FunctionResult } from '../types/function.js'; import { type DenialEnvelope, consultBefore, gateUnavailableEnvelope } from './hook.js'; export const TOOL_NAME = 'agent_trigger'; -export const FUNCTION_ID = 'agent::trigger'; export type DispatchResult = | { kind: 'result'; result: FunctionResult } | { kind: 'deny'; result: FunctionResult } | { kind: 'pending' }; +export function missingFunctionResult(): FunctionResult { + return errorResult({ + error: 'missing_function', + message: 'agent_trigger requires a non-empty `function` string field', + }); +} + +export function unwrapAgentTrigger(fc: FunctionCall): FunctionCall { + if (fc.function_id !== TOOL_NAME) return fc; + const args = (fc.arguments ?? {}) as Record; + const fn = typeof args.function === 'string' ? args.function : ''; + const payload = args.payload ?? {}; + return { id: fc.id, function_id: fn, arguments: payload }; +} + export function agentTriggerTool(): unknown { return { name: TOOL_NAME, @@ -60,7 +73,7 @@ function denialResult(denial: DenialEnvelope): FunctionResult { }; } -function decodeOrPassthrough(value: unknown): FunctionResult { +export function decodeOrPassthrough(value: unknown): FunctionResult { if ( value && typeof value === 'object' && @@ -91,26 +104,6 @@ function isFunctionNotFound(err: unknown): boolean { return false; } -/** - * Build the `hint` field on a `function_not_found` result. Models - * regularly confuse the SKILL id (`sandbox/skills/sandbox/create`, the - * on-disk path returned by `directory::skills::list`) with the FUNCTION - * id (`sandbox::create`, what `agent_trigger` actually expects) and - * then retry the same wrong id 3+ times before recovering. When the - * caller's `function_id` contains a `/` we can usually reconstruct the - * canonical worker::fn form and surface it as a "did you mean" — that - * collapses the typical 4-turn recovery to a 2-turn recovery. - * - * Cases recognised: - * - `/skills//` → `::` (the canonical skill-id - * shape produced by `directory::skills::list` for a how-to that - * declares `function_id:` in its frontmatter). - * - `/` → `::` (weaker guess, but - * matches what models often hallucinate as a shorthand). - * - * Anything else (no `/`, or shapes we can't confidently rewrite) gets - * the generic hint pointing at the skills surface. - */ export function functionNotFoundHint(badFunctionId: string): string { if (!badFunctionId.includes('/')) { return 'load the relevant skill via directory::skills::get, or check the function id'; @@ -123,131 +116,59 @@ export function functionNotFoundHint(badFunctionId: string): string { const segments = badFunctionId.split('/').filter((s) => s.length > 0); let suggestion: string | null = null; if (segments.length >= 4 && segments[1] === 'skills' && segments[0] === segments[2]) { - // sandbox/skills/sandbox/create → sandbox::create - // worker-a/skills/worker-a/nested/fn → worker-a::nested::fn suggestion = `${segments[0]}::${segments.slice(3).join('::')}`; } else if (segments.length === 2 && segments[1] !== 'index') { - // sandbox/create → sandbox::create (also catches accidental - // `/` shorthand the model invented from the skill path) suggestion = `${segments[0]}::${segments[1]}`; } return suggestion ? `Did you mean \`${suggestion}\`? ${generic}` : generic; } -function isTimeout(err: unknown): boolean { - if (!err || typeof err !== 'object') return false; - const obj = err as Record; - if (obj.code === 'timeout') return true; - if (typeof obj.message === 'string' && /^Timeout|timed out/.test(obj.message)) return true; - return false; -} - -export async function dispatchWithHook( +/** Trigger a function call and normalize success/error into a FunctionResult. */ +export async function triggerFunctionCall( iii: ISdk, function_call: FunctionCall, - session_id: string | undefined, -): Promise { - if (!function_call.function_id || function_call.function_id.length === 0) { - return { - kind: 'result', - result: errorResult({ - error: 'missing_function', - message: 'agent_trigger requires a non-empty `function` string field', - }), - }; - } - const outcome = await consultBefore(iii, function_call); - if (outcome.kind === 'deny') return { kind: 'deny', result: denialResult(outcome.denial) }; - if (outcome.kind === 'pending') { - return { kind: 'pending' }; - } - +): Promise { try { const value = await iii.trigger({ function_id: function_call.function_id, payload: function_call.arguments ?? {}, }); - return { kind: 'result', result: decodeOrPassthrough(value) }; + return decodeOrPassthrough(value); } catch (err) { if (isFunctionNotFound(err)) { - return { - kind: 'result', - result: errorResult({ - error: 'function_not_found', - function: function_call.function_id, - hint: functionNotFoundHint(function_call.function_id), - }), - }; + return errorResult({ + error: 'function_not_found', + function: function_call.function_id, + hint: functionNotFoundHint(function_call.function_id), + }); } - if (isTimeout(err)) { - return { - kind: 'result', - result: errorResult({ - error: 'timeout', - function: function_call.function_id, - message: String(err), - }), - }; - } - return { - kind: 'deny', - result: denialResult( - gateUnavailableEnvelope(function_call.function_id, `trigger_failed: ${String(err)}`), - ), - }; + return denialResult( + gateUnavailableEnvelope(function_call.function_id, `trigger_failed: ${String(err)}`), + ); } } -export async function dispatch( +export async function dispatchWithHook( iii: ISdk, - session_id: string, - fn: unknown, - payload: unknown, -): Promise { - if (typeof fn !== 'string' || fn.length === 0) { - return errorResult({ - error: 'missing_function', - message: 'agent_trigger requires a non-empty `function` string field', - }); + function_call: FunctionCall, +): Promise { + const outcome = await consultBefore(iii, function_call); + if (outcome.kind === 'deny') { + return { kind: 'deny', result: denialResult(outcome.denial) }; } - const fc: FunctionCall = { - id: `agent_trigger-${uuidLike()}`, - function_id: fn, - arguments: payload ?? {}, - }; - const out = await dispatchWithHook(iii, fc, session_id); - if (out.kind === 'pending') { - return errorResult({ - error: 'awaiting_approval', - function: fc.function_id, - message: 'This call requires human approval. Approve via the console and retry.', - }); + if (outcome.kind === 'pending') { + return { kind: 'pending' }; } - return out.result; -} -export function register(iii: ISdk): void { - iii.registerFunction( - FUNCTION_ID, - async (payload: unknown) => { - const obj = (payload ?? {}) as Record; - const session_id = typeof obj.session_id === 'string' ? obj.session_id : ''; - const fn = obj.function; - const inner = obj.payload ?? {}; - return await dispatch(iii, session_id, fn, inner); - }, - { - description: - 'LLM-facing dispatcher: dispatches an iii function and returns a FunctionResult.', - }, - ); + const result = await triggerFunctionCall(iii, function_call); + return { kind: 'result', result }; } +const errorResultDetailsSchema = z.union([ + z.object({ error: z.string() }), + z.object({ status: z.literal('denied') }), +]); + export function isErrorResult(result: FunctionResult): boolean { - const details = result.details; - if (!details || typeof details !== 'object') return false; - const obj = details as Record; - if (typeof obj.error === 'string') return true; - if (obj.status === 'denied') return true; - return false; + return errorResultDetailsSchema.safeParse(result.details).success; } diff --git a/harness/src/turn-orchestrator/approval-resume.ts b/harness/src/turn-orchestrator/approval-resume.ts index 18435a1c..93acf7b7 100644 --- a/harness/src/turn-orchestrator/approval-resume.ts +++ b/harness/src/turn-orchestrator/approval-resume.ts @@ -1,7 +1,7 @@ /** * Per-call resume functions for parked approvals. Registered when a call * enters `function_awaiting_approval`; invoked by `approval::resolve` or - * abort. Persists to scope `approvals` and wakes `turn::step`. + * abort. Persists to scope `approvals` and enqueues `turn::{state}` via wakeFromRecord. */ import { @@ -19,7 +19,7 @@ import { stateSet, } from '../runtime/state.js'; import type { TurnStateRecord } from './state.js'; -import { STEP_FN_ID } from './subscriber.js'; +import { wakeFromRecord } from './wake.js'; const resumeRefs = new Map(); const TURN_STATE_KEY_RE = /^session\/[^/]+\/turn_state$/; @@ -90,9 +90,9 @@ async function handleApprovalResume( } try { - await iii.trigger({ function_id: STEP_FN_ID, payload: { session_id } }); + await wakeFromRecord(iii, session_id); } catch (err) { - logger.warn('approval resume: turn::step invoke failed', { session_id, err: String(err) }); + logger.warn('approval resume: turn step wake failed', { session_id, err: String(err) }); } unregisterApprovalResume(fnId); @@ -112,7 +112,7 @@ export function registerApprovalResume( async (payload: unknown) => handleApprovalResume(iii, session_id, function_call_id, payload), { description: - 'Resume a parked approval: persist decision to approvals scope and wake turn::step.', + 'Resume a parked approval: persist decision to approvals scope and enqueue turn::{state}.', }, ); resumeRefs.set(fnId, ref); diff --git a/harness/src/turn-orchestrator/config.ts b/harness/src/turn-orchestrator/config.ts index f994fba2..d066bee9 100644 --- a/harness/src/turn-orchestrator/config.ts +++ b/harness/src/turn-orchestrator/config.ts @@ -1,13 +1,11 @@ -import { getNumber, getStringArray } from '../runtime/config.js'; +import { getStringArray } from '../runtime/config.js'; export type TurnOrchestratorConfig = { - sync_default_timeout_ms: number; system_default_skills: string[]; }; export function loadOrchestratorConfig(cfg: Record): TurnOrchestratorConfig { return { - sync_default_timeout_ms: getNumber(cfg, 'sync_default_timeout_ms', 120_000), system_default_skills: getStringArray(cfg, 'system_default_skills', [ 'iii://iii-directory/index', ]), diff --git a/harness/src/turn-orchestrator/get-state.ts b/harness/src/turn-orchestrator/get-state.ts index 1a4cac56..927c6c80 100644 --- a/harness/src/turn-orchestrator/get-state.ts +++ b/harness/src/turn-orchestrator/get-state.ts @@ -1,28 +1,25 @@ /** - * `turn::get_state` — one-shot reader for a session's current - * turn_state record. Exists so UI clients can recover pending modals - * after a page reload without reaching into iii state from the - * browser. The orchestrator owns the turn_state schema and key - * layout; clients call this and get back the record (or null for - * an unknown session). + * `turn::get_state` — one-shot reader for a session's current turn_state record. + * + * **Incoming**: flat `{ session_id }` from `console/web` real backend (page reload recovery) + * **Outgoing**: `TurnStateRecord | null` — null when the session is unknown */ -import { requireString } from '../runtime/handler.js'; import type { ISdk } from '../runtime/iii.js'; import * as persistence from './persistence.js'; -import type { TurnStateRecord } from './state.js'; +import { GetStatePayloadSchema, type GetStatePayload, type GetStateResult } from './schemas.js'; -export const FUNCTION_ID = 'turn::get_state'; - -export async function execute(iii: ISdk, payload: unknown): Promise { - const obj = (payload ?? {}) as Record; - const session_id = requireString(obj, 'session_id'); - return persistence.loadRecord(iii, session_id); +export async function execute(iii: ISdk, payload: GetStatePayload): Promise { + return persistence.loadRecord(iii, payload.session_id); } export function register(iii: ISdk): void { - iii.registerFunction(FUNCTION_ID, async (payload: unknown) => execute(iii, payload), { - description: - 'Read the current turn_state record for a session. Returns null if the session is unknown. UI clients use this on page reload to recover any in-progress modals (e.g. function_awaiting_approval) without reading iii state directly.', - }); + iii.registerFunction( + 'turn::get_state', + async (payload: GetStatePayload) => execute(iii, GetStatePayloadSchema.parse(payload)), + { + description: + 'Read the current turn_state record for a session. Returns null if the session is unknown. UI clients use this on page reload to recover any in-progress modals (e.g. function_awaiting_approval) without reading iii state directly.', + }, + ); } diff --git a/harness/src/turn-orchestrator/on-abort-signal.ts b/harness/src/turn-orchestrator/on-abort-signal.ts index 34a82091..a5d2788b 100644 --- a/harness/src/turn-orchestrator/on-abort-signal.ts +++ b/harness/src/turn-orchestrator/on-abort-signal.ts @@ -2,7 +2,7 @@ * Reactive abort wake. A `state` trigger on `scope: 'agent'` filtered by * the abort_signal key shape (`session//abort_signal`) and a * `new_value === true` write fires this adapter, which publishes - * `turn::step_requested` so the orchestrator's FSM advances to + * `turn::{state}` on the durable FIFO queue so the orchestrator's FSM advances to * `steering_check` and observes the abort flag promptly. * * Without this wake, a session mid-streaming would only check @@ -12,50 +12,70 @@ * soon as the current one finishes — which is the earliest moment we * can safely react. * - * Mirror of the canonical pattern in - * `harness/src/harness/fanout/sessions-poll.ts`. + * **Incoming**: agent-scope `state:created` / `state:updated` on + * `session//abort_signal` with `new_value === true` (from `state::set` via + * `performAbortSideEffects` / `router::abort`). Same envelope the engine passes + * to state trigger adapters. + * + * **Outgoing**: `wakeFromRecord` enqueues `{ session_id }` on the `turn-step` queue. */ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; +import { AbortSignalWriteEventSchema, type ParsedAbortSignalWrite } from './schemas.js'; +import { wakeFromRecord } from './wake.js'; -export const STEP_TOPIC = 'turn::step_requested'; -export const HANDLER_FN_ID = 'turn::on_abort_signal'; -export const CONDITION_FN_ID = 'turn::is_abort_signal_set'; -const ABORT_SIGNAL_KEY_RE = /^session\/([^/]+)\/abort_signal$/; - -export function isAbortSignalWrite(event: unknown): boolean { - if (!event || typeof event !== 'object') return false; - const obj = event as Record; - if (obj.event_type !== 'state:created' && obj.event_type !== 'state:updated') return false; - if (obj.new_value !== true) return false; - const key = obj.key; - if (typeof key !== 'string') return false; - return ABORT_SIGNAL_KEY_RE.test(key); +export function parseAbortSignalWrite(event: unknown): ParsedAbortSignalWrite | null { + const result = AbortSignalWriteEventSchema.safeParse(event); + return result.success ? result.data : null; } -function extractSessionId(key: string): string | null { - const m = ABORT_SIGNAL_KEY_RE.exec(key); - return m ? (m[1] ?? null) : null; +export function isAbortSignalWrite(event: unknown): boolean { + return parseAbortSignalWrite(event) !== null; } -export async function handleAbortSignalWrite(iii: ISdk, event: unknown): Promise { - if (!event || typeof event !== 'object') return; - const obj = event as Record; - const key = obj.key; - if (typeof key !== 'string') return; - const session_id = extractSessionId(key); - if (!session_id) return; - +export async function execute(iii: ISdk, write: ParsedAbortSignalWrite): Promise { try { - await iii.trigger({ - function_id: 'iii::durable::publish', - payload: { topic: STEP_TOPIC, data: { session_id } }, - }); + await wakeFromRecord(iii, write.session_id); } catch (err) { - logger.warn('turn::on_abort_signal: publish failed', { - session_id, + logger.warn('turn::on_abort_signal: wake failed', { + session_id: write.session_id, err: String(err), }); } } + +export async function handleAbortSignalWrite(iii: ISdk, event: unknown): Promise { + const write = parseAbortSignalWrite(event); + if (!write) return; + await execute(iii, write); +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'turn::is_abort_signal_set', + async (event: unknown) => isAbortSignalWrite(event), + { + description: + 'Condition: state event sets session//abort_signal = true (state:created or state:updated).', + }, + ); + + iii.registerFunction( + 'turn::on_abort_signal', + async (event: unknown) => handleAbortSignalWrite(iii, event), + { + description: + 'State trigger adapter on scope=agent for abort_signal writes; enqueues turn::{state} so the orchestrator picks up the abort promptly.', + }, + ); + + iii.registerTrigger({ + type: 'state', + function_id: 'turn::on_abort_signal', + config: { + scope: 'agent', + condition_function_id: 'turn::is_abort_signal_set', + }, + }); +} diff --git a/harness/src/turn-orchestrator/on-record-written.ts b/harness/src/turn-orchestrator/on-record-written.ts deleted file mode 100644 index 28c48f08..00000000 --- a/harness/src/turn-orchestrator/on-record-written.ts +++ /dev/null @@ -1,106 +0,0 @@ -/** - * Self-loop wake: a state trigger on `scope: 'agent'` filtered by the - * turn_state key shape and a stepable state TRANSITION (new state differs - * from old, non-terminal, non-awaiting) invokes `turn::step`. Saving the - * record on a real transition is the wake — replaces the durable - * `turn::step_requested` self-publish that used to live in `subscriber.ts`. - * - * Same-state writes (e.g. `handlePrepare` calling `saveRecord` while still - * in `function_prepare` to persist normalized calls) MUST NOT wake step, - * otherwise the orchestrator races itself: a duplicate `turn::step` runs - * the same handler again, re-emitting events and re-persisting prepared - * calls. We filter those out by requiring `new_value.state !== old_value.state`. - */ - -import type { ISdk } from '../runtime/iii.js'; -import { logger } from '../runtime/otel.js'; -import type { TurnState } from './state.js'; -import { STEP_FN_ID, STEP_TOPIC } from './subscriber.js'; - -export { STEP_FN_ID }; -export const HANDLER_FN_ID = 'turn::on_record_written'; -export const CONDITION_FN_ID = 'turn::is_stepable_record_write'; - -const TURN_STATE_KEY_RE = /^session\/(?[^/]+)\/turn_state$/; - -const NON_STEPABLE_STATES: ReadonlySet = new Set([ - 'stopped', - 'function_awaiting_approval', -]); - -type StepableWrite = { - session_id: string; - state: TurnState; -}; - -/** - * One source of truth for "is this a stepable turn_state write?". Returns the - * extracted session_id + state on match, null otherwise. Both the condition - * (boolean check) and the handler (needs the session_id) route through this, - * so they can't drift — and the handler can't fire on a parking-state write - * even if the condition was bypassed. - */ -function parseStepableWrite(event: unknown): StepableWrite | null { - if (!event || typeof event !== 'object') return null; - const obj = event as Record; - - if (obj.event_type !== 'state:created' && obj.event_type !== 'state:updated') return null; - - const key = obj.key; - if (typeof key !== 'string') return null; - const session_id = TURN_STATE_KEY_RE.exec(key)?.groups?.session_id; - if (!session_id) return null; - - const nv = obj.new_value; - if (!nv || typeof nv !== 'object') return null; - const state = (nv as Record).state; - if (typeof state !== 'string') return null; - if (NON_STEPABLE_STATES.has(state as TurnState)) return null; - - if (obj.event_type === 'state:updated') { - const ov = obj.old_value; - const old_state = - ov && typeof ov === 'object' ? (ov as Record).state : undefined; - if (typeof old_state === 'string' && old_state === state) return null; - } - - return { session_id, state: state as TurnState }; -} - -export function isStepableRecordWrite(event: unknown): boolean { - return parseStepableWrite(event) !== null; -} - -export async function handleStepableRecordWrite(iii: ISdk, event: unknown): Promise { - const parsed = parseStepableWrite(event); - if (!parsed) return; - - try { - await iii.trigger({ - function_id: STEP_FN_ID, - payload: { session_id: parsed.session_id }, - }); - return; - } catch (err) { - // Direct invoke failed (timeout, transient throw, etc). The triggering - // state write already landed, so without a durable retry the session - // would sit stuck in this state forever. Fall back to publishing - // `turn::step_requested` so the durable subscriber on `subscriber.ts` - // buffers + retries. - logger.warn( - 'turn::on_record_written: direct turn::step failed; falling back to durable publish', - { session_id: parsed.session_id, err: String(err) }, - ); - try { - await iii.trigger({ - function_id: 'iii::durable::publish', - payload: { topic: STEP_TOPIC, data: { session_id: parsed.session_id } }, - }); - } catch (publishErr) { - logger.error( - 'turn::on_record_written: durable publish fallback also failed; session may be stuck', - { session_id: parsed.session_id, err: String(publishErr) }, - ); - } - } -} diff --git a/harness/src/turn-orchestrator/on-terminal.ts b/harness/src/turn-orchestrator/on-terminal.ts deleted file mode 100644 index 1f6ac9b4..00000000 --- a/harness/src/turn-orchestrator/on-terminal.ts +++ /dev/null @@ -1,57 +0,0 @@ -/** - * Reactive wake for `run::start_and_wait`. A `state` trigger on - * `scope: 'agent'` filtered to `session//turn_state` writes whose - * `new_value.state === 'stopped'` fires this handler, which resolves the - * in-process waiter installed by `executeSync`. Replaces the previous - * - * Mirror of the canonical pattern in `on-abort-signal.ts` and the - * per-call approval resume functions in `approval-resume.ts`. - */ - -export const HANDLER_FN_ID = 'turn::on_terminal_state'; -export const CONDITION_FN_ID = 'turn::is_terminal_state_write'; - -const TURN_STATE_KEY_RE = /^session\/([^/]+)\/turn_state$/; - -const pending = new Map void>(); - -export const __pendingForTest = pending; - -export function isTerminalStateWrite(event: unknown): boolean { - if (!event || typeof event !== 'object') return false; - const obj = event as Record; - if (obj.event_type !== 'state:created' && obj.event_type !== 'state:updated') return false; - const key = obj.key; - if (typeof key !== 'string' || !TURN_STATE_KEY_RE.test(key)) return false; - const nv = obj.new_value; - if (!nv || typeof nv !== 'object') return false; - return (nv as Record).state === 'stopped'; -} - -function extractSessionId(key: string): string | null { - const m = TURN_STATE_KEY_RE.exec(key); - return m ? (m[1] ?? null) : null; -} - -export function installTerminalWaiter(session_id: string): Promise { - return new Promise((resolve) => { - pending.set(session_id, resolve); - }); -} - -export function clearTerminalWaiter(session_id: string): void { - pending.delete(session_id); -} - -export function handleTerminalStateWrite(event: unknown): void { - if (!event || typeof event !== 'object') return; - const obj = event as Record; - const key = obj.key; - if (typeof key !== 'string') return; - const session_id = extractSessionId(key); - if (!session_id) return; - const resolver = pending.get(session_id); - if (!resolver) return; - pending.delete(session_id); - resolver(); -} diff --git a/harness/src/turn-orchestrator/on-turn-state-changed.ts b/harness/src/turn-orchestrator/on-turn-state-changed.ts deleted file mode 100644 index a1e18942..00000000 --- a/harness/src/turn-orchestrator/on-turn-state-changed.ts +++ /dev/null @@ -1,64 +0,0 @@ -/** - * State-trigger adapter that mirrors `on-record-written` but emits a - * `turn_state_changed` agent event instead of triggering `turn::step`. - * Gives the frontend a live signal carrying the new turn_state record - * so it can derive pending approvals from state directly. - */ - -import type { ISdk } from '../runtime/iii.js'; -import { logger } from '../runtime/otel.js'; -import { emit } from './events.js'; - -export const HANDLER_FN_ID = 'turn::on_turn_state_changed'; -export const CONDITION_FN_ID = 'turn::is_turn_state_write'; - -const TURN_STATE_KEY_RE = /^session\/(?[^/]+)\/turn_state$/; - -type ParsedWrite = { - session_id: string; - event_type: 'state:created' | 'state:updated'; - new_value: Record; - old_value?: Record; -}; - -function parseWrite(event: unknown): ParsedWrite | null { - if (!event || typeof event !== 'object') return null; - const obj = event as Record; - if (obj.event_type !== 'state:created' && obj.event_type !== 'state:updated') return null; - const key = obj.key; - if (typeof key !== 'string') return null; - const session_id = TURN_STATE_KEY_RE.exec(key)?.groups?.session_id; - if (!session_id) return null; - const nv = obj.new_value; - if (!nv || typeof nv !== 'object') return null; - const ov = obj.old_value; - return { - session_id, - event_type: obj.event_type, - new_value: nv as Record, - old_value: ov && typeof ov === 'object' ? (ov as Record) : undefined, - }; -} - -export function isTurnStateWrite(event: unknown): boolean { - return parseWrite(event) !== null; -} - -export async function handleTurnStateWrite(iii: ISdk, event: unknown): Promise { - const parsed = parseWrite(event); - if (!parsed) return; - - try { - await emit(iii, parsed.session_id, { - type: 'turn_state_changed', - event_type: parsed.event_type, - new_value: parsed.new_value, - ...(parsed.old_value !== undefined && { old_value: parsed.old_value }), - }); - } catch (err) { - logger.warn('turn::on_turn_state_changed: emit failed', { - session_id: parsed.session_id, - err: String(err), - }); - } -} diff --git a/harness/src/turn-orchestrator/persistence.ts b/harness/src/turn-orchestrator/persistence.ts index 05fc40a2..9a1c785e 100644 --- a/harness/src/turn-orchestrator/persistence.ts +++ b/harness/src/turn-orchestrator/persistence.ts @@ -6,18 +6,17 @@ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import type { AgentMessage } from '../types/agent-message.js'; import type { FunctionCall, FunctionResult } from '../types/function.js'; +import { type RunRequest, parseRunRequest } from './run-request.js'; import { type TurnStateRecord, - cwdIndexKey, - cwdKey, functionSchemasKey, lastSessionTreeLenKey, messagesKey, runRequestKey, - sandboxIdKey, - toolSchemasKey, turnStateKey, } from './state.js'; +import { emitTurnStateChanged } from './turn-state-write.js'; +import { shouldWakeStep, wakeState } from './wake.js'; const SCOPE = 'agent'; @@ -51,8 +50,41 @@ export async function loadRecord(iii: ISdk, session_id: string): Promise { +/** + * Persist turn_state and emit UI event — no FSM wake (mid-handler saves). + * Pass `previous` (the pre-write record) to skip the `state::get` that would + * otherwise re-read it; omit it and the prior value is loaded here. + */ +export async function persistRecord( + iii: ISdk, + rec: TurnStateRecord, + previous?: TurnStateRecord | null, +): Promise { + const prev = previous !== undefined ? previous : await loadRecord(iii, rec.session_id); + const eventType = prev === null ? 'state:created' : 'state:updated'; + await stateSet(iii, turnStateKey(rec.session_id), rec); + + await emitTurnStateChanged( + iii, + rec.session_id, + eventType, + rec as unknown as Record, + prev !== null ? (prev as unknown as Record) : undefined, + ); +} + +export async function saveRecord( + iii: ISdk, + rec: TurnStateRecord, + previous?: TurnStateRecord | null, +): Promise { + const prev = previous !== undefined ? previous : await loadRecord(iii, rec.session_id); + await persistRecord(iii, rec, prev); + + if (shouldWakeStep(prev?.state ?? null, rec.state)) { + await wakeState(iii, rec.session_id, rec.state); + } } export async function loadMessages(iii: ISdk, session_id: string): Promise { @@ -135,25 +167,9 @@ export async function saveRunRequest( await stateSet(iii, runRequestKey(session_id), request); } -export async function loadRunRequest( - iii: ISdk, - session_id: string, -): Promise> { +export async function loadRunRequest(iii: ISdk, session_id: string): Promise { const v = await stateGet(iii, runRequestKey(session_id)); - return v && typeof v === 'object' ? (v as Record) : {}; -} - -export async function saveCwd(iii: ISdk, session_id: string, cwd: string): Promise { - await stateSet(iii, cwdKey(session_id), cwd); -} - -export async function saveCwdIndex(iii: ISdk, cwd_hash: string, session_id: string): Promise { - await stateSet(iii, cwdIndexKey(cwd_hash), session_id); -} - -export async function loadSandboxId(iii: ISdk, session_id: string): Promise { - const v = await stateGet(iii, sandboxIdKey(session_id)); - return typeof v === 'string' ? v : null; + return parseRunRequest(v && typeof v === 'object' ? (v as Record) : {}); } export async function saveFunctionSchemas( @@ -166,29 +182,17 @@ export async function saveFunctionSchemas( export async function loadFunctionSchemas(iii: ISdk, session_id: string): Promise { const v = await stateGet(iii, functionSchemasKey(session_id)); - if (Array.isArray(v)) return v; - const legacy = await stateGet(iii, toolSchemasKey(session_id)); - if (Array.isArray(legacy)) return legacy; - return []; + return Array.isArray(v) ? v : []; } const PREPARED_KEY = 'function_prepared'; const EXECUTED_KEY = 'function_executed'; -const LEGACY_PREPARED_KEY = 'tool_prepared'; -const LEGACY_EXECUTED_KEY = 'tool_executed'; const stagingKey = (sid: string, suffix: string) => `session/${sid}/${suffix}`; -async function stagingGetWithLegacy( - iii: ISdk, - session_id: string, - newSuffix: string, - legacySuffix: string, -): Promise { - const v = await stateGet(iii, stagingKey(session_id, newSuffix)); - if (Array.isArray(v)) return v; - const legacy = await stateGet(iii, stagingKey(session_id, legacySuffix)); - return Array.isArray(legacy) ? legacy : []; +async function stagingGet(iii: ISdk, session_id: string, suffix: string): Promise { + const v = await stateGet(iii, stagingKey(session_id, suffix)); + return Array.isArray(v) ? v : []; } export type PreparedEntry = { @@ -222,12 +226,12 @@ export async function savePreparedCalls( } export async function loadPreparedCalls(iii: ISdk, session_id: string): Promise { - const items = await stagingGetWithLegacy(iii, session_id, PREPARED_KEY, LEGACY_PREPARED_KEY); + const items = await stagingGet(iii, session_id, PREPARED_KEY); const out: PreparedEntry[] = []; for (const it of items) { if (!it || typeof it !== 'object') continue; const obj = it as Record; - const fc = (obj.function_call ?? obj.tool_call) as FunctionCall | undefined; + const fc = obj.function_call as FunctionCall | undefined; if (!fc) continue; const blocked = (obj.blocked as FunctionResult | null) ?? null; const pre_approved = obj.pre_approved === true; @@ -245,12 +249,12 @@ export async function saveExecutedCalls( } export async function loadExecutedCalls(iii: ISdk, session_id: string): Promise { - const items = await stagingGetWithLegacy(iii, session_id, EXECUTED_KEY, LEGACY_EXECUTED_KEY); + const items = await stagingGet(iii, session_id, EXECUTED_KEY); const out: ExecutedEntry[] = []; for (const it of items) { if (!it || typeof it !== 'object') continue; const obj = it as Record; - const fc = (obj.function_call ?? obj.tool_call) as FunctionCall | undefined; + const fc = obj.function_call as FunctionCall | undefined; const result = obj.result as FunctionResult | undefined; if (!fc || !result) continue; out.push({ diff --git a/harness/src/turn-orchestrator/register.ts b/harness/src/turn-orchestrator/register.ts index 7ea54662..de49c286 100644 --- a/harness/src/turn-orchestrator/register.ts +++ b/harness/src/turn-orchestrator/register.ts @@ -1,150 +1,35 @@ import { loadConfig } from '../runtime/config.js'; import type { ISdk } from '../runtime/iii.js'; -import { register as registerAgentTrigger } from './agent-trigger.js'; import * as bootstrap from './bootstrap.js'; import { loadOrchestratorConfig } from './config.js'; import { register as registerGetState } from './get-state.js'; -import { - CONDITION_FN_ID as ABORT_CONDITION_FN, - HANDLER_FN_ID as ABORT_HANDLER_FN, - handleAbortSignalWrite, - isAbortSignalWrite, -} from './on-abort-signal.js'; -import { - CONDITION_FN_ID as RECORD_CONDITION_FN, - HANDLER_FN_ID as RECORD_HANDLER_FN, - handleStepableRecordWrite, - isStepableRecordWrite, -} from './on-record-written.js'; -import { - CONDITION_FN_ID as TURN_STATE_CHANGED_CONDITION_FN, - HANDLER_FN_ID as TURN_STATE_CHANGED_HANDLER_FN, - handleTurnStateWrite, - isTurnStateWrite, -} from './on-turn-state-changed.js'; -import { - CONDITION_FN_ID as TERMINAL_CONDITION_FN, - HANDLER_FN_ID as TERMINAL_HANDLER_FN, - handleTerminalStateWrite, - isTerminalStateWrite, -} from './on-terminal.js'; +import { register as registerOnAbortSignal } from './on-abort-signal.js'; import { register as registerRunStart } from './run-start.js'; import { recoverPendingApprovals } from './approval-resume.js'; -import { register as registerSubscriber } from './subscriber.js'; +import { + registerAssistantFinished, + registerAssistantStreaming, + registerFunctionAwaitingApproval, + registerFunctionExecute, + registerProvisioning, + registerSteeringCheck, + registerTearingDown, +} from './states/index.js'; export async function register(iii: ISdk, ctx: { configPath: string }): Promise { const cfg = await loadConfig(ctx.configPath); const orchestratorCfg = loadOrchestratorConfig(cfg); - registerRunStart(iii, orchestratorCfg); - registerAgentTrigger(iii); - registerSubscriber(iii, orchestratorCfg); + registerRunStart(iii); + registerProvisioning(iii, orchestratorCfg); + registerAssistantStreaming(iii); + registerAssistantFinished(iii); + registerFunctionExecute(iii); + registerFunctionAwaitingApproval(iii); + registerSteeringCheck(iii); + registerTearingDown(iii); await recoverPendingApprovals(iii); registerGetState(iii); + registerOnAbortSignal(iii); - iii.registerFunction(ABORT_CONDITION_FN, async (event: unknown) => isAbortSignalWrite(event), { - description: - 'Condition: state event sets session//abort_signal = true (state:created or state:updated).', - }); - - iii.registerFunction( - ABORT_HANDLER_FN, - async (event: unknown) => handleAbortSignalWrite(iii, event), - { - description: - 'State trigger adapter on scope=agent for abort_signal writes; publishes turn::step_requested so the orchestrator picks up the abort promptly.', - }, - ); - - iii.registerTrigger({ - type: 'state', - function_id: ABORT_HANDLER_FN, - config: { - scope: 'agent', - condition_function_id: ABORT_CONDITION_FN, - }, - }); - - iii.registerFunction( - TERMINAL_CONDITION_FN, - async (event: unknown) => isTerminalStateWrite(event), - { - description: 'Condition: state event sets session//turn_state to state="stopped".', - }, - ); - - iii.registerFunction( - TERMINAL_HANDLER_FN, - async (event: unknown) => { - handleTerminalStateWrite(event); - }, - { - description: - 'State trigger adapter on scope=agent for terminal turn_state writes; resolves the run::start_and_wait waiter for that session.', - }, - ); - - iii.registerTrigger({ - type: 'state', - function_id: TERMINAL_HANDLER_FN, - config: { - scope: 'agent', - condition_function_id: TERMINAL_CONDITION_FN, - }, - }); - - iii.registerFunction( - RECORD_CONDITION_FN, - async (event: unknown) => isStepableRecordWrite(event), - { - description: - 'Condition: state event sets session//turn_state to a stepable state (excludes stopped + function_awaiting_approval).', - }, - ); - - iii.registerFunction( - RECORD_HANDLER_FN, - async (event: unknown) => handleStepableRecordWrite(iii, event), - { - description: - 'State trigger adapter on scope=agent for stepable turn_state writes; invokes turn::step. Replaces the imperative publishStep self-publish.', - }, - ); - - iii.registerTrigger({ - type: 'state', - function_id: RECORD_HANDLER_FN, - config: { - scope: 'agent', - condition_function_id: RECORD_CONDITION_FN, - }, - }); - - iii.registerFunction( - TURN_STATE_CHANGED_CONDITION_FN, - async (event: unknown) => isTurnStateWrite(event), - { - description: 'Condition: state event is a write to session//turn_state.', - }, - ); - - iii.registerFunction( - TURN_STATE_CHANGED_HANDLER_FN, - async (event: unknown) => handleTurnStateWrite(iii, event), - { - description: - 'State trigger adapter on scope=agent for turn_state writes; emits turn_state_changed on agent::events for the subscribed UI.', - }, - ); - - iii.registerTrigger({ - type: 'state', - function_id: TURN_STATE_CHANGED_HANDLER_FN, - config: { - scope: 'agent', - condition_function_id: TURN_STATE_CHANGED_CONDITION_FN, - }, - }); - - // Bootstrap best-effort skill download in the background. void bootstrap.run(iii, orchestratorCfg); } diff --git a/harness/src/turn-orchestrator/run-request.ts b/harness/src/turn-orchestrator/run-request.ts new file mode 100644 index 00000000..23d8ca5a --- /dev/null +++ b/harness/src/turn-orchestrator/run-request.ts @@ -0,0 +1,28 @@ +/** + * The persisted run request and its single typed parser. `loadRunRequest` + * (persistence) parses the raw `session//run_request` value through + * `parseRunRequest` once, so every consumer reads a fully-typed `RunRequest` + * instead of re-guarding `unknown` fields. + */ + +import type { Mode } from './system-prompt.js'; + +export type RunRequest = { + provider: string; + model: string; + mode: Mode | null; + system_prompt: string; +}; + +function parseMode(value: unknown): Mode | null { + return value === 'plan' || value === 'ask' || value === 'agent' ? value : null; +} + +export function parseRunRequest(raw: Record): RunRequest { + return { + provider: typeof raw.provider === 'string' ? raw.provider : '', + model: typeof raw.model === 'string' ? raw.model : '', + mode: parseMode(raw.mode), + system_prompt: typeof raw.system_prompt === 'string' ? raw.system_prompt : '', + }; +} diff --git a/harness/src/turn-orchestrator/run-start.ts b/harness/src/turn-orchestrator/run-start.ts index d8524a64..d6112a95 100644 --- a/harness/src/turn-orchestrator/run-start.ts +++ b/harness/src/turn-orchestrator/run-start.ts @@ -1,115 +1,39 @@ /** - * `run::start` and `run::start_and_wait`. Mirrors - * `turn-orchestrator/src/run_start.rs`. + * `run::start`. Mirrors `turn-orchestrator/src/run_start.rs`. + * + * **Incoming**: flat run request from `harness::trigger` (`body.payload` after + * `HarnessTriggerInputSchema` parse); console/web sends + * `{ session_id, message_id?, provider, model, mode?, messages }` and omits + * `system_prompt`, `max_turns` (schema defaults). + * **Outgoing**: `{ session_id }` — persists run config, messages, and seeds + * `turn_state` to provisioning via `saveRecord`. */ -import { requireString } from '../runtime/handler.js'; import type { ISdk } from '../runtime/iii.js'; -import type { AgentEvent } from '../types/agent-event.js'; -import type { AgentMessage } from '../types/agent-message.js'; -import type { TurnOrchestratorConfig } from './config.js'; -import { emit } from './events.js'; -import { clearTerminalWaiter, installTerminalWaiter } from './on-terminal.js'; import * as persistence from './persistence.js'; +import { RunStartPayloadSchema, type RunStartPayload, type RunStartResult } from './schemas.js'; import { newRecord } from './state.js'; -export const FUNCTION_ID = 'run::start'; -export const SYNC_FUNCTION_ID = 'run::start_and_wait'; +export async function execute(iii: ISdk, payload: RunStartPayload): Promise { + const { session_id, messages, max_turns, message_id: _message_id, ...run } = payload; -function buildRunRequest(payload: Record): Record { - return { - provider: payload.provider ?? '', - model: payload.model ?? '', - system_prompt: payload.system_prompt ?? '', - mode: payload.mode ?? null, - image: payload.image ?? 'python', - idle_timeout_secs: payload.idle_timeout_secs ?? 300, - cwd: payload.cwd ?? null, - cwd_hash: payload.cwd_hash ?? null, - }; -} - -function buildInitialEventPlan(messages: AgentMessage[]): AgentEvent[] { - const plan: AgentEvent[] = [{ type: 'agent_start' }]; - for (const m of messages) { - plan.push({ type: 'message_start', message: m }); - plan.push({ type: 'message_end', message: m }); - } - return plan; -} - -export async function execute(iii: ISdk, payload: unknown): Promise<{ session_id: string }> { - const obj = (payload ?? {}) as Record; - const session_id = requireString(obj, 'session_id'); - const max_turns = typeof obj.max_turns === 'number' ? obj.max_turns : undefined; - const request = buildRunRequest(obj); - const initial_messages = Array.isArray(obj.messages) ? (obj.messages as AgentMessage[]) : []; - - await persistence.saveRunRequest(iii, session_id, request); - await persistence.saveMessages(iii, session_id, initial_messages); - - if (typeof request.cwd === 'string') { - await persistence.saveCwd(iii, session_id, request.cwd as string); - if (typeof request.cwd_hash === 'string') { - await persistence.saveCwdIndex(iii, request.cwd_hash as string, session_id); - } - } - - for (const evt of buildInitialEventPlan(initial_messages)) { - await emit(iii, session_id, evt); - } + await persistence.saveRunRequest(iii, session_id, { + ...run, + mode: run.mode ?? null, + }); + await persistence.saveMessages(iii, session_id, messages); const record = newRecord(session_id, max_turns); await persistence.saveRecord(iii, record); return { session_id }; } -export async function executeSync( - iii: ISdk, - cfg: TurnOrchestratorConfig, - payload: unknown, -): Promise<{ session_id: string; messages: AgentMessage[]; turn_count: number }> { - const obj = (payload ?? {}) as Record; - const session_id = requireString(obj, 'session_id'); - const timeout_ms = - typeof obj.timeout_ms === 'number' ? obj.timeout_ms : cfg.sync_default_timeout_ms; - - // Install the waiter BEFORE kicking the run so the terminal turn_state - // write — which fires the `turn::on_terminal_state` state trigger — is - // guaranteed to find an entry to resolve. - const terminal = installTerminalWaiter(session_id); - try { - await execute(iii, payload); - - const winner = await new Promise<'terminal' | 'timeout'>((resolve) => { - const timer = setTimeout(() => resolve('timeout'), timeout_ms); - terminal.then(() => { - clearTimeout(timer); - resolve('terminal'); - }); - }); - - if (winner === 'timeout') { - throw new Error(`run::start_and_wait timed out after ${timeout_ms} ms`); - } - - const rec = await persistence.loadRecord(iii, session_id); - const messages = await persistence.loadMessages(iii, session_id); - return { session_id, messages, turn_count: rec?.turn_count ?? 0 }; - } finally { - clearTerminalWaiter(session_id); - } -} - -export function register(iii: ISdk, cfg: TurnOrchestratorConfig): void { - iii.registerFunction(FUNCTION_ID, async (payload: unknown) => execute(iii, payload), { - description: 'Start a durable agent session and return immediately.', - }); +export function register(iii: ISdk): void { iii.registerFunction( - SYNC_FUNCTION_ID, - async (payload: unknown) => executeSync(iii, cfg, payload), + 'run::start', + async (payload: RunStartPayload) => execute(iii, RunStartPayloadSchema.parse(payload)), { - description: 'Start a durable agent session and block until terminal (test/dev convenience).', + description: 'Start a durable agent session and return immediately.', }, ); } diff --git a/harness/src/turn-orchestrator/run-transition.ts b/harness/src/turn-orchestrator/run-transition.ts new file mode 100644 index 00000000..0f142761 --- /dev/null +++ b/harness/src/turn-orchestrator/run-transition.ts @@ -0,0 +1,53 @@ +/** + * Shared FSM transition runner. Every `turn::{state}` function performs the + * same load → null-check → stale-skip → handle → save sequence; this owns it so + * each per-state file only contributes its handler. + * + * The record loaded here is snapshotted before the handler mutates it and + * threaded into `saveRecord`, so the save path needs no extra `state::get` to + * compute the wake decision or the UI event's `old_value` — one read per + * transition instead of three. + */ + +import type { ISdk } from '../runtime/iii.js'; +import { logger } from '../runtime/otel.js'; +import * as persistence from './persistence.js'; +import { type TurnStepPayload, type TurnStepResult } from './schemas.js'; +import { type TurnState, type TurnStateRecord, cloneRecord } from './state.js'; + +export type TransitionHandler = (iii: ISdk, rec: TurnStateRecord) => Promise; + +/** Returns a stale skip result when the queue message no longer matches persisted state. */ +function staleSkipResult(expectedState: TurnState, rec: TurnStateRecord): TurnStepResult | null { + if (rec.state === expectedState) return null; + logger.warn(`turn::${expectedState} skipped: stale queue message`, { + session_id: rec.session_id, + expected: expectedState, + actual: rec.state, + }); + return { ok: true, skipped: true, reason: 'stale' }; +} + +export async function runTransition( + iii: ISdk, + state: TurnState, + handle: TransitionHandler, + payload: TurnStepPayload, +): Promise { + const rec = await persistence.loadRecord(iii, payload.session_id); + if (!rec) { + throw new Error(`turn::${state} invariant: missing session ${payload.session_id}`); + } + const skipped = staleSkipResult(state, rec); + if (skipped) return skipped; + + const previous = cloneRecord(rec); + const from_state = rec.state; + try { + await handle(iii, rec); + } catch (err) { + throw new Error(`transition from ${from_state} failed: ${String(err)}`); + } + await persistence.saveRecord(iii, rec, previous); + return { ok: true, from_state, to_state: rec.state }; +} diff --git a/harness/src/turn-orchestrator/schemas.ts b/harness/src/turn-orchestrator/schemas.ts new file mode 100644 index 00000000..4ece3fdc --- /dev/null +++ b/harness/src/turn-orchestrator/schemas.ts @@ -0,0 +1,57 @@ +/** + * Registered-function I/O contracts for turn-orchestrator. Every payload schema + * and payload/result type for the worker's `iii.registerFunction` handlers lives + * here, so the contract surface is readable in one place. Handlers import the + * schema (to `.parse` at the boundary) and the inferred types from this file. + */ + +import { z } from 'zod'; +import type { AgentMessage } from '../types/agent-message.js'; +import type { TurnState, TurnStateRecord } from './state.js'; +import type { Mode } from './system-prompt.js'; + +/** Shared `{ session_id }` payload — `turn::{state}` steps and `turn::get_state`. */ +export const SessionIdPayloadSchema = z.object({ + session_id: z.string().min(1), +}); + +// --- run::start --- +export const RunStartPayloadSchema = SessionIdPayloadSchema.extend({ + message_id: z.string().optional(), + provider: z.string(), + model: z.string(), + mode: z.enum(['plan', 'ask', 'agent'] satisfies [Mode, Mode, Mode]).optional(), + messages: z.custom((v) => Array.isArray(v)).default([]), + max_turns: z.number().optional(), + system_prompt: z.string().default(''), +}); +export type RunStartPayload = z.infer; +export type RunStartResult = { session_id: string }; + +// --- turn::{state} durable step --- +export const TurnStepPayloadSchema = SessionIdPayloadSchema; +export type TurnStepPayload = z.infer; +export type TurnStepResult = + | { ok: true; from_state: TurnState; to_state: TurnState } + | { ok: true; skipped: true; reason: 'stale' }; + +// --- turn::get_state --- +export const GetStatePayloadSchema = SessionIdPayloadSchema; +export type GetStatePayload = z.infer; +export type GetStateResult = TurnStateRecord | null; + +// --- turn::is_abort_signal_set / turn::on_abort_signal (agent-scope state event) --- +const AgentAbortSignalWriteEventSchema = z.object({ + type: z.literal('state').optional(), + scope: z.literal('agent').optional(), + event_type: z.enum(['state:created', 'state:updated']), + key: z.string().regex(/^session\/[^/]+\/abort_signal$/), + new_value: z.literal(true), + old_value: z.union([z.literal(true), z.literal(false), z.null()]).optional(), +}); + +export const AbortSignalWriteEventSchema = AgentAbortSignalWriteEventSchema.transform((data) => { + const session_id = data.key.slice('session/'.length, -'/abort_signal'.length); + return { session_id }; +}); +export type ParsedAbortSignalWrite = z.infer; diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index f4b43a9a..5d080693 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -8,13 +8,10 @@ import type { FunctionCall } from '../types/function.js'; export type TurnState = | 'provisioning' - | 'awaiting_assistant' | 'assistant_streaming' | 'assistant_finished' - | 'function_prepare' | 'function_execute' | 'function_awaiting_approval' - | 'function_finalize' | 'steering_check' | 'tearing_down' | 'stopped'; @@ -37,6 +34,8 @@ export type TurnStateRecord = { started_at_ms: number; updated_at_ms: number; awaiting_approval?: AwaitingApprovalEntry[]; + /** Set during assistant_streaming when message_update deltas were emitted. */ + assistant_body_streamed?: boolean; }; export function newRecord(session_id: string, max_turns?: number): TurnStateRecord { @@ -60,21 +59,28 @@ export function transitionTo(rec: TurnStateRecord, next: TurnState): void { rec.updated_at_ms = Date.now(); } +/** + * Deep copy of a record via JSON round-trip — faithful to a `state::get` + * reload (the record is persisted as JSON), so the runner can snapshot the + * pre-mutation record and thread it into `saveRecord` instead of paying a + * second `state::get` to recover the previous state. + */ +export function cloneRecord(rec: TurnStateRecord): TurnStateRecord { + return JSON.parse(JSON.stringify(rec)) as TurnStateRecord; +} + export function isTerminal(rec: TurnStateRecord): boolean { return rec.state === 'stopped'; } +export function turnFnId(state: TurnState): string { + return `turn::${state}`; +} + export const messagesKey = (sid: string) => `session/${sid}/messages`; export const turnStateKey = (sid: string) => `session/${sid}/turn_state`; export const runRequestKey = (sid: string) => `session/${sid}/run_request`; -export const cwdKey = (sid: string) => `session/${sid}/cwd`; -export const cwdIndexKey = (hash: string) => `harness/cwd/${hash}/last_session_id`; -export const sandboxIdKey = (sid: string) => `session/${sid}/sandbox_id`; export const functionSchemasKey = (sid: string) => `session/${sid}/function_schemas`; -export const toolSchemasKey = (sid: string) => `session/${sid}/tool_schemas`; export const lastSessionTreeLenKey = (sid: string) => `session/${sid}/session_tree_mirror_len`; -export const lastCompactionAtKey = (sid: string) => `session/${sid}/last_compaction_at`; -export const lastCompactionConsumedAtKey = (sid: string) => - `session/${sid}/last_compaction_consumed_at`; export const eventCounterKey = (sid: string) => `session/${sid}/event_counter`; export const abortSignalKey = (sid: string) => `session/${sid}/abort_signal`; diff --git a/harness/src/turn-orchestrator/states/assistant-finished.ts b/harness/src/turn-orchestrator/states/assistant-finished.ts new file mode 100644 index 00000000..d3b59f5a --- /dev/null +++ b/harness/src/turn-orchestrator/states/assistant-finished.ts @@ -0,0 +1,122 @@ +/** + * `turn::assistant_finished`. Persist assistant message and route to steering or function execute. + * + * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. + * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. + */ + +import type { ISdk } from '../../runtime/iii.js'; +import { logger } from '../../runtime/otel.js'; +import type { AgentEvent } from '../../types/agent-event.js'; +import type { AssistantMessage } from '../../types/agent-message.js'; +import type { FunctionCall } from '../../types/function.js'; +import { missingFunctionResult, unwrapAgentTrigger } from '../agent-trigger.js'; +import { emit } from '../events.js'; +import type { PreparedEntry } from '../persistence.js'; +import * as persistence from '../persistence.js'; +import { runTransition } from '../run-transition.js'; +import { type TurnStateRecord, transitionTo } from '../state.js'; +import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; + +function extractFunctionCalls(msg: AssistantMessage): FunctionCall[] { + const out: FunctionCall[] = []; + for (const b of msg.content) { + if (b.type === 'function_call') { + out.push({ id: b.id, function_id: b.function_id, arguments: b.arguments }); + } + } + return out; +} + +function assistantMessageComplete(asst: AssistantMessage, body_streamed: boolean): AgentEvent { + return { type: 'message_complete', message: asst, body_streamed }; +} + +export async function handleFinished(iii: ISdk, rec: TurnStateRecord): Promise { + const asst = rec.last_assistant; + if (!asst) { + throw new Error('assistant_finished without last_assistant'); + } + await emit( + iii, + rec.session_id, + assistantMessageComplete(asst, rec.assistant_body_streamed === true), + ); + const isErrorOrAborted = asst.stop_reason === 'error' || asst.stop_reason === 'aborted'; + // Error/aborted assistant messages (e.g. provider auth failures, + // network blips, user aborts) are surfaced to the UI via the + // message_complete emitted above, but we deliberately + // keep them out of the session's persisted message history so the + // LLM's next-turn context doesn't accumulate transient infra noise. + if (!isErrorOrAborted) { + const messages = await persistence.loadMessages(iii, rec.session_id); + // Idempotency guard: handleFinished can re-enter (durable trigger + // retry, crash before transitionTo persists). Without this guard a + // second run pushes the SAME assistant message again. If that + // assistant has tool_calls, Anthropic rejects the next request with: + // "each tool_use must have a unique id". + // Detect by comparing timestamp + content shape against the last + // assistant message in flat-state; skip the push when they match. + const last = messages[messages.length - 1]; + const alreadyPersisted = + last && + last.role === 'assistant' && + last.timestamp === asst.timestamp && + last.model === asst.model && + last.provider === asst.provider; + if (alreadyPersisted) { + logger.warn('handleFinished: skipping duplicate assistant push (re-entry detected)', { + session_id: rec.session_id, + timestamp: asst.timestamp, + }); + } else { + messages.push(asst); + await persistence.saveMessages(iii, rec.session_id, messages); + } + } + + if (isErrorOrAborted) { + await emit(iii, rec.session_id, { + type: 'turn_end', + message: asst, + function_results: [], + }); + rec.turn_end_emitted = true; + transitionTo(rec, 'tearing_down'); + return; + } + const calls = extractFunctionCalls(asst); + if (calls.length === 0) { + transitionTo(rec, 'steering_check'); + return; + } + + rec.function_results = []; + rec.pending_function_calls = calls.map(unwrapAgentTrigger); + + const prepared: PreparedEntry[] = calls.map((raw) => { + const function_call = unwrapAgentTrigger(raw); + if (!function_call.function_id) { + return { function_call, blocked: missingFunctionResult() }; + } + return { function_call, blocked: null }; + }); + + await persistence.saveExecutedCalls(iii, rec.session_id, []); + await persistence.savePreparedCalls(iii, rec.session_id, prepared); + transitionTo(rec, 'function_execute'); +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'turn::assistant_finished', + async (payload: TurnStepPayload) => { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition(iii, 'assistant_finished', handleFinished, parsed); + }, + { + description: + 'Run one durable FSM transition for session in state assistant_finished: finalize assistant and route onward.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/states/assistant-streaming.ts b/harness/src/turn-orchestrator/states/assistant-streaming.ts new file mode 100644 index 00000000..35f3b912 --- /dev/null +++ b/harness/src/turn-orchestrator/states/assistant-streaming.ts @@ -0,0 +1,240 @@ +/** + * `turn::assistant_streaming`. Start turn, stream provider response, advance to finished. + * + * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. + * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. + */ + +import type { ISdk, StreamChannelRef } from '../../runtime/iii.js'; +import { logger } from '../../runtime/otel.js'; +import type { AssistantMessage } from '../../types/agent-message.js'; +import type { AgentFunction } from '../../types/function.js'; +import type { ProviderStreamInput } from '../../types/provider.js'; +import type { AssistantMessageEvent } from '../../types/stream-event.js'; +import { emit } from '../events.js'; +import * as persistence from '../persistence.js'; +import { runPreflight } from '../preflight.js'; +import { buildInput, decide, targetFunctionId } from '../provider-router.js'; +import { runTransition } from '../run-transition.js'; +import { type TurnStateRecord, transitionTo } from '../state.js'; +import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; + +function eventPartial(ev: AssistantMessageEvent): AssistantMessage | null { + if ('partial' in ev) return ev.partial; + if (ev.type === 'done') return ev.message; + if (ev.type === 'error') return ev.error; + return null; +} + +function syntheticErrorAssistant( + provider: string, + model: string, + reason: string, +): AssistantMessage { + return { + role: 'assistant', + content: [{ type: 'text', text: reason }], + stop_reason: 'error', + error_message: reason, + error_kind: 'transient', + usage: null, + model, + provider, + timestamp: Date.now(), + }; +} + +function formatProviderError(err: unknown): string { + const raw = err instanceof Error ? err.message : String(err); + return raw + .replace(/^IIIInvocationError:\s*/i, '') + .replace(/^invocation_failed:\s*/i, '') + .trim(); +} + +export async function handleStreaming(iii: ISdk, rec: TurnStateRecord): Promise { + if (rec.max_turns !== undefined && rec.turn_count >= rec.max_turns) { + const cap = rec.max_turns ?? 0; + const exhausted: AssistantMessage = { + role: 'assistant', + content: [{ type: 'text', text: `loop stopped: max_turns (${cap}) reached` }], + stop_reason: 'end', + error_message: null, + error_kind: null, + usage: null, + model: '', + provider: '', + timestamp: Date.now(), + }; + await emit(iii, rec.session_id, { + type: 'message_complete', + message: exhausted, + body_streamed: false, + }); + await emit(iii, rec.session_id, { + type: 'turn_end', + message: exhausted, + function_results: [], + }); + rec.turn_end_emitted = true; + rec.last_assistant = exhausted; + const messages = await persistence.loadMessages(iii, rec.session_id); + messages.push(exhausted); + await persistence.saveMessages(iii, rec.session_id, messages); + transitionTo(rec, 'tearing_down'); + return; + } + rec.turn_count++; + rec.turn_end_emitted = false; + rec.assistant_body_streamed = false; + + const request = await persistence.loadRunRequest(iii, rec.session_id); + let messages = await persistence.loadMessages(iii, rec.session_id); + const schemas = await persistence.loadFunctionSchemas(iii, rec.session_id); + + const { provider, model, system_prompt } = request; + const tools = (Array.isArray(schemas) ? schemas : []) as AgentFunction[]; + + const decision = decide({ provider, model }); + const targetFn = targetFunctionId(decision); + + const preflightResult = await runPreflight( + iii, + rec.session_id, + messages, + decision.provider, + model, + ); + if (preflightResult === 'compacted') { + messages = await persistence.loadMessages(iii, rec.session_id); + } + + let channel: Awaited>; + try { + channel = await iii.createChannel(); + } catch (err) { + logger.warn('createChannel failed; falling back to synthetic error', { + err: String(err), + }); + rec.last_assistant = syntheticErrorAssistant( + decision.provider, + decision.model, + `create_channel failed: ${String(err)}`, + ); + transitionTo(rec, 'assistant_finished'); + return; + } + + const messageQueue: string[] = []; + let done = false; + let resolveNext: (() => void) | null = null; + channel.reader.onMessage((msg: string) => { + messageQueue.push(msg); + if (resolveNext) { + const fn = resolveNext; + resolveNext = null; + fn(); + } + }); + channel.reader.stream.resume(); + + const input: ProviderStreamInput = buildInput( + decision, + channel.writerRef as StreamChannelRef, + system_prompt, + messages, + tools, + ); + + let triggerError: string | null = null; + const triggerPromise = iii + .trigger({ + function_id: targetFn, + payload: input, + timeoutMs: 300_000, + }) + .catch((err) => { + logger.warn('provider stream trigger failed', { targetFn, err: String(err) }); + triggerError = formatProviderError(err); + done = true; + if (resolveNext) { + const fn = resolveNext; + resolveNext = null; + fn(); + } + return null; + }); + + const readPromise = (async (): Promise => { + let final: AssistantMessage | null = null; + while (!done) { + while (messageQueue.length > 0) { + const text = messageQueue.shift(); + if (text === undefined) break; + let event: AssistantMessageEvent | null = null; + try { + event = JSON.parse(text) as AssistantMessageEvent; + } catch (err) { + logger.warn('decode AssistantMessageEvent failed', { + session_id: rec.session_id, + err: String(err), + }); + continue; + } + const partial = eventPartial(event); + if (partial) final = partial; + if (event.type !== 'done' && event.type !== 'error') { + if (partial) { + await emit(iii, rec.session_id, { + type: 'message_update', + message: partial, + llm_event: event, + }); + if (event.type === 'text_delta' || event.type === 'thinking_delta') { + rec.assistant_body_streamed = true; + } + } + continue; + } + if (event.type === 'done') final = event.message; + else final = event.error; + done = true; + break; + } + if (done) break; + await new Promise((r) => { + resolveNext = r; + }); + } + return final; + })(); + + const [, finalMsg] = await Promise.all([triggerPromise, readPromise]); + if (finalMsg) { + rec.last_assistant = finalMsg; + } else { + const errorText = triggerError ?? 'provider channel closed without final'; + const synthetic = syntheticErrorAssistant(decision.provider, decision.model, errorText); + await emit(iii, rec.session_id, { + type: 'message_update', + message: synthetic, + llm_event: { type: 'text_delta', partial: synthetic, delta: errorText }, + }); + rec.last_assistant = synthetic; + } + transitionTo(rec, 'assistant_finished'); +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'turn::assistant_streaming', + async (payload: TurnStepPayload) => { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition(iii, 'assistant_streaming', handleStreaming, parsed); + }, + { + description: + 'Run one durable FSM transition for session in state assistant_streaming: start turn and stream provider response.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/states/assistant.ts b/harness/src/turn-orchestrator/states/assistant.ts deleted file mode 100644 index f168f1be..00000000 --- a/harness/src/turn-orchestrator/states/assistant.ts +++ /dev/null @@ -1,351 +0,0 @@ -/** - * `awaiting_assistant`, `assistant_streaming`, `assistant_finished`. Phase 2.A - * channel-based streaming lives in `handleStreaming`. - */ - -import type { ISdk, StreamChannelRef } from '../../runtime/iii.js'; -import { logger } from '../../runtime/otel.js'; -import type { AgentEvent } from '../../types/agent-event.js'; -import type { AgentMessage, AssistantMessage } from '../../types/agent-message.js'; -import type { ContentBlock } from '../../types/content.js'; -import type { AgentFunction, FunctionCall } from '../../types/function.js'; -import type { ProviderStreamInput } from '../../types/provider.js'; -import type { AssistantMessageEvent, StopReason } from '../../types/stream-event.js'; -import { emit } from '../events.js'; -import * as persistence from '../persistence.js'; -import { buildInput, decide, targetFunctionId } from '../provider-router.js'; -import { runPreflight } from '../preflight.js'; -import { type TurnStateRecord, transitionTo } from '../state.js'; - -export async function handleAwaiting(iii: ISdk, rec: TurnStateRecord): Promise { - if (rec.max_turns !== undefined && rec.turn_count >= rec.max_turns) { - const cap = rec.max_turns ?? 0; - const exhausted: AssistantMessage = { - role: 'assistant', - content: [{ type: 'text', text: `loop stopped: max_turns (${cap}) reached` }], - stop_reason: 'end', - error_message: null, - error_kind: null, - usage: null, - model: '', - provider: '', - timestamp: Date.now(), - }; - await emit(iii, rec.session_id, { type: 'message_start', message: exhausted }); - await emit(iii, rec.session_id, { type: 'message_end', message: exhausted }); - await emit(iii, rec.session_id, { - type: 'turn_end', - message: exhausted, - function_results: [], - }); - rec.turn_end_emitted = true; - rec.last_assistant = exhausted; - const messages = await persistence.loadMessages(iii, rec.session_id); - messages.push(exhausted); - await persistence.saveMessages(iii, rec.session_id, messages); - transitionTo(rec, 'tearing_down'); - return; - } - rec.turn_count++; - rec.turn_end_emitted = false; - await emit(iii, rec.session_id, { type: 'turn_start' }); - transitionTo(rec, 'assistant_streaming'); -} - -function eventPartial(ev: AssistantMessageEvent): AssistantMessage | null { - if ('partial' in ev) return ev.partial; - if (ev.type === 'done') return ev.message; - if (ev.type === 'error') return ev.error; - return null; -} - -function latestFunctionCall( - msg: AssistantMessage, -): { id: string; function_id: string; args: unknown } | null { - for (let i = msg.content.length - 1; i >= 0; i--) { - const b = msg.content[i]; - if (b?.type === 'function_call') { - return { id: b.id, function_id: b.function_id, args: b.arguments }; - } - } - return null; -} - -function syntheticErrorAssistant( - provider: string, - model: string, - reason: string, -): AssistantMessage { - return { - role: 'assistant', - content: [{ type: 'text', text: reason }], - stop_reason: 'error', - error_message: reason, - error_kind: 'transient', - usage: null, - model, - provider, - timestamp: Date.now(), - }; -} - -/** - * Strip iii-sdk's `IIIInvocationError: invocation_failed: ` framing from - * a thrown trigger error so the user-visible message is just the - * underlying cause (e.g. "auth::get_token returned no credential for - * provider=openai"). - */ -function formatProviderError(err: unknown): string { - const raw = err instanceof Error ? err.message : String(err); - return raw - .replace(/^IIIInvocationError:\s*/i, '') - .replace(/^invocation_failed:\s*/i, '') - .trim(); -} - -export async function handleStreaming(iii: ISdk, rec: TurnStateRecord): Promise { - const request = await persistence.loadRunRequest(iii, rec.session_id); - let messages = await persistence.loadMessages(iii, rec.session_id); - const schemas = await persistence.loadFunctionSchemas(iii, rec.session_id); - - const provider = typeof request.provider === 'string' ? (request.provider as string) : ''; - const model = typeof request.model === 'string' ? (request.model as string) : ''; - const system_prompt = - typeof request.system_prompt === 'string' ? (request.system_prompt as string) : null; - const tools = (Array.isArray(schemas) ? schemas : []) as AgentFunction[]; - - const decision = decide({ provider, model }); - const targetFn = targetFunctionId(decision); - - // Pre-flight: if projected token usage would overflow the model's context - // window, trigger compact_now synchronously before opening the provider - // channel. On compaction, reload messages so the provider sees the trimmed - // history. ContextOverflowError / CompactionBusyError propagate up and are - // handled as a transient error by the step loop (the session will retry on - // the next wake or surface the error to the caller). - const preflightResult = await runPreflight( - iii, - rec.session_id, - messages, - decision.provider, - model, - ); - if (preflightResult === 'compacted') { - messages = await persistence.loadMessages(iii, rec.session_id); - } - - // Open a channel; provider writes AssistantMessageEvent JSON into it. - let channel: Awaited>; - try { - channel = await iii.createChannel(); - } catch (err) { - logger.warn('createChannel failed; falling back to synthetic error', { - err: String(err), - }); - rec.last_assistant = syntheticErrorAssistant( - decision.provider, - decision.model, - `create_channel failed: ${String(err)}`, - ); - transitionTo(rec, 'assistant_finished'); - return; - } - - const messageQueue: string[] = []; - let done = false; - let resolveNext: (() => void) | null = null; - channel.reader.onMessage((msg: string) => { - messageQueue.push(msg); - if (resolveNext) { - const fn = resolveNext; - resolveNext = null; - fn(); - } - }); - // iii-sdk@0.12.0's ChannelReader.onMessage doesn't open the read-side - // WebSocket — only stream.read / readAll do. Without this resume(), the - // provider's writes are dropped engine-side and the queue stays empty. - channel.reader.stream.resume(); - - const input: ProviderStreamInput = buildInput( - decision, - channel.writerRef as StreamChannelRef, - system_prompt, - messages, - tools, - ); - - // Capture the trigger error (if any) so the synthetic assistant message - // below carries the *actual* cause (e.g. "no credential for - // provider=openai") instead of a generic "channel closed without final". - let triggerError: string | null = null; - const triggerPromise = iii - .trigger({ - function_id: targetFn, - payload: input, - timeoutMs: 300_000, - }) - .catch((err) => { - logger.warn('provider stream trigger failed', { targetFn, err: String(err) }); - triggerError = formatProviderError(err); - done = true; - if (resolveNext) { - const fn = resolveNext; - resolveNext = null; - fn(); - } - return null; - }); - - const readPromise = (async (): Promise => { - let final: AssistantMessage | null = null; - while (!done) { - while (messageQueue.length > 0) { - const text = messageQueue.shift(); - if (text === undefined) break; - let event: AssistantMessageEvent | null = null; - try { - event = JSON.parse(text) as AssistantMessageEvent; - } catch (err) { - logger.warn('decode AssistantMessageEvent failed', { - session_id: rec.session_id, - err: String(err), - }); - continue; - } - const partial = eventPartial(event); - if (partial) final = partial; - if (event.type !== 'done' && event.type !== 'error') { - if (partial) { - await emit(iii, rec.session_id, { - type: 'message_update', - message: partial, - llm_event: event, - }); - if (event.type === 'functioncall_start' || event.type === 'functioncall_delta') { - const fc = latestFunctionCall(partial); - if (fc) { - await emit(iii, rec.session_id, { - type: 'function_execution_update', - function_call_id: fc.id, - function_id: fc.function_id, - args: fc.args, - partial_result: null, - }); - } - } - } - continue; - } - // Terminal event: capture final message and break out. - if (event.type === 'done') final = event.message; - else final = event.error; - done = true; - break; - } - if (done) break; - await new Promise((r) => { - resolveNext = r; - }); - } - return final; - })(); - - const [, finalMsg] = await Promise.all([triggerPromise, readPromise]); - if (finalMsg) { - rec.last_assistant = finalMsg; - } else { - // Trigger failed or channel closed without a terminal frame. The - // provider didn't get to stream any text_delta events, so the UI - // never populated its renderer. Emit a synthetic message_update - // carrying the error as a text_delta so the existing UI translator - // (which assumes deltas drive the chat text) shows the error. - const errorText = triggerError ?? 'provider channel closed without final'; - const synthetic = syntheticErrorAssistant(decision.provider, decision.model, errorText); - await emit(iii, rec.session_id, { - type: 'message_update', - message: synthetic, - llm_event: { type: 'text_delta', partial: synthetic, delta: errorText }, - }); - rec.last_assistant = synthetic; - } - transitionTo(rec, 'assistant_finished'); -} - -function extractFunctionCalls(msg: AssistantMessage): FunctionCall[] { - const out: FunctionCall[] = []; - for (const b of msg.content) { - if (b.type === 'function_call') { - out.push({ id: b.id, function_id: b.function_id, arguments: b.arguments }); - } - } - return out; -} - -export function assistantLifecycleEvents(asst: AssistantMessage): AgentEvent[] { - return [ - { type: 'message_start', message: asst }, - { type: 'message_end', message: asst }, - ]; -} - -export async function handleFinished(iii: ISdk, rec: TurnStateRecord): Promise { - const asst = rec.last_assistant; - if (!asst) { - throw new Error('assistant_finished without last_assistant'); - } - for (const evt of assistantLifecycleEvents(asst)) { - await emit(iii, rec.session_id, evt); - } - const isErrorOrAborted = asst.stop_reason === 'error' || asst.stop_reason === 'aborted'; - // Error/aborted assistant messages (e.g. provider auth failures, - // network blips, user aborts) are surfaced to the UI via the - // MessageStart/MessageEnd events emitted above, but we deliberately - // keep them out of the session's persisted message history so the - // LLM's next-turn context doesn't accumulate transient infra noise. - if (!isErrorOrAborted) { - const messages = await persistence.loadMessages(iii, rec.session_id); - // Idempotency guard: handleFinished can re-enter (durable trigger - // retry, crash before transitionTo persists). Without this guard a - // second run pushes the SAME assistant message again. If that - // assistant has tool_calls, Anthropic rejects the next request with: - // "each tool_use must have a unique id". - // Detect by comparing timestamp + content shape against the last - // assistant message in flat-state; skip the push when they match. - const last = messages[messages.length - 1]; - const alreadyPersisted = - last && - last.role === 'assistant' && - last.timestamp === asst.timestamp && - last.model === asst.model && - last.provider === asst.provider; - if (alreadyPersisted) { - logger.warn('handleFinished: skipping duplicate assistant push (re-entry detected)', { - session_id: rec.session_id, - timestamp: asst.timestamp, - }); - } else { - messages.push(asst); - await persistence.saveMessages(iii, rec.session_id, messages); - } - } - - if (isErrorOrAborted) { - await emit(iii, rec.session_id, { - type: 'turn_end', - message: asst, - function_results: [], - }); - rec.turn_end_emitted = true; - transitionTo(rec, 'tearing_down'); - return; - } - const calls = extractFunctionCalls(asst); - if (calls.length === 0) { - transitionTo(rec, 'steering_check'); - } else { - rec.pending_function_calls = calls; - transitionTo(rec, 'function_prepare'); - } -} -// reload 1779112003 diff --git a/harness/src/turn-orchestrator/states/function-awaiting-approval.ts b/harness/src/turn-orchestrator/states/function-awaiting-approval.ts new file mode 100644 index 00000000..2ddcc936 --- /dev/null +++ b/harness/src/turn-orchestrator/states/function-awaiting-approval.ts @@ -0,0 +1,112 @@ +/** + * `turn::function_awaiting_approval`. Read approval decisions and resume execute. + * + * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. + * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. + */ + +import { ApprovalResumePayloadSchema, STATE_SCOPE } from '../../approval-gate/schemas.js'; +import type { z } from 'zod'; +import type { ISdk } from '../../runtime/iii.js'; +import type { FunctionResult } from '../../types/function.js'; +import { text } from '../../types/content.js'; +import * as persistence from '../persistence.js'; +import { runTransition } from '../run-transition.js'; +import { type TurnStateRecord, transitionTo } from '../state.js'; +import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; + +export type ApprovalDecision = z.infer; + +/** Decode stored approval decision from `state::get` (scope `approvals`). */ +export function parseApprovalDecision(value: unknown): ApprovalDecision | null { + const parsed = ApprovalResumePayloadSchema.safeParse(value); + return parsed.success ? parsed.data : null; +} + +async function readDecision( + iii: ISdk, + session_id: string, + function_call_id: string, +): Promise { + const key = `${session_id}/${function_call_id}`; + const raw = await iii.trigger({ + function_id: 'state::get', + payload: { scope: STATE_SCOPE, key }, + }); + return parseApprovalDecision(raw); +} + +function denialResultFromDecision(decision: ApprovalDecision): FunctionResult { + const reason = + decision.reason ?? (decision.decision === 'aborted' ? 'session_aborted' : 'denied'); + const message = + decision.decision === 'aborted' + ? `Function call aborted: ${reason}` + : `Permission denied by user: ${reason}`; + return { + content: [text(message)], + details: { + approval_denied: true, + decision: decision.decision, + reason, + }, + terminate: false, + }; +} + +export async function handleAwaitingApproval(iii: ISdk, rec: TurnStateRecord): Promise { + const awaiting = rec.awaiting_approval ?? []; + if (awaiting.length === 0) { + transitionTo(rec, 'function_execute'); + return; + } + + const decisions = await Promise.all( + awaiting.map((entry) => readDecision(iii, rec.session_id, entry.function_call_id)), + ); + + if (decisions.some((decision) => decision === null)) { + return; + } + + const prepared = await persistence.loadPreparedCalls(iii, rec.session_id); + for (let i = 0; i < awaiting.length; i++) { + const entry = awaiting[i]; + const decision = decisions[i]; + if (!entry || !decision) continue; + const idx = prepared.findIndex( + (preparedEntry) => preparedEntry.function_call.id === entry.function_call_id, + ); + if (idx < 0) continue; + const current = prepared[idx]; + if (!current) continue; + if (decision.decision === 'allow') { + prepared[idx] = { ...current, pre_approved: true, blocked: null }; + } else { + prepared[idx] = { + ...current, + pre_approved: false, + blocked: denialResultFromDecision(decision), + }; + } + } + + await persistence.savePreparedCalls(iii, rec.session_id, prepared); + + rec.awaiting_approval = []; + transitionTo(rec, 'function_execute'); +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'turn::function_awaiting_approval', + async (payload: TurnStepPayload) => { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition(iii, 'function_awaiting_approval', handleAwaitingApproval, parsed); + }, + { + description: + 'Run one durable FSM transition for session in state function_awaiting_approval: read approval decisions and resume.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/states/function-execute.ts b/harness/src/turn-orchestrator/states/function-execute.ts new file mode 100644 index 00000000..87fe86aa --- /dev/null +++ b/harness/src/turn-orchestrator/states/function-execute.ts @@ -0,0 +1,267 @@ +/** + * `turn::function_execute`. Run prepared function calls, finalize results, route onward. + * + * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. + * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. + */ + +import type { ISdk } from '../../runtime/iii.js'; +import { logger } from '../../runtime/otel.js'; +import type { AgentEvent } from '../../types/agent-event.js'; +import type { + AgentMessage, + AssistantMessage, + FunctionResultMessage, +} from '../../types/agent-message.js'; +import type { FunctionCall, FunctionResult } from '../../types/function.js'; +import { dispatchWithHook, isErrorResult, triggerFunctionCall } from '../agent-trigger.js'; +import { registerApprovalResume } from '../approval-resume.js'; +import { emit } from '../events.js'; +import { publishAfter } from '../hook.js'; +import * as persistence from '../persistence.js'; +import type { ExecutedEntry } from '../persistence.js'; +import { runTransition } from '../run-transition.js'; +import { type TurnStateRecord, transitionTo } from '../state.js'; +import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; + +function buildFunctionExecutionEnd( + fc: FunctionCall, + result: FunctionResult, + is_error: boolean, + duration_ms: number, +): AgentEvent { + return { + type: 'function_execution_end', + function_call_id: fc.id, + function_id: fc.function_id, + result, + is_error, + duration_ms, + }; +} + +function augmentFunctionCall(fc: FunctionCall, session_id: string): FunctionCall { + let augmented_args: unknown; + if (fc.arguments && typeof fc.arguments === 'object' && !Array.isArray(fc.arguments)) { + augmented_args = { ...(fc.arguments as Record) }; + } else { + augmented_args = { arguments: fc.arguments }; + } + if (typeof augmented_args === 'object' && augmented_args !== null) { + const obj = augmented_args as Record; + obj.session_id = session_id; + obj.function_call_id = fc.id; + obj.function_id = fc.function_id; + obj.function_call = { + id: fc.id, + function_id: fc.function_id, + arguments: fc.arguments, + }; + } + return { id: fc.id, function_id: fc.function_id, arguments: augmented_args }; +} + +async function commitExecutedCall( + iii: ISdk, + rec: TurnStateRecord, + results: ExecutedEntry[], + fc: FunctionCall, + result: FunctionResult, + startedAt: number, + is_error?: boolean, +): Promise { + const duration_ms = Date.now() - startedAt; + const error = is_error ?? isErrorResult(result); + persistence.upsertExecutedCall(results, { + function_call: fc, + result, + is_error: error, + duration_ms, + }); + await persistence.saveExecutedCalls(iii, rec.session_id, results); + await emit(iii, rec.session_id, buildFunctionExecutionEnd(fc, result, error, duration_ms)); +} + +function buildFinalizeLifecycle( + asst: AssistantMessage, + results: FunctionResultMessage[], +): AgentEvent[] { + const out: AgentEvent[] = [{ type: 'turn_end', message: asst, function_results: results }]; + return out; +} + +async function finalizeExecutedCalls(iii: ISdk, rec: TurnStateRecord): Promise { + const executed = await persistence.loadExecutedCalls(iii, rec.session_id); + const function_results: FunctionResultMessage[] = []; + let all_terminate = executed.length > 0; + for (const e of executed) { + let result = e.result; + const merged = await publishAfter(iii, e.function_call, result); + if ( + merged && + typeof merged === 'object' && + Array.isArray((merged as Record).content) + ) { + result = merged as FunctionResult; + } + if (!result.terminate) all_terminate = false; + function_results.push({ + role: 'function_result', + function_call_id: e.function_call.id, + function_id: e.function_call.function_id, + content: result.content, + details: result.details, + is_error: e.is_error, + timestamp: Date.now(), + }); + } + const messages = await persistence.loadMessages(iii, rec.session_id); + // Idempotency guard: handleFinalize can re-enter (durable trigger retry, + // step-fanout race, crash mid-finalize before transitionTo persists). + // executedCalls is only cleared at the start of the NEXT handlePrepare, + // so a second run reads the SAME results and would push duplicates into + // flat-state. Skip any function_result whose function_call_id is already + // present. Anthropic rejects duplicate `tool_result` blocks with id: + // "each tool_use must have a single result. Found multiple tool_result + // blocks with id: toolu_..." + // and any provider's wire-messages flush would produce them otherwise. + // Only the most-recent function_result block matters for dedup — + // duplicates only appear when the re-entry runs against a slice + // we already wrote in this same finalize, so walking from the tail + // and stopping once we pass the boundary of pre-existing results + // is sufficient. Pre-fix this scanned every message from the head + // on every finalize, which grew O(history) per turn for a guard + // that only ever protects against ~10 entries. + const incomingIds = new Set(); + for (const r of function_results) incomingIds.add(r.function_call_id); + const existingResultIds = new Set(); + for (let i = messages.length - 1; i >= 0; i--) { + const m = messages[i]; + if (!m) continue; + if (m.role === 'function_result') { + existingResultIds.add(m.function_call_id); + continue; + } + if (m.role === 'assistant') { + // Once we cross an assistant boundary BEFORE seeing any + // pending incoming id we've passed the turn this finalize + // is writing for — earlier function_result blocks can't be + // duplicates of `function_results`. + let unseen = false; + for (const id of incomingIds) { + if (!existingResultIds.has(id)) { + unseen = true; + break; + } + } + if (!unseen) break; + } + } + let appended = 0; + for (const r of function_results) { + if (existingResultIds.has(r.function_call_id)) continue; + messages.push(r as AgentMessage); + existingResultIds.add(r.function_call_id); + appended++; + } + if (appended < function_results.length) { + logger.warn('handleFinalize: skipped duplicate function_results (re-entry detected)', { + session_id: rec.session_id, + total: function_results.length, + appended, + skipped: function_results.length - appended, + }); + } + await persistence.saveMessages(iii, rec.session_id, messages); + + const asst = rec.last_assistant; + rec.function_results = function_results; + rec.pending_function_calls = []; + // Clear persisted executedCalls now so a re-entry into handleFinalize + // (durable retry, crash before transitionTo) finds an empty set and + // produces zero new function_results to push. Belt+suspenders alongside + // the idempotency guard above. handlePrepare also clears at the start + // of the NEXT turn, but that's too late if re-entry happens before then. + await persistence.saveExecutedCalls(iii, rec.session_id, []); + + if (asst) { + for (const evt of buildFinalizeLifecycle(asst, function_results)) { + await emit(iii, rec.session_id, evt); + } + rec.turn_end_emitted = true; + } + transitionTo(rec, all_terminate ? 'tearing_down' : 'steering_check'); +} + +export async function handleExecute(iii: ISdk, rec: TurnStateRecord): Promise { + const prepared = await persistence.loadPreparedCalls(iii, rec.session_id); + const results = await persistence.loadExecutedCalls(iii, rec.session_id); + + for (const entry of prepared) { + const fc = entry.function_call; + await emit(iii, rec.session_id, { + type: 'function_execution_start', + function_call_id: fc.id, + function_id: fc.function_id, + args: fc.arguments, + }); + const startedAt = Date.now(); + + const existing = persistence.findExecutedCall(results, fc.id); + if (existing) { + await emit( + iii, + rec.session_id, + buildFunctionExecutionEnd(fc, existing.result, existing.is_error, existing.duration_ms), + ); + continue; + } + + if (entry.pre_approved === true) { + await commitExecutedCall( + iii, + rec, + results, + fc, + await triggerFunctionCall(iii, fc), + startedAt, + ); + continue; + } + + if (entry.blocked) { + await commitExecutedCall(iii, rec, results, fc, entry.blocked, startedAt, true); + continue; + } + + const out = await dispatchWithHook(iii, augmentFunctionCall(fc, rec.session_id)); + if (out.kind === 'pending') { + rec.awaiting_approval = rec.awaiting_approval ?? []; + rec.awaiting_approval.push({ + function_call_id: fc.id, + function_id: fc.function_id, + args: fc.arguments, + }); + registerApprovalResume(iii, rec.session_id, fc.id); + transitionTo(rec, 'function_awaiting_approval'); + return; + } + + await commitExecutedCall(iii, rec, results, fc, out.result, startedAt); + } + await finalizeExecutedCalls(iii, rec); +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'turn::function_execute', + async (payload: TurnStepPayload) => { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition(iii, 'function_execute', handleExecute, parsed); + }, + { + description: + 'Run one durable FSM transition for session in state function_execute: dispatch prepared calls and finalize results.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/states/functions.ts b/harness/src/turn-orchestrator/states/functions.ts deleted file mode 100644 index 48e50d48..00000000 --- a/harness/src/turn-orchestrator/states/functions.ts +++ /dev/null @@ -1,449 +0,0 @@ -/** - * `function_prepare`, `function_execute`, `function_finalize`. Mirrors - * `turn-orchestrator/src/states/functions.rs`. - */ - -import { STATE_SCOPE } from '../../approval-gate/schemas.js'; -import type { ISdk } from '../../runtime/iii.js'; -import { logger } from '../../runtime/otel.js'; -import type { AgentEvent } from '../../types/agent-event.js'; -import type { - AgentMessage, - AssistantMessage, - FunctionResultMessage, -} from '../../types/agent-message.js'; -import { text } from '../../types/content.js'; -import type { FunctionCall, FunctionResult } from '../../types/function.js'; -import { TOOL_NAME, dispatchWithHook, isErrorResult } from '../agent-trigger.js'; -import { registerApprovalResume } from '../approval-resume.js'; -import type { TurnOrchestratorConfig } from '../config.js'; -import { emit } from '../events.js'; -import { publishAfter } from '../hook.js'; -import type { PreparedEntry } from '../persistence.js'; -import * as persistence from '../persistence.js'; -import { type TurnStateRecord, transitionTo } from '../state.js'; - -type ApprovalDecisionRecord = { - decision: 'allow' | 'deny' | 'aborted'; - reason: string | null; -}; - -function unwrapAgentTrigger(fc: FunctionCall): FunctionCall { - if (fc.function_id !== TOOL_NAME) return fc; - const args = (fc.arguments ?? {}) as Record; - const fn = typeof args.function === 'string' ? args.function : ''; - const payload = args.payload ?? {}; - return { id: fc.id, function_id: fn, arguments: payload }; -} - -export async function handlePrepare(iii: ISdk, rec: TurnStateRecord): Promise { - rec.function_results = []; - const raw = rec.pending_function_calls; - rec.pending_function_calls = raw.map(unwrapAgentTrigger); - - const prepared: PreparedEntry[] = rec.pending_function_calls.map((fc) => ({ - function_call: fc, - blocked: null, - })); - - await persistence.saveRecord(iii, rec); - await persistence.saveExecutedCalls(iii, rec.session_id, []); - await persistence.savePreparedCalls(iii, rec.session_id, prepared); - - transitionTo(rec, 'function_execute'); -} - -export async function handleExecute( - iii: ISdk, - cfg: TurnOrchestratorConfig, - rec: TurnStateRecord, -): Promise { - const prepared = await persistence.loadPreparedCalls(iii, rec.session_id); - const results = await persistence.loadExecutedCalls(iii, rec.session_id); - - for (const entry of prepared) { - const fc = entry.function_call; - await emit(iii, rec.session_id, { - type: 'function_execution_start', - function_call_id: fc.id, - function_id: fc.function_id, - args: fc.arguments, - }); - /* `startedAt` is captured right after the start emit so the measured - window matches what the consumer sees on the wire. Each non-replay - branch computes its own delta; `existing` reuses the persisted one. */ - const startedAt = Date.now(); - - const existing = persistence.findExecutedCall(results, fc.id); - if (existing) { - /* Replay: reuse the persisted duration so a resumed run shows the - original timing, not the ~0ms it takes to re-emit. */ - await emit( - iii, - rec.session_id, - buildFunctionExecutionEnd(fc, existing.result, existing.is_error, existing.duration_ms), - ); - continue; - } - - if (entry.pre_approved === true) { - let result: FunctionResult; - let is_error: boolean; - let duration_ms: number; - try { - const value = await iii.trigger({ - function_id: fc.function_id, - payload: fc.arguments ?? {}, - }); - result = decodeOrPassthroughResult(value); - is_error = isErrorResult(result); - } catch (err) { - result = triggerErrorResult(fc.function_id, err); - is_error = true; - } - - duration_ms = Date.now() - startedAt; - persistence.upsertExecutedCall(results, { - function_call: fc, - result, - is_error, - duration_ms, - }); - - await persistence.saveExecutedCalls(iii, rec.session_id, results); - await emit(iii, rec.session_id, buildFunctionExecutionEnd(fc, result, is_error, duration_ms)); - continue; - } - - if (entry.blocked) { - const result = entry.blocked; - const is_error = true; - /* Denial is effectively instant — local delta captures whatever - time the persist + emit roundtrip takes, which is honest. */ - const duration_ms = Date.now() - startedAt; - persistence.upsertExecutedCall(results, { - function_call: fc, - result, - is_error, - duration_ms, - }); - await persistence.saveExecutedCalls(iii, rec.session_id, results); - await emit(iii, rec.session_id, buildFunctionExecutionEnd(fc, result, is_error, duration_ms)); - continue; - } - - // Augment the per-call args with session/fc context — same as Rust. - let augmented_args: unknown; - if (fc.arguments && typeof fc.arguments === 'object' && !Array.isArray(fc.arguments)) { - augmented_args = { ...(fc.arguments as Record) }; - } else { - augmented_args = { arguments: fc.arguments }; - } - if (typeof augmented_args === 'object' && augmented_args !== null) { - const obj = augmented_args as Record; - obj.session_id = rec.session_id; - obj.function_call_id = fc.id; - obj.function_id = fc.function_id; - obj.function_call = { - id: fc.id, - function_id: fc.function_id, - arguments: fc.arguments, - }; - } - const augmentedFc: FunctionCall = { - id: fc.id, - function_id: fc.function_id, - arguments: augmented_args, - }; - const out = await dispatchWithHook(iii, augmentedFc, rec.session_id); - - if (out.kind === 'pending') { - /* No end emit; `startedAt` is discarded. On resume, the loop re-enters - and a fresh `function_execution_start` resets the timer — approval - wait time is naturally excluded from the eventual duration. */ - rec.awaiting_approval = rec.awaiting_approval ?? []; - rec.awaiting_approval.push({ - function_call_id: fc.id, - function_id: fc.function_id, - args: fc.arguments, - }); - registerApprovalResume(iii, rec.session_id, fc.id); - transitionTo(rec, 'function_awaiting_approval'); - return; - } - - const result = out.result; - const is_error = out.kind === 'deny' || isErrorResult(result); - const duration_ms = Date.now() - startedAt; - - persistence.upsertExecutedCall(results, { - function_call: fc, - result, - is_error, - duration_ms, - }); - - // Kick off persistence in parallel with the user-facing emit so the UI's - // fcall-end lands ~one trigger round-trip sooner. We still await both - // before the next iteration so ordering and durability are preserved. - const savePromise = persistence.saveExecutedCalls(iii, rec.session_id, results); - await emit(iii, rec.session_id, buildFunctionExecutionEnd(fc, result, is_error, duration_ms)); - await savePromise; - } - transitionTo(rec, 'function_finalize'); -} - -function triggerErrorResult(function_id: string, err: unknown): FunctionResult { - const message = - err && typeof err === 'object' && typeof (err as Record).message === 'string' - ? ((err as Record).message as string) - : String(err); - const details = { - error: 'trigger_failed', - function: function_id, - message, - }; - return { - content: [text(JSON.stringify(details))], - details, - terminate: false, - }; -} - -function decodeOrPassthroughResult(value: unknown): FunctionResult { - if ( - value && - typeof value === 'object' && - Array.isArray((value as Record).content) - ) { - const obj = value as Record; - return { - content: obj.content as FunctionResult['content'], - details: obj.details ?? {}, - terminate: typeof obj.terminate === 'boolean' ? obj.terminate : false, - }; - } - const textBody = typeof value === 'string' ? value : JSON.stringify(value); - return { - content: [text(textBody)], - details: value, - terminate: false, - }; -} - -async function readDecision( - iii: ISdk, - session_id: string, - function_call_id: string, -): Promise { - const key = `${session_id}/${function_call_id}`; - const raw = await iii.trigger({ - function_id: 'state::get', - payload: { scope: STATE_SCOPE, key }, - }); - if (!raw || typeof raw !== 'object') return null; - const obj = raw as Record; - const decision = obj.decision; - if (decision !== 'allow' && decision !== 'deny' && decision !== 'aborted') return null; - return { - decision, - reason: typeof obj.reason === 'string' ? obj.reason : null, - }; -} - -function denialResultFromDecision(decision: ApprovalDecisionRecord): FunctionResult { - const reason = - decision.reason ?? (decision.decision === 'aborted' ? 'session_aborted' : 'denied'); - const message = - decision.decision === 'aborted' - ? `Function call aborted: ${reason}` - : `Permission denied by user: ${reason}`; - return { - content: [text(message)], - details: { - approval_denied: true, - decision: decision.decision, - reason, - }, - terminate: false, - }; -} - -export async function handleAwaitingApproval(iii: ISdk, rec: TurnStateRecord): Promise { - const awaiting = rec.awaiting_approval ?? []; - if (awaiting.length === 0) { - transitionTo(rec, 'function_execute'); - return; - } - - const decisions = await Promise.all( - awaiting.map((entry) => readDecision(iii, rec.session_id, entry.function_call_id)), - ); - - if (decisions.some((decision) => decision === null)) { - return; - } - - const prepared = await persistence.loadPreparedCalls(iii, rec.session_id); - for (let i = 0; i < awaiting.length; i++) { - const entry = awaiting[i]; - const decision = decisions[i]; - if (!entry || !decision) continue; - const idx = prepared.findIndex( - (preparedEntry) => preparedEntry.function_call.id === entry.function_call_id, - ); - if (idx < 0) continue; - const current = prepared[idx]; - if (!current) continue; - if (decision.decision === 'allow') { - prepared[idx] = { ...current, pre_approved: true, blocked: null }; - } else { - prepared[idx] = { - ...current, - pre_approved: false, - blocked: denialResultFromDecision(decision), - }; - } - } - - await persistence.savePreparedCalls(iii, rec.session_id, prepared); - - rec.awaiting_approval = []; - transitionTo(rec, 'function_execute'); -} - -function buildFunctionExecutionEnd( - fc: FunctionCall, - result: FunctionResult, - is_error: boolean, - duration_ms: number, -): AgentEvent { - return { - type: 'function_execution_end', - function_call_id: fc.id, - function_id: fc.function_id, - result, - is_error, - duration_ms, - }; -} - -function buildFinalizeLifecycle( - asst: AssistantMessage, - results: FunctionResultMessage[], -): AgentEvent[] { - const out: AgentEvent[] = []; - for (const r of results) { - out.push({ type: 'message_start', message: r }); - out.push({ type: 'message_end', message: r }); - } - out.push({ type: 'turn_end', message: asst, function_results: results }); - return out; -} - -export async function handleFinalize(iii: ISdk, rec: TurnStateRecord): Promise { - const executed = await persistence.loadExecutedCalls(iii, rec.session_id); - const function_results: FunctionResultMessage[] = []; - let all_terminate = executed.length > 0; - for (const e of executed) { - let result = e.result; - const merged = await publishAfter(iii, e.function_call, result); - if ( - merged && - typeof merged === 'object' && - Array.isArray((merged as Record).content) - ) { - result = merged as FunctionResult; - } - if (!result.terminate) all_terminate = false; - function_results.push({ - role: 'function_result', - function_call_id: e.function_call.id, - function_id: e.function_call.function_id, - content: result.content, - details: result.details, - is_error: e.is_error, - timestamp: Date.now(), - }); - } - const messages = await persistence.loadMessages(iii, rec.session_id); - // Idempotency guard: handleFinalize can re-enter (durable trigger retry, - // step-fanout race, crash mid-finalize before transitionTo persists). - // executedCalls is only cleared at the start of the NEXT handlePrepare, - // so a second run reads the SAME results and would push duplicates into - // flat-state. Skip any function_result whose function_call_id is already - // present. Anthropic rejects duplicate `tool_result` blocks with id: - // "each tool_use must have a single result. Found multiple tool_result - // blocks with id: toolu_..." - // and any provider's wire-messages flush would produce them otherwise. - // Only the most-recent function_result block matters for dedup — - // duplicates only appear when the re-entry runs against a slice - // we already wrote in this same finalize, so walking from the tail - // and stopping once we pass the boundary of pre-existing results - // is sufficient. Pre-fix this scanned every message from the head - // on every finalize, which grew O(history) per turn for a guard - // that only ever protects against ~10 entries. - const incomingIds = new Set(); - for (const r of function_results) incomingIds.add(r.function_call_id); - const existingResultIds = new Set(); - for (let i = messages.length - 1; i >= 0; i--) { - const m = messages[i]; - if (!m) continue; - if (m.role === 'function_result') { - existingResultIds.add(m.function_call_id); - continue; - } - if (m.role === 'assistant') { - // Once we cross an assistant boundary BEFORE seeing any - // pending incoming id we've passed the turn this finalize - // is writing for — earlier function_result blocks can't be - // duplicates of `function_results`. - let unseen = false; - for (const id of incomingIds) { - if (!existingResultIds.has(id)) { - unseen = true; - break; - } - } - if (!unseen) break; - } - } - let appended = 0; - for (const r of function_results) { - if (existingResultIds.has(r.function_call_id)) continue; - messages.push(r as AgentMessage); - existingResultIds.add(r.function_call_id); - appended++; - } - if (appended < function_results.length) { - logger.warn('handleFinalize: skipped duplicate function_results (re-entry detected)', { - session_id: rec.session_id, - total: function_results.length, - appended, - skipped: function_results.length - appended, - }); - } - await persistence.saveMessages(iii, rec.session_id, messages); - - const asst = rec.last_assistant; - if (!asst) { - rec.function_results = function_results; - rec.pending_function_calls = []; - await persistence.saveExecutedCalls(iii, rec.session_id, []); - transitionTo(rec, all_terminate ? 'tearing_down' : 'steering_check'); - return; - } - for (const evt of buildFinalizeLifecycle(asst, function_results)) { - await emit(iii, rec.session_id, evt); - } - rec.turn_end_emitted = true; - rec.function_results = function_results; - rec.pending_function_calls = []; - // Clear persisted executedCalls now so a re-entry into handleFinalize - // (durable retry, crash before transitionTo) finds an empty set and - // produces zero new function_results to push. Belt+suspenders alongside - // the idempotency guard above. handlePrepare also clears at the start - // of the NEXT turn, but that's too late if re-entry happens before then. - await persistence.saveExecutedCalls(iii, rec.session_id, []); - transitionTo(rec, all_terminate ? 'tearing_down' : 'steering_check'); -} diff --git a/harness/src/turn-orchestrator/states/index.ts b/harness/src/turn-orchestrator/states/index.ts new file mode 100644 index 00000000..e7865709 --- /dev/null +++ b/harness/src/turn-orchestrator/states/index.ts @@ -0,0 +1,11 @@ +/** + * Re-export per-state register functions. Each `turn::{state}` lives in its own file. + */ + +export { register as registerProvisioning } from './provisioning.js'; +export { register as registerAssistantStreaming } from './assistant-streaming.js'; +export { register as registerAssistantFinished } from './assistant-finished.js'; +export { register as registerFunctionExecute } from './function-execute.js'; +export { register as registerFunctionAwaitingApproval } from './function-awaiting-approval.js'; +export { register as registerSteeringCheck } from './steering-check.js'; +export { register as registerTearingDown } from './tearing-down.js'; diff --git a/harness/src/turn-orchestrator/states/provisioning.ts b/harness/src/turn-orchestrator/states/provisioning.ts index 67d7cd9a..8ca88284 100644 --- a/harness/src/turn-orchestrator/states/provisioning.ts +++ b/harness/src/turn-orchestrator/states/provisioning.ts @@ -1,22 +1,33 @@ +/** + * `turn::provisioning`. First FSM step after `run::start`: materialize tool schemas, + * assemble the system prompt, persist the enriched run request, then advance. + * + * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. + * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. + */ + import type { ISdk } from '../../runtime/iii.js'; import { logger } from '../../runtime/otel.js'; import { agentTriggerTool } from '../agent-trigger.js'; import type { TurnOrchestratorConfig } from '../config.js'; import * as persistence from '../persistence.js'; +import { type RunRequest } from '../run-request.js'; +import { runTransition } from '../run-transition.js'; import { type TurnStateRecord, transitionTo } from '../state.js'; -import { - type DefaultSkillBody, - type Mode, - buildSystemPrompt, - defaultSkillBody, -} from '../system-prompt.js'; - -function asMode(value: unknown): Mode | null { - return value === 'plan' || value === 'ask' || value === 'agent' ? value : null; -} +import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; +import { type DefaultSkillBody, buildSystemPrompt, defaultSkillBody } from '../system-prompt.js'; const FETCH_TIMEOUT_MS = 10_000; +export function parseDirectoryBody(resp: unknown): string | null { + if (typeof resp === 'string') return resp; + if (resp && typeof resp === 'object') { + const body = (resp as { body?: unknown }).body; + if (typeof body === 'string') return body; + } + return null; +} + async function fetchSkill(iii: ISdk, id: string): Promise { try { const resp = await iii.trigger({ @@ -24,12 +35,7 @@ async function fetchSkill(iii: ISdk, id: string): Promise { payload: { id }, timeoutMs: FETCH_TIMEOUT_MS, }); - if (typeof resp === 'string') return resp; - if (resp && typeof resp === 'object') { - const body = (resp as Record).body; - if (typeof body === 'string') return body; - } - return null; + return parseDirectoryBody(resp); } catch (err) { logger.warn('directory::skills::get failed', { id, err: String(err) }); return null; @@ -53,11 +59,8 @@ async function fetchSkillsIndex(iii: ISdk): Promise { payload: {}, timeoutMs: FETCH_TIMEOUT_MS, }); - if (resp && typeof resp === 'object') { - const body = (resp as Record).body; - if (typeof body === 'string' && body.length > 0) return body; - } - return null; + const body = parseDirectoryBody(resp); + return body && body.length > 0 ? body : null; } catch (err) { logger.warn('directory::skills::index failed', { err: String(err) }); return null; @@ -71,22 +74,37 @@ export async function handleProvisioning( ): Promise { const request = await persistence.loadRunRequest(iii, rec.session_id); - // The single tool LLMs see is `agent_trigger`. await persistence.saveFunctionSchemas(iii, rec.session_id, [agentTriggerTool()]); - const overrideRaw = request.system_prompt; - const override = typeof overrideRaw === 'string' && overrideRaw.length > 0 ? overrideRaw : null; - const cwd = typeof request.cwd === 'string' ? (request.cwd as string) : null; - const mode = asMode(request.mode); + const override = request.system_prompt.length > 0 ? request.system_prompt : null; const [skillsIndex, bodies] = await Promise.all([ fetchSkillsIndex(iii), fetchDefaultSkills(iii, cfg.system_default_skills), ]); - const prompt = buildSystemPrompt(bodies, cwd, override, mode, skillsIndex); + const prompt = buildSystemPrompt(bodies, null, override, request.mode, skillsIndex); - const updated = { ...request, system_prompt: prompt }; + const updated: RunRequest = { ...request, system_prompt: prompt }; await persistence.saveRunRequest(iii, rec.session_id, updated); - transitionTo(rec, 'awaiting_assistant'); + transitionTo(rec, 'assistant_streaming'); +} + +export function register(iii: ISdk, cfg: TurnOrchestratorConfig): void { + iii.registerFunction( + 'turn::provisioning', + async (payload: TurnStepPayload) => { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition( + iii, + 'provisioning', + (i, rec) => handleProvisioning(i, cfg, rec), + parsed, + ); + }, + { + description: + 'Run one durable FSM transition for session in state provisioning: materialize tool schemas, build system prompt, advance to assistant_streaming.', + }, + ); } diff --git a/harness/src/turn-orchestrator/states/steering.ts b/harness/src/turn-orchestrator/states/steering-check.ts similarity index 77% rename from harness/src/turn-orchestrator/states/steering.ts rename to harness/src/turn-orchestrator/states/steering-check.ts index f59b3208..77a0f6a5 100644 --- a/harness/src/turn-orchestrator/states/steering.ts +++ b/harness/src/turn-orchestrator/states/steering-check.ts @@ -1,14 +1,17 @@ /** - * `steering_check`. Drains the steering / followup inbox queues and the - * abort flag, then routes onward. Mirrors - * `turn-orchestrator/src/states/steering.rs`. + * `turn::steering_check`. Drains steering / followup inboxes and the abort flag, then routes onward. + * + * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. + * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. */ import type { ISdk } from '../../runtime/iii.js'; import type { AgentMessage, AssistantMessage } from '../../types/agent-message.js'; import { emit } from '../events.js'; import * as persistence from '../persistence.js'; +import { runTransition } from '../run-transition.js'; import { type TurnStateRecord, abortSignalKey, transitionTo } from '../state.js'; +import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; export type SteeringRoute = | 'abort' @@ -17,6 +20,7 @@ export type SteeringRoute = | 'continue_after_function' | 'end_turn'; +/** Pure priority router — no I/O. */ export function route( abort: boolean, has_steering: boolean, @@ -122,28 +126,20 @@ export async function handleSteering(iii: ISdk, rec: TurnStateRecord): Promise { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition(iii, 'steering_check', handleSteering, parsed); + }, + { + description: + 'Run one durable FSM transition for session in state steering_check: drain inboxes and route onward.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/states/tearing-down.ts b/harness/src/turn-orchestrator/states/tearing-down.ts index abdafc63..d9af5420 100644 --- a/harness/src/turn-orchestrator/states/tearing-down.ts +++ b/harness/src/turn-orchestrator/states/tearing-down.ts @@ -1,27 +1,34 @@ +/** + * `turn::tearing_down`. Emit `agent_end` and transition to `stopped`. + * + * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. + * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. + */ + import type { ISdk } from '../../runtime/iii.js'; -import { logger } from '../../runtime/otel.js'; +import type { AgentMessage } from '../../types/agent-message.js'; import { emit } from '../events.js'; import * as persistence from '../persistence.js'; +import { runTransition } from '../run-transition.js'; import { type TurnStateRecord, transitionTo } from '../state.js'; +import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; export async function handleTearingDown(iii: ISdk, rec: TurnStateRecord): Promise { - const sandbox_id = await persistence.loadSandboxId(iii, rec.session_id); - if (sandbox_id) { - try { - await iii.trigger({ - function_id: 'sandbox::stop', - payload: { sandbox_id, wait: true }, - timeoutMs: 60_000, - }); - } catch (err) { - logger.warn('sandbox::stop failed during teardown', { - sandbox_id, - err: String(err), - }); - } - } - const messages = await persistence.loadMessages(iii, rec.session_id); + const messages: AgentMessage[] = await persistence.loadMessages(iii, rec.session_id); await emit(iii, rec.session_id, { type: 'agent_end', messages }); transitionTo(rec, 'stopped'); } -// reload 1779112003 + +export function register(iii: ISdk): void { + iii.registerFunction( + 'turn::tearing_down', + async (payload: TurnStepPayload) => { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition(iii, 'tearing_down', handleTearingDown, parsed); + }, + { + description: + 'Run one durable FSM transition for session in state tearing_down: emit agent_end and mark stopped.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/subscriber.ts b/harness/src/turn-orchestrator/subscriber.ts deleted file mode 100644 index 99d36623..00000000 --- a/harness/src/turn-orchestrator/subscriber.ts +++ /dev/null @@ -1,63 +0,0 @@ -/** - * `turn::step` durable subscriber. - */ - -import type { ISdk } from '../runtime/iii.js'; -import { logger } from '../runtime/otel.js'; -import type { TurnOrchestratorConfig } from './config.js'; -import * as persistence from './persistence.js'; -import { isTerminal } from './state.js'; -import { step } from './transitions.js'; - -export const STEP_FN_ID = 'turn::step'; -export const STEP_TOPIC = 'turn::step_requested'; - -function extractSessionId(payload: unknown): string | null { - if (!payload || typeof payload !== 'object') return null; - const obj = payload as Record; - const inner = - obj.payload && typeof obj.payload === 'object' - ? (obj.payload as Record) - : obj.data && typeof obj.data === 'object' - ? (obj.data as Record) - : obj; - return typeof inner.session_id === 'string' ? inner.session_id : null; -} - -export async function execute( - iii: ISdk, - cfg: TurnOrchestratorConfig, - payload: unknown, -): Promise { - const session_id = extractSessionId(payload); - if (!session_id) { - throw new Error('turn::step_requested payload missing session_id'); - } - const rec = await persistence.loadRecord(iii, session_id); - if (!rec) { - logger.warn('turn::step_requested for unknown session', { session_id }); - return { ok: false, reason: 'unknown_session' }; - } - if (isTerminal(rec)) { - return { ok: true, terminal: true }; - } - const from_state = rec.state; - try { - await step(iii, cfg, rec); - } catch (err) { - throw new Error(`transition from ${from_state} failed: ${String(err)}`); - } - await persistence.saveRecord(iii, rec); - return { ok: true, from_state, to_state: rec.state }; -} - -export function register(iii: ISdk, cfg: TurnOrchestratorConfig): void { - iii.registerFunction(STEP_FN_ID, async (payload: unknown) => execute(iii, cfg, payload), { - description: 'Run one durable state machine transition for a session.', - }); - iii.registerTrigger({ - type: 'durable:subscriber', - function_id: STEP_FN_ID, - config: { topic: STEP_TOPIC }, - }); -} diff --git a/harness/src/turn-orchestrator/transitions.ts b/harness/src/turn-orchestrator/transitions.ts deleted file mode 100644 index 6ccc07d0..00000000 --- a/harness/src/turn-orchestrator/transitions.ts +++ /dev/null @@ -1,44 +0,0 @@ -import type { ISdk } from '../runtime/iii.js'; -import type { TurnOrchestratorConfig } from './config.js'; -import type { TurnStateRecord } from './state.js'; -import { handleAwaiting, handleFinished, handleStreaming } from './states/assistant.js'; -import { - handleAwaitingApproval, - handleExecute, - handleFinalize, - handlePrepare, -} from './states/functions.js'; -import { handleProvisioning } from './states/provisioning.js'; -import { handleSteering } from './states/steering.js'; -import { handleTearingDown } from './states/tearing-down.js'; - -export async function step( - iii: ISdk, - cfg: TurnOrchestratorConfig, - rec: TurnStateRecord, -): Promise { - switch (rec.state) { - case 'provisioning': - return handleProvisioning(iii, cfg, rec); - case 'awaiting_assistant': - return handleAwaiting(iii, rec); - case 'assistant_streaming': - return handleStreaming(iii, rec); - case 'assistant_finished': - return handleFinished(iii, rec); - case 'function_prepare': - return handlePrepare(iii, rec); - case 'function_execute': - return handleExecute(iii, cfg, rec); - case 'function_awaiting_approval': - return handleAwaitingApproval(iii, rec); - case 'function_finalize': - return handleFinalize(iii, rec); - case 'steering_check': - return handleSteering(iii, rec); - case 'tearing_down': - return handleTearingDown(iii, rec); - case 'stopped': - return; // idempotent terminal - } -} diff --git a/harness/src/turn-orchestrator/turn-state-write.ts b/harness/src/turn-orchestrator/turn-state-write.ts new file mode 100644 index 00000000..296f4a7e --- /dev/null +++ b/harness/src/turn-orchestrator/turn-state-write.ts @@ -0,0 +1,30 @@ +/** + * UI notification when agent-scope turn_state is persisted via `saveRecord` / + * `persistRecord`. + */ + +import type { ISdk } from '../runtime/iii.js'; +import { logger } from '../runtime/otel.js'; +import { emit } from './events.js'; + +export async function emitTurnStateChanged( + iii: ISdk, + session_id: string, + event_type: 'state:created' | 'state:updated', + new_value: Record, + old_value?: Record, +): Promise { + try { + await emit(iii, session_id, { + type: 'turn_state_changed', + event_type, + new_value, + ...(old_value !== undefined && { old_value }), + }); + } catch (err) { + logger.warn('emitTurnStateChanged failed', { + session_id, + err: String(err), + }); + } +} diff --git a/harness/src/turn-orchestrator/wake.ts b/harness/src/turn-orchestrator/wake.ts new file mode 100644 index 00000000..ec57e29b --- /dev/null +++ b/harness/src/turn-orchestrator/wake.ts @@ -0,0 +1,45 @@ +/** + * Durable FSM wake via iii-queue FIFO `turn-step`. Enqueues `turn::{state}` per + * persisted turn_state, not a generic dispatcher. + */ + +import { TriggerAction, type ISdk } from '../runtime/iii.js'; +import { logger } from '../runtime/otel.js'; +import * as persistence from './persistence.js'; +import { turnFnId, type TurnState, type TurnStateRecord } from './state.js'; + +export const TURN_STEP_QUEUE = 'turn-step'; + +const NON_STEPABLE_STATES = new Set(['stopped', 'function_awaiting_approval']); + +/** True when a persisted turn_state transition should enqueue `turn::{newState}`. */ +export function shouldWakeStep(previousState: TurnState | null, newState: TurnState): boolean { + if (NON_STEPABLE_STATES.has(newState)) return false; + if (previousState !== null && previousState === newState) return false; + return true; +} + +/** Guard before enqueueing from approval/abort — skip terminal sessions. */ +export function shouldRunStep(rec: TurnStateRecord | null): boolean { + if (!rec) return false; + return rec.state !== 'stopped'; +} + +export async function wakeState(iii: ISdk, session_id: string, state: TurnState): Promise { + try { + await iii.trigger({ + function_id: turnFnId(state), + payload: { session_id }, + action: TriggerAction.Enqueue({ queue: TURN_STEP_QUEUE }), + }); + } catch (err) { + logger.warn('wakeState failed', { session_id, state, err: String(err) }); + } +} + +/** Enqueue the handler for the session's current persisted state (approval/abort). */ +export async function wakeFromRecord(iii: ISdk, session_id: string): Promise { + const rec = await persistence.loadRecord(iii, session_id); + if (!rec || !shouldRunStep(rec)) return; + await wakeState(iii, session_id, rec.state); +} diff --git a/harness/src/types/agent-event.ts b/harness/src/types/agent-event.ts index 7aca3413..83076cf8 100644 --- a/harness/src/types/agent-event.ts +++ b/harness/src/types/agent-event.ts @@ -11,33 +11,28 @@ import type { FunctionResult } from './function.js'; import type { AssistantMessageEvent } from './stream-event.js'; export type AgentEvent = - | { type: 'agent_start' } | { type: 'agent_end'; messages: AgentMessage[] } - | { type: 'turn_start' } | { type: 'turn_end'; message: AgentMessage; function_results: FunctionResultMessage[]; } - | { type: 'message_start'; message: AgentMessage } | { type: 'message_update'; message: AgentMessage; llm_event: AssistantMessageEvent; } - | { type: 'message_end'; message: AgentMessage } | { - type: 'function_execution_start'; - function_call_id: string; - function_id: string; - args: unknown; + type: 'message_complete'; + message: AgentMessage; + /** When true, text/thinking were already delivered via message_update. */ + body_streamed?: boolean; } | { - type: 'function_execution_update'; + type: 'function_execution_start'; function_call_id: string; function_id: string; args: unknown; - partial_result: unknown; } | { type: 'function_execution_end'; diff --git a/harness/tests/harness/policy.test.ts b/harness/tests/harness/policy.test.ts index e75c4a04..96dc4ce4 100644 --- a/harness/tests/harness/policy.test.ts +++ b/harness/tests/harness/policy.test.ts @@ -412,7 +412,6 @@ describe('shipped iii-permissions.yaml', () => { 'auth::set_token', 'auth::delete_token', 'run::start', - 'run::start_and_wait', 'router::stream_assistant', 'router::abort', ]; diff --git a/harness/tests/harness/trigger.test.ts b/harness/tests/harness/trigger.test.ts index 561fad6d..53cdcd40 100644 --- a/harness/tests/harness/trigger.test.ts +++ b/harness/tests/harness/trigger.test.ts @@ -1,11 +1,7 @@ /** * Contract test for `harness::trigger`. * - * Mirrors the Rust harness bridge (see `workers/harness/src/lib.rs:103-159`) - * by forwarding `{ function_id, payload }` to `iii.trigger` and wrapping the - * result in an HTTP-style envelope. This ensures console/web can route - * browser-originated chat turns through a single instrumented bus function — - * the same pattern `workers/harness/web/src/App.tsx` uses over HTTP. + * console/web forwards chat kickoff via a single flat payload over WS. */ import { describe, expect, it, vi } from 'vitest'; @@ -28,6 +24,19 @@ function makeFakeSdk(triggerResult: unknown = { ok: true }) { return { sdk, registered, trigger }; } +const runStartPayload = { + session_id: 'sess-1', + provider: 'anthropic', + model: 'claude-sonnet-4-6', + messages: [ + { + role: 'user' as const, + content: [{ type: 'text' as const, text: 'hi' }], + timestamp: Date.now(), + }, + ], +}; + describe('harness::trigger', () => { it('registers a handler under id "harness::trigger"', () => { const { sdk, registered } = makeFakeSdk(); @@ -35,74 +44,41 @@ describe('harness::trigger', () => { expect(registered.has('harness::trigger')).toBe(true); }); - it('forwards body.function_id and body.payload to iii.trigger', async () => { + it('forwards payload to run::start', async () => { const { sdk, registered, trigger } = makeFakeSdk({ session_id: 'sess' }); register(sdk); const handler = registered.get('harness::trigger')?.handler; if (!handler) throw new Error('handler not registered'); const result = (await handler({ - body: { - function_id: 'run::start', - session_id: 'sess-1', - message_id: 'msg-1', - payload: { session_id: 'sess-1', provider: 'anthropic', model: 'claude-sonnet-4-6' }, - }, + session_id: 'sess-1', + message_id: 'msg-1', + payload: runStartPayload, })) as Record; expect(trigger).toHaveBeenCalledTimes(1); const triggerArg = trigger.mock.calls[0]?.[0] as Record; expect(triggerArg.function_id).toBe('run::start'); - expect(triggerArg.payload).toEqual({ - session_id: 'sess-1', - provider: 'anthropic', - model: 'claude-sonnet-4-6', + expect(triggerArg.payload).toMatchObject(runStartPayload); + expect(triggerArg.payload).toMatchObject({ + system_prompt: '', }); expect(result.status_code).toBe(200); expect(result.body).toEqual({ session_id: 'sess' }); }); - it('falls back to top-level when body envelope is absent (WS shape)', async () => { - const { sdk, registered, trigger } = makeFakeSdk(); - register(sdk); - const handler = registered.get('harness::trigger')?.handler; - if (!handler) throw new Error('handler not registered'); - - await handler({ - function_id: 'run::start', - session_id: 'sess-2', - message_id: 'msg-2', - payload: { session_id: 'sess-2', provider: 'openai', model: 'gpt-4o' }, - }); - - const triggerArg = trigger.mock.calls[0]?.[0] as Record; - expect(triggerArg.function_id).toBe('run::start'); - expect(triggerArg.payload).toEqual({ - session_id: 'sess-2', - provider: 'openai', - model: 'gpt-4o', - }); - }); - - it('defaults payload to an empty object when omitted', async () => { - const { sdk, registered, trigger } = makeFakeSdk(); - register(sdk); - const handler = registered.get('harness::trigger')?.handler; - if (!handler) throw new Error('handler not registered'); - - await handler({ body: { function_id: 'state::get' } }); - - const triggerArg = trigger.mock.calls[0]?.[0] as Record; - expect(triggerArg.payload).toEqual({}); - }); - - it('throws when function_id is missing', async () => { + it('rejects invalid run::start payload', async () => { const { sdk, registered } = makeFakeSdk(); register(sdk); const handler = registered.get('harness::trigger')?.handler; if (!handler) throw new Error('handler not registered'); - await expect(handler({ body: { payload: {} } })).rejects.toThrow(/missing function_id/); + await expect( + handler({ + session_id: 'sess-1', + payload: { provider: 'openai' }, + }), + ).rejects.toThrow(); }); it('surfaces trigger errors (no swallowing)', async () => { @@ -120,7 +96,9 @@ describe('harness::trigger', () => { if (!triggerHandler) throw new Error('handler not registered'); await expect( // biome-ignore lint/style/noNonNullAssertion: defined above - triggerHandler!({ body: { function_id: 'run::start', payload: {} } }), + triggerHandler!({ + payload: runStartPayload, + }), ).rejects.toThrow(/boom/); }); }); diff --git a/harness/tests/integration/approval-resume.e2e.test.ts b/harness/tests/integration/approval-resume.e2e.test.ts index 1b2b6736..77d20819 100644 --- a/harness/tests/integration/approval-resume.e2e.test.ts +++ b/harness/tests/integration/approval-resume.e2e.test.ts @@ -9,10 +9,20 @@ import { isAbortSignalWrite, } from '../../src/turn-orchestrator/on-abort-signal.js'; import type { ISdk } from '../../src/runtime/iii.js'; +import { newRecord, turnStateKey } from '../../src/turn-orchestrator/state.js'; -function fakeIii(): { iii: ISdk; stepTriggers: Array<{ session_id: string }> } { +async function flushMicrotasks(): Promise { + await Promise.resolve(); + await Promise.resolve(); +} + +function fakeIii(): { + iii: ISdk; + wakeTriggers: Array<{ session_id: string; function_id: string }>; + stateStore: Map; +} { const stateStore = new Map(); - const stepTriggers: Array<{ session_id: string }> = []; + const wakeTriggers: Array<{ session_id: string; function_id: string }> = []; const handlers = new Map Promise>(); const iii = { @@ -20,59 +30,62 @@ function fakeIii(): { iii: ISdk; stepTriggers: Array<{ session_id: string }> } { handlers.set(fnId, handler); return { unregister: vi.fn() }; }), - trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { - if (function_id === 'state::set') { - const p = payload as { scope: string; key: string; value: unknown }; - const fullKey = `${p.scope}/${p.key}`; - const old_value = stateStore.get(fullKey) ?? null; - stateStore.set(fullKey, p.value); - if (p.scope === 'agent') { - const event = { - event_type: old_value == null ? 'state:created' : 'state:updated', - scope: p.scope, - key: p.key, - old_value, - new_value: p.value, - message_type: 'state', - }; - if (isAbortSignalWrite(event)) { - queueMicrotask(() => { - void handleAbortSignalWrite(iii as unknown as ISdk, event); - }); + trigger: vi.fn( + async ({ + function_id, + payload, + action, + }: { + function_id: string; + payload: unknown; + action?: unknown; + }) => { + if (function_id === 'state::set') { + const p = payload as { scope: string; key: string; value: unknown }; + const fullKey = `${p.scope}/${p.key}`; + const old_value = stateStore.get(fullKey) ?? null; + stateStore.set(fullKey, p.value); + if (p.scope === 'agent') { + const event = { + event_type: old_value == null ? 'state:created' : 'state:updated', + scope: p.scope, + key: p.key, + old_value, + new_value: p.value, + message_type: 'state', + }; + if (isAbortSignalWrite(event)) { + queueMicrotask(() => { + void handleAbortSignalWrite(iii as unknown as ISdk, event); + }); + } } + return null; } - return null; - } - if (function_id === 'state::get') { - const p = payload as { scope: string; key: string }; - return stateStore.get(`${p.scope}/${p.key}`) ?? null; - } - - if (function_id === 'turn::step') { - stepTriggers.push(payload as { session_id: string }); - return null; - } + if (function_id === 'state::get') { + const p = payload as { scope: string; key: string }; + return stateStore.get(`${p.scope}/${p.key}`) ?? null; + } - const handler = handlers.get(function_id); - if (handler) { - await handler(payload); - return null; - } + if (function_id.startsWith('turn::') && action != null) { + const p = payload as { session_id: string }; + wakeTriggers.push({ session_id: p.session_id, function_id }); + return null; + } - if (function_id === 'iii::durable::publish') { - const p = payload as { topic: string; data: { session_id: string } }; - if (p.topic === 'turn::step_requested') { - stepTriggers.push({ session_id: p.data.session_id }); + const handler = handlers.get(function_id); + if (handler) { + await handler(payload); + return null; } - return null; - } - return null; - }), + return null; + }, + ), }; - return { iii: iii as unknown as ISdk, stepTriggers }; + return { iii: iii as unknown as ISdk, wakeTriggers, stateStore }; } describe('approval resume reactive trigger', () => { @@ -80,8 +93,11 @@ describe('approval resume reactive trigger', () => { clearApprovalResumeRegistry(); }); - it('approval::resolve via resume fn automatically triggers turn::step', async () => { - const { iii, stepTriggers } = fakeIii(); + it('approval::resolve via resume fn automatically enqueues turn::{state}', async () => { + const { iii, wakeTriggers, stateStore } = fakeIii(); + const rec = newRecord('sess-x'); + rec.state = 'function_awaiting_approval'; + stateStore.set(`agent/${turnStateKey('sess-x')}`, rec); registerApprovalResume(iii, 'sess-x', 'fc-1'); const out = await handleResolveRequest(iii, { @@ -91,14 +107,20 @@ describe('approval resume reactive trigger', () => { }); expect(out).toEqual({ ok: true }); - await Promise.resolve(); + await flushMicrotasks(); - expect(stepTriggers).toHaveLength(1); - expect(stepTriggers[0]).toMatchObject({ session_id: 'sess-x' }); + expect(wakeTriggers).toHaveLength(1); + expect(wakeTriggers[0]).toMatchObject({ + session_id: 'sess-x', + function_id: 'turn::function_awaiting_approval', + }); }); - it('writing session//abort_signal=true wakes turn::step (via durable publish)', async () => { - const { iii, stepTriggers } = fakeIii(); + it('writing session//abort_signal=true enqueues turn::{state}', async () => { + const { iii, wakeTriggers, stateStore } = fakeIii(); + const rec = newRecord('sess-abort'); + rec.state = 'assistant_streaming'; + stateStore.set(`agent/${turnStateKey('sess-abort')}`, rec); await iii.trigger({ function_id: 'state::set', @@ -109,33 +131,39 @@ describe('approval resume reactive trigger', () => { }, }); - await Promise.resolve(); + await flushMicrotasks(); - expect(stepTriggers).toHaveLength(1); - expect(stepTriggers[0]).toMatchObject({ session_id: 'sess-abort' }); + expect(wakeTriggers).toHaveLength(1); + expect(wakeTriggers[0]).toMatchObject({ + session_id: 'sess-abort', + function_id: 'turn::assistant_streaming', + }); }); it('writing session//abort_signal=false does NOT trigger (condition rejects clears)', async () => { - const { iii, stepTriggers } = fakeIii(); + const { iii, wakeTriggers, stateStore } = fakeIii(); + const rec = newRecord('sess-clear'); + rec.state = 'function_execute'; + stateStore.set(`agent/${turnStateKey('sess-clear')}`, rec); await iii.trigger({ function_id: 'state::set', payload: { scope: 'agent', key: 'session/sess-clear/abort_signal', value: true }, }); - await Promise.resolve(); - stepTriggers.length = 0; + await flushMicrotasks(); + wakeTriggers.length = 0; await iii.trigger({ function_id: 'state::set', payload: { scope: 'agent', key: 'session/sess-clear/abort_signal', value: false }, }); - await Promise.resolve(); + await flushMicrotasks(); - expect(stepTriggers).toHaveLength(0); + expect(wakeTriggers).toHaveLength(0); }); it('writing an unrelated agent-scope key does NOT trigger', async () => { - const { iii, stepTriggers } = fakeIii(); + const { iii, wakeTriggers } = fakeIii(); await iii.trigger({ function_id: 'state::set', @@ -147,6 +175,6 @@ describe('approval resume reactive trigger', () => { }); await Promise.resolve(); - expect(stepTriggers).toHaveLength(0); + expect(wakeTriggers).toHaveLength(0); }); }); diff --git a/harness/tests/integration/on-record-written.e2e.test.ts b/harness/tests/integration/on-record-written.e2e.test.ts index c8e677c2..bac27500 100644 --- a/harness/tests/integration/on-record-written.e2e.test.ts +++ b/harness/tests/integration/on-record-written.e2e.test.ts @@ -1,140 +1,158 @@ import { describe, expect, it, vi } from 'vitest'; +import { TriggerAction } from '../../src/runtime/iii.js'; import type { ISdk } from '../../src/runtime/iii.js'; -import { - STEP_FN_ID, - handleStepableRecordWrite, - isStepableRecordWrite, -} from '../../src/turn-orchestrator/on-record-written.js'; - -function fakeIii(): { iii: ISdk; stepInvocations: Array<{ session_id: string }> } { +import * as persistence from '../../src/turn-orchestrator/persistence.js'; +import { newRecord, turnStateKey } from '../../src/turn-orchestrator/state.js'; + +function fakeIii(): { + iii: ISdk; + wakeInvocations: Array<{ session_id: string; function_id: string; action?: unknown }>; + stateStore: Map; +} { const stateStore = new Map(); - const stepInvocations: Array<{ session_id: string }> = []; + const wakeInvocations: Array<{ session_id: string; function_id: string; action?: unknown }> = []; const iii = { - trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { - if (function_id === 'state::set') { - const p = payload as { scope: string; key: string; value: unknown }; - const fullKey = `${p.scope}/${p.key}`; - const old_value = stateStore.get(fullKey) ?? null; - stateStore.set(fullKey, p.value); - if (p.scope === 'agent') { - const event = { - event_type: old_value == null ? 'state:created' : 'state:updated', - scope: p.scope, - key: p.key, - old_value, - new_value: p.value, - message_type: 'state', - }; - if (isStepableRecordWrite(event)) { - queueMicrotask(() => { - void handleStepableRecordWrite(iii as unknown as ISdk, event); - }); - } + trigger: vi.fn( + async ({ + function_id, + payload, + action, + }: { + function_id: string; + payload: unknown; + action?: unknown; + }) => { + if (function_id === 'state::get') { + const p = payload as { scope: string; key: string }; + const v = stateStore.get(`${p.scope}/${p.key}`); + return v === undefined ? null : structuredClone(v); } - return null; - } - if (function_id === STEP_FN_ID) { - stepInvocations.push(payload as { session_id: string }); - return null; - } + if (function_id === 'state::set') { + const p = payload as { scope: string; key: string; value: unknown }; + stateStore.set(`${p.scope}/${p.key}`, structuredClone(p.value)); + return null; + } + + if (function_id === 'state::update') { + return { old_value: 0 }; + } + + if (function_id.startsWith('turn::')) { + const p = payload as { session_id: string }; + wakeInvocations.push({ session_id: p.session_id, function_id, action }); + return null; + } - return null; - }), + return null; + }, + ), }; - return { iii: iii as unknown as ISdk, stepInvocations }; + return { iii: iii as unknown as ISdk, wakeInvocations, stateStore }; } -describe('turn-step reactive wake', () => { - it('writing session//turn_state with a stepable state invokes turn::step', async () => { - const { iii, stepInvocations } = fakeIii(); - - await iii.trigger({ - function_id: 'state::set', - payload: { - scope: 'agent', - key: 'session/sess-a/turn_state', - value: { state: 'provisioning' }, - }, - }); +describe('saveRecord wake integration', () => { + it('writing a new stepable turn_state enqueues turn::provisioning', async () => { + const { iii, wakeInvocations } = fakeIii(); + const rec = newRecord('sess-a'); + rec.state = 'provisioning'; - await Promise.resolve(); + await persistence.saveRecord(iii, rec); - expect(stepInvocations).toEqual([{ session_id: 'sess-a' }]); + expect(wakeInvocations).toEqual([ + { + session_id: 'sess-a', + function_id: 'turn::provisioning', + action: TriggerAction.Enqueue({ queue: 'turn-step' }), + }, + ]); }); - it('subsequent transitions also wake turn::step', async () => { - const { iii, stepInvocations } = fakeIii(); + it('subsequent transitions enqueue turn::{newState}', async () => { + const { iii, wakeInvocations } = fakeIii(); + const rec = newRecord('sess-b'); + rec.state = 'provisioning'; + await persistence.saveRecord(iii, rec); + + rec.state = 'assistant_streaming'; + await persistence.saveRecord(iii, rec); - await iii.trigger({ - function_id: 'state::set', - payload: { - scope: 'agent', - key: 'session/sess-b/turn_state', - value: { state: 'provisioning' }, + expect(wakeInvocations).toEqual([ + { + session_id: 'sess-b', + function_id: 'turn::provisioning', + action: TriggerAction.Enqueue({ queue: 'turn-step' }), }, - }); - await Promise.resolve(); - - await iii.trigger({ - function_id: 'state::set', - payload: { - scope: 'agent', - key: 'session/sess-b/turn_state', - value: { state: 'awaiting_assistant' }, + { + session_id: 'sess-b', + function_id: 'turn::assistant_streaming', + action: TriggerAction.Enqueue({ queue: 'turn-step' }), }, - }); - await Promise.resolve(); - - expect(stepInvocations).toEqual([{ session_id: 'sess-b' }, { session_id: 'sess-b' }]); + ]); }); it('parking in function_awaiting_approval does NOT wake', async () => { - const { iii, stepInvocations } = fakeIii(); - - await iii.trigger({ - function_id: 'state::set', - payload: { - scope: 'agent', - key: 'session/sess-c/turn_state', - value: { state: 'function_awaiting_approval' }, - }, - }); - await Promise.resolve(); + const { iii, wakeInvocations } = fakeIii(); + const rec = newRecord('sess-c'); + rec.state = 'function_awaiting_approval'; - expect(stepInvocations).toEqual([]); + await persistence.saveRecord(iii, rec); + + expect(wakeInvocations).toEqual([]); }); it('terminal stopped state does NOT wake', async () => { - const { iii, stepInvocations } = fakeIii(); - - await iii.trigger({ - function_id: 'state::set', - payload: { - scope: 'agent', - key: 'session/sess-d/turn_state', - value: { state: 'stopped' }, - }, - }); - await Promise.resolve(); + const { iii, wakeInvocations } = fakeIii(); + const rec = newRecord('sess-d'); + rec.state = 'stopped'; - expect(stepInvocations).toEqual([]); + await persistence.saveRecord(iii, rec); + + expect(wakeInvocations).toEqual([]); }); - it('non-turn_state agent keys do NOT wake (no leakage from abort_signal etc.)', async () => { - const { iii, stepInvocations } = fakeIii(); + it('same-state re-save does NOT wake', async () => { + const { iii, wakeInvocations } = fakeIii(); + const rec = newRecord('sess-e'); + rec.state = 'function_execute'; + await persistence.saveRecord(iii, rec); + wakeInvocations.length = 0; - await iii.trigger({ - function_id: 'state::set', - payload: { - scope: 'agent', - key: 'session/sess-e/abort_signal', - value: true, - }, - }); - await Promise.resolve(); + await persistence.saveRecord(iii, rec); + + expect(wakeInvocations).toEqual([]); + }); +}); + +function turnStateGets(iii: ISdk, session_id: string): number { + const trigger = iii.trigger as unknown as { + mock: { calls: Array<[{ function_id: string; payload?: { key?: string } }]> }; + }; + return trigger.mock.calls.filter( + ([arg]) => arg.function_id === 'state::get' && arg.payload?.key === turnStateKey(session_id), + ).length; +} + +describe('saveRecord read elimination (#5)', () => { + it('2-arg saveRecord reads turn_state exactly once (no double load)', async () => { + const { iii } = fakeIii(); + const rec = newRecord('sess-r1'); + rec.state = 'provisioning'; + + await persistence.saveRecord(iii, rec); + + expect(turnStateGets(iii, 'sess-r1')).toBe(1); + }); + + it('saveRecord with a threaded previous reads turn_state zero times', async () => { + const { iii } = fakeIii(); + const previous = newRecord('sess-r2'); + previous.state = 'provisioning'; + const next = { ...previous, state: 'assistant_streaming' as const }; + + await persistence.saveRecord(iii, next, previous); - expect(stepInvocations).toEqual([]); + expect(turnStateGets(iii, 'sess-r2')).toBe(0); }); }); diff --git a/harness/tests/integration/wire-parity.test.ts b/harness/tests/integration/wire-parity.test.ts index e3dd094e..5b285a56 100644 --- a/harness/tests/integration/wire-parity.test.ts +++ b/harness/tests/integration/wire-parity.test.ts @@ -11,11 +11,6 @@ import type { AssistantMessage } from '../../src/types/agent-message.js'; import { formatFunctionResultContent } from '../../src/types/wire.js'; describe('AgentEvent wire shape', () => { - it('agent_start serialises to {"type":"agent_start"}', () => { - const evt: AgentEvent = { type: 'agent_start' }; - expect(JSON.parse(JSON.stringify(evt))).toEqual({ type: 'agent_start' }); - }); - it('turn_end carries message + function_results', () => { const asst: AssistantMessage = { role: 'assistant', diff --git a/harness/tests/turn-orchestrator/agent-trigger.test.ts b/harness/tests/turn-orchestrator/agent-trigger.test.ts index 39c84266..f5053dd6 100644 --- a/harness/tests/turn-orchestrator/agent-trigger.test.ts +++ b/harness/tests/turn-orchestrator/agent-trigger.test.ts @@ -2,12 +2,12 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; import type { DispatchResult } from '../../src/turn-orchestrator/agent-trigger.js'; import { - FUNCTION_ID, TOOL_NAME, agentTriggerTool, dispatchWithHook, functionNotFoundHint, isErrorResult, + triggerFunctionCall, } from '../../src/turn-orchestrator/agent-trigger.js'; import * as hookModule from '../../src/turn-orchestrator/hook.js'; @@ -24,9 +24,8 @@ describe('agent_trigger tool schema', () => { expect(params.required).toEqual(['function']); }); - it('TOOL_NAME and FUNCTION_ID are stable', () => { + it('TOOL_NAME is stable', () => { expect(TOOL_NAME).toBe('agent_trigger'); - expect(FUNCTION_ID).toBe('agent::trigger'); }); }); @@ -79,15 +78,34 @@ describe('isErrorResult', () => { }); }); +describe('triggerFunctionCall', () => { + it('returns gate_unavailable denial on trigger failure', async () => { + const iii = { + trigger: vi.fn().mockRejectedValue(new Error('handler error')), + } as unknown as ISdk; + const result = await triggerFunctionCall(iii, { + id: 'fc-1', + function_id: 'shell::fs::write', + arguments: {}, + }); + expect(isErrorResult(result)).toBe(true); + expect(result.details).toMatchObject({ + status: 'denied', + denied_by: 'gate_unavailable', + function_id: 'shell::fs::write', + }); + }); +}); + describe('dispatchWithHook returns DispatchResult', () => { it('returns kind:pending when consultBefore returns pending', async () => { vi.spyOn(hookModule, 'consultBefore').mockResolvedValue({ kind: 'pending' }); const iii = { trigger: vi.fn() } as unknown as ISdk; - const out = await dispatchWithHook( - iii, - { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, - 's1', - ); + const out = await dispatchWithHook(iii, { + id: 'fc-1', + function_id: 'shell::run', + arguments: { command: 'ls' }, + }); expect(out.kind).toBe('pending'); }); @@ -103,11 +121,11 @@ describe('dispatchWithHook returns DispatchResult', () => { }, }); const iii = { trigger: vi.fn() } as unknown as ISdk; - const out = await dispatchWithHook( - iii, - { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - 's1', - ); + const out = await dispatchWithHook(iii, { + id: 'fc-1', + function_id: 'shell::run', + arguments: {}, + }); expect(out.kind).toBe('deny'); if (out.kind === 'deny') { expect(out.result.details).toMatchObject({ status: 'denied' }); @@ -119,11 +137,11 @@ describe('dispatchWithHook returns DispatchResult', () => { const iii = { trigger: vi.fn().mockResolvedValue({ ok: true }), } as unknown as ISdk; - const out = await dispatchWithHook( - iii, - { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - 's1', - ); + const out = await dispatchWithHook(iii, { + id: 'fc-1', + function_id: 'shell::run', + arguments: {}, + }); expect(out.kind).toBe('result'); }); @@ -137,15 +155,11 @@ describe('dispatchWithHook returns DispatchResult', () => { const iii = { trigger: vi.fn().mockRejectedValue({ code: 'function_not_found' }), } as unknown as ISdk; - const out = await dispatchWithHook( - iii, - { - id: 'fc-1', - function_id: 'sandbox/skills/sandbox/create', - arguments: { image: 'node' }, - }, - 's1', - ); + const out = await dispatchWithHook(iii, { + id: 'fc-1', + function_id: 'sandbox/skills/sandbox/create', + arguments: { image: 'node' }, + }); expect(out.kind).toBe('result'); if (out.kind !== 'result') return; const details = out.result.details as Record; @@ -160,11 +174,11 @@ describe('dispatchWithHook returns DispatchResult', () => { const iii = { trigger: vi.fn().mockRejectedValue({ code: 'function_not_found' }), } as unknown as ISdk; - const out = await dispatchWithHook( - iii, - { id: 'fc-1', function_id: 'some/odd/three-segment/id', arguments: {} }, - 's1', - ); + const out = await dispatchWithHook(iii, { + id: 'fc-1', + function_id: 'some/odd/three-segment/id', + arguments: {}, + }); if (out.kind !== 'result') throw new Error('expected result kind'); const details = out.result.details as Record; // No "Did you mean" — three-segment ids don't match the @@ -179,11 +193,11 @@ describe('dispatchWithHook returns DispatchResult', () => { const iii = { trigger: vi.fn().mockRejectedValue({ code: 'function_not_found' }), } as unknown as ISdk; - const out = await dispatchWithHook( - iii, - { id: 'fc-1', function_id: 'misspelled', arguments: {} }, - 's1', - ); + const out = await dispatchWithHook(iii, { + id: 'fc-1', + function_id: 'misspelled', + arguments: {}, + }); if (out.kind !== 'result') throw new Error('expected result kind'); const details = out.result.details as Record; expect(details.hint).toBe( diff --git a/harness/tests/turn-orchestrator/approval-resume.test.ts b/harness/tests/turn-orchestrator/approval-resume.test.ts index 19bced43..25001fb3 100644 --- a/harness/tests/turn-orchestrator/approval-resume.test.ts +++ b/harness/tests/turn-orchestrator/approval-resume.test.ts @@ -1,5 +1,5 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; +import { TriggerAction, type ISdk } from '../../src/runtime/iii.js'; import { approvalResumeFnId } from '../../src/approval-gate/schemas.js'; import { clearApprovalResumeRegistry, @@ -13,14 +13,18 @@ type RegisteredFn = { unregister: ReturnType; }; -import type { TurnStateRecord } from '../../src/turn-orchestrator/state.js'; +import { + newRecord, + turnStateKey, + type TurnStateRecord, +} from '../../src/turn-orchestrator/state.js'; function makeIiiWithRegistry( stateStore = new Map(), agentTurnStates: TurnStateRecord[] = [], ) { const registered = new Map(); - const stepCalls: Array<{ session_id: string }> = []; + const wakeCalls: Array<{ session_id: string; action?: unknown; function_id?: string }> = []; const iii = { registerFunction: vi.fn((fnId: string, handler: (payload: unknown) => Promise) => { @@ -32,28 +36,43 @@ function makeIiiWithRegistry( registered.set(fnId, entry); return { unregister: entry.unregister }; }), - trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { - if (function_id === 'state::get') { - const p = payload as { scope: string; key: string }; - return stateStore.get(`${p.scope}/${p.key}`) ?? null; - } - if (function_id === 'state::set') { - const p = payload as { scope: string; key: string; value: unknown }; - stateStore.set(`${p.scope}/${p.key}`, p.value); + trigger: vi.fn( + async ({ + function_id, + payload, + action, + }: { + function_id: string; + payload: unknown; + action?: unknown; + }) => { + if (function_id === 'state::get') { + const p = payload as { scope: string; key: string }; + return stateStore.get(`${p.scope}/${p.key}`) ?? null; + } + if (function_id === 'state::set') { + const p = payload as { scope: string; key: string; value: unknown }; + stateStore.set(`${p.scope}/${p.key}`, p.value); + return null; + } + if (function_id === 'state::list') { + return agentTurnStates; + } + if (function_id.startsWith('turn::') && function_id !== 'turn::on_abort_signal') { + const p = payload as { session_id: string }; + wakeCalls.push({ + session_id: p.session_id, + action, + function_id, + }); + return null; + } return null; - } - if (function_id === 'state::list') { - return agentTurnStates; - } - if (function_id === 'turn::step') { - stepCalls.push(payload as { session_id: string }); - return null; - } - return null; - }), + }, + ), } as unknown as ISdk; - return { iii, registered, stepCalls, stateStore }; + return { iii, registered, wakeCalls, stateStore }; } afterEach(() => { @@ -85,16 +104,25 @@ describe('registerApprovalResume', () => { }); describe('approval resume handler', () => { - it('persists decision, triggers turn::step, and unregisters', async () => { - const { iii, registered, stepCalls, stateStore } = makeIiiWithRegistry(); + it('persists decision, enqueues turn::{state}, and unregisters', async () => { + const { iii, registered, wakeCalls, stateStore } = makeIiiWithRegistry(); + const rec = newRecord('s1'); + rec.state = 'function_awaiting_approval'; + stateStore.set(`agent/${turnStateKey('s1')}`, rec); registerApprovalResume(iii, 's1', 'fc-1'); const entry = registered.get('turn::approval_resume::s1/fc-1'); if (!entry) throw new Error('handler not registered'); await entry.handler({ decision: 'allow', reason: null }); expect(stateStore.get('approvals/s1/fc-1')).toEqual({ decision: 'allow', reason: null }); - expect(stepCalls).toEqual([{ session_id: 's1' }]); - expect(entry.unregister).toHaveBeenCalled(); + expect(wakeCalls).toEqual([ + { + session_id: 's1', + function_id: 'turn::function_awaiting_approval', + action: TriggerAction.Enqueue({ queue: 'turn-step' }), + }, + ]); + expect(entry!.unregister).toHaveBeenCalled(); }); it('does not overwrite an existing decision (idempotent persist)', async () => { @@ -113,16 +141,16 @@ describe('approval resume handler', () => { }); }); - it('does not trigger turn::step again after unregister on second invoke', async () => { - const { iii, registered, stepCalls } = makeIiiWithRegistry(); + it('does not enqueue turn::{state} again after unregister on second invoke', async () => { + const { iii, registered, wakeCalls } = makeIiiWithRegistry(); registerApprovalResume(iii, 's1', 'fc-1'); const entry = registered.get('turn::approval_resume::s1/fc-1'); if (!entry) throw new Error('handler not registered'); await entry.handler({ decision: 'deny', reason: 'nope' }); - stepCalls.length = 0; + wakeCalls.length = 0; await entry.handler({ decision: 'allow', reason: null }); - expect(stepCalls).toHaveLength(0); + expect(wakeCalls).toHaveLength(0); }); }); diff --git a/harness/tests/turn-orchestrator/assistant.test.ts b/harness/tests/turn-orchestrator/assistant.test.ts index 95ab8055..b218fd6d 100644 --- a/harness/tests/turn-orchestrator/assistant.test.ts +++ b/harness/tests/turn-orchestrator/assistant.test.ts @@ -1,32 +1,364 @@ -import { describe, expect, it } from 'vitest'; +import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; +import type { AssistantMessage } from '../../src/types/agent-message.js'; +import { TOOL_NAME } from '../../src/turn-orchestrator/agent-trigger.js'; +import * as persistence from '../../src/turn-orchestrator/persistence.js'; +import * as preflightModule from '../../src/turn-orchestrator/preflight.js'; import { type TurnStateRecord, newRecord } from '../../src/turn-orchestrator/state.js'; -import { handleAwaiting } from '../../src/turn-orchestrator/states/assistant.js'; +import { handleFinished } from '../../src/turn-orchestrator/states/assistant-finished.js'; +import { handleStreaming } from '../../src/turn-orchestrator/states/assistant-streaming.js'; -type TriggerCall = { function_id: string; payload: unknown }; +type TriggerCall = { function_id: string; payload: unknown; timeoutMs?: number }; -function fakeIii(): { iii: ISdk; calls: TriggerCall[] } { +function fakeIii(overrides?: Partial): { iii: ISdk; calls: TriggerCall[] } { const calls: TriggerCall[] = []; const iii = { - trigger: async (req: { function_id: string; payload: T }): Promise => { - calls.push({ function_id: req.function_id, payload: req.payload }); + trigger: async (req: { + function_id: string; + payload: T; + timeoutMs?: number; + }): Promise => { + calls.push({ + function_id: req.function_id, + payload: req.payload, + timeoutMs: req.timeoutMs, + }); return null as R; }, + ...overrides, } as unknown as ISdk; return { iii, calls }; } -describe('handleAwaiting', () => { +function assistant(overrides: Partial = {}): AssistantMessage { + return { + role: 'assistant', + content: [{ type: 'text', text: 'hello' }], + stop_reason: 'end', + error_message: null, + error_kind: null, + usage: null, + model: 'gpt-4o', + provider: 'openai', + timestamp: 1, + ...overrides, + }; +} + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe('handleStreaming turn start', () => { it('starts a normal assistant turn without approval::consume resurrection', async () => { - const rec: TurnStateRecord = { ...newRecord('s1'), state: 'awaiting_assistant' }; - const { iii, calls } = fakeIii(); + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; + const { iii, calls } = fakeIii({ + createChannel: async () => { + throw new Error('channel unavailable'); + }, + }); + vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ + provider: 'openai', + model: 'gpt-4o', + mode: null, + system_prompt: '', + }); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + vi.spyOn(persistence, 'loadFunctionSchemas').mockResolvedValue([]); + vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); - await handleAwaiting(iii, rec); + await handleStreaming(iii, rec); - expect(rec.state).toBe('assistant_streaming'); expect(rec.turn_count).toBe(1); expect(rec.turn_end_emitted).toBe(false); expect(calls.some((c) => c.function_id === 'approval::consume')).toBe(false); + expect(calls.some((c) => c.function_id === 'stream::set')).toBe(false); + }); + + it('exhausts max_turns and transitions to tearing_down', async () => { + const rec: TurnStateRecord = { + ...newRecord('s1', 2), + state: 'assistant_streaming', + turn_count: 2, + }; + const { iii, calls } = fakeIii(); + const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + + await handleStreaming(iii, rec); + + expect(rec.state).toBe('tearing_down'); + expect(rec.turn_end_emitted).toBe(true); + expect(rec.last_assistant?.content[0]).toEqual({ + type: 'text', + text: 'loop stopped: max_turns (2) reached', + }); + expect(saveSpy).toHaveBeenCalledOnce(); expect(calls.some((c) => c.function_id === 'stream::set')).toBe(true); }); }); + +describe('handleStreaming', () => { + it('transitions to assistant_finished with synthetic error when createChannel fails', async () => { + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; + const { iii } = fakeIii({ + createChannel: async () => { + throw new Error('channel unavailable'); + }, + }); + vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ + provider: 'openai', + model: 'gpt-4o', + mode: null, + system_prompt: '', + }); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + vi.spyOn(persistence, 'loadFunctionSchemas').mockResolvedValue([]); + vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); + + await handleStreaming(iii, rec); + + expect(rec.state).toBe('assistant_finished'); + expect(rec.last_assistant?.stop_reason).toBe('error'); + expect(rec.last_assistant?.error_message).toContain('create_channel failed'); + }); + + it('captures provider done frame and transitions to assistant_finished', async () => { + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; + const finalMsg = assistant({ content: [{ type: 'text', text: 'done reply' }] }); + let deliver: ((msg: string) => void) | null = null; + + const { iii } = fakeIii({ + createChannel: async () => ({ + writerRef: {}, + reader: { + onMessage: (cb: (msg: string) => void) => { + deliver = cb; + }, + stream: { + resume: () => { + deliver?.( + JSON.stringify({ + type: 'done', + message: finalMsg, + }), + ); + }, + }, + }, + }), + }); + + vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ + provider: 'openai', + model: 'gpt-4o', + mode: null, + system_prompt: '', + }); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + vi.spyOn(persistence, 'loadFunctionSchemas').mockResolvedValue([]); + vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); + + await handleStreaming(iii, rec); + + expect(rec.state).toBe('assistant_finished'); + expect(rec.last_assistant).toEqual(finalMsg); + }); +}); + +describe('handleFinished', () => { + it('throws when last_assistant is missing', async () => { + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_finished' }; + const { iii } = fakeIii(); + + await expect(handleFinished(iii, rec)).rejects.toThrow( + 'assistant_finished without last_assistant', + ); + }); + + it('routes error assistant to tearing_down without persisting transcript', async () => { + const rec: TurnStateRecord = { + ...newRecord('s1'), + state: 'assistant_finished', + last_assistant: assistant({ stop_reason: 'error', error_message: 'auth failed' }), + }; + const { iii } = fakeIii(); + const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + + await handleFinished(iii, rec); + + expect(rec.state).toBe('tearing_down'); + expect(rec.turn_end_emitted).toBe(true); + expect(saveSpy).not.toHaveBeenCalled(); + }); + + it('routes text-only assistant to steering_check and persists message', async () => { + const rec: TurnStateRecord = { + ...newRecord('s1'), + state: 'assistant_finished', + last_assistant: assistant(), + }; + const { iii } = fakeIii(); + const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + + await handleFinished(iii, rec); + + expect(rec.state).toBe('steering_check'); + expect(rec.pending_function_calls).toEqual([]); + expect(saveSpy).toHaveBeenCalledOnce(); + }); + + it('prepares function calls and transitions to function_execute', async () => { + const rec: TurnStateRecord = { + ...newRecord('s1'), + state: 'assistant_finished', + last_assistant: assistant({ + content: [ + { + type: 'function_call', + id: 'fc-1', + function_id: 'shell::run', + arguments: { command: 'ls' }, + }, + ], + }), + }; + const { iii } = fakeIii(); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + const saveExecutedSpy = vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); + const savePreparedSpy = vi.spyOn(persistence, 'savePreparedCalls').mockResolvedValue(undefined); + + await handleFinished(iii, rec); + + expect(rec.state).toBe('function_execute'); + expect(rec.function_results).toEqual([]); + expect(rec.pending_function_calls).toEqual([ + { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, + ]); + expect(saveExecutedSpy).toHaveBeenCalledWith(iii, 's1', []); + expect(savePreparedSpy).toHaveBeenCalledWith(iii, 's1', [ + { + function_call: { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, + blocked: null, + }, + ]); + }); + + it('does NOT duplicate the assistant message when handleFinished re-enters', async () => { + // Idempotency guard: a durable retry / crash-before-transitionTo can + // replay handleFinished with the same last_assistant. Re-pushing a + // tool-call assistant makes Anthropic reject the next request with + // "each tool_use must have a unique id". + const rec: TurnStateRecord = { + ...newRecord('s1'), + state: 'assistant_finished', + last_assistant: assistant({ + content: [ + { + type: 'function_call', + id: 'toolu_42', + function_id: 'shell::run', + arguments: { command: 'pwd' }, + }, + ], + }), + }; + const { iii } = fakeIii(); + let storedMessages: unknown[] = []; + vi.spyOn(persistence, 'loadMessages').mockImplementation(async () => storedMessages as never); + vi.spyOn(persistence, 'saveMessages').mockImplementation(async (_iii, _sid, msgs) => { + storedMessages = msgs as never; + }); + vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); + vi.spyOn(persistence, 'savePreparedCalls').mockResolvedValue(undefined); + + await handleFinished(iii, rec); + // Re-entry: same record before the transition was durably observed. + rec.state = 'assistant_finished'; + await handleFinished(iii, rec); + + const asstMsgs = (storedMessages as Array<{ role?: string }>).filter( + (m) => m.role === 'assistant', + ); + expect(asstMsgs).toHaveLength(1); + }); + + it('unwraps agent_trigger wrappers when preparing function calls', async () => { + const rec: TurnStateRecord = { + ...newRecord('s1'), + state: 'assistant_finished', + last_assistant: assistant({ + content: [ + { + type: 'function_call', + id: 'fc-wrap', + function_id: TOOL_NAME, + arguments: { function: 'shell::run', payload: { command: 'ls' } }, + }, + { + type: 'function_call', + id: 'fc-direct', + function_id: 'shell::echo', + arguments: { text: 'hi' }, + }, + ], + }), + }; + const { iii } = fakeIii(); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); + const savePreparedSpy = vi.spyOn(persistence, 'savePreparedCalls').mockResolvedValue(undefined); + + await handleFinished(iii, rec); + + expect(rec.state).toBe('function_execute'); + const prepared = savePreparedSpy.mock.calls[0]?.[2]; + expect(prepared).toEqual([ + { + function_call: { id: 'fc-wrap', function_id: 'shell::run', arguments: { command: 'ls' } }, + blocked: null, + }, + { + function_call: { id: 'fc-direct', function_id: 'shell::echo', arguments: { text: 'hi' } }, + blocked: null, + }, + ]); + }); + + it('blocks agent_trigger calls with missing or empty function at prepare time', async () => { + const rec: TurnStateRecord = { + ...newRecord('s1'), + state: 'assistant_finished', + last_assistant: assistant({ + content: [ + { + type: 'function_call', + id: 'fc-bad', + function_id: TOOL_NAME, + arguments: { payload: { command: 'ls' } }, + }, + ], + }), + }; + const { iii } = fakeIii(); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); + const savePreparedSpy = vi.spyOn(persistence, 'savePreparedCalls').mockResolvedValue(undefined); + + await handleFinished(iii, rec); + + expect(rec.state).toBe('function_execute'); + const prepared = savePreparedSpy.mock.calls[0]?.[2]; + expect(prepared?.[0]?.function_call).toEqual({ + id: 'fc-bad', + function_id: '', + arguments: { command: 'ls' }, + }); + expect(prepared?.[0]?.blocked?.details).toMatchObject({ error: 'missing_function' }); + }); +}); diff --git a/harness/tests/turn-orchestrator/awaiting-approval.test.ts b/harness/tests/turn-orchestrator/awaiting-approval.test.ts index 9c1e4705..8153d4d8 100644 --- a/harness/tests/turn-orchestrator/awaiting-approval.test.ts +++ b/harness/tests/turn-orchestrator/awaiting-approval.test.ts @@ -2,7 +2,7 @@ import { describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; import * as persistence from '../../src/turn-orchestrator/persistence.js'; import type { TurnStateRecord } from '../../src/turn-orchestrator/state.js'; -import { handleAwaitingApproval } from '../../src/turn-orchestrator/states/functions.js'; +import { handleAwaitingApproval } from '../../src/turn-orchestrator/states/function-awaiting-approval.js'; function fakeIii(stateGetImpl: (scope: string, key: string) => unknown): ISdk { return { diff --git a/harness/tests/turn-orchestrator/config.test.ts b/harness/tests/turn-orchestrator/config.test.ts index de967d60..7948a32b 100644 --- a/harness/tests/turn-orchestrator/config.test.ts +++ b/harness/tests/turn-orchestrator/config.test.ts @@ -2,18 +2,15 @@ import { describe, expect, it } from 'vitest'; import { loadOrchestratorConfig } from '../../src/turn-orchestrator/config.js'; describe('loadOrchestratorConfig', () => { - it('applies defaults for sync timeout and system skills', () => { + it('applies defaults for system skills', () => { const cfg = loadOrchestratorConfig({}); - expect(cfg.sync_default_timeout_ms).toBe(120_000); expect(cfg.system_default_skills).toEqual(['iii://iii-directory/index']); }); - it('reads sync_default_timeout_ms and system_default_skills from config', () => { + it('reads system_default_skills from config', () => { const cfg = loadOrchestratorConfig({ - sync_default_timeout_ms: 60_000, system_default_skills: ['skill-a'], }); - expect(cfg.sync_default_timeout_ms).toBe(60_000); expect(cfg.system_default_skills).toEqual(['skill-a']); }); }); diff --git a/harness/tests/turn-orchestrator/functions.test.ts b/harness/tests/turn-orchestrator/functions.test.ts index 69ce28ea..f2d76b44 100644 --- a/harness/tests/turn-orchestrator/functions.test.ts +++ b/harness/tests/turn-orchestrator/functions.test.ts @@ -1,23 +1,58 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; -import * as agentTriggerModule from '../../src/turn-orchestrator/agent-trigger.js'; -import type { TurnOrchestratorConfig } from '../../src/turn-orchestrator/config.js'; +import * as events from '../../src/turn-orchestrator/events.js'; import * as hookModule from '../../src/turn-orchestrator/hook.js'; import * as persistence from '../../src/turn-orchestrator/persistence.js'; import type { TurnStateRecord } from '../../src/turn-orchestrator/state.js'; import { newRecord } from '../../src/turn-orchestrator/state.js'; +import * as agentTriggerModule from '../../src/turn-orchestrator/agent-trigger.js'; import * as approvalResumeModule from '../../src/turn-orchestrator/approval-resume.js'; -import { handleExecute } from '../../src/turn-orchestrator/states/functions.js'; - -const cfg: TurnOrchestratorConfig = { - sync_default_timeout_ms: 120_000, - system_default_skills: [], -}; +import { parseApprovalDecision } from '../../src/turn-orchestrator/states/function-awaiting-approval.js'; +import { handleExecute } from '../../src/turn-orchestrator/states/function-execute.js'; afterEach(() => { vi.restoreAllMocks(); }); +function mockFinalizePersistence(): void { + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + vi.spyOn(hookModule, 'publishAfter').mockResolvedValue(undefined); +} + +describe('parseApprovalDecision', () => { + it('accepts allow/deny/aborted with nullable reason (stored approval shape)', () => { + expect(parseApprovalDecision({ decision: 'allow', reason: null })).toEqual({ + decision: 'allow', + reason: null, + }); + expect(parseApprovalDecision({ decision: 'deny', reason: 'policy' })).toEqual({ + decision: 'deny', + reason: 'policy', + }); + expect(parseApprovalDecision({ decision: 'aborted', reason: 'session_aborted' })).toEqual({ + decision: 'aborted', + reason: 'session_aborted', + }); + }); + + it('rejects speculative wrapper envelopes no caller stores', () => { + expect(parseApprovalDecision({ data: { decision: 'allow', reason: null } })).toBeNull(); + expect(parseApprovalDecision({ payload: { decision: 'allow', reason: null } })).toBeNull(); + }); + + it.each([ + ['null', null], + ['undefined', undefined], + ['missing decision', { reason: null }], + ['empty decision', { decision: '', reason: null }], + ['unknown decision', { decision: 'needs_approval', reason: null }], + ['numeric reason', { decision: 'allow', reason: 7 }], + ] as const)('rejects bad shape: %s', (_label, value) => { + expect(parseApprovalDecision(value)).toBeNull(); + }); +}); + describe('handleExecute new flow', () => { it('pushes the call onto awaiting_approval and transitions to function_awaiting_approval on pending', async () => { const dispatchSpy = vi.spyOn(agentTriggerModule, 'dispatchWithHook'); @@ -42,7 +77,7 @@ describe('handleExecute new flow', () => { ]); vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([]); vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); - await handleExecute(iii, cfg, rec); + await handleExecute(iii, rec); expect(rec.state).toBe('function_awaiting_approval'); expect(rec.awaiting_approval).toHaveLength(1); @@ -50,7 +85,7 @@ describe('handleExecute new flow', () => { expect(registerResumeSpy).toHaveBeenCalledWith(iii, 's1', 'fc-1'); }); - it('skips dispatchWithHook on pre_approved entries and calls iii.trigger directly', async () => { + it('skips consultBefore on pre_approved entries and uses triggerFunctionCall', async () => { const triggerSpy = vi.fn().mockResolvedValue({ ok: true }); const iii = { trigger: triggerSpy } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); @@ -71,7 +106,7 @@ describe('handleExecute new flow', () => { vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); const consultBeforeSpy = vi.spyOn(hookModule, 'consultBefore'); - await handleExecute(iii, cfg, rec); + await handleExecute(iii, rec); expect(consultBeforeSpy).not.toHaveBeenCalled(); const triggerCalls = triggerSpy.mock.calls.map( @@ -80,7 +115,7 @@ describe('handleExecute new flow', () => { expect(triggerCalls).toContain('shell::run'); }); - it('synthesizes an error result when a pre_approved trigger rejects (does not throw out of handleExecute)', async () => { + it('synthesizes a gate_unavailable denial when a pre_approved trigger rejects', async () => { const triggerSpy = vi.fn(async (req: { function_id: string }) => { if (req.function_id === 'shell::fs::write') { throw new Error('handler error: {"code":"S210","message":"bad write payload"}'); @@ -104,20 +139,25 @@ describe('handleExecute new flow', () => { ]); vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([]); const saveSpy = vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); + mockFinalizePersistence(); - await expect(handleExecute(iii, cfg, rec)).resolves.toBeUndefined(); + await expect(handleExecute(iii, rec)).resolves.toBeUndefined(); - expect(rec.state).toBe('function_finalize'); + expect(rec.state).toBe('steering_check'); expect(saveSpy).toHaveBeenCalled(); - const lastSave = saveSpy.mock.calls.at(-1)?.[2] as Array<{ - is_error: boolean; - result: { details: unknown }; - }>; - expect(lastSave?.[0]?.is_error).toBe(true); - const details = lastSave?.[0]?.result.details as Record; - expect(details?.error).toBe('trigger_failed'); - expect(details?.function).toBe('shell::fs::write'); - expect(String(details?.message)).toContain('S210'); + // saveExecutedCalls is invoked twice: once with the synthesized error + // result, then once with `[]` as the idempotency guard clears executed + // calls at the end of finalize. Inspect the persisted-results call, not + // the trailing clear. + const savedResults = saveSpy.mock.calls + .map((c) => c[2] as Array<{ is_error: boolean; result: { details: unknown } }>) + .find((arr) => Array.isArray(arr) && arr.length > 0); + expect(savedResults?.[0]?.is_error).toBe(true); + const details = savedResults?.[0]?.result.details as Record; + expect(details?.status).toBe('denied'); + expect(details?.denied_by).toBe('gate_unavailable'); + expect(details?.function_id).toBe('shell::fs::write'); + expect(String(details?.reason)).toContain('S210'); }); it('emits denial result without dispatching when blocked is set', async () => { @@ -140,184 +180,193 @@ describe('handleExecute new flow', () => { ]); vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([]); vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); - await handleExecute(iii, cfg, rec); + mockFinalizePersistence(); + await handleExecute(iii, rec); const shellCalls = triggerSpy.mock.calls.filter( (call) => (call[0] as { function_id: string }).function_id === 'shell::run', ); expect(shellCalls).toHaveLength(0); - expect(rec.state).toBe('function_finalize'); + expect(rec.state).toBe('steering_check'); }); -}); - -describe('handleFinalize idempotency', () => { - // Production failure: Anthropic rejected with "each tool_use must have a - // single result. Found multiple tool_result blocks with id: toolu_...". - // Root cause: handleFinalize re-entered with the same executedCalls in - // state and pushed the same function_result AgentMessages onto flat-state - // a second time. This test pins the idempotent behavior. - it('does NOT duplicate function_results when handleFinalize runs twice with the same executedCalls', async () => { - const { handleFinalize } = await import('../../src/turn-orchestrator/states/functions.js'); + it('replays persisted executed calls without re-dispatching', async () => { + const dispatchSpy = vi.spyOn(agentTriggerModule, 'dispatchWithHook'); + const triggerSpy = vi.fn().mockResolvedValue(null); + const iii = { trigger: triggerSpy } as unknown as ISdk; + const rec = newRecord('s1'); + rec.state = 'function_execute'; - const executedCalls = [ + const existingResult = { + content: [{ type: 'text' as const, text: 'cached' }], + details: {}, + terminate: false, + }; + vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ { - function_call: { - id: 'toolu_01', - function_id: 'shell::run', - arguments: { command: 'ls' }, - }, - result: { content: [{ type: 'text' as const, text: 'ok' }], details: {} }, - is_error: false, - duration_ms: 5, + function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + blocked: null, }, + ]); + vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([ { - function_call: { - id: 'toolu_02', - function_id: 'fs::read', - arguments: { path: '/etc/hosts' }, - }, - result: { content: [{ type: 'text' as const, text: '127.0.0.1' }], details: {} }, + function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + result: existingResult, is_error: false, - duration_ms: 3, + duration_ms: 42, }, - ]; - - // executedCalls is still present across both calls — the production - // failure mode where state wasn't cleared between handleFinalize entries. - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue(executedCalls); + ]); vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); + mockFinalizePersistence(); - let storedMessages: unknown[] = []; - vi.spyOn(persistence, 'loadMessages').mockImplementation(async () => storedMessages as never); - vi.spyOn(persistence, 'saveMessages').mockImplementation(async (_iii, _sid, msgs) => { - storedMessages = msgs as never; - }); - vi.spyOn(hookModule, 'publishAfter').mockResolvedValue(null as never); + await handleExecute(iii, rec); - const iii = { - trigger: vi.fn().mockResolvedValue({ old_value: 0 }), - } as unknown as ISdk; + expect(dispatchSpy).not.toHaveBeenCalled(); + expect(rec.state).toBe('steering_check'); + }); - const rec: TurnStateRecord = newRecord('sess-finalize-idem'); - rec.state = 'function_finalize'; - rec.last_assistant = { - role: 'assistant', - content: [], - stop_reason: 'function_call', - error_message: null, - error_kind: null, - usage: { input: 1, output: 1, cache_read: 0, cache_write: 0 }, - model: 'claude', - provider: 'anthropic', - timestamp: 0, - }; + it('transitions to steering_check after a successful hook dispatch', async () => { + vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ + kind: 'result', + result: { + content: [{ type: 'text' as const, text: 'ok' }], + details: {}, + terminate: false, + }, + }); + const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; + const rec = newRecord('s1'); + rec.state = 'function_execute'; - await handleFinalize(iii, rec); - // Critical: simulate re-entry — handleFinalize fires again with the - // SAME executedCalls (the production failure mode). - rec.state = 'function_finalize'; - rec.turn_end_emitted = false; - await handleFinalize(iii, rec); + vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ + { + function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + blocked: null, + }, + ]); + vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([]); + vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); + mockFinalizePersistence(); - const fnResults = ( - storedMessages as Array<{ role?: string; function_call_id?: string }> - ).filter((m) => m.role === 'function_result'); - expect(fnResults).toHaveLength(2); - expect(fnResults.map((m) => m.function_call_id).sort()).toEqual(['toolu_01', 'toolu_02']); + await handleExecute(iii, rec); + + expect(rec.state).toBe('steering_check'); }); - it('clears executedCalls at the end so a future re-entry produces zero new results', async () => { - const { handleFinalize } = await import('../../src/turn-orchestrator/states/functions.js'); + it('transitions to steering_check when last_assistant is missing after execute', async () => { + const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; + const rec = newRecord('s1'); + rec.state = 'function_execute'; + rec.last_assistant = null; + + vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([]); vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([ { - function_call: { id: 'toolu_x', function_id: 'f', arguments: {} }, - result: { content: [], details: {} }, + function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + result: { + content: [{ type: 'text' as const, text: 'ok' }], + details: {}, + terminate: false, + }, is_error: false, duration_ms: 1, }, ]); - const saveExecutedSpy = vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); + vi.spyOn(hookModule, 'publishAfter').mockResolvedValue(undefined); vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - vi.spyOn(hookModule, 'publishAfter').mockResolvedValue(null as never); + const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); - const iii = { - trigger: vi.fn().mockResolvedValue({ old_value: 0 }), - } as unknown as ISdk; - const rec: TurnStateRecord = newRecord('sess-clear'); - rec.state = 'function_finalize'; + await handleExecute(iii, rec); + + expect(rec.state).toBe('steering_check'); + expect(rec.pending_function_calls).toEqual([]); + expect(rec.function_results).toHaveLength(1); + expect(emitSpy).not.toHaveBeenCalled(); + }); + + it('emits turn lifecycle and sets turn_end_emitted when last_assistant is present', async () => { + const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; + const rec = newRecord('s1'); + rec.state = 'function_execute'; rec.last_assistant = { role: 'assistant', - content: [], - stop_reason: 'function_call', + content: [{ type: 'text', text: 'done' }], + stop_reason: 'end', error_message: null, error_kind: null, - usage: { input: 0, output: 0, cache_read: 0, cache_write: 0 }, - model: 'claude', - provider: 'anthropic', - timestamp: 0, + usage: null, + model: 'm', + provider: 'p', + timestamp: 1, }; - await handleFinalize(iii, rec); - - // saveExecutedCalls(..., []) must have been called at the END. - const clearCalls = saveExecutedSpy.mock.calls.filter( - ([, , calls]) => Array.isArray(calls) && (calls as unknown[]).length === 0, - ); - expect(clearCalls.length).toBeGreaterThan(0); - }); -}); + vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([]); + vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([ + { + function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + result: { + content: [{ type: 'text' as const, text: 'ok' }], + details: {}, + terminate: false, + }, + is_error: false, + duration_ms: 1, + }, + ]); + vi.spyOn(hookModule, 'publishAfter').mockResolvedValue(undefined); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); -describe('handleFinished idempotency', () => { - // Sibling hazard to handleFinalize: re-entry pushes the same assistant - // message twice. If that assistant has tool_calls, Anthropic rejects - // with "each tool_use must have a unique id". + await handleExecute(iii, rec); - it('does NOT duplicate the assistant message when handleFinished runs twice', async () => { - const { handleFinished } = await import('../../src/turn-orchestrator/states/assistant.js'); + expect(rec.state).toBe('steering_check'); + expect(rec.turn_end_emitted).toBe(true); + expect(emitSpy.mock.calls.some((call) => call[2]?.type === 'turn_end')).toBe(true); + }); - const assistantMsg = { - role: 'assistant' as const, - content: [ - { - type: 'function_call' as const, - id: 'toolu_42', - function_id: 'shell::run', - arguments: { command: 'pwd' }, - }, - ], - stop_reason: 'function_call' as const, - error_message: null, - error_kind: null, - usage: { input: 10, output: 5, cache_read: 0, cache_write: 0 }, - model: 'claude', - provider: 'anthropic', - timestamp: 1700000000, - }; + it('does NOT duplicate function_results in flat-state when handleExecute re-enters', async () => { + // Idempotency guard: a durable retry / step-fanout race can replay the + // finalize path with the same persisted executedCalls. Re-pushing the + // same function_result blocks makes Anthropic reject with "each tool_use + // must have a single result. Found multiple tool_result blocks with id". + const executed = [ + { + function_call: { id: 'toolu_01', function_id: 'shell::run', arguments: { command: 'ls' } }, + result: { content: [{ type: 'text' as const, text: 'ok' }], details: {}, terminate: false }, + is_error: false, + duration_ms: 5, + }, + ]; + const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; + const rec = newRecord('s1'); + rec.state = 'function_execute'; + vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ + { function_call: executed[0].function_call, blocked: null }, + ]); + vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue(executed); + vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); let storedMessages: unknown[] = []; vi.spyOn(persistence, 'loadMessages').mockImplementation(async () => storedMessages as never); vi.spyOn(persistence, 'saveMessages').mockImplementation(async (_iii, _sid, msgs) => { storedMessages = msgs as never; }); + vi.spyOn(hookModule, 'publishAfter').mockResolvedValue(undefined); + vi.spyOn(events, 'emit').mockResolvedValue(undefined); - const iii = { - trigger: vi.fn().mockResolvedValue({ old_value: 0 }), - } as unknown as ISdk; - - const rec: TurnStateRecord = newRecord('sess-finished-idem'); - rec.state = 'assistant_finished'; - rec.last_assistant = assistantMsg; - - await handleFinished(iii, rec); - rec.state = 'assistant_finished'; + await handleExecute(iii, rec); + // Re-entry: same persisted executedCalls, before the transition was + // durably observed. + rec.state = 'function_execute'; rec.turn_end_emitted = false; - await handleFinished(iii, rec); + await handleExecute(iii, rec); - const asstMsgs = (storedMessages as Array<{ role?: string }>).filter( - (m) => m.role === 'assistant', - ); - expect(asstMsgs).toHaveLength(1); + const fnResults = ( + storedMessages as Array<{ role?: string; function_call_id?: string }> + ).filter((m) => m.role === 'function_result'); + expect(fnResults).toHaveLength(1); + expect(fnResults[0]?.function_call_id).toBe('toolu_01'); }); }); diff --git a/harness/tests/turn-orchestrator/get-state.test.ts b/harness/tests/turn-orchestrator/get-state.test.ts index 7d6cacfb..5c76220f 100644 --- a/harness/tests/turn-orchestrator/get-state.test.ts +++ b/harness/tests/turn-orchestrator/get-state.test.ts @@ -1,13 +1,47 @@ import { describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; -import { FUNCTION_ID, execute } from '../../src/turn-orchestrator/get-state.js'; +import { execute } from '../../src/turn-orchestrator/get-state.js'; import { newRecord } from '../../src/turn-orchestrator/state.js'; +import { GetStatePayloadSchema } from '../../src/turn-orchestrator/schemas.js'; -describe('turn::get_state', () => { - it('exposes the canonical function id', () => { - expect(FUNCTION_ID).toBe('turn::get_state'); +describe('GetStatePayloadSchema', () => { + it('accepts the flat shape the real backend sends', () => { + expect(GetStatePayloadSchema.parse({ session_id: 'sess-abc' })).toEqual({ + session_id: 'sess-abc', + }); }); + it('strips extra keys (engine may add metadata later)', () => { + expect(GetStatePayloadSchema.parse({ session_id: 's1', trace_id: 't1' })).toEqual({ + session_id: 's1', + }); + }); + + it('rejects publish envelope shapes — no in-repo caller wraps get_state', () => { + expect(() => + GetStatePayloadSchema.parse({ + topic: 'turn::step_requested', + data: { session_id: 's1' }, + }), + ).toThrow(); + }); + + it('rejects nested payload wrappers (no in-repo caller uses them)', () => { + expect(() => GetStatePayloadSchema.parse({ data: { session_id: 's1' } })).toThrow(); + expect(() => GetStatePayloadSchema.parse({ payload: { session_id: 's1' } })).toThrow(); + }); + + it('rejects missing, empty, or non-string session_id', () => { + expect(() => GetStatePayloadSchema.parse({})).toThrow(); + expect(() => GetStatePayloadSchema.parse({ session_id: '' })).toThrow(); + expect(() => GetStatePayloadSchema.parse({ session_id: 42 })).toThrow(); + expect(() => GetStatePayloadSchema.parse({ session_id: null })).toThrow(); + expect(() => GetStatePayloadSchema.parse(null)).toThrow(); + expect(() => GetStatePayloadSchema.parse(undefined)).toThrow(); + }); +}); + +describe('turn::get_state execute', () => { it('returns the turn_state record for a known session via persistence.loadRecord', async () => { const rec = newRecord('sess-abc'); rec.state = 'function_awaiting_approval'; @@ -34,9 +68,4 @@ describe('turn::get_state', () => { const out = await execute(iii, { session_id: 'unknown' }); expect(out).toBeNull(); }); - - it('throws on missing session_id', async () => { - const iii = { trigger: vi.fn() } as unknown as ISdk; - await expect(execute(iii, {})).rejects.toThrow(/session_id/); - }); }); diff --git a/harness/tests/turn-orchestrator/on-abort-signal.test.ts b/harness/tests/turn-orchestrator/on-abort-signal.test.ts index 2355e23b..e185cea3 100644 --- a/harness/tests/turn-orchestrator/on-abort-signal.test.ts +++ b/harness/tests/turn-orchestrator/on-abort-signal.test.ts @@ -1,41 +1,89 @@ import { describe, expect, it, vi } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; +import { TriggerAction, type ISdk } from '../../src/runtime/iii.js'; import { - STEP_TOPIC, + execute, handleAbortSignalWrite, isAbortSignalWrite, + parseAbortSignalWrite, } from '../../src/turn-orchestrator/on-abort-signal.js'; +import { AbortSignalWriteEventSchema } from '../../src/turn-orchestrator/schemas.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; -describe('isAbortSignalWrite condition', () => { - it('matches session//abort_signal with new_value === true', () => { - expect( - isAbortSignalWrite({ - event_type: 'state:created', - scope: 'agent', - key: 'session/sess-abc/abort_signal', - old_value: null, - new_value: true, - message_type: 'state', +const matchingEvent = { + event_type: 'state:created' as const, + scope: 'agent' as const, + key: 'session/sess-abc/abort_signal', + old_value: null, + new_value: true as const, + message_type: 'state', +}; + +describe('AbortSignalWriteEventSchema', () => { + it('accepts the agent state write shape from state::set / engine triggers', () => { + expect(AbortSignalWriteEventSchema.parse(matchingEvent)).toEqual({ + session_id: 'sess-abc', + }); + }); + + it('rejects durable publish envelope shapes (not a state trigger event)', () => { + expect(() => + AbortSignalWriteEventSchema.parse({ + topic: 'turn::step_requested', + data: { session_id: 's1' }, }), - ).toBe(true); + ).toThrow(); }); - it('matches state:updated transitioning to true', () => { - expect( - isAbortSignalWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/abort_signal', - old_value: false, - new_value: true, - message_type: 'state', + it('rejects nested payload wrappers', () => { + expect(() => AbortSignalWriteEventSchema.parse({ payload: matchingEvent })).toThrow(); + expect(() => AbortSignalWriteEventSchema.parse({ data: matchingEvent })).toThrow(); + }); + + it('rejects missing key, wrong new_value, or non-abort_signal keys', () => { + expect(() => AbortSignalWriteEventSchema.parse({})).toThrow(); + expect(() => + AbortSignalWriteEventSchema.parse({ + ...matchingEvent, + key: 'session/sess-abc/turn_state', + }), + ).toThrow(); + expect(() => + AbortSignalWriteEventSchema.parse({ + ...matchingEvent, + new_value: false, + }), + ).toThrow(); + expect(() => + AbortSignalWriteEventSchema.parse({ + ...matchingEvent, + event_type: 'state:deleted', }), - ).toBe(true); + ).toThrow(); + expect(() => AbortSignalWriteEventSchema.parse(null)).toThrow(); + }); +}); + +describe('parseAbortSignalWrite condition', () => { + it('matches session//abort_signal with new_value === true', () => { + expect(parseAbortSignalWrite(matchingEvent)).toEqual({ session_id: 'sess-abc' }); + expect(isAbortSignalWrite(matchingEvent)).toBe(true); + }); + + it('matches state:updated transitioning to true', () => { + const event = { + event_type: 'state:updated' as const, + scope: 'agent' as const, + key: 'session/sess-abc/abort_signal', + old_value: false, + new_value: true as const, + message_type: 'state', + }; + expect(parseAbortSignalWrite(event)).toEqual({ session_id: 'sess-abc' }); }); it('skips state:deleted', () => { expect( - isAbortSignalWrite({ + parseAbortSignalWrite({ event_type: 'state:deleted', scope: 'agent', key: 'session/sess-abc/abort_signal', @@ -43,12 +91,12 @@ describe('isAbortSignalWrite condition', () => { new_value: null, message_type: 'state', }), - ).toBe(false); + ).toBeNull(); }); it('skips writes that set the signal to false (idempotent clears)', () => { expect( - isAbortSignalWrite({ + parseAbortSignalWrite({ event_type: 'state:updated', scope: 'agent', key: 'session/sess-abc/abort_signal', @@ -56,12 +104,12 @@ describe('isAbortSignalWrite condition', () => { new_value: false, message_type: 'state', }), - ).toBe(false); + ).toBeNull(); }); it('skips non-abort_signal keys in the agent scope', () => { expect( - isAbortSignalWrite({ + parseAbortSignalWrite({ event_type: 'state:updated', scope: 'agent', key: 'session/sess-abc/turn_state', @@ -69,48 +117,78 @@ describe('isAbortSignalWrite condition', () => { new_value: { state: 'function_execute' }, message_type: 'state', }), - ).toBe(false); + ).toBeNull(); }); it('skips top-level non-session keys', () => { expect( - isAbortSignalWrite({ + parseAbortSignalWrite({ event_type: 'state:updated', scope: 'agent', - key: 'harness/cwd/abc/last_session_id', + key: 'harness/index/abc/last_session_id', old_value: null, new_value: 'sess-1', message_type: 'state', }), - ).toBe(false); + ).toBeNull(); }); }); -describe('handleAbortSignalWrite', () => { - it('extracts session_id and publishes turn::step_requested', async () => { - const triggers: Array<{ function_id: string; payload: unknown }> = []; +function mockIiiWithTurnState(rec: ReturnType): { + iii: ISdk; + triggers: Array<{ function_id: string; payload: unknown; action?: unknown }>; +} { + const triggers: Array<{ function_id: string; payload: unknown; action?: unknown }> = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string; payload: unknown; action?: unknown }) => { + if (req.function_id === 'state::get') return rec; + triggers.push(req); + return null; + }), + } as unknown as ISdk; + return { iii, triggers }; +} + +describe('execute', () => { + it('enqueues turn::{state} on the turn-step FIFO queue', async () => { + const rec = newRecord('sess-abc'); + rec.state = 'assistant_streaming'; + const { iii, triggers } = mockIiiWithTurnState(rec); + + await execute(iii, { session_id: 'sess-abc' }); + + expect(triggers).toHaveLength(1); + expect(triggers[0]?.function_id).toBe('turn::assistant_streaming'); + expect(triggers[0]?.payload).toEqual({ session_id: 'sess-abc' }); + expect(triggers[0]?.action).toEqual(TriggerAction.Enqueue({ queue: 'turn-step' })); + }); + + it('swallows enqueue failures (logs only, never rethrows)', async () => { + const rec = newRecord('sess-abc'); + rec.state = 'provisioning'; const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { - triggers.push(req); - return null; + trigger: vi.fn(async (req: { function_id: string }) => { + if (req.function_id === 'state::get') return rec; + throw new Error('durable down'); }), } as unknown as ISdk; - await handleAbortSignalWrite(iii, { - event_type: 'state:created', - scope: 'agent', - key: 'session/sess-abc/abort_signal', - old_value: null, - new_value: true, - message_type: 'state', - }); + await expect(execute(iii, { session_id: 'sess-abc' })).resolves.toBeUndefined(); + }); +}); + +describe('handleAbortSignalWrite', () => { + it('extracts session_id and enqueues turn::{state}', async () => { + const rec = newRecord('sess-abc'); + rec.state = 'function_execute'; + const { iii, triggers } = mockIiiWithTurnState(rec); + + await handleAbortSignalWrite(iii, matchingEvent); expect(triggers).toHaveLength(1); - expect(triggers[0]?.function_id).toBe('iii::durable::publish'); - expect(triggers[0]?.payload).toMatchObject({ - topic: STEP_TOPIC, - data: { session_id: 'sess-abc' }, - }); + expect(triggers[0]?.function_id).toBe('turn::function_execute'); + expect(triggers[0]?.payload).toEqual({ session_id: 'sess-abc' }); + expect(triggers[0]?.action).toEqual(TriggerAction.Enqueue({ queue: 'turn-step' })); }); it('no-ops when key does not match the abort_signal pattern', async () => { @@ -125,4 +203,17 @@ describe('handleAbortSignalWrite', () => { }); expect(iii.trigger).not.toHaveBeenCalled(); }); + + it('no-ops when new_value is not true (direct invoke bypasses engine condition)', async () => { + const iii = { trigger: vi.fn() } as unknown as ISdk; + await handleAbortSignalWrite(iii, { + event_type: 'state:updated', + scope: 'agent', + key: 'session/sess-abc/abort_signal', + old_value: true, + new_value: false, + message_type: 'state', + }); + expect(iii.trigger).not.toHaveBeenCalled(); + }); }); diff --git a/harness/tests/turn-orchestrator/on-record-written.test.ts b/harness/tests/turn-orchestrator/on-record-written.test.ts deleted file mode 100644 index 49c4f6bb..00000000 --- a/harness/tests/turn-orchestrator/on-record-written.test.ts +++ /dev/null @@ -1,221 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; -import { - STEP_FN_ID, - handleStepableRecordWrite, - isStepableRecordWrite, -} from '../../src/turn-orchestrator/on-record-written.js'; - -describe('isStepableRecordWrite condition', () => { - it('matches turn_state writes with a non-terminal, non-awaiting state', () => { - expect( - isStepableRecordWrite({ - event_type: 'state:created', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: null, - new_value: { state: 'provisioning' }, - message_type: 'state', - }), - ).toBe(true); - - expect( - isStepableRecordWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: { state: 'provisioning' }, - new_value: { state: 'awaiting_assistant' }, - message_type: 'state', - }), - ).toBe(true); - }); - - it('rejects terminal state (stopped)', () => { - expect( - isStepableRecordWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: { state: 'tearing_down' }, - new_value: { state: 'stopped' }, - message_type: 'state', - }), - ).toBe(false); - }); - - it('rejects function_awaiting_approval (orchestrator parks here)', () => { - expect( - isStepableRecordWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: { state: 'function_prepare' }, - new_value: { state: 'function_awaiting_approval' }, - message_type: 'state', - }), - ).toBe(false); - }); - - it('rejects state:deleted', () => { - expect( - isStepableRecordWrite({ - event_type: 'state:deleted', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: { state: 'provisioning' }, - new_value: null, - message_type: 'state', - }), - ).toBe(false); - }); - - it('rejects non-turn_state keys in the agent scope', () => { - expect( - isStepableRecordWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/abort_signal', - old_value: null, - new_value: true, - message_type: 'state', - }), - ).toBe(false); - }); - - it('rejects same-state writes (old_value.state === new_value.state)', () => { - expect( - isStepableRecordWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: { state: 'function_prepare' }, - new_value: { state: 'function_prepare' }, - message_type: 'state', - }), - ).toBe(false); - - expect( - isStepableRecordWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: { state: 'function_prepare' }, - new_value: { state: 'function_execute' }, - message_type: 'state', - }), - ).toBe(true); - }); - - it('rejects writes whose new_value lacks a string state', () => { - expect( - isStepableRecordWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: null, - new_value: { not_state: 'provisioning' }, - message_type: 'state', - }), - ).toBe(false); - - expect( - isStepableRecordWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: null, - new_value: null, - message_type: 'state', - }), - ).toBe(false); - }); -}); - -describe('handleStepableRecordWrite', () => { - it('extracts session_id and invokes turn::step directly', async () => { - const triggers: Array<{ function_id: string; payload: unknown }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { - triggers.push(req); - return null; - }), - } as unknown as ISdk; - - await handleStepableRecordWrite(iii, { - event_type: 'state:created', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: null, - new_value: { state: 'provisioning' }, - message_type: 'state', - }); - - expect(triggers).toHaveLength(1); - expect(triggers[0]?.function_id).toBe(STEP_FN_ID); - expect(triggers[0]?.payload).toEqual({ session_id: 'sess-abc' }); - }); - - it('no-ops when key does not match the turn_state pattern', async () => { - const iii = { trigger: vi.fn() } as unknown as ISdk; - await handleStepableRecordWrite(iii, { - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/abort_signal', - old_value: null, - new_value: true, - message_type: 'state', - }); - expect(iii.trigger).not.toHaveBeenCalled(); - }); - - it('falls back to durable publish when the direct turn::step invoke fails', async () => { - const triggers: Array<{ function_id: string; payload: unknown }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { - triggers.push(req); - // Fail the direct turn::step invoke; let the durable publish succeed. - if (req.function_id === STEP_FN_ID) { - throw new Error('engine down'); - } - return null; - }), - } as unknown as ISdk; - - await handleStepableRecordWrite(iii, { - event_type: 'state:created', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: null, - new_value: { state: 'provisioning' }, - message_type: 'state', - }); - - expect(triggers).toHaveLength(2); - expect(triggers[0]?.function_id).toBe(STEP_FN_ID); - expect(triggers[1]?.function_id).toBe('iii::durable::publish'); - expect(triggers[1]?.payload).toEqual({ - topic: 'turn::step_requested', - data: { session_id: 'sess-abc' }, - }); - }); - - it('swallows when BOTH the direct invoke and durable publish fallback fail', async () => { - const iii = { - trigger: vi.fn(async () => { - throw new Error('engine down'); - }), - } as unknown as ISdk; - - await expect( - handleStepableRecordWrite(iii, { - event_type: 'state:created', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: null, - new_value: { state: 'provisioning' }, - message_type: 'state', - }), - ).resolves.toBeUndefined(); - }); -}); diff --git a/harness/tests/turn-orchestrator/on-terminal.test.ts b/harness/tests/turn-orchestrator/on-terminal.test.ts deleted file mode 100644 index 96624767..00000000 --- a/harness/tests/turn-orchestrator/on-terminal.test.ts +++ /dev/null @@ -1,192 +0,0 @@ -import { afterEach, describe, expect, it, vi } from 'vitest'; -import { - __pendingForTest as pending, - clearTerminalWaiter, - handleTerminalStateWrite, - installTerminalWaiter, - isTerminalStateWrite, -} from '../../src/turn-orchestrator/on-terminal.js'; - -afterEach(() => { - pending.clear(); -}); - -describe('isTerminalStateWrite condition', () => { - it('matches state:updated on turn_state with state === "stopped"', () => { - expect( - isTerminalStateWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: { state: 'tearing_down' }, - new_value: { state: 'stopped' }, - message_type: 'state', - }), - ).toBe(true); - }); - - it('matches state:created with state === "stopped" (replay edge case)', () => { - expect( - isTerminalStateWrite({ - event_type: 'state:created', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: null, - new_value: { state: 'stopped' }, - message_type: 'state', - }), - ).toBe(true); - }); - - it('skips non-terminal turn_state writes', () => { - expect( - isTerminalStateWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: { state: 'awaiting_assistant' }, - new_value: { state: 'function_execute' }, - message_type: 'state', - }), - ).toBe(false); - }); - - it('skips state:deleted', () => { - expect( - isTerminalStateWrite({ - event_type: 'state:deleted', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: { state: 'stopped' }, - new_value: null, - message_type: 'state', - }), - ).toBe(false); - }); - - it('skips writes to keys other than turn_state', () => { - expect( - isTerminalStateWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/abort_signal', - old_value: false, - new_value: true, - message_type: 'state', - }), - ).toBe(false); - }); - - it('skips writes where new_value lacks a state field', () => { - expect( - isTerminalStateWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: { state: 'tearing_down' }, - new_value: { turn_count: 3 }, - message_type: 'state', - }), - ).toBe(false); - }); -}); - -describe('installTerminalWaiter + handleTerminalStateWrite', () => { - it('resolves the waiter when a terminal write fires for that session', async () => { - const waiter = installTerminalWaiter('sess-abc'); - - handleTerminalStateWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: { state: 'tearing_down' }, - new_value: { state: 'stopped' }, - message_type: 'state', - }); - - await expect(waiter).resolves.toBeUndefined(); - }); - - it('ignores writes for unrelated sessions', async () => { - const waiter = installTerminalWaiter('sess-abc'); - let resolved = false; - waiter.then(() => { - resolved = true; - }); - - handleTerminalStateWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-xyz/turn_state', - old_value: { state: 'tearing_down' }, - new_value: { state: 'stopped' }, - message_type: 'state', - }); - - await Promise.resolve(); - expect(resolved).toBe(false); - - clearTerminalWaiter('sess-abc'); - }); - - it('clearTerminalWaiter removes the waiter without resolving it', async () => { - const waiter = installTerminalWaiter('sess-abc'); - let settled = false; - waiter.then(() => { - settled = true; - }); - - clearTerminalWaiter('sess-abc'); - - handleTerminalStateWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: { state: 'tearing_down' }, - new_value: { state: 'stopped' }, - message_type: 'state', - }); - - await Promise.resolve(); - expect(settled).toBe(false); - }); - - it('handleTerminalStateWrite is a no-op for malformed events', () => { - expect(() => handleTerminalStateWrite(null)).not.toThrow(); - expect(() => - handleTerminalStateWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'not/a/match', - new_value: { state: 'stopped' }, - message_type: 'state', - }), - ).not.toThrow(); - }); - - it('multiple terminal writes for the same session resolve the waiter exactly once', async () => { - const waiter = installTerminalWaiter('sess-abc'); - const resolver = vi.fn(); - waiter.then(resolver); - - handleTerminalStateWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: { state: 'tearing_down' }, - new_value: { state: 'stopped' }, - message_type: 'state', - }); - handleTerminalStateWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: { state: 'stopped' }, - new_value: { state: 'stopped' }, - message_type: 'state', - }); - - await Promise.resolve(); - expect(resolver).toHaveBeenCalledTimes(1); - }); -}); diff --git a/harness/tests/turn-orchestrator/on-turn-state-changed.test.ts b/harness/tests/turn-orchestrator/on-turn-state-changed.test.ts deleted file mode 100644 index 881fd516..00000000 --- a/harness/tests/turn-orchestrator/on-turn-state-changed.test.ts +++ /dev/null @@ -1,129 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; -import { - CONDITION_FN_ID, - handleTurnStateWrite, - isTurnStateWrite, -} from '../../src/turn-orchestrator/on-turn-state-changed.js'; - -function fakeIii(): { iii: ISdk; emits: Array<{ session_id: string; event: unknown }> } { - const emits: Array<{ session_id: string; event: unknown }> = []; - const iii = { - trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { - if (function_id === 'stream::set') { - const p = payload as { group_id: string; data: unknown }; - emits.push({ session_id: p.group_id, event: p.data }); - return null; - } - return null; - }), - } as unknown as ISdk; - return { iii, emits }; -} - -describe('CONDITION_FN_ID', () => { - it('is the stable string the trigger config will reference', () => { - expect(CONDITION_FN_ID).toBe('turn::is_turn_state_write'); - }); -}); - -describe('isTurnStateWrite', () => { - it('returns true for state:created on session//turn_state', () => { - expect( - isTurnStateWrite({ - event_type: 'state:created', - key: 'session/sess-a/turn_state', - new_value: { state: 'provisioning' }, - }), - ).toBe(true); - }); - - it('returns true for state:updated on session//turn_state', () => { - expect( - isTurnStateWrite({ - event_type: 'state:updated', - key: 'session/sess-a/turn_state', - new_value: { state: 'function_awaiting_approval' }, - old_value: { state: 'function_execute' }, - }), - ).toBe(true); - }); - - it('returns false for non-turn_state agent keys', () => { - expect( - isTurnStateWrite({ - event_type: 'state:created', - key: 'session/sess-a/abort_signal', - new_value: true, - }), - ).toBe(false); - }); - - it('returns false for state:deleted', () => { - expect( - isTurnStateWrite({ - event_type: 'state:deleted', - key: 'session/sess-a/turn_state', - }), - ).toBe(false); - }); -}); - -describe('handleTurnStateWrite', () => { - it('emits turn_state_changed on agent::events with group_id = session_id', async () => { - const { iii, emits } = fakeIii(); - await handleTurnStateWrite(iii, { - event_type: 'state:updated', - key: 'session/sess-a/turn_state', - new_value: { state: 'function_awaiting_approval', awaiting_approval: [] }, - old_value: { state: 'function_execute', awaiting_approval: null }, - }); - expect(emits).toHaveLength(1); - expect(emits[0]?.session_id).toBe('sess-a'); - expect(emits[0]?.event).toMatchObject({ - type: 'turn_state_changed', - event_type: 'state:updated', - new_value: { state: 'function_awaiting_approval' }, - old_value: { state: 'function_execute' }, - }); - }); - - it('is a no-op when the event does not match the condition', async () => { - const { iii, emits } = fakeIii(); - await handleTurnStateWrite(iii, { - event_type: 'state:created', - key: 'session/sess-a/abort_signal', - new_value: true, - }); - expect(emits).toEqual([]); - }); - - it('swallows emit failures (logs only, never rethrows)', async () => { - const iii = { - trigger: vi.fn(async () => { - throw new Error('stream::set down'); - }), - } as unknown as ISdk; - // Should NOT throw. - await expect( - handleTurnStateWrite(iii, { - event_type: 'state:created', - key: 'session/sess-a/turn_state', - new_value: { state: 'provisioning' }, - }), - ).resolves.toBeUndefined(); - }); - - it('omits old_value from the emitted event when state:created', async () => { - const { iii, emits } = fakeIii(); - await handleTurnStateWrite(iii, { - event_type: 'state:created', - key: 'session/sess-a/turn_state', - new_value: { state: 'provisioning' }, - }); - expect(emits).toHaveLength(1); - const event = emits[0]?.event as Record; - expect(event.type).toBe('turn_state_changed'); - expect('old_value' in event).toBe(false); - }); -}); diff --git a/harness/tests/turn-orchestrator/provisioning.test.ts b/harness/tests/turn-orchestrator/provisioning.test.ts new file mode 100644 index 00000000..251ec424 --- /dev/null +++ b/harness/tests/turn-orchestrator/provisioning.test.ts @@ -0,0 +1,209 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import type { TurnOrchestratorConfig } from '../../src/turn-orchestrator/config.js'; +import * as persistence from '../../src/turn-orchestrator/persistence.js'; +import { type TurnStateRecord, newRecord } from '../../src/turn-orchestrator/state.js'; +import { TurnStepPayloadSchema } from '../../src/turn-orchestrator/schemas.js'; +import { + handleProvisioning, + parseDirectoryBody, + register, +} from '../../src/turn-orchestrator/states/provisioning.js'; + +type TriggerCall = { function_id: string; payload: unknown; timeoutMs?: number }; + +function fakeIii(responses: Record = {}): { iii: ISdk; calls: TriggerCall[] } { + const calls: TriggerCall[] = []; + const iii = { + trigger: async (req: { + function_id: string; + payload: T; + timeoutMs?: number; + }): Promise => { + calls.push({ + function_id: req.function_id, + payload: req.payload, + timeoutMs: req.timeoutMs, + }); + return (responses[req.function_id] ?? null) as R; + }, + } as unknown as ISdk; + return { iii, calls }; +} + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe('parseDirectoryBody', () => { + it('accepts bare string and wrapped body responses', () => { + expect(parseDirectoryBody('raw')).toBe('raw'); + expect(parseDirectoryBody({ body: 'wrapped' })).toBe('wrapped'); + }); + + it('rejects empty wrapped body and non-string shapes', () => { + expect(parseDirectoryBody({ body: '' })).toBe(''); + expect(parseDirectoryBody({ body: 1 })).toBeNull(); + expect(parseDirectoryBody(null)).toBeNull(); + }); +}); + +describe('handleProvisioning', () => { + it('materializes schemas, persists built prompt, and advances to assistant_streaming', async () => { + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'provisioning' }; + const { iii, calls } = fakeIii({ + 'directory::skills::index': { body: 'INDEX' }, + 'directory::skills::get': { body: 'SKILL' }, + }); + const cfg = { system_default_skills: ['iii://iii-directory/index'] }; + + vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ + provider: 'openai', + model: 'gpt-4', + mode: 'agent', + system_prompt: '', + }); + const saveSchemas = vi.spyOn(persistence, 'saveFunctionSchemas').mockResolvedValue(); + const saveRunRequest = vi.spyOn(persistence, 'saveRunRequest').mockResolvedValue(); + + await handleProvisioning(iii, cfg, rec); + + expect(rec.state).toBe('assistant_streaming'); + expect(saveSchemas).toHaveBeenCalledWith(iii, 's1', [ + expect.objectContaining({ name: 'agent_trigger' }), + ]); + expect(saveRunRequest).toHaveBeenCalledWith( + iii, + 's1', + expect.objectContaining({ + provider: 'openai', + model: 'gpt-4', + system_prompt: expect.stringContaining('operating in agent mode'), + }), + ); + expect(calls.some((c) => c.function_id === 'directory::skills::index')).toBe(true); + expect(calls.some((c) => c.function_id === 'directory::skills::get')).toBe(true); + }); + + it('preserves a non-empty caller override verbatim', async () => { + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'provisioning' }; + const { iii } = fakeIii(); + const cfg = { system_default_skills: [] as string[] }; + + vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ + provider: 'openai', + model: 'gpt-4', + mode: null, + system_prompt: 'custom override', + }); + vi.spyOn(persistence, 'saveFunctionSchemas').mockResolvedValue(); + const saveRunRequest = vi.spyOn(persistence, 'saveRunRequest').mockResolvedValue(); + + await handleProvisioning(iii, cfg, rec); + + expect(saveRunRequest).toHaveBeenCalledWith( + iii, + 's1', + expect.objectContaining({ system_prompt: 'custom override' }), + ); + }); + + it('continues when directory fetches fail', async () => { + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'provisioning' }; + const { iii } = fakeIii(); + const cfg = { system_default_skills: ['iii://missing'] }; + + vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ + provider: '', + model: '', + mode: null, + system_prompt: '', + }); + vi.spyOn(persistence, 'saveFunctionSchemas').mockResolvedValue(); + const saveRunRequest = vi.spyOn(persistence, 'saveRunRequest').mockResolvedValue(); + + await handleProvisioning(iii, cfg, rec); + + expect(rec.state).toBe('assistant_streaming'); + expect(saveRunRequest).toHaveBeenCalledWith( + iii, + 's1', + expect.objectContaining({ + system_prompt: expect.stringContaining('You are an iii agent worker'), + }), + ); + }); +}); + +describe('TurnStepPayloadSchema', () => { + it('accepts the flat shape every in-repo caller uses', () => { + expect(TurnStepPayloadSchema.parse({ session_id: 's1' })).toEqual({ session_id: 's1' }); + }); +}); + +describe('register', () => { + const cfg: TurnOrchestratorConfig = { system_default_skills: [] }; + + type Handler = (payload: unknown) => Promise; + + function captureHandler(): { iii: ISdk; getHandler: () => Handler; getId: () => string } { + let handler: Handler | null = null; + let registeredId = ''; + const iii = { + registerFunction: (id: string, fn: Handler) => { + registeredId = id; + handler = fn; + return { unregister: () => {} }; + }, + trigger: async () => null, + } as unknown as ISdk; + return { + iii, + getHandler: () => { + if (!handler) throw new Error('handler not registered'); + return handler; + }, + getId: () => registeredId, + }; + } + + it('registers turn::provisioning, threads cfg into the runner, and returns metadata', async () => { + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'provisioning' }; + vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); + const saveRecord = vi.spyOn(persistence, 'saveRecord').mockResolvedValue(); + const loadRunRequest = vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ + provider: '', + model: '', + mode: null, + system_prompt: '', + }); + vi.spyOn(persistence, 'saveFunctionSchemas').mockResolvedValue(); + vi.spyOn(persistence, 'saveRunRequest').mockResolvedValue(); + + const { iii, getHandler, getId } = captureHandler(); + register(iii, cfg); + expect(getId()).toBe('turn::provisioning'); + + const result = await getHandler()({ session_id: 's1' }); + + // cfg flows through to handleProvisioning (which reads the run request), + // and the runner threads the pre-mutation snapshot into saveRecord. + expect(loadRunRequest).toHaveBeenCalledWith(iii, 's1'); + expect(saveRecord).toHaveBeenCalledWith( + iii, + rec, + expect.objectContaining({ state: 'provisioning' }), + ); + expect(result).toEqual({ + ok: true, + from_state: 'provisioning', + to_state: 'assistant_streaming', + }); + }); + + it('rejects payloads missing session_id', async () => { + const { iii, getHandler } = captureHandler(); + register(iii, cfg); + await expect(getHandler()({})).rejects.toThrow(); + }); +}); diff --git a/harness/tests/turn-orchestrator/run-request.test.ts b/harness/tests/turn-orchestrator/run-request.test.ts new file mode 100644 index 00000000..745557b8 --- /dev/null +++ b/harness/tests/turn-orchestrator/run-request.test.ts @@ -0,0 +1,38 @@ +import { describe, expect, it } from 'vitest'; +import { parseRunRequest } from '../../src/turn-orchestrator/run-request.js'; + +describe('parseRunRequest', () => { + it('maps persisted run::start fields with defaults for missing keys', () => { + expect(parseRunRequest({})).toEqual({ + provider: '', + model: '', + mode: null, + system_prompt: '', + }); + }); + + it('passes through provided string fields', () => { + expect(parseRunRequest({ provider: 'openai', model: 'gpt-4', system_prompt: 'hi' })).toEqual({ + provider: 'openai', + model: 'gpt-4', + mode: null, + system_prompt: 'hi', + }); + }); + + it('rejects invalid mode values and accepts valid ones', () => { + expect(parseRunRequest({ mode: 'invalid' }).mode).toBeNull(); + expect(parseRunRequest({ mode: 'plan' }).mode).toBe('plan'); + expect(parseRunRequest({ mode: 'ask' }).mode).toBe('ask'); + expect(parseRunRequest({ mode: 'agent' }).mode).toBe('agent'); + }); + + it('coerces non-string fields to defaults', () => { + expect(parseRunRequest({ provider: 123, model: null, system_prompt: {} })).toEqual({ + provider: '', + model: '', + mode: null, + system_prompt: '', + }); + }); +}); diff --git a/harness/tests/turn-orchestrator/run-start.test.ts b/harness/tests/turn-orchestrator/run-start.test.ts index c429cbc0..7e08a4ad 100644 --- a/harness/tests/turn-orchestrator/run-start.test.ts +++ b/harness/tests/turn-orchestrator/run-start.test.ts @@ -1,53 +1,167 @@ -import { describe, expect, it } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; -import { FUNCTION_ID, SYNC_FUNCTION_ID, execute } from '../../src/turn-orchestrator/run-start.js'; +import { describe, expect, it, vi } from 'vitest'; +import { TriggerAction, type ISdk } from '../../src/runtime/iii.js'; +import { execute, register } from '../../src/turn-orchestrator/run-start.js'; +import { RunStartPayloadSchema } from '../../src/turn-orchestrator/schemas.js'; -type TriggerCall = { function_id: string; payload: unknown }; +type TriggerCall = { function_id: string; payload: unknown; action?: unknown }; function fakeIii(): { iii: ISdk; calls: TriggerCall[] } { const calls: TriggerCall[] = []; const iii = { - trigger: async (req: { function_id: string; payload: T }): Promise => { - calls.push({ function_id: req.function_id, payload: req.payload }); + trigger: async (req: { + function_id: string; + payload: T; + action?: unknown; + }): Promise => { + calls.push({ function_id: req.function_id, payload: req.payload, action: req.action }); return null as R; }, + registerFunction: vi.fn(), } as unknown as ISdk; return { iii, calls }; } -describe('run-start constants', () => { - it('exposes only start function ids', () => { - expect(FUNCTION_ID).toBe('run::start'); - expect(SYNC_FUNCTION_ID).toBe('run::start_and_wait'); +/** Shape console/web sends inside harness::trigger payload (real.ts). */ +const consoleRunStartPayload = { + session_id: 'sess-1', + message_id: 'msg-1', + provider: 'anthropic', + model: 'claude-sonnet-4-6', + mode: 'agent' as const, + messages: [ + { + role: 'user' as const, + content: [{ type: 'text' as const, text: 'hi' }], + timestamp: Date.now(), + }, + ], +}; + +/** Minimal shape harness/trigger.test.ts forwards to run::start. */ +const harnessRunStartPayload = { + session_id: 'sess-1', + provider: 'anthropic', + model: 'claude-sonnet-4-6', + messages: [ + { + role: 'user' as const, + content: [{ type: 'text' as const, text: 'hi' }], + timestamp: Date.now(), + }, + ], +}; + +describe('RunStartPayloadSchema', () => { + it('accepts the console/web payload shape', () => { + expect(RunStartPayloadSchema.parse(consoleRunStartPayload)).toMatchObject({ + session_id: 'sess-1', + message_id: 'msg-1', + provider: 'anthropic', + model: 'claude-sonnet-4-6', + mode: 'agent', + system_prompt: '', + messages: consoleRunStartPayload.messages, + }); + }); + + it('accepts the minimal harness::trigger test payload with defaults', () => { + expect(RunStartPayloadSchema.parse(harnessRunStartPayload)).toMatchObject({ + session_id: 'sess-1', + provider: 'anthropic', + model: 'claude-sonnet-4-6', + system_prompt: '', + messages: harnessRunStartPayload.messages, + }); + }); + + it('rejects harness::trigger envelope shapes — run::start receives payload only', () => { + expect(() => + RunStartPayloadSchema.parse({ + session_id: 'outer', + message_id: 'msg-1', + payload: harnessRunStartPayload, + }), + ).toThrow(); + }); + + it('rejects nested payload/data wrappers (no in-repo caller uses them)', () => { + expect(() => RunStartPayloadSchema.parse({ data: harnessRunStartPayload })).toThrow(); + expect(() => RunStartPayloadSchema.parse({ payload: harnessRunStartPayload })).toThrow(); + }); + + it('rejects missing or invalid required fields', () => { + expect(() => RunStartPayloadSchema.parse({})).toThrow(); + expect(() => RunStartPayloadSchema.parse({ session_id: '' })).toThrow(); + expect(() => RunStartPayloadSchema.parse({ session_id: 's1' })).toThrow(); + expect(() => RunStartPayloadSchema.parse({ session_id: 's1', provider: 'p' })).toThrow(); + expect(() => + RunStartPayloadSchema.parse({ session_id: 42, provider: 'p', model: 'm' }), + ).toThrow(); + expect(() => + RunStartPayloadSchema.parse({ session_id: 's1', provider: 'p', model: 'm', mode: 'invalid' }), + ).toThrow(); + expect(() => RunStartPayloadSchema.parse(null)).toThrow(); + expect(() => RunStartPayloadSchema.parse(undefined)).toThrow(); + }); +}); + +describe('register', () => { + it('registers run::start and parses payload at the unknown boundary', async () => { + const registered = new Map Promise>(); + const iii = { + registerFunction: (fnId: string, handler: (payload: unknown) => Promise) => { + registered.set(fnId, handler); + }, + trigger: vi.fn(async () => null), + } as unknown as ISdk; + + register(iii); + const handler = registered.get('run::start'); + expect(handler).toBeDefined(); + + const result = await handler!(harnessRunStartPayload); + expect(result).toEqual({ session_id: 'sess-1' }); + }); + + it('rejects invalid payloads at register boundary', async () => { + const registered = new Map Promise>(); + const iii = { + registerFunction: (fnId: string, handler: (payload: unknown) => Promise) => { + registered.set(fnId, handler); + }, + trigger: vi.fn(async () => null), + } as unknown as ISdk; + + register(iii); + const handler = registered.get('run::start'); + expect(handler).toBeDefined(); + + await expect(handler!({ provider: 'openai' })).rejects.toThrow(); }); }); describe('execute', () => { - it('saves initial session state to wake the reactive step trigger', async () => { + it('saves initial session state and enqueues turn::provisioning via saveRecord wake', async () => { const { iii, calls } = fakeIii(); - await execute(iii, { - session_id: 's1', - provider: 'openai', - model: 'gpt-test', - messages: [{ role: 'user', content: [{ type: 'text', text: 'hi' }], timestamp: 1 }], - }); + const result = await execute(iii, RunStartPayloadSchema.parse(harnessRunStartPayload)); + + expect(result).toEqual({ session_id: 'sess-1' }); - // The reactive wake is now state-driven: the turn_state write at - // state='provisioning' is what the on-record-written trigger picks up. - // run-start no longer self-publishes turn::step_requested. const turnStateSet = calls.find( (c) => c.function_id === 'state::set' && (c.payload as { scope?: string; key?: string }).scope === 'agent' && - (c.payload as { scope?: string; key?: string }).key === 'session/s1/turn_state', + (c.payload as { scope?: string; key?: string }).key === 'session/sess-1/turn_state', ); expect(turnStateSet).toBeDefined(); expect((turnStateSet?.payload as { value: { state: string } }).value.state).toBe( 'provisioning', ); - const publish = calls.find((c) => c.function_id === 'iii::durable::publish'); - expect(publish).toBeUndefined(); + const wake = calls.find((c) => c.function_id === 'turn::provisioning'); + expect(wake).toBeDefined(); + expect(wake?.payload).toEqual({ session_id: 'sess-1' }); + expect(wake?.action).toEqual(TriggerAction.Enqueue({ queue: 'turn-step' })); }); }); diff --git a/harness/tests/turn-orchestrator/run-transition.test.ts b/harness/tests/turn-orchestrator/run-transition.test.ts new file mode 100644 index 00000000..b25761fb --- /dev/null +++ b/harness/tests/turn-orchestrator/run-transition.test.ts @@ -0,0 +1,98 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import * as persistence from '../../src/turn-orchestrator/persistence.js'; +import { runTransition } from '../../src/turn-orchestrator/run-transition.js'; +import { + type TurnStateRecord, + newRecord, + transitionTo, +} from '../../src/turn-orchestrator/state.js'; + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe('runTransition', () => { + it('throws when the session record is missing, without running the handler', async () => { + vi.spyOn(persistence, 'loadRecord').mockResolvedValue(null); + const handle = vi.fn(); + + await expect( + runTransition({} as ISdk, 'provisioning', handle, { session_id: 'missing' }), + ).rejects.toThrow('turn::provisioning invariant: missing session missing'); + expect(handle).not.toHaveBeenCalled(); + }); + + it('returns a stale skip without running the handler or saving', async () => { + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; + vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); + const saveRecord = vi.spyOn(persistence, 'saveRecord').mockResolvedValue(); + const handle = vi.fn(); + + const result = await runTransition({} as ISdk, 'provisioning', handle, { session_id: 's1' }); + + expect(result).toEqual({ ok: true, skipped: true, reason: 'stale' }); + expect(handle).not.toHaveBeenCalled(); + expect(saveRecord).not.toHaveBeenCalled(); + }); + + it('runs the handler and threads the pre-mutation snapshot into saveRecord', async () => { + const iii = {} as ISdk; + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'provisioning' }; + vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); + const saveRecord = vi.spyOn(persistence, 'saveRecord').mockResolvedValue(); + const handle = vi.fn(async (_iii: ISdk, r: TurnStateRecord) => { + transitionTo(r, 'assistant_streaming'); + }); + + const result = await runTransition(iii, 'provisioning', handle, { session_id: 's1' }); + + expect(handle).toHaveBeenCalledWith(iii, rec); + expect(saveRecord).toHaveBeenCalledWith( + iii, + rec, + expect.objectContaining({ state: 'provisioning' }), + ); + expect(result).toEqual({ + ok: true, + from_state: 'provisioning', + to_state: 'assistant_streaming', + }); + }); + + it('snapshots a deep copy so in-place handler mutation does not leak into previous', async () => { + const iii = {} as ISdk; + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'function_execute' }; + rec.awaiting_approval = []; + vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); + let captured: TurnStateRecord | null | undefined; + vi.spyOn(persistence, 'saveRecord').mockImplementation(async (_i, _r, previous) => { + captured = previous; + }); + const handle = vi.fn(async (_iii: ISdk, r: TurnStateRecord) => { + r.awaiting_approval?.push({ function_call_id: 'fc-1', function_id: 'f', args: {} }); + transitionTo(r, 'function_awaiting_approval'); + }); + + await runTransition(iii, 'function_execute', handle, { session_id: 's1' }); + + // The snapshot reflects state BEFORE the handler ran, even though the + // handler mutated rec.awaiting_approval in place. + expect(captured?.state).toBe('function_execute'); + expect(captured?.awaiting_approval).toEqual([]); + }); + + it('wraps handler failures as transition errors tagged with the from-state', async () => { + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'steering_check' }; + vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); + const saveRecord = vi.spyOn(persistence, 'saveRecord').mockResolvedValue(); + const handle = vi.fn(async () => { + throw new Error('boom'); + }); + + await expect( + runTransition({} as ISdk, 'steering_check', handle, { session_id: 's1' }), + ).rejects.toThrow('transition from steering_check failed: Error: boom'); + expect(saveRecord).not.toHaveBeenCalled(); + }); +}); diff --git a/harness/tests/turn-orchestrator/state.test.ts b/harness/tests/turn-orchestrator/state.test.ts index 9b0b7b91..574b6d57 100644 --- a/harness/tests/turn-orchestrator/state.test.ts +++ b/harness/tests/turn-orchestrator/state.test.ts @@ -1,6 +1,5 @@ import { describe, expect, it } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; -import type { TurnOrchestratorConfig } from '../../src/turn-orchestrator/config.js'; import type { AwaitingApprovalEntry, TurnState, @@ -13,7 +12,7 @@ import { transitionTo, turnStateKey, } from '../../src/turn-orchestrator/state.js'; -import { step } from '../../src/turn-orchestrator/transitions.js'; +import { handleAwaitingApproval } from '../../src/turn-orchestrator/states/function-awaiting-approval.js'; describe('TurnStateRecord', () => { it('starts in provisioning', () => { @@ -64,14 +63,13 @@ describe('awaiting_approval field', () => { }); }); -describe('step dispatches function_awaiting_approval', () => { - it('runs the awaiting-approval handler for that state', async () => { - const cfg = {} as TurnOrchestratorConfig; +describe('handleAwaitingApproval with empty queue', () => { + it('advances to function_execute when awaiting_approval is empty', async () => { const rec = newRecord('s1'); transitionTo(rec, 'function_awaiting_approval'); rec.awaiting_approval = []; - await step({} as ISdk, cfg, rec); + await handleAwaitingApproval({} as ISdk, rec); expect(rec.state).toBe('function_execute'); }); diff --git a/harness/tests/turn-orchestrator/steering.test.ts b/harness/tests/turn-orchestrator/steering.test.ts index 5dcd090d..28477cb1 100644 --- a/harness/tests/turn-orchestrator/steering.test.ts +++ b/harness/tests/turn-orchestrator/steering.test.ts @@ -1,24 +1,216 @@ -import { describe, expect, it } from 'vitest'; -import { route } from '../../src/turn-orchestrator/states/steering.js'; +import { afterEach, describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import type { AgentMessage } from '../../src/types/agent-message.js'; +import * as events from '../../src/turn-orchestrator/events.js'; +import * as persistence from '../../src/turn-orchestrator/persistence.js'; +import { + abortSignalKey, + newRecord, + type TurnStateRecord, +} from '../../src/turn-orchestrator/state.js'; +import { handleSteering, route } from '../../src/turn-orchestrator/states/steering-check.js'; + +afterEach(() => { + vi.restoreAllMocks(); +}); describe('steering route()', () => { - it('abort wins over everything', () => { - expect(route(true, true, true, true)).toBe('abort'); + it.each([ + [true, true, true, true, 'abort'], + [true, false, false, false, 'abort'], + [false, true, true, true, 'steering'], + [false, true, false, false, 'steering'], + [false, false, true, true, 'followup'], + [false, false, true, false, 'followup'], + [false, false, false, true, 'continue_after_function'], + [false, false, false, false, 'end_turn'], + ] as const)('route(%s, %s, %s, %s) -> %s', (abort, has_steering, has_followup, has_function_results, expected) => { + expect(route(abort, has_steering, has_followup, has_function_results)).toBe(expected); + }); +}); + +function userMessage(text: string): AgentMessage { + return { role: 'user', content: [{ type: 'text', text }] }; +} + +function makeIii( + opts: { abort?: boolean; steeringItems?: AgentMessage[]; followupItems?: AgentMessage[] } = {}, +) { + const { abort = false, steeringItems = [], followupItems = [] } = opts; + const drainCalls: Array<{ name: string; session_id: string }> = []; + + const iii = { + trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { + if (req.function_id === 'state::get') { + const p = req.payload as { key: string }; + if (p.key.endsWith('/abort_signal')) return abort ? true : null; + return null; + } + if (req.function_id === 'session-inbox::drain') { + const p = req.payload as { name: string; session_id: string }; + drainCalls.push(p); + if (p.name === 'steering') return { items: steeringItems }; + if (p.name === 'followup') return { items: followupItems }; + return { items: [] }; + } + if (req.function_id === 'state::update') return { old_value: 0 }; + if (req.function_id === 'stream::set') return null; + return null; + }), + } as unknown as ISdk; + + return { iii, drainCalls }; +} + +function steeringRec( + session_id: string, + overrides: Partial = {}, +): TurnStateRecord { + const rec = newRecord(session_id); + rec.state = 'steering_check'; + return { ...rec, ...overrides }; +} + +describe('handleSteering', () => { + it('abort: persists aborted assistant, emits turn_end, transitions to tearing_down', async () => { + const { iii } = makeIii({ abort: true }); + const rec = steeringRec('s1'); + const loadSpy = vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); + + await handleSteering(iii, rec); + + expect(rec.state).toBe('tearing_down'); + expect(rec.turn_end_emitted).toBe(true); + expect(rec.last_assistant?.stop_reason).toBe('aborted'); + expect(loadSpy).toHaveBeenCalledWith(iii, 's1'); + expect(saveSpy).toHaveBeenCalledWith( + iii, + 's1', + expect.arrayContaining([expect.objectContaining({ stop_reason: 'aborted' })]), + ); + expect(emitSpy).toHaveBeenCalledWith( + iii, + 's1', + expect.objectContaining({ + type: 'turn_end', + message: expect.objectContaining({ stop_reason: 'aborted' }), + }), + ); + }); + + it('abort: skips inbox drains', async () => { + const { iii, drainCalls } = makeIii({ + abort: true, + steeringItems: [userMessage('steer')], + followupItems: [userMessage('follow')], + }); + const rec = steeringRec('s1'); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + vi.spyOn(events, 'emit').mockResolvedValue(undefined); + + await handleSteering(iii, rec); + + expect(drainCalls).toHaveLength(0); }); - it('steering takes precedence over followup and function results', () => { - expect(route(false, true, true, true)).toBe('steering'); + it('steering: appends drained messages and transitions to assistant_streaming', async () => { + const steeringItems = [userMessage('steer-me')]; + const { iii } = makeIii({ steeringItems }); + const rec = steeringRec('s1', { + function_results: [{ role: 'function_result', content: [] }] as never, + }); + const loadSpy = vi.spyOn(persistence, 'loadMessages').mockResolvedValue([userMessage('prior')]); + const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + vi.spyOn(events, 'emit').mockResolvedValue(undefined); + + await handleSteering(iii, rec); + + expect(rec.state).toBe('assistant_streaming'); + expect(rec.function_results).toEqual([]); + expect(rec.turn_end_emitted).toBe(true); + expect(saveSpy).toHaveBeenCalledWith(iii, 's1', [userMessage('prior'), ...steeringItems]); + expect(loadSpy).toHaveBeenCalled(); + }); + + it('followup: drains followup when steering queue is empty', async () => { + const followupItems = [userMessage('follow-up')]; + const { iii, drainCalls } = makeIii({ followupItems }); + const rec = steeringRec('s1'); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + vi.spyOn(events, 'emit').mockResolvedValue(undefined); + + await handleSteering(iii, rec); + + expect(rec.state).toBe('assistant_streaming'); + expect(drainCalls.map((c) => c.name)).toEqual(['steering', 'followup']); + expect(saveSpy).toHaveBeenCalledWith(iii, 's1', followupItems); + }); + + it('followup: skipped when steering queue has items', async () => { + const { iii, drainCalls } = makeIii({ + steeringItems: [userMessage('steer')], + followupItems: [userMessage('follow')], + }); + const rec = steeringRec('s1'); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + vi.spyOn(events, 'emit').mockResolvedValue(undefined); + + await handleSteering(iii, rec); + + expect(drainCalls.map((c) => c.name)).toEqual(['steering']); + expect(rec.state).toBe('assistant_streaming'); }); - it('followup takes precedence over function results', () => { - expect(route(false, false, true, true)).toBe('followup'); + it('continue_after_function: clears function_results without reloading messages', async () => { + const { iii } = makeIii(); + const rec = steeringRec('s1', { + function_results: [{ role: 'function_result', content: [] }] as never, + turn_end_emitted: true, + }); + const loadSpy = vi.spyOn(persistence, 'loadMessages'); + const emitSpy = vi.spyOn(events, 'emit'); + + await handleSteering(iii, rec); + + expect(rec.state).toBe('assistant_streaming'); + expect(rec.function_results).toEqual([]); + expect(loadSpy).not.toHaveBeenCalled(); + expect(emitSpy).not.toHaveBeenCalled(); }); - it('function results route to continue_after_function', () => { - expect(route(false, false, false, true)).toBe('continue_after_function'); + it('end_turn: emits turn_end once and transitions to tearing_down', async () => { + const { iii } = makeIii(); + const rec = steeringRec('s1'); + const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); + const loadSpy = vi.spyOn(persistence, 'loadMessages'); + + await handleSteering(iii, rec); + + expect(rec.state).toBe('tearing_down'); + expect(rec.turn_end_emitted).toBe(true); + expect(emitSpy).toHaveBeenCalledWith(iii, 's1', expect.objectContaining({ type: 'turn_end' })); + expect(loadSpy).not.toHaveBeenCalled(); }); - it('nothing pending -> end_turn', () => { - expect(route(false, false, false, false)).toBe('end_turn'); + it('reads abort via state::get on abort_signal key', async () => { + const { iii } = makeIii({ abort: true }); + const rec = steeringRec('s1'); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + vi.spyOn(events, 'emit').mockResolvedValue(undefined); + + await handleSteering(iii, rec); + + expect(iii.trigger).toHaveBeenCalledWith( + expect.objectContaining({ + function_id: 'state::get', + payload: { scope: 'agent', key: abortSignalKey('s1') }, + }), + ); }); }); diff --git a/harness/tests/turn-orchestrator/tearing-down.test.ts b/harness/tests/turn-orchestrator/tearing-down.test.ts index 763fb420..cc3ed045 100644 --- a/harness/tests/turn-orchestrator/tearing-down.test.ts +++ b/harness/tests/turn-orchestrator/tearing-down.test.ts @@ -1,5 +1,7 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; +import type { AgentMessage } from '../../src/types/agent-message.js'; +import * as events from '../../src/turn-orchestrator/events.js'; import * as persistence from '../../src/turn-orchestrator/persistence.js'; import { type TurnStateRecord, newRecord } from '../../src/turn-orchestrator/state.js'; import { handleTearingDown } from '../../src/turn-orchestrator/states/tearing-down.js'; @@ -30,30 +32,16 @@ afterEach(() => { }); describe('handleTearingDown', () => { - it('proceeds with normal teardown without approval::consume resurrection', async () => { + it('transitions to stopped and emits agent_end with session messages', async () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'tearing_down' }; - const { iii, calls } = fakeIii(); - vi.spyOn(persistence, 'loadSandboxId').mockResolvedValue(null); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + const messages: AgentMessage[] = [{ role: 'user', content: 'hi' }]; + const { iii } = fakeIii(); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue(messages); + const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); await handleTearingDown(iii, rec); expect(rec.state).toBe('stopped'); - expect(calls.some((c) => c.function_id === 'approval::consume')).toBe(false); - expect(calls.some((c) => c.function_id === 'stream::set')).toBe(true); - }); - - it('stops the sandbox before ending the agent when a sandbox id exists', async () => { - const rec: TurnStateRecord = { ...newRecord('s1'), state: 'tearing_down' }; - const { iii, calls } = fakeIii(); - vi.spyOn(persistence, 'loadSandboxId').mockResolvedValue('sandbox-1'); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - - await handleTearingDown(iii, rec); - - const sandboxCall = calls.find((c) => c.function_id === 'sandbox::stop'); - expect(sandboxCall?.payload).toEqual({ sandbox_id: 'sandbox-1', wait: true }); - expect(sandboxCall?.timeoutMs).toBe(60_000); - expect(rec.state).toBe('stopped'); + expect(emitSpy).toHaveBeenCalledWith(iii, 's1', { type: 'agent_end', messages }); }); }); diff --git a/harness/tests/turn-orchestrator/turn-state-write.test.ts b/harness/tests/turn-orchestrator/turn-state-write.test.ts new file mode 100644 index 00000000..468500c1 --- /dev/null +++ b/harness/tests/turn-orchestrator/turn-state-write.test.ts @@ -0,0 +1,62 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import { emitTurnStateChanged } from '../../src/turn-orchestrator/turn-state-write.js'; + +function fakeIii(): { iii: ISdk; emits: Array<{ session_id: string; event: unknown }> } { + const emits: Array<{ session_id: string; event: unknown }> = []; + const iii = { + trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { + if (function_id === 'stream::set') { + const p = payload as { group_id: string; data: unknown }; + emits.push({ session_id: p.group_id, event: p.data }); + return null; + } + if (function_id === 'state::update') { + return { old_value: 0 }; + } + return null; + }), + } as unknown as ISdk; + return { iii, emits }; +} + +describe('emitTurnStateChanged', () => { + it('emits turn_state_changed on agent::events with group_id = session_id', async () => { + const { iii, emits } = fakeIii(); + await emitTurnStateChanged( + iii, + 'sess-a', + 'state:updated', + { state: 'function_awaiting_approval', awaiting_approval: [] }, + { state: 'function_execute', awaiting_approval: null }, + ); + expect(emits).toHaveLength(1); + expect(emits[0]?.session_id).toBe('sess-a'); + expect(emits[0]?.event).toMatchObject({ + type: 'turn_state_changed', + event_type: 'state:updated', + new_value: { state: 'function_awaiting_approval' }, + old_value: { state: 'function_execute' }, + }); + }); + + it('swallows emit failures (logs only, never rethrows)', async () => { + const iii = { + trigger: vi.fn(async () => { + throw new Error('stream::set down'); + }), + } as unknown as ISdk; + await expect( + emitTurnStateChanged(iii, 'sess-a', 'state:created', { state: 'provisioning' }), + ).resolves.toBeUndefined(); + }); + + it('omits old_value from the emitted event when state:created', async () => { + const { iii, emits } = fakeIii(); + await emitTurnStateChanged(iii, 'sess-a', 'state:created', { state: 'provisioning' }); + expect(emits).toHaveLength(1); + const event = emits[0]?.event as Record; + expect(event.type).toBe('turn_state_changed'); + expect('old_value' in event).toBe(false); + }); +}); diff --git a/harness/tests/turn-orchestrator/wake.test.ts b/harness/tests/turn-orchestrator/wake.test.ts new file mode 100644 index 00000000..115fa729 --- /dev/null +++ b/harness/tests/turn-orchestrator/wake.test.ts @@ -0,0 +1,94 @@ +import { describe, expect, it, vi } from 'vitest'; +import { TriggerAction } from '../../src/runtime/iii.js'; +import type { ISdk } from '../../src/runtime/iii.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; +import { shouldWakeStep, wakeFromRecord, wakeState } from '../../src/turn-orchestrator/wake.js'; + +describe('shouldWakeStep', () => { + it('accepts first write to a stepable state', () => { + expect(shouldWakeStep(null, 'provisioning')).toBe(true); + }); + + it('accepts transitions to another stepable state', () => { + expect(shouldWakeStep('provisioning', 'assistant_streaming')).toBe(true); + expect(shouldWakeStep('assistant_finished', 'function_execute')).toBe(true); + }); + + it('rejects terminal state (stopped)', () => { + expect(shouldWakeStep('tearing_down', 'stopped')).toBe(false); + }); + + it('rejects function_awaiting_approval (orchestrator parks here)', () => { + expect(shouldWakeStep('function_execute', 'function_awaiting_approval')).toBe(false); + }); + + it('rejects same-state writes', () => { + expect(shouldWakeStep('function_execute', 'function_execute')).toBe(false); + }); +}); + +describe('wakeState', () => { + it('enqueues turn::{state} on the turn-step FIFO queue', async () => { + const triggers: Array<{ function_id: string; payload: unknown; action?: unknown }> = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string; payload: unknown; action?: unknown }) => { + triggers.push(req); + return null; + }), + } as unknown as ISdk; + + await wakeState(iii, 'sess-abc', 'assistant_streaming'); + + expect(triggers).toHaveLength(1); + expect(triggers[0]?.function_id).toBe('turn::assistant_streaming'); + expect(triggers[0]?.payload).toEqual({ session_id: 'sess-abc' }); + expect(triggers[0]?.action).toEqual(TriggerAction.Enqueue({ queue: 'turn-step' })); + }); + + it('swallows enqueue failures (logs only, never rethrows)', async () => { + const iii = { + trigger: vi.fn(async () => { + throw new Error('queue down'); + }), + } as unknown as ISdk; + + await expect(wakeState(iii, 'sess-abc', 'provisioning')).resolves.toBeUndefined(); + }); +}); + +describe('wakeFromRecord', () => { + it('enqueues turn::{currentState} from persisted record', async () => { + const rec = newRecord('sess-x'); + rec.state = 'function_awaiting_approval'; + const triggers: Array<{ function_id: string; payload: unknown; action?: unknown }> = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string; payload: unknown; action?: unknown }) => { + if (req.function_id === 'state::get') { + return rec; + } + triggers.push(req); + return null; + }), + } as unknown as ISdk; + + await wakeFromRecord(iii, 'sess-x'); + + expect(triggers).toHaveLength(1); + expect(triggers[0]?.function_id).toBe('turn::function_awaiting_approval'); + expect(triggers[0]?.payload).toEqual({ session_id: 'sess-x' }); + }); + + it('no-ops when session is stopped', async () => { + const rec = newRecord('sess-y'); + rec.state = 'stopped'; + const iii = { + trigger: vi.fn(async (req: { function_id: string }) => { + if (req.function_id === 'state::get') return rec; + return null; + }), + } as unknown as ISdk; + + await wakeFromRecord(iii, 'sess-y'); + expect(iii.trigger).toHaveBeenCalledTimes(1); + }); +}); diff --git a/iii-permissions.yaml b/iii-permissions.yaml index 0440daad..591be678 100644 --- a/iii-permissions.yaml +++ b/iii-permissions.yaml @@ -21,7 +21,6 @@ rules: - '!oauth::anthropic::login' - '!oauth::openai-codex::login' - '!run::start' - - '!run::start_and_wait' - '!router::stream_assistant' - '!router::abort'