From ea6e9b665ef1caf2eb1dc487fcc6a589837de026 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sun, 24 May 2026 16:55:44 -0300 Subject: [PATCH 01/41] refactor: remove isTerminal function and update state checks in tests - Eliminated the isTerminal function from the state module, simplifying the state management logic. - Updated tests to directly check the state property instead of using isTerminal, ensuring clarity and consistency in state validation. - Adjusted the function_id generation in wakeState to use a template string for better readability. --- .../src/turn-orchestrator/run-transition.ts | 5 +++-- harness/src/turn-orchestrator/state.ts | 18 ------------------ harness/src/turn-orchestrator/wake.ts | 4 ++-- harness/tests/turn-orchestrator/state.test.ts | 7 +++---- 4 files changed, 8 insertions(+), 26 deletions(-) diff --git a/harness/src/turn-orchestrator/run-transition.ts b/harness/src/turn-orchestrator/run-transition.ts index 0f142761..70a50a66 100644 --- a/harness/src/turn-orchestrator/run-transition.ts +++ b/harness/src/turn-orchestrator/run-transition.ts @@ -13,7 +13,7 @@ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import * as persistence from './persistence.js'; import { type TurnStepPayload, type TurnStepResult } from './schemas.js'; -import { type TurnState, type TurnStateRecord, cloneRecord } from './state.js'; +import { type TurnState, type TurnStateRecord } from './state.js'; export type TransitionHandler = (iii: ISdk, rec: TurnStateRecord) => Promise; @@ -41,7 +41,8 @@ export async function runTransition( const skipped = staleSkipResult(state, rec); if (skipped) return skipped; - const previous = cloneRecord(rec); + // JSON round-trip matches a persisted reload — snapshot before handler mutates. + const previous = JSON.parse(JSON.stringify(rec)) as TurnStateRecord; const from_state = rec.state; try { await handle(iii, rec); diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index 5d080693..b9a585c6 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -59,24 +59,6 @@ export function transitionTo(rec: TurnStateRecord, next: TurnState): void { rec.updated_at_ms = Date.now(); } -/** - * Deep copy of a record via JSON round-trip — faithful to a `state::get` - * reload (the record is persisted as JSON), so the runner can snapshot the - * pre-mutation record and thread it into `saveRecord` instead of paying a - * second `state::get` to recover the previous state. - */ -export function cloneRecord(rec: TurnStateRecord): TurnStateRecord { - return JSON.parse(JSON.stringify(rec)) as TurnStateRecord; -} - -export function isTerminal(rec: TurnStateRecord): boolean { - return rec.state === 'stopped'; -} - -export function turnFnId(state: TurnState): string { - return `turn::${state}`; -} - export const messagesKey = (sid: string) => `session/${sid}/messages`; export const turnStateKey = (sid: string) => `session/${sid}/turn_state`; export const runRequestKey = (sid: string) => `session/${sid}/run_request`; diff --git a/harness/src/turn-orchestrator/wake.ts b/harness/src/turn-orchestrator/wake.ts index ec57e29b..0b850805 100644 --- a/harness/src/turn-orchestrator/wake.ts +++ b/harness/src/turn-orchestrator/wake.ts @@ -6,7 +6,7 @@ import { TriggerAction, type ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import * as persistence from './persistence.js'; -import { turnFnId, type TurnState, type TurnStateRecord } from './state.js'; +import { type TurnState, type TurnStateRecord } from './state.js'; export const TURN_STEP_QUEUE = 'turn-step'; @@ -28,7 +28,7 @@ export function shouldRunStep(rec: TurnStateRecord | null): boolean { export async function wakeState(iii: ISdk, session_id: string, state: TurnState): Promise { try { await iii.trigger({ - function_id: turnFnId(state), + function_id: `turn::${state}`, payload: { session_id }, action: TriggerAction.Enqueue({ queue: TURN_STEP_QUEUE }), }); diff --git a/harness/tests/turn-orchestrator/state.test.ts b/harness/tests/turn-orchestrator/state.test.ts index 574b6d57..bddb1be9 100644 --- a/harness/tests/turn-orchestrator/state.test.ts +++ b/harness/tests/turn-orchestrator/state.test.ts @@ -6,7 +6,6 @@ import type { TurnStateRecord, } from '../../src/turn-orchestrator/state.js'; import { - isTerminal, messagesKey, newRecord, transitionTo, @@ -20,13 +19,13 @@ describe('TurnStateRecord', () => { expect(r.state).toBe('provisioning'); expect(r.session_id).toBe('s1'); expect(r.max_turns).toBe(32); - expect(isTerminal(r)).toBe(false); + expect(r.state).not.toBe('stopped'); }); it('transitionTo stopped marks terminal', () => { const r = newRecord('s1'); transitionTo(r, 'stopped'); - expect(isTerminal(r)).toBe(true); + expect(r.state).toBe('stopped'); }); }); @@ -40,7 +39,7 @@ describe('function_awaiting_approval state', () => { it('is non-terminal', () => { const rec = newRecord('s1'); transitionTo(rec, 'function_awaiting_approval' as TurnState); - expect(isTerminal(rec)).toBe(false); + expect(rec.state).not.toBe('stopped'); }); }); From 7af8d6b6e0727c02bea8a9562eece3219037565c Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sun, 24 May 2026 17:03:49 -0300 Subject: [PATCH 02/41] refactor(turn-orchestrator): add failed state, work field; own entry types --- harness/src/turn-orchestrator/persistence.ts | 20 +++---------- harness/src/turn-orchestrator/state.ts | 29 +++++++++++++++++-- harness/src/turn-orchestrator/wake.ts | 2 +- harness/tests/turn-orchestrator/state.test.ts | 16 ++++++++++ 4 files changed, 48 insertions(+), 19 deletions(-) diff --git a/harness/src/turn-orchestrator/persistence.ts b/harness/src/turn-orchestrator/persistence.ts index 9a1c785e..b6ebfdd1 100644 --- a/harness/src/turn-orchestrator/persistence.ts +++ b/harness/src/turn-orchestrator/persistence.ts @@ -8,6 +8,8 @@ import type { AgentMessage } from '../types/agent-message.js'; import type { FunctionCall, FunctionResult } from '../types/function.js'; import { type RunRequest, parseRunRequest } from './run-request.js'; import { + type ExecutedEntry, + type PreparedEntry, type TurnStateRecord, functionSchemasKey, lastSessionTreeLenKey, @@ -18,6 +20,8 @@ import { import { emitTurnStateChanged } from './turn-state-write.js'; import { shouldWakeStep, wakeState } from './wake.js'; +export type { ExecutedEntry, PreparedEntry } from './state.js'; + const SCOPE = 'agent'; async function stateGet(iii: ISdk, key: string): Promise { @@ -195,22 +199,6 @@ async function stagingGet(iii: ISdk, session_id: string, suffix: string): Promis return Array.isArray(v) ? v : []; } -export type PreparedEntry = { - function_call: FunctionCall; - blocked: FunctionResult | null; - pre_approved?: boolean; -}; -export type ExecutedEntry = { - function_call: FunctionCall; - result: FunctionResult; - is_error: boolean; - /** Wall-clock ms between the matching function_execution_start and end. - * Persisted so resumed runs replay the original timing instead of the - * ~0ms it takes to re-emit the end event. Defaults to 0 in - * loadExecutedCalls so records persisted by an older binary survive - * the upgrade. */ - duration_ms: number; -}; export async function savePreparedCalls( iii: ISdk, diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index b9a585c6..a1fd4487 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -4,7 +4,7 @@ */ import type { AssistantMessage, FunctionResultMessage } from '../types/agent-message.js'; -import type { FunctionCall } from '../types/function.js'; +import type { FunctionCall, FunctionResult } from '../types/function.js'; export type TurnState = | 'provisioning' @@ -14,7 +14,8 @@ export type TurnState = | 'function_awaiting_approval' | 'steering_check' | 'tearing_down' - | 'stopped'; + | 'stopped' + | 'failed'; export type AwaitingApprovalEntry = { function_call_id: string; @@ -22,6 +23,24 @@ export type AwaitingApprovalEntry = { args: unknown; }; +export type PreparedEntry = { + function_call: FunctionCall; + blocked: FunctionResult | null; + pre_approved?: boolean; +}; + +export type ExecutedEntry = { + function_call: FunctionCall; + result: FunctionResult; + is_error: boolean; + duration_ms: number; +}; + +export type TurnWork = { + batch: PreparedEntry[]; + results: ExecutedEntry[]; +}; + export type TurnStateRecord = { session_id: string; state: TurnState; @@ -36,6 +55,8 @@ export type TurnStateRecord = { awaiting_approval?: AwaitingApprovalEntry[]; /** Set during assistant_streaming when message_update deltas were emitted. */ assistant_body_streamed?: boolean; + work?: TurnWork; + error?: { kind: string; message: string }; }; export function newRecord(session_id: string, max_turns?: number): TurnStateRecord { @@ -59,6 +80,10 @@ export function transitionTo(rec: TurnStateRecord, next: TurnState): void { rec.updated_at_ms = Date.now(); } +export function isTerminal(rec: TurnStateRecord): boolean { + return rec.state === 'stopped' || rec.state === 'failed'; +} + export const messagesKey = (sid: string) => `session/${sid}/messages`; export const turnStateKey = (sid: string) => `session/${sid}/turn_state`; export const runRequestKey = (sid: string) => `session/${sid}/run_request`; diff --git a/harness/src/turn-orchestrator/wake.ts b/harness/src/turn-orchestrator/wake.ts index 0b850805..fdc75fc4 100644 --- a/harness/src/turn-orchestrator/wake.ts +++ b/harness/src/turn-orchestrator/wake.ts @@ -10,7 +10,7 @@ import { type TurnState, type TurnStateRecord } from './state.js'; export const TURN_STEP_QUEUE = 'turn-step'; -const NON_STEPABLE_STATES = new Set(['stopped', 'function_awaiting_approval']); +const NON_STEPABLE_STATES = new Set(['stopped', 'failed', 'function_awaiting_approval']); /** True when a persisted turn_state transition should enqueue `turn::{newState}`. */ export function shouldWakeStep(previousState: TurnState | null, newState: TurnState): boolean { diff --git a/harness/tests/turn-orchestrator/state.test.ts b/harness/tests/turn-orchestrator/state.test.ts index bddb1be9..e78dd510 100644 --- a/harness/tests/turn-orchestrator/state.test.ts +++ b/harness/tests/turn-orchestrator/state.test.ts @@ -6,6 +6,7 @@ import type { TurnStateRecord, } from '../../src/turn-orchestrator/state.js'; import { + isTerminal, messagesKey, newRecord, transitionTo, @@ -80,3 +81,18 @@ describe('state keys', () => { expect(messagesKey('abc')).toBe('session/abc/messages'); }); }); + +describe('state record', () => { + it('newRecord starts in provisioning, non-terminal, no work', () => { + const r = newRecord('s1', 5); + expect(r.state).toBe('provisioning'); + expect(isTerminal(r)).toBe(false); + expect(r.work).toBeUndefined(); + expect(r.max_turns).toBe(5); + }); + + it('failed is terminal', () => { + const r: TurnStateRecord = { ...newRecord('s1'), state: 'failed', error: { kind: 'bug', message: 'x' } }; + expect(isTerminal(r)).toBe(true); + }); +}); From 3a4ca2762ec5f4e71262bd6e3400f8cd69a15409 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sun, 24 May 2026 18:25:41 -0300 Subject: [PATCH 03/41] feat(turn-orchestrator): terminal-vs-transient error model with failed state --- harness/src/turn-orchestrator/errors.ts | 10 ++++ .../src/turn-orchestrator/run-transition.ts | 56 ++++++++++++++++-- .../turn-orchestrator/run-transition.test.ts | 57 +++++++++++++++++-- 3 files changed, 113 insertions(+), 10 deletions(-) diff --git a/harness/src/turn-orchestrator/errors.ts b/harness/src/turn-orchestrator/errors.ts index 90be19e0..c8361f72 100644 --- a/harness/src/turn-orchestrator/errors.ts +++ b/harness/src/turn-orchestrator/errors.ts @@ -16,3 +16,13 @@ export class CompactionBusyError extends Error { this.name = 'CompactionBusyError'; } } + +/** Thrown by a handler for a genuinely retryable failure. runTransition + * re-throws it so the turn-step queue applies backoff/retry/DLQ. Any other + * throw is treated as terminal and routes the session to `failed`. */ +export class TransientError extends Error { + constructor(message: string) { + super(message); + this.name = 'TransientError'; + } +} diff --git a/harness/src/turn-orchestrator/run-transition.ts b/harness/src/turn-orchestrator/run-transition.ts index 70a50a66..d1b1dd78 100644 --- a/harness/src/turn-orchestrator/run-transition.ts +++ b/harness/src/turn-orchestrator/run-transition.ts @@ -3,17 +3,20 @@ * same load → null-check → stale-skip → handle → save sequence; this owns it so * each per-state file only contributes its handler. * - * The record loaded here is snapshotted before the handler mutates it and - * threaded into `saveRecord`, so the save path needs no extra `state::get` to - * compute the wake decision or the UI event's `old_value` — one read per - * transition instead of three. + * On an unexpected handler throw the session is routed to the `failed` + * terminal (acked, so the durable queue stops retrying) and the failure is + * surfaced to the UI. A handler may throw `TransientError` to opt into the + * queue's retry/backoff/DLQ instead. */ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; +import type { AssistantMessage } from '../types/agent-message.js'; +import { TransientError } from './errors.js'; +import { emit } from './events.js'; import * as persistence from './persistence.js'; import { type TurnStepPayload, type TurnStepResult } from './schemas.js'; -import { type TurnState, type TurnStateRecord } from './state.js'; +import { type TurnState, type TurnStateRecord, transitionTo } from './state.js'; export type TransitionHandler = (iii: ISdk, rec: TurnStateRecord) => Promise; @@ -28,6 +31,46 @@ function staleSkipResult(expectedState: TurnState, rec: TurnStateRecord): TurnSt return { ok: true, skipped: true, reason: 'stale' }; } +async function failTransition( + iii: ISdk, + rec: TurnStateRecord, + previous: TurnStateRecord, + from_state: TurnState, + err: unknown, +): Promise { + const message = err instanceof Error ? err.message : String(err); + rec.error = { kind: 'transition_error', message: `from ${from_state}: ${message}` }; + transitionTo(rec, 'failed'); + await persistence.saveRecord(iii, rec, previous); + + // Surface the failure to the live UI (mirrors the graceful error path): + // message_complete{stop_reason:'error'} → the translator emits a `stop-reason` + // event so the user sees WHY; a bare agent_end renders as a silent end. + // error_kind:'transient' matches syntheticErrorAssistant's union usage; + // the UI translator only reads stop_reason, not error_kind. + const failed: AssistantMessage = { + role: 'assistant', + content: [{ type: 'text', text: rec.error.message }], + stop_reason: 'error', + error_message: rec.error.message, + error_kind: 'transient', + usage: null, + model: '', + provider: '', + timestamp: Date.now(), + }; + await emit(iii, rec.session_id, { type: 'message_complete', message: failed, body_streamed: false }); + + const messages = await persistence.loadMessages(iii, rec.session_id); + await emit(iii, rec.session_id, { type: 'agent_end', messages }); + logger.error('transition failed; session marked failed', { + session_id: rec.session_id, + from_state, + err: message, + }); + return { ok: true, from_state, to_state: 'failed' }; +} + export async function runTransition( iii: ISdk, state: TurnState, @@ -47,7 +90,8 @@ export async function runTransition( try { await handle(iii, rec); } catch (err) { - throw new Error(`transition from ${from_state} failed: ${String(err)}`); + if (err instanceof TransientError) throw err; + return failTransition(iii, rec, previous, from_state, err); } await persistence.saveRecord(iii, rec, previous); return { ok: true, from_state, to_state: rec.state }; diff --git a/harness/tests/turn-orchestrator/run-transition.test.ts b/harness/tests/turn-orchestrator/run-transition.test.ts index b25761fb..6f0ab85e 100644 --- a/harness/tests/turn-orchestrator/run-transition.test.ts +++ b/harness/tests/turn-orchestrator/run-transition.test.ts @@ -1,6 +1,7 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; import * as persistence from '../../src/turn-orchestrator/persistence.js'; +import { TransientError } from '../../src/turn-orchestrator/errors.js'; import { runTransition } from '../../src/turn-orchestrator/run-transition.js'; import { type TurnStateRecord, @@ -82,17 +83,65 @@ describe('runTransition', () => { expect(captured?.awaiting_approval).toEqual([]); }); - it('wraps handler failures as transition errors tagged with the from-state', async () => { + it('routes an unexpected handler throw to failed without re-throwing', async () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'steering_check' }; vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); const saveRecord = vi.spyOn(persistence, 'saveRecord').mockResolvedValue(); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); const handle = vi.fn(async () => { throw new Error('boom'); }); + // Should NOT re-throw — returns { ok: true, to_state: 'failed' } + const result = await runTransition({} as ISdk, 'steering_check', handle, { session_id: 's1' }); + expect(result).toMatchObject({ ok: true, to_state: 'failed' }); + expect(saveRecord).toHaveBeenCalled(); + }); +}); + +function fakeIii(record: unknown) { + const writes: Array<{ function_id: string; payload: any }> = []; + const iii = { + trigger: vi.fn(async ({ function_id, payload }: any) => { + writes.push({ function_id, payload }); + if (function_id === 'state::get' && payload.key.endsWith('/turn_state')) return record; + return null; + }), + } as any; + return { iii, writes }; +} + +describe('runTransition error model', () => { + const base = { + session_id: 's1', state: 'function_execute', turn_count: 1, + pending_function_calls: [], function_results: [], turn_end_emitted: false, + started_at_ms: 1, updated_at_ms: 1, + }; + + it('routes an unexpected throw to failed and does not re-throw', async () => { + const { iii, writes } = fakeIii({ ...base }); + const res = await runTransition(iii, 'function_execute', async () => { + throw new Error('boom'); + }, { session_id: 's1' }); + expect(res).toMatchObject({ ok: true, to_state: 'failed' }); + const saved = writes.find((w) => w.function_id === 'state::set' && w.payload.key.endsWith('/turn_state')); + expect(saved?.payload.value.state).toBe('failed'); + expect(saved?.payload.value.error.message).toContain('boom'); + const surfaced = writes.some((w) => + w.function_id === 'stream::set' + && w.payload.data?.type === 'message_complete' + && w.payload.data?.message?.stop_reason === 'error'); + expect(surfaced).toBe(true); + const ended = writes.some((w) => w.function_id === 'stream::set' && w.payload.data?.type === 'agent_end'); + expect(ended).toBe(true); + }); + + it('re-throws TransientError so the queue retries', async () => { + const { iii } = fakeIii({ ...base }); await expect( - runTransition({} as ISdk, 'steering_check', handle, { session_id: 's1' }), - ).rejects.toThrow('transition from steering_check failed: Error: boom'); - expect(saveRecord).not.toHaveBeenCalled(); + runTransition(iii, 'function_execute', async () => { + throw new TransientError('retry me'); + }, { session_id: 's1' }), + ).rejects.toThrow('retry me'); }); }); From 1023e7b489ccd33750b65c74244af7ae9d40791f Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sun, 24 May 2026 18:31:51 -0300 Subject: [PATCH 04/41] refactor(turn-orchestrator): function_execute owns batch via record.work --- harness/src/turn-orchestrator/persistence.ts | 6 + .../states/function-awaiting-approval.ts | 15 +- .../states/function-execute.ts | 74 +++-- .../awaiting-approval.test.ts | 120 ++++--- .../tests/turn-orchestrator/functions.test.ts | 305 ++++++++++-------- .../turn-orchestrator/persistence.test.ts | 18 ++ 6 files changed, 332 insertions(+), 206 deletions(-) create mode 100644 harness/tests/turn-orchestrator/persistence.test.ts diff --git a/harness/src/turn-orchestrator/persistence.ts b/harness/src/turn-orchestrator/persistence.ts index b6ebfdd1..5a82d6e9 100644 --- a/harness/src/turn-orchestrator/persistence.ts +++ b/harness/src/turn-orchestrator/persistence.ts @@ -54,6 +54,12 @@ export async function loadRecord(iii: ISdk, session_id: string): Promise { + await stateSet(iii, turnStateKey(rec.session_id), rec); +} + /** * Persist turn_state and emit UI event — no FSM wake (mid-handler saves). * Pass `previous` (the pre-write record) to skip the `state::get` that would diff --git a/harness/src/turn-orchestrator/states/function-awaiting-approval.ts b/harness/src/turn-orchestrator/states/function-awaiting-approval.ts index 2ddcc936..bdda1d40 100644 --- a/harness/src/turn-orchestrator/states/function-awaiting-approval.ts +++ b/harness/src/turn-orchestrator/states/function-awaiting-approval.ts @@ -10,7 +10,6 @@ import type { z } from 'zod'; import type { ISdk } from '../../runtime/iii.js'; import type { FunctionResult } from '../../types/function.js'; import { text } from '../../types/content.js'; -import * as persistence from '../persistence.js'; import { runTransition } from '../run-transition.js'; import { type TurnStateRecord, transitionTo } from '../state.js'; import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; @@ -69,21 +68,19 @@ export async function handleAwaitingApproval(iii: ISdk, rec: TurnStateRecord): P return; } - const prepared = await persistence.loadPreparedCalls(iii, rec.session_id); + const batch = rec.work?.batch ?? []; for (let i = 0; i < awaiting.length; i++) { const entry = awaiting[i]; const decision = decisions[i]; if (!entry || !decision) continue; - const idx = prepared.findIndex( - (preparedEntry) => preparedEntry.function_call.id === entry.function_call_id, - ); + const idx = batch.findIndex((pe) => pe.function_call.id === entry.function_call_id); if (idx < 0) continue; - const current = prepared[idx]; + const current = batch[idx]; if (!current) continue; if (decision.decision === 'allow') { - prepared[idx] = { ...current, pre_approved: true, blocked: null }; + batch[idx] = { ...current, pre_approved: true, blocked: null }; } else { - prepared[idx] = { + batch[idx] = { ...current, pre_approved: false, blocked: denialResultFromDecision(decision), @@ -91,8 +88,6 @@ export async function handleAwaitingApproval(iii: ISdk, rec: TurnStateRecord): P } } - await persistence.savePreparedCalls(iii, rec.session_id, prepared); - rec.awaiting_approval = []; transitionTo(rec, 'function_execute'); } diff --git a/harness/src/turn-orchestrator/states/function-execute.ts b/harness/src/turn-orchestrator/states/function-execute.ts index 87fe86aa..b02a910d 100644 --- a/harness/src/turn-orchestrator/states/function-execute.ts +++ b/harness/src/turn-orchestrator/states/function-execute.ts @@ -14,14 +14,14 @@ import type { FunctionResultMessage, } from '../../types/agent-message.js'; import type { FunctionCall, FunctionResult } from '../../types/function.js'; -import { dispatchWithHook, isErrorResult, triggerFunctionCall } from '../agent-trigger.js'; +import { dispatchWithHook, isErrorResult, missingFunctionResult, triggerFunctionCall, unwrapAgentTrigger } from '../agent-trigger.js'; import { registerApprovalResume } from '../approval-resume.js'; import { emit } from '../events.js'; import { publishAfter } from '../hook.js'; import * as persistence from '../persistence.js'; import type { ExecutedEntry } from '../persistence.js'; import { runTransition } from '../run-transition.js'; -import { type TurnStateRecord, transitionTo } from '../state.js'; +import { type PreparedEntry, type TurnWork, type TurnStateRecord, transitionTo } from '../state.js'; import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; function buildFunctionExecutionEnd( @@ -61,10 +61,39 @@ function augmentFunctionCall(fc: FunctionCall, session_id: string): FunctionCall return { id: fc.id, function_id: fc.function_id, arguments: augmented_args }; } +function extractFunctionCalls(msg: AssistantMessage): FunctionCall[] { + const out: FunctionCall[] = []; + for (const b of msg.content) { + if (b.type === 'function_call') { + out.push({ id: b.id, function_id: b.function_id, arguments: b.arguments }); + } + } + return out; +} + +function buildBatch(asst: AssistantMessage): PreparedEntry[] { + return extractFunctionCalls(asst).map((raw) => { + const function_call = unwrapAgentTrigger(raw); + if (!function_call.function_id) { + return { function_call, blocked: missingFunctionResult() }; + } + return { function_call, blocked: null }; + }); +} + +function ensureWork(rec: TurnStateRecord): TurnWork { + if (!rec.work) { + const asst = rec.last_assistant; + if (!asst) throw new Error('function_execute without last_assistant'); + rec.work = { batch: buildBatch(asst), results: [] }; + } + return rec.work; +} + async function commitExecutedCall( iii: ISdk, rec: TurnStateRecord, - results: ExecutedEntry[], + work: TurnWork, fc: FunctionCall, result: FunctionResult, startedAt: number, @@ -72,26 +101,19 @@ async function commitExecutedCall( ): Promise { const duration_ms = Date.now() - startedAt; const error = is_error ?? isErrorResult(result); - persistence.upsertExecutedCall(results, { + persistence.upsertExecutedCall(work.results, { function_call: fc, result, is_error: error, duration_ms, }); - await persistence.saveExecutedCalls(iii, rec.session_id, results); + await persistence.writeRecord(iii, rec); await emit(iii, rec.session_id, buildFunctionExecutionEnd(fc, result, error, duration_ms)); } -function buildFinalizeLifecycle( - asst: AssistantMessage, - results: FunctionResultMessage[], -): AgentEvent[] { - const out: AgentEvent[] = [{ type: 'turn_end', message: asst, function_results: results }]; - return out; -} - async function finalizeExecutedCalls(iii: ISdk, rec: TurnStateRecord): Promise { - const executed = await persistence.loadExecutedCalls(iii, rec.session_id); + const work = rec.work ?? { batch: [], results: [] }; + const executed: ExecutedEntry[] = work.results; const function_results: FunctionResultMessage[] = []; let all_terminate = executed.length > 0; for (const e of executed) { @@ -177,27 +199,19 @@ async function finalizeExecutedCalls(iii: ISdk, rec: TurnStateRecord): Promise { - const prepared = await persistence.loadPreparedCalls(iii, rec.session_id); - const results = await persistence.loadExecutedCalls(iii, rec.session_id); + const work = ensureWork(rec); - for (const entry of prepared) { + for (const entry of work.batch) { const fc = entry.function_call; await emit(iii, rec.session_id, { type: 'function_execution_start', @@ -207,7 +221,7 @@ export async function handleExecute(iii: ISdk, rec: TurnStateRecord): Promise unknown): ISdk { @@ -19,6 +18,7 @@ function fakeIii(stateGetImpl: (scope: string, key: string) => unknown): ISdk { function recordWith( awaiting: { function_call_id: string; function_id: string; args: unknown }[], + work?: TurnWork, ): TurnStateRecord { return { session_id: 's1', @@ -32,67 +32,77 @@ function recordWith( started_at_ms: 0, updated_at_ms: 0, awaiting_approval: awaiting, + work, }; } +function workWith(batch: PreparedEntry[]): TurnWork { + return { batch, results: [] }; +} + describe('handleAwaitingApproval', () => { + it('transitions straight to function_execute when awaiting is empty', async () => { + const iii = fakeIii((_scope, _key) => null); + const rec = recordWith([]); + await handleAwaitingApproval(iii, rec); + expect(rec.state).toBe('function_execute'); + }); + it('no-ops when any decision is missing', async () => { const iii = fakeIii((_scope, _key) => null); - const rec = recordWith([{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }]); - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, blocked: null }, - ]); + const rec = recordWith( + [{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], + workWith([ + { function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, blocked: null }, + ]), + ); await handleAwaitingApproval(iii, rec); expect(rec.state).toBe('function_awaiting_approval'); expect(rec.awaiting_approval).toHaveLength(1); + // batch unchanged — no decision folded in + expect(rec.work?.batch[0]?.pre_approved).toBeUndefined(); }); - it('marks prepared entries pre_approved on allow and transitions to function_execute', async () => { + it('folds pre_approved into work.batch on allow and transitions to function_execute', async () => { const iii = fakeIii((_scope, key) => { if (key === 's1/fc-1') return { decision: 'allow', reason: null }; return null; }); - const rec = recordWith([ - { function_call_id: 'fc-1', function_id: 'shell::run', args: { command: 'ls' } }, - ]); - const savedPrepared = vi.spyOn(persistence, 'savePreparedCalls').mockResolvedValue(undefined); - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { - function_call: { - id: 'fc-1', - function_id: 'shell::run', - arguments: { command: 'ls' }, + const rec = recordWith( + [{ function_call_id: 'fc-1', function_id: 'shell::run', args: { command: 'ls' } }], + workWith([ + { + function_call: { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, + blocked: null, }, - blocked: null, - }, - ]); + ]), + ); await handleAwaitingApproval(iii, rec); expect(rec.state).toBe('function_execute'); expect(rec.awaiting_approval).toEqual([]); - const savedArg = savedPrepared.mock.calls[0][2]; - expect(savedArg[0].pre_approved).toBe(true); - expect(savedArg[0].blocked).toBeNull(); + expect(rec.work?.batch[0]?.pre_approved).toBe(true); + expect(rec.work?.batch[0]?.blocked).toBeNull(); }); - it('sets blocked denial result on deny and transitions to function_execute', async () => { + it('sets blocked denial result in work.batch on deny and transitions to function_execute', async () => { const iii = fakeIii((_scope, key) => { if (key === 's1/fc-1') return { decision: 'deny', reason: 'policy' }; return null; }); - const rec = recordWith([{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }]); - const savedPrepared = vi.spyOn(persistence, 'savePreparedCalls').mockResolvedValue(undefined); - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, blocked: null }, - ]); + const rec = recordWith( + [{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], + workWith([ + { function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, blocked: null }, + ]), + ); await handleAwaitingApproval(iii, rec); expect(rec.state).toBe('function_execute'); - const savedArg = savedPrepared.mock.calls[0][2]; - expect(savedArg[0].pre_approved).toBeFalsy(); - expect(savedArg[0].blocked).toMatchObject({ + expect(rec.work?.batch[0]?.pre_approved).toBeFalsy(); + expect(rec.work?.batch[0]?.blocked).toMatchObject({ details: expect.objectContaining({ approval_denied: true, decision: 'deny', @@ -101,21 +111,51 @@ describe('handleAwaitingApproval', () => { }); }); - it('handles aborted decision like deny', async () => { + it('handles aborted decision like deny (folded into work.batch)', async () => { const iii = fakeIii((_scope, key) => { if (key === 's1/fc-1') return { decision: 'aborted', reason: 'session_aborted' }; return null; }); - const rec = recordWith([{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }]); - const savedPrepared = vi.spyOn(persistence, 'savePreparedCalls').mockResolvedValue(undefined); - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, blocked: null }, - ]); + const rec = recordWith( + [{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], + workWith([ + { function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, blocked: null }, + ]), + ); + + await handleAwaitingApproval(iii, rec); + + expect(rec.state).toBe('function_execute'); + expect(rec.work?.batch[0]?.pre_approved).toBeFalsy(); + expect(rec.work?.batch[0]?.blocked?.details).toMatchObject({ decision: 'aborted' }); + }); + + it('folds independent decisions across a multi-call batch', async () => { + const iii = fakeIii((_scope, key) => { + if (key === 's1/fc-1') return { decision: 'allow', reason: null }; + if (key === 's1/fc-2') return { decision: 'deny', reason: 'policy' }; + return null; + }); + const rec = recordWith( + [ + { function_call_id: 'fc-1', function_id: 'shell::run', args: {} }, + { function_call_id: 'fc-2', function_id: 'shell::fs::write', args: {} }, + ], + workWith([ + { function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, blocked: null }, + { + function_call: { id: 'fc-2', function_id: 'shell::fs::write', arguments: {} }, + blocked: null, + }, + ]), + ); await handleAwaitingApproval(iii, rec); expect(rec.state).toBe('function_execute'); - const savedArg = savedPrepared.mock.calls[0][2]; - expect(savedArg[0].blocked?.details).toMatchObject({ decision: 'aborted' }); + expect(rec.work?.batch[0]?.pre_approved).toBe(true); + expect(rec.work?.batch[0]?.blocked).toBeNull(); + expect(rec.work?.batch[1]?.pre_approved).toBeFalsy(); + expect(rec.work?.batch[1]?.blocked?.details).toMatchObject({ decision: 'deny' }); }); }); diff --git a/harness/tests/turn-orchestrator/functions.test.ts b/harness/tests/turn-orchestrator/functions.test.ts index f2d76b44..58163f19 100644 --- a/harness/tests/turn-orchestrator/functions.test.ts +++ b/harness/tests/turn-orchestrator/functions.test.ts @@ -9,6 +9,7 @@ import * as agentTriggerModule from '../../src/turn-orchestrator/agent-trigger.j import * as approvalResumeModule from '../../src/turn-orchestrator/approval-resume.js'; import { parseApprovalDecision } from '../../src/turn-orchestrator/states/function-awaiting-approval.js'; import { handleExecute } from '../../src/turn-orchestrator/states/function-execute.js'; +import type { AssistantMessage } from '../../src/types/agent-message.js'; afterEach(() => { vi.restoreAllMocks(); @@ -20,6 +21,28 @@ function mockFinalizePersistence(): void { vi.spyOn(hookModule, 'publishAfter').mockResolvedValue(undefined); } +/** Build a minimal AssistantMessage with the given function_call content blocks. */ +function makeAssistant( + calls: Array<{ id: string; function_id: string; arguments?: unknown }>, +): AssistantMessage { + return { + role: 'assistant', + content: calls.map((c) => ({ + type: 'function_call' as const, + id: c.id, + function_id: c.function_id, + arguments: c.arguments ?? {}, + })), + stop_reason: 'function_call', + error_message: null, + error_kind: null, + usage: null, + model: 'm', + provider: 'p', + timestamp: 1, + }; +} + describe('parseApprovalDecision', () => { it('accepts allow/deny/aborted with nullable reason (stored approval shape)', () => { expect(parseApprovalDecision({ decision: 'allow', reason: null })).toEqual({ @@ -54,6 +77,32 @@ describe('parseApprovalDecision', () => { }); describe('handleExecute new flow', () => { + it('builds work.batch from last_assistant when work is absent', async () => { + vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ + kind: 'result', + result: { + content: [{ type: 'text' as const, text: 'ok' }], + details: {}, + terminate: false, + }, + }); + const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; + const rec: TurnStateRecord = newRecord('s1'); + rec.state = 'function_execute'; + rec.last_assistant = makeAssistant([ + { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, + ]); + + mockFinalizePersistence(); + await handleExecute(iii, rec); + + // work should be cleared after finalize + expect(rec.work).toBeUndefined(); + expect(rec.state).toBe('steering_check'); + expect(rec.function_results).toHaveLength(1); + expect(rec.function_results[0]?.function_call_id).toBe('fc-1'); + }); + it('pushes the call onto awaiting_approval and transitions to function_awaiting_approval on pending', async () => { const dispatchSpy = vi.spyOn(agentTriggerModule, 'dispatchWithHook'); dispatchSpy.mockResolvedValueOnce({ kind: 'pending' }); @@ -64,25 +113,18 @@ describe('handleExecute new flow', () => { const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); rec.state = 'function_execute'; - - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { - function_call: { - id: 'fc-1', - function_id: 'shell::run', - arguments: { command: 'ls' }, - }, - blocked: null, - }, + rec.last_assistant = makeAssistant([ + { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, ]); - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([]); - vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); + await handleExecute(iii, rec); expect(rec.state).toBe('function_awaiting_approval'); expect(rec.awaiting_approval).toHaveLength(1); expect(rec.awaiting_approval?.[0]?.function_call_id).toBe('fc-1'); expect(registerResumeSpy).toHaveBeenCalledWith(iii, 's1', 'fc-1'); + // work.batch should still be populated (re-entry will continue from it) + expect(rec.work?.batch).toHaveLength(1); }); it('skips consultBefore on pre_approved entries and uses triggerFunctionCall', async () => { @@ -90,21 +132,23 @@ describe('handleExecute new flow', () => { const iii = { trigger: triggerSpy } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); rec.state = 'function_execute'; - - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { - function_call: { - id: 'fc-1', - function_id: 'shell::run', - arguments: { command: 'ls' }, + // Supply via rec.work (simulates re-entry after approval was granted) + rec.work = { + batch: [ + { + function_call: { + id: 'fc-1', + function_id: 'shell::run', + arguments: { command: 'ls' }, + }, + blocked: null, + pre_approved: true, }, - blocked: null, - pre_approved: true, - }, - ]); - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([]); - vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); + ], + results: [], + }; const consultBeforeSpy = vi.spyOn(hookModule, 'consultBefore'); + mockFinalizePersistence(); await handleExecute(iii, rec); @@ -125,35 +169,29 @@ describe('handleExecute new flow', () => { const iii = { trigger: triggerSpy } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); rec.state = 'function_execute'; - - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { - function_call: { - id: 'fc-1', - function_id: 'shell::fs::write', - arguments: { content: 'Tue May 19 08:17:10 -03 2026\n' }, + rec.work = { + batch: [ + { + function_call: { + id: 'fc-1', + function_id: 'shell::fs::write', + arguments: { content: 'Tue May 19 08:17:10 -03 2026\n' }, + }, + blocked: null, + pre_approved: true, }, - blocked: null, - pre_approved: true, - }, - ]); - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([]); - const saveSpy = vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); + ], + results: [], + }; mockFinalizePersistence(); await expect(handleExecute(iii, rec)).resolves.toBeUndefined(); expect(rec.state).toBe('steering_check'); - expect(saveSpy).toHaveBeenCalled(); - // saveExecutedCalls is invoked twice: once with the synthesized error - // result, then once with `[]` as the idempotency guard clears executed - // calls at the end of finalize. Inspect the persisted-results call, not - // the trailing clear. - const savedResults = saveSpy.mock.calls - .map((c) => c[2] as Array<{ is_error: boolean; result: { details: unknown } }>) - .find((arr) => Array.isArray(arr) && arr.length > 0); - expect(savedResults?.[0]?.is_error).toBe(true); - const details = savedResults?.[0]?.result.details as Record; + // The result should be an error with denied details + expect(rec.function_results).toHaveLength(1); + expect(rec.function_results[0]?.is_error).toBe(true); + const details = rec.function_results[0]?.details as Record; expect(details?.status).toBe('denied'); expect(details?.denied_by).toBe('gate_unavailable'); expect(details?.function_id).toBe('shell::fs::write'); @@ -171,15 +209,16 @@ describe('handleExecute new flow', () => { details: { approval_denied: true, decision: 'deny' as const }, terminate: false, }; - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - blocked: denial, - pre_approved: false, - }, - ]); - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([]); - vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); + rec.work = { + batch: [ + { + function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + blocked: denial, + pre_approved: false, + }, + ], + results: [], + }; mockFinalizePersistence(); await handleExecute(iii, rec); @@ -190,7 +229,7 @@ describe('handleExecute new flow', () => { expect(rec.state).toBe('steering_check'); }); - it('replays persisted executed calls without re-dispatching', async () => { + it('replays persisted executed calls without re-dispatching (re-entry with pre-populated work.results)', async () => { const dispatchSpy = vi.spyOn(agentTriggerModule, 'dispatchWithHook'); const triggerSpy = vi.fn().mockResolvedValue(null); const iii = { trigger: triggerSpy } as unknown as ISdk; @@ -202,21 +241,23 @@ describe('handleExecute new flow', () => { details: {}, terminate: false, }; - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - blocked: null, - }, - ]); - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - result: existingResult, - is_error: false, - duration_ms: 42, - }, - ]); - vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); + // Pre-populate rec.work with batch + already-executed result + rec.work = { + batch: [ + { + function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + blocked: null, + }, + ], + results: [ + { + function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + result: existingResult, + is_error: false, + duration_ms: 42, + }, + ], + }; mockFinalizePersistence(); await handleExecute(iii, rec); @@ -237,41 +278,43 @@ describe('handleExecute new flow', () => { const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec = newRecord('s1'); rec.state = 'function_execute'; - - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - blocked: null, - }, + rec.last_assistant = makeAssistant([ + { id: 'fc-1', function_id: 'shell::run', arguments: {} }, ]); - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([]); - vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); - mockFinalizePersistence(); + mockFinalizePersistence(); await handleExecute(iii, rec); expect(rec.state).toBe('steering_check'); }); - it('transitions to steering_check when last_assistant is missing after execute', async () => { + it('transitions to steering_check when last_assistant is missing after execute (with pre-populated work)', async () => { const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec = newRecord('s1'); rec.state = 'function_execute'; rec.last_assistant = null; - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([]); - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - result: { - content: [{ type: 'text' as const, text: 'ok' }], - details: {}, - terminate: false, + // Supply pre-populated work so ensureWork doesn't throw on null last_assistant + rec.work = { + batch: [ + { + function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + blocked: null, }, - is_error: false, - duration_ms: 1, - }, - ]); + ], + results: [ + { + function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + result: { + content: [{ type: 'text' as const, text: 'ok' }], + details: {}, + terminate: false, + }, + is_error: false, + duration_ms: 1, + }, + ], + }; vi.spyOn(hookModule, 'publishAfter').mockResolvedValue(undefined); vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); @@ -282,7 +325,8 @@ describe('handleExecute new flow', () => { expect(rec.state).toBe('steering_check'); expect(rec.pending_function_calls).toEqual([]); expect(rec.function_results).toHaveLength(1); - expect(emitSpy).not.toHaveBeenCalled(); + // No turn_end emitted when last_assistant is null + expect(emitSpy.mock.calls.some((call) => call[2]?.type === 'turn_end')).toBe(false); }); it('emits turn lifecycle and sets turn_end_emitted when last_assistant is present', async () => { @@ -301,19 +345,22 @@ describe('handleExecute new flow', () => { timestamp: 1, }; - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([]); - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue([ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - result: { - content: [{ type: 'text' as const, text: 'ok' }], - details: {}, - terminate: false, + // Supply pre-populated work (last_assistant has no function_call blocks here) + rec.work = { + batch: [], + results: [ + { + function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + result: { + content: [{ type: 'text' as const, text: 'ok' }], + details: {}, + terminate: false, + }, + is_error: false, + duration_ms: 1, }, - is_error: false, - duration_ms: 1, - }, - ]); + ], + }; vi.spyOn(hookModule, 'publishAfter').mockResolvedValue(undefined); vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); @@ -328,26 +375,21 @@ describe('handleExecute new flow', () => { it('does NOT duplicate function_results in flat-state when handleExecute re-enters', async () => { // Idempotency guard: a durable retry / step-fanout race can replay the - // finalize path with the same persisted executedCalls. Re-pushing the - // same function_result blocks makes Anthropic reject with "each tool_use - // must have a single result. Found multiple tool_result blocks with id". - const executed = [ - { - function_call: { id: 'toolu_01', function_id: 'shell::run', arguments: { command: 'ls' } }, - result: { content: [{ type: 'text' as const, text: 'ok' }], details: {}, terminate: false }, - is_error: false, - duration_ms: 5, - }, - ]; + // finalize path with the same work. Re-pushing the same function_result + // blocks makes Anthropic reject with "each tool_use must have a single + // result. Found multiple tool_result blocks with id". + const existingResult = { + content: [{ type: 'text' as const, text: 'ok' }], + details: {}, + terminate: false, + }; + const fc = { id: 'toolu_01', function_id: 'shell::run', arguments: { command: 'ls' } }; + const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec = newRecord('s1'); rec.state = 'function_execute'; + rec.last_assistant = makeAssistant([{ id: 'toolu_01', function_id: 'shell::run', arguments: { command: 'ls' } }]); - vi.spyOn(persistence, 'loadPreparedCalls').mockResolvedValue([ - { function_call: executed[0].function_call, blocked: null }, - ]); - vi.spyOn(persistence, 'loadExecutedCalls').mockResolvedValue(executed); - vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); let storedMessages: unknown[] = []; vi.spyOn(persistence, 'loadMessages').mockImplementation(async () => storedMessages as never); vi.spyOn(persistence, 'saveMessages').mockImplementation(async (_iii, _sid, msgs) => { @@ -355,12 +397,23 @@ describe('handleExecute new flow', () => { }); vi.spyOn(hookModule, 'publishAfter').mockResolvedValue(undefined); vi.spyOn(events, 'emit').mockResolvedValue(undefined); + vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValue({ + kind: 'result', + result: existingResult, + }); await handleExecute(iii, rec); - // Re-entry: same persisted executedCalls, before the transition was - // durably observed. + + // Re-entry: simulate state was reset before durable confirmation rec.state = 'function_execute'; rec.turn_end_emitted = false; + // work.results already has the executed call after first run cleared rec.work, + // so we need to re-populate work for re-entry simulation + rec.work = { + batch: [{ function_call: fc, blocked: null }], + results: [{ function_call: fc, result: existingResult, is_error: false, duration_ms: 5 }], + }; + await handleExecute(iii, rec); const fnResults = ( diff --git a/harness/tests/turn-orchestrator/persistence.test.ts b/harness/tests/turn-orchestrator/persistence.test.ts new file mode 100644 index 00000000..2d86c551 --- /dev/null +++ b/harness/tests/turn-orchestrator/persistence.test.ts @@ -0,0 +1,18 @@ +import { describe, it, expect, vi } from 'vitest'; +import * as persistence from '../../src/turn-orchestrator/persistence.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; + +describe('writeRecord', () => { + it('writes turn_state without emitting turn_state_changed', async () => { + const calls: string[] = []; + const iii = { + trigger: vi.fn(async ({ function_id }: any) => { + calls.push(function_id); + return null; + }), + } as any; + await persistence.writeRecord(iii, newRecord('s1')); + expect(calls).toContain('state::set'); + expect(calls).not.toContain('stream::set'); // no agent::events emit + }); +}); From 8691817372b01c02dedb6f57e83958ca7f6ff61c Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sun, 24 May 2026 18:42:12 -0300 Subject: [PATCH 05/41] refactor(turn-orchestrator): emit lean TurnStateView to UI --- harness/src/turn-orchestrator/get-state.ts | 10 ++++-- harness/src/turn-orchestrator/persistence.ts | 5 +-- harness/src/turn-orchestrator/schemas.ts | 27 ++++++++++++++-- .../tests/turn-orchestrator/get-state.test.ts | 31 ++++++++++++++++--- 4 files changed, 62 insertions(+), 11 deletions(-) diff --git a/harness/src/turn-orchestrator/get-state.ts b/harness/src/turn-orchestrator/get-state.ts index 927c6c80..7aec7c03 100644 --- a/harness/src/turn-orchestrator/get-state.ts +++ b/harness/src/turn-orchestrator/get-state.ts @@ -7,10 +7,16 @@ import type { ISdk } from '../runtime/iii.js'; import * as persistence from './persistence.js'; -import { GetStatePayloadSchema, type GetStatePayload, type GetStateResult } from './schemas.js'; +import { + GetStatePayloadSchema, + type GetStatePayload, + type GetStateResult, + toView, +} from './schemas.js'; export async function execute(iii: ISdk, payload: GetStatePayload): Promise { - return persistence.loadRecord(iii, payload.session_id); + const rec = await persistence.loadRecord(iii, payload.session_id); + return rec ? toView(rec) : null; } export function register(iii: ISdk): void { diff --git a/harness/src/turn-orchestrator/persistence.ts b/harness/src/turn-orchestrator/persistence.ts index 5a82d6e9..30863afb 100644 --- a/harness/src/turn-orchestrator/persistence.ts +++ b/harness/src/turn-orchestrator/persistence.ts @@ -17,6 +17,7 @@ import { runRequestKey, turnStateKey, } from './state.js'; +import { toView } from './schemas.js'; import { emitTurnStateChanged } from './turn-state-write.js'; import { shouldWakeStep, wakeState } from './wake.js'; @@ -79,8 +80,8 @@ export async function persistRecord( iii, rec.session_id, eventType, - rec as unknown as Record, - prev !== null ? (prev as unknown as Record) : undefined, + toView(rec) as unknown as Record, + prev !== null ? (toView(prev) as unknown as Record) : undefined, ); } diff --git a/harness/src/turn-orchestrator/schemas.ts b/harness/src/turn-orchestrator/schemas.ts index 4ece3fdc..79516932 100644 --- a/harness/src/turn-orchestrator/schemas.ts +++ b/harness/src/turn-orchestrator/schemas.ts @@ -7,7 +7,7 @@ import { z } from 'zod'; import type { AgentMessage } from '../types/agent-message.js'; -import type { TurnState, TurnStateRecord } from './state.js'; +import type { AwaitingApprovalEntry, TurnState, TurnStateRecord } from './state.js'; import type { Mode } from './system-prompt.js'; /** Shared `{ session_id }` payload — `turn::{state}` steps and `turn::get_state`. */ @@ -38,7 +38,30 @@ export type TurnStepResult = // --- turn::get_state --- export const GetStatePayloadSchema = SessionIdPayloadSchema; export type GetStatePayload = z.infer; -export type GetStateResult = TurnStateRecord | null; + +/** Lean projection of TurnStateRecord sent to the UI and returned by turn::get_state. + * Excludes heavy internal fields (work, last_assistant) not needed by consumers. */ +export type TurnStateView = { + session_id: string; + state: TurnState; + turn_count: number; + max_turns?: number; + awaiting_approval?: AwaitingApprovalEntry[]; + error?: { kind: string; message: string }; +}; + +export function toView(rec: TurnStateRecord): TurnStateView { + return { + session_id: rec.session_id, + state: rec.state, + turn_count: rec.turn_count, + max_turns: rec.max_turns, + awaiting_approval: rec.awaiting_approval, + error: rec.error, + }; +} + +export type GetStateResult = TurnStateView | null; // --- turn::is_abort_signal_set / turn::on_abort_signal (agent-scope state event) --- const AgentAbortSignalWriteEventSchema = z.object({ diff --git a/harness/tests/turn-orchestrator/get-state.test.ts b/harness/tests/turn-orchestrator/get-state.test.ts index 5c76220f..f7c9f742 100644 --- a/harness/tests/turn-orchestrator/get-state.test.ts +++ b/harness/tests/turn-orchestrator/get-state.test.ts @@ -42,9 +42,24 @@ describe('GetStatePayloadSchema', () => { }); describe('turn::get_state execute', () => { - it('returns the turn_state record for a known session via persistence.loadRecord', async () => { - const rec = newRecord('sess-abc'); - rec.state = 'function_awaiting_approval'; + it('returns a lean view for a known session (excludes work/last_assistant)', async () => { + const rec = { + ...newRecord('sess-abc', 5), + state: 'function_awaiting_approval' as const, + awaiting_approval: [{ function_call_id: 'c1', function_id: 'x::y', args: {} }], + last_assistant: { + role: 'assistant', + content: [], + stop_reason: 'end', + error_message: null, + error_kind: null, + usage: null, + model: 'm', + provider: 'p', + timestamp: 1, + }, + work: { batch: [], results: [] }, + }; const iii = { trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { if ( @@ -57,8 +72,14 @@ describe('turn::get_state execute', () => { }), } as unknown as ISdk; - const out = await execute(iii, { session_id: 'sess-abc' }); - expect(out).toEqual(rec); + const view: any = await execute(iii, { session_id: 'sess-abc' }); + expect(view.state).toBe('function_awaiting_approval'); + expect(view.awaiting_approval).toHaveLength(1); + expect(view.session_id).toBe('sess-abc'); + expect(view.turn_count).toBe(0); + expect(view.max_turns).toBe(5); + expect(view.work).toBeUndefined(); + expect(view.last_assistant).toBeUndefined(); }); it('returns null when no record exists for the session', async () => { From 1cd99f7576b85e0fcc0933e2a14dfc77db74d9d4 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sun, 24 May 2026 18:43:09 -0300 Subject: [PATCH 06/41] refactor(turn-orchestrator): merge assistant_finished into assistant_streaming --- harness/src/turn-orchestrator/register.ts | 2 - harness/src/turn-orchestrator/state.ts | 1 - .../states/assistant-finished.ts | 122 ------- .../states/assistant-streaming.ts | 59 +++- harness/src/turn-orchestrator/states/index.ts | 1 - .../tests/turn-orchestrator/assistant.test.ts | 311 ++++++++---------- harness/tests/turn-orchestrator/wake.test.ts | 2 +- 7 files changed, 185 insertions(+), 313 deletions(-) delete mode 100644 harness/src/turn-orchestrator/states/assistant-finished.ts diff --git a/harness/src/turn-orchestrator/register.ts b/harness/src/turn-orchestrator/register.ts index de49c286..5a906e98 100644 --- a/harness/src/turn-orchestrator/register.ts +++ b/harness/src/turn-orchestrator/register.ts @@ -7,7 +7,6 @@ import { register as registerOnAbortSignal } from './on-abort-signal.js'; import { register as registerRunStart } from './run-start.js'; import { recoverPendingApprovals } from './approval-resume.js'; import { - registerAssistantFinished, registerAssistantStreaming, registerFunctionAwaitingApproval, registerFunctionExecute, @@ -22,7 +21,6 @@ export async function register(iii: ISdk, ctx: { configPath: string }): Promise< registerRunStart(iii); registerProvisioning(iii, orchestratorCfg); registerAssistantStreaming(iii); - registerAssistantFinished(iii); registerFunctionExecute(iii); registerFunctionAwaitingApproval(iii); registerSteeringCheck(iii); diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index a1fd4487..8774bfc0 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -9,7 +9,6 @@ import type { FunctionCall, FunctionResult } from '../types/function.js'; export type TurnState = | 'provisioning' | 'assistant_streaming' - | 'assistant_finished' | 'function_execute' | 'function_awaiting_approval' | 'steering_check' diff --git a/harness/src/turn-orchestrator/states/assistant-finished.ts b/harness/src/turn-orchestrator/states/assistant-finished.ts deleted file mode 100644 index d3b59f5a..00000000 --- a/harness/src/turn-orchestrator/states/assistant-finished.ts +++ /dev/null @@ -1,122 +0,0 @@ -/** - * `turn::assistant_finished`. Persist assistant message and route to steering or function execute. - * - * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. - * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. - */ - -import type { ISdk } from '../../runtime/iii.js'; -import { logger } from '../../runtime/otel.js'; -import type { AgentEvent } from '../../types/agent-event.js'; -import type { AssistantMessage } from '../../types/agent-message.js'; -import type { FunctionCall } from '../../types/function.js'; -import { missingFunctionResult, unwrapAgentTrigger } from '../agent-trigger.js'; -import { emit } from '../events.js'; -import type { PreparedEntry } from '../persistence.js'; -import * as persistence from '../persistence.js'; -import { runTransition } from '../run-transition.js'; -import { type TurnStateRecord, transitionTo } from '../state.js'; -import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; - -function extractFunctionCalls(msg: AssistantMessage): FunctionCall[] { - const out: FunctionCall[] = []; - for (const b of msg.content) { - if (b.type === 'function_call') { - out.push({ id: b.id, function_id: b.function_id, arguments: b.arguments }); - } - } - return out; -} - -function assistantMessageComplete(asst: AssistantMessage, body_streamed: boolean): AgentEvent { - return { type: 'message_complete', message: asst, body_streamed }; -} - -export async function handleFinished(iii: ISdk, rec: TurnStateRecord): Promise { - const asst = rec.last_assistant; - if (!asst) { - throw new Error('assistant_finished without last_assistant'); - } - await emit( - iii, - rec.session_id, - assistantMessageComplete(asst, rec.assistant_body_streamed === true), - ); - const isErrorOrAborted = asst.stop_reason === 'error' || asst.stop_reason === 'aborted'; - // Error/aborted assistant messages (e.g. provider auth failures, - // network blips, user aborts) are surfaced to the UI via the - // message_complete emitted above, but we deliberately - // keep them out of the session's persisted message history so the - // LLM's next-turn context doesn't accumulate transient infra noise. - if (!isErrorOrAborted) { - const messages = await persistence.loadMessages(iii, rec.session_id); - // Idempotency guard: handleFinished can re-enter (durable trigger - // retry, crash before transitionTo persists). Without this guard a - // second run pushes the SAME assistant message again. If that - // assistant has tool_calls, Anthropic rejects the next request with: - // "each tool_use must have a unique id". - // Detect by comparing timestamp + content shape against the last - // assistant message in flat-state; skip the push when they match. - const last = messages[messages.length - 1]; - const alreadyPersisted = - last && - last.role === 'assistant' && - last.timestamp === asst.timestamp && - last.model === asst.model && - last.provider === asst.provider; - if (alreadyPersisted) { - logger.warn('handleFinished: skipping duplicate assistant push (re-entry detected)', { - session_id: rec.session_id, - timestamp: asst.timestamp, - }); - } else { - messages.push(asst); - await persistence.saveMessages(iii, rec.session_id, messages); - } - } - - if (isErrorOrAborted) { - await emit(iii, rec.session_id, { - type: 'turn_end', - message: asst, - function_results: [], - }); - rec.turn_end_emitted = true; - transitionTo(rec, 'tearing_down'); - return; - } - const calls = extractFunctionCalls(asst); - if (calls.length === 0) { - transitionTo(rec, 'steering_check'); - return; - } - - rec.function_results = []; - rec.pending_function_calls = calls.map(unwrapAgentTrigger); - - const prepared: PreparedEntry[] = calls.map((raw) => { - const function_call = unwrapAgentTrigger(raw); - if (!function_call.function_id) { - return { function_call, blocked: missingFunctionResult() }; - } - return { function_call, blocked: null }; - }); - - await persistence.saveExecutedCalls(iii, rec.session_id, []); - await persistence.savePreparedCalls(iii, rec.session_id, prepared); - transitionTo(rec, 'function_execute'); -} - -export function register(iii: ISdk): void { - iii.registerFunction( - 'turn::assistant_finished', - async (payload: TurnStepPayload) => { - const parsed = TurnStepPayloadSchema.parse(payload); - return runTransition(iii, 'assistant_finished', handleFinished, parsed); - }, - { - description: - 'Run one durable FSM transition for session in state assistant_finished: finalize assistant and route onward.', - }, - ); -} diff --git a/harness/src/turn-orchestrator/states/assistant-streaming.ts b/harness/src/turn-orchestrator/states/assistant-streaming.ts index 35f3b912..0a38bcb3 100644 --- a/harness/src/turn-orchestrator/states/assistant-streaming.ts +++ b/harness/src/turn-orchestrator/states/assistant-streaming.ts @@ -1,5 +1,5 @@ /** - * `turn::assistant_streaming`. Start turn, stream provider response, advance to finished. + * `turn::assistant_streaming`. Start turn, stream provider response, finalize, and route onward. * * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. @@ -52,6 +52,57 @@ function formatProviderError(err: unknown): string { .trim(); } +function isErrorOrAborted(asst: AssistantMessage): boolean { + return asst.stop_reason === 'error' || asst.stop_reason === 'aborted'; +} + +async function finalizeAssistant(iii: ISdk, rec: TurnStateRecord): Promise { + const asst = rec.last_assistant; + if (!asst) throw new Error('assistant_streaming finalize without last_assistant'); + + await emit(iii, rec.session_id, { + type: 'message_complete', + message: asst, + body_streamed: rec.assistant_body_streamed === true, + }); + + const errored = isErrorOrAborted(asst); + if (!errored) { + const messages = await persistence.loadMessages(iii, rec.session_id); + const last = messages[messages.length - 1]; + const dup = + last && + last.role === 'assistant' && + last.timestamp === asst.timestamp && + last.model === asst.model && + last.provider === asst.provider; + if (!dup) { + messages.push(asst); + await persistence.saveMessages(iii, rec.session_id, messages); + } else { + logger.warn('finalizeAssistant: skipping duplicate assistant push (re-entry detected)', { + session_id: rec.session_id, + timestamp: asst.timestamp, + }); + } + } + + if (errored) { + await emit(iii, rec.session_id, { type: 'turn_end', message: asst, function_results: [] }); + rec.turn_end_emitted = true; + transitionTo(rec, 'tearing_down'); + return; + } + const hasCalls = asst.content.some((b) => b.type === 'function_call'); + if (!hasCalls) { + transitionTo(rec, 'steering_check'); + return; + } + rec.function_results = []; + rec.work = undefined; // function_execute builds the batch from last_assistant + transitionTo(rec, 'function_execute'); +} + export async function handleStreaming(iii: ISdk, rec: TurnStateRecord): Promise { if (rec.max_turns !== undefined && rec.turn_count >= rec.max_turns) { const cap = rec.max_turns ?? 0; @@ -121,7 +172,7 @@ export async function handleStreaming(iii: ISdk, rec: TurnStateRecord): Promise< decision.model, `create_channel failed: ${String(err)}`, ); - transitionTo(rec, 'assistant_finished'); + await finalizeAssistant(iii, rec); return; } @@ -222,7 +273,7 @@ export async function handleStreaming(iii: ISdk, rec: TurnStateRecord): Promise< }); rec.last_assistant = synthetic; } - transitionTo(rec, 'assistant_finished'); + await finalizeAssistant(iii, rec); } export function register(iii: ISdk): void { @@ -234,7 +285,7 @@ export function register(iii: ISdk): void { }, { description: - 'Run one durable FSM transition for session in state assistant_streaming: start turn and stream provider response.', + 'Run one durable FSM transition for session in state assistant_streaming: start turn, stream provider response, finalize, and route onward.', }, ); } diff --git a/harness/src/turn-orchestrator/states/index.ts b/harness/src/turn-orchestrator/states/index.ts index e7865709..100b1743 100644 --- a/harness/src/turn-orchestrator/states/index.ts +++ b/harness/src/turn-orchestrator/states/index.ts @@ -4,7 +4,6 @@ export { register as registerProvisioning } from './provisioning.js'; export { register as registerAssistantStreaming } from './assistant-streaming.js'; -export { register as registerAssistantFinished } from './assistant-finished.js'; export { register as registerFunctionExecute } from './function-execute.js'; export { register as registerFunctionAwaitingApproval } from './function-awaiting-approval.js'; export { register as registerSteeringCheck } from './steering-check.js'; diff --git a/harness/tests/turn-orchestrator/assistant.test.ts b/harness/tests/turn-orchestrator/assistant.test.ts index b218fd6d..105f564f 100644 --- a/harness/tests/turn-orchestrator/assistant.test.ts +++ b/harness/tests/turn-orchestrator/assistant.test.ts @@ -1,11 +1,9 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; import type { AssistantMessage } from '../../src/types/agent-message.js'; -import { TOOL_NAME } from '../../src/turn-orchestrator/agent-trigger.js'; import * as persistence from '../../src/turn-orchestrator/persistence.js'; import * as preflightModule from '../../src/turn-orchestrator/preflight.js'; import { type TurnStateRecord, newRecord } from '../../src/turn-orchestrator/state.js'; -import { handleFinished } from '../../src/turn-orchestrator/states/assistant-finished.js'; import { handleStreaming } from '../../src/turn-orchestrator/states/assistant-streaming.js'; type TriggerCall = { function_id: string; payload: unknown; timeoutMs?: number }; @@ -45,6 +43,28 @@ function assistant(overrides: Partial = {}): AssistantMessage }; } +/** Build a fake iii whose createChannel delivers a single done event synchronously on stream.resume(). */ +function fakeIiiWithDone(finalMsg: AssistantMessage): { iii: ISdk; calls: TriggerCall[] } { + return fakeIii({ + createChannel: async () => { + let deliver: ((msg: string) => void) | null = null; + return { + writerRef: {}, + reader: { + onMessage: (cb: (msg: string) => void) => { + deliver = cb; + }, + stream: { + resume: () => { + deliver?.(JSON.stringify({ type: 'done', message: finalMsg })); + }, + }, + }, + }; + }, + }); +} + afterEach(() => { vi.restoreAllMocks(); }); @@ -70,9 +90,11 @@ describe('handleStreaming turn start', () => { await handleStreaming(iii, rec); expect(rec.turn_count).toBe(1); - expect(rec.turn_end_emitted).toBe(false); + // createChannel failure → synthetic error → finalizeAssistant sets turn_end_emitted = true + expect(rec.turn_end_emitted).toBe(true); expect(calls.some((c) => c.function_id === 'approval::consume')).toBe(false); - expect(calls.some((c) => c.function_id === 'stream::set')).toBe(false); + // stream::set is called by emit(message_complete) and emit(turn_end) in the error path + expect(calls.some((c) => c.function_id === 'stream::set')).toBe(true); }); it('exhausts max_turns and transitions to tearing_down', async () => { @@ -99,7 +121,7 @@ describe('handleStreaming turn start', () => { }); describe('handleStreaming', () => { - it('transitions to assistant_finished with synthetic error when createChannel fails', async () => { + it('transitions to tearing_down with synthetic error when createChannel fails', async () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; const { iii } = fakeIii({ createChannel: async () => { @@ -118,12 +140,72 @@ describe('handleStreaming', () => { await handleStreaming(iii, rec); - expect(rec.state).toBe('assistant_finished'); + expect(rec.state).toBe('tearing_down'); expect(rec.last_assistant?.stop_reason).toBe('error'); expect(rec.last_assistant?.error_message).toContain('create_channel failed'); }); - it('captures provider done frame and transitions to assistant_finished', async () => { + it('streaming completion emits message_complete, persists, and routes to function_execute when calls exist', async () => { + const finalMsg = assistant({ + content: [ + { + type: 'function_call', + id: 'fc-1', + function_id: 'shell::run', + arguments: { command: 'ls' }, + }, + ], + }); + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; + const { iii, calls } = fakeIiiWithDone(finalMsg); + + vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ + provider: 'openai', + model: 'gpt-4o', + mode: null, + system_prompt: '', + }); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + vi.spyOn(persistence, 'loadFunctionSchemas').mockResolvedValue([]); + vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); + const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + + await handleStreaming(iii, rec); + + // emitted message_complete via stream::set trigger + expect(calls.some((c) => c.function_id === 'stream::set')).toBe(true); + // assistant persisted + expect(saveSpy).toHaveBeenCalledOnce(); + // routed to function_execute (NOT assistant_finished) + expect(rec.state).toBe('function_execute'); + expect(rec.last_assistant).toEqual(finalMsg); + expect(rec.function_results).toEqual([]); + expect(rec.work).toBeUndefined(); + }); + + it('routes to steering_check when the assistant made no calls', async () => { + const finalMsg = assistant({ content: [{ type: 'text', text: 'done reply' }] }); + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; + const { iii } = fakeIiiWithDone(finalMsg); + + vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ + provider: 'openai', + model: 'gpt-4o', + mode: null, + system_prompt: '', + }); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + vi.spyOn(persistence, 'loadFunctionSchemas').mockResolvedValue([]); + vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); + vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + + await handleStreaming(iii, rec); + + expect(rec.state).toBe('steering_check'); + expect(rec.last_assistant).toEqual(finalMsg); + }); + + it('captures provider done frame and routes correctly (text-only → steering_check)', async () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; const finalMsg = assistant({ content: [{ type: 'text', text: 'done reply' }] }); let deliver: ((msg: string) => void) | null = null; @@ -158,207 +240,72 @@ describe('handleStreaming', () => { vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); vi.spyOn(persistence, 'loadFunctionSchemas').mockResolvedValue([]); vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); + vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); await handleStreaming(iii, rec); - expect(rec.state).toBe('assistant_finished'); + expect(rec.state).toBe('steering_check'); expect(rec.last_assistant).toEqual(finalMsg); }); -}); - -describe('handleFinished', () => { - it('throws when last_assistant is missing', async () => { - const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_finished' }; - const { iii } = fakeIii(); - - await expect(handleFinished(iii, rec)).rejects.toThrow( - 'assistant_finished without last_assistant', - ); - }); it('routes error assistant to tearing_down without persisting transcript', async () => { - const rec: TurnStateRecord = { - ...newRecord('s1'), - state: 'assistant_finished', - last_assistant: assistant({ stop_reason: 'error', error_message: 'auth failed' }), - }; - const { iii } = fakeIii(); - const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + const finalMsg = assistant({ stop_reason: 'error', error_message: 'auth failed' }); + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; + const { iii } = fakeIiiWithDone(finalMsg); + + vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ + provider: 'openai', + model: 'gpt-4o', + mode: null, + system_prompt: '', + }); vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + vi.spyOn(persistence, 'loadFunctionSchemas').mockResolvedValue([]); + vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); + const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - await handleFinished(iii, rec); + await handleStreaming(iii, rec); expect(rec.state).toBe('tearing_down'); expect(rec.turn_end_emitted).toBe(true); expect(saveSpy).not.toHaveBeenCalled(); }); - it('routes text-only assistant to steering_check and persists message', async () => { - const rec: TurnStateRecord = { - ...newRecord('s1'), - state: 'assistant_finished', - last_assistant: assistant(), - }; - const { iii } = fakeIii(); - const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - - await handleFinished(iii, rec); - - expect(rec.state).toBe('steering_check'); - expect(rec.pending_function_calls).toEqual([]); - expect(saveSpy).toHaveBeenCalledOnce(); - }); - - it('prepares function calls and transitions to function_execute', async () => { - const rec: TurnStateRecord = { - ...newRecord('s1'), - state: 'assistant_finished', - last_assistant: assistant({ - content: [ - { - type: 'function_call', - id: 'fc-1', - function_id: 'shell::run', - arguments: { command: 'ls' }, - }, - ], - }), - }; - const { iii } = fakeIii(); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - const saveExecutedSpy = vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); - const savePreparedSpy = vi.spyOn(persistence, 'savePreparedCalls').mockResolvedValue(undefined); - - await handleFinished(iii, rec); + it('does NOT duplicate the assistant message on re-entry', async () => { + const finalMsg = assistant({ + content: [ + { + type: 'function_call', + id: 'toolu_42', + function_id: 'shell::run', + arguments: { command: 'pwd' }, + }, + ], + }); + // Simulate re-entry: messages already contain the assistant message + let storedMessages: unknown[] = [finalMsg]; - expect(rec.state).toBe('function_execute'); - expect(rec.function_results).toEqual([]); - expect(rec.pending_function_calls).toEqual([ - { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, - ]); - expect(saveExecutedSpy).toHaveBeenCalledWith(iii, 's1', []); - expect(savePreparedSpy).toHaveBeenCalledWith(iii, 's1', [ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, - blocked: null, - }, - ]); - }); + const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; + const { iii } = fakeIiiWithDone(finalMsg); - it('does NOT duplicate the assistant message when handleFinished re-enters', async () => { - // Idempotency guard: a durable retry / crash-before-transitionTo can - // replay handleFinished with the same last_assistant. Re-pushing a - // tool-call assistant makes Anthropic reject the next request with - // "each tool_use must have a unique id". - const rec: TurnStateRecord = { - ...newRecord('s1'), - state: 'assistant_finished', - last_assistant: assistant({ - content: [ - { - type: 'function_call', - id: 'toolu_42', - function_id: 'shell::run', - arguments: { command: 'pwd' }, - }, - ], - }), - }; - const { iii } = fakeIii(); - let storedMessages: unknown[] = []; + vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ + provider: 'openai', + model: 'gpt-4o', + mode: null, + system_prompt: '', + }); vi.spyOn(persistence, 'loadMessages').mockImplementation(async () => storedMessages as never); vi.spyOn(persistence, 'saveMessages').mockImplementation(async (_iii, _sid, msgs) => { storedMessages = msgs as never; }); - vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); - vi.spyOn(persistence, 'savePreparedCalls').mockResolvedValue(undefined); + vi.spyOn(persistence, 'loadFunctionSchemas').mockResolvedValue([]); + vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); - await handleFinished(iii, rec); - // Re-entry: same record before the transition was durably observed. - rec.state = 'assistant_finished'; - await handleFinished(iii, rec); + await handleStreaming(iii, rec); const asstMsgs = (storedMessages as Array<{ role?: string }>).filter( (m) => m.role === 'assistant', ); expect(asstMsgs).toHaveLength(1); }); - - it('unwraps agent_trigger wrappers when preparing function calls', async () => { - const rec: TurnStateRecord = { - ...newRecord('s1'), - state: 'assistant_finished', - last_assistant: assistant({ - content: [ - { - type: 'function_call', - id: 'fc-wrap', - function_id: TOOL_NAME, - arguments: { function: 'shell::run', payload: { command: 'ls' } }, - }, - { - type: 'function_call', - id: 'fc-direct', - function_id: 'shell::echo', - arguments: { text: 'hi' }, - }, - ], - }), - }; - const { iii } = fakeIii(); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); - const savePreparedSpy = vi.spyOn(persistence, 'savePreparedCalls').mockResolvedValue(undefined); - - await handleFinished(iii, rec); - - expect(rec.state).toBe('function_execute'); - const prepared = savePreparedSpy.mock.calls[0]?.[2]; - expect(prepared).toEqual([ - { - function_call: { id: 'fc-wrap', function_id: 'shell::run', arguments: { command: 'ls' } }, - blocked: null, - }, - { - function_call: { id: 'fc-direct', function_id: 'shell::echo', arguments: { text: 'hi' } }, - blocked: null, - }, - ]); - }); - - it('blocks agent_trigger calls with missing or empty function at prepare time', async () => { - const rec: TurnStateRecord = { - ...newRecord('s1'), - state: 'assistant_finished', - last_assistant: assistant({ - content: [ - { - type: 'function_call', - id: 'fc-bad', - function_id: TOOL_NAME, - arguments: { payload: { command: 'ls' } }, - }, - ], - }), - }; - const { iii } = fakeIii(); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - vi.spyOn(persistence, 'saveExecutedCalls').mockResolvedValue(undefined); - const savePreparedSpy = vi.spyOn(persistence, 'savePreparedCalls').mockResolvedValue(undefined); - - await handleFinished(iii, rec); - - expect(rec.state).toBe('function_execute'); - const prepared = savePreparedSpy.mock.calls[0]?.[2]; - expect(prepared?.[0]?.function_call).toEqual({ - id: 'fc-bad', - function_id: '', - arguments: { command: 'ls' }, - }); - expect(prepared?.[0]?.blocked?.details).toMatchObject({ error: 'missing_function' }); - }); }); diff --git a/harness/tests/turn-orchestrator/wake.test.ts b/harness/tests/turn-orchestrator/wake.test.ts index 115fa729..e09ae12c 100644 --- a/harness/tests/turn-orchestrator/wake.test.ts +++ b/harness/tests/turn-orchestrator/wake.test.ts @@ -11,7 +11,7 @@ describe('shouldWakeStep', () => { it('accepts transitions to another stepable state', () => { expect(shouldWakeStep('provisioning', 'assistant_streaming')).toBe(true); - expect(shouldWakeStep('assistant_finished', 'function_execute')).toBe(true); + expect(shouldWakeStep('assistant_streaming', 'function_execute')).toBe(true); }); it('rejects terminal state (stopped)', () => { From 41468e4d950165fcc407d35a50b89f413ccedc0b Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sun, 24 May 2026 18:47:34 -0300 Subject: [PATCH 07/41] feat(turn-orchestrator): legacy-record migration shim --- harness/src/turn-orchestrator/persistence.ts | 14 ++++++++++- .../turn-orchestrator/persistence.test.ts | 24 +++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/harness/src/turn-orchestrator/persistence.ts b/harness/src/turn-orchestrator/persistence.ts index 30863afb..586c52d5 100644 --- a/harness/src/turn-orchestrator/persistence.ts +++ b/harness/src/turn-orchestrator/persistence.ts @@ -49,10 +49,22 @@ async function stateSet(iii: ISdk, key: string, value: unknown): Promise { } } +/** Defensive coercion for records persisted before assistant_finished was + * removed. Drain-before-cutover is preferred; this prevents a crash on an + * in-flight legacy record. */ +export function migrateLegacyRecord(rec: TurnStateRecord): TurnStateRecord { + if ((rec.state as string) === 'assistant_finished') { + const asst = rec.last_assistant; + const hasCalls = !!asst && asst.content.some((b) => b.type === 'function_call'); + return { ...rec, state: hasCalls ? 'function_execute' : 'steering_check' }; + } + return rec; +} + export async function loadRecord(iii: ISdk, session_id: string): Promise { const v = await stateGet(iii, turnStateKey(session_id)); if (!v || typeof v !== 'object') return null; - return v as TurnStateRecord; + return migrateLegacyRecord(v as TurnStateRecord); } /** Persist the record with no UI event and no FSM wake — for mid-handler, diff --git a/harness/tests/turn-orchestrator/persistence.test.ts b/harness/tests/turn-orchestrator/persistence.test.ts index 2d86c551..b749a01b 100644 --- a/harness/tests/turn-orchestrator/persistence.test.ts +++ b/harness/tests/turn-orchestrator/persistence.test.ts @@ -1,5 +1,6 @@ import { describe, it, expect, vi } from 'vitest'; import * as persistence from '../../src/turn-orchestrator/persistence.js'; +import { migrateLegacyRecord } from '../../src/turn-orchestrator/persistence.js'; import { newRecord } from '../../src/turn-orchestrator/state.js'; describe('writeRecord', () => { @@ -16,3 +17,26 @@ describe('writeRecord', () => { expect(calls).not.toContain('stream::set'); // no agent::events emit }); }); + +describe('migrateLegacyRecord', () => { + it('coerces legacy assistant_finished -> function_execute when last_assistant has calls', () => { + const legacy: any = { + session_id: 's1', state: 'assistant_finished', turn_count: 1, + last_assistant: { role: 'assistant', content: [{ type: 'function_call', id: 'c1', function_id: 'x::y', arguments: {} }] }, + pending_function_calls: [], function_results: [], turn_end_emitted: false, started_at_ms: 1, updated_at_ms: 1, + }; + expect(migrateLegacyRecord(legacy).state).toBe('function_execute'); + }); + it('coerces legacy assistant_finished -> steering_check when no calls', () => { + const legacy: any = { + session_id: 's1', state: 'assistant_finished', turn_count: 1, + last_assistant: { role: 'assistant', content: [{ type: 'text', text: 'hi' }] }, + pending_function_calls: [], function_results: [], turn_end_emitted: false, started_at_ms: 1, updated_at_ms: 1, + }; + expect(migrateLegacyRecord(legacy).state).toBe('steering_check'); + }); + it('leaves current records untouched', () => { + const cur: any = { session_id: 's1', state: 'function_execute', turn_count: 1, pending_function_calls: [], function_results: [], turn_end_emitted: false, started_at_ms: 1, updated_at_ms: 1 }; + expect(migrateLegacyRecord(cur).state).toBe('function_execute'); + }); +}); From ad27601622498672873c7e88776bce950d9123a6 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sun, 24 May 2026 18:47:57 -0300 Subject: [PATCH 08/41] refactor(turn-orchestrator): cap max_turns in steering_check --- .../states/assistant-streaming.ts | 31 -------- .../states/steering-check.ts | 38 +++++++++ .../tests/turn-orchestrator/assistant.test.ts | 20 ----- .../tests/turn-orchestrator/steering.test.ts | 78 +++++++++++++++++++ 4 files changed, 116 insertions(+), 51 deletions(-) diff --git a/harness/src/turn-orchestrator/states/assistant-streaming.ts b/harness/src/turn-orchestrator/states/assistant-streaming.ts index 0a38bcb3..a5e9433a 100644 --- a/harness/src/turn-orchestrator/states/assistant-streaming.ts +++ b/harness/src/turn-orchestrator/states/assistant-streaming.ts @@ -104,37 +104,6 @@ async function finalizeAssistant(iii: ISdk, rec: TurnStateRecord): Promise } export async function handleStreaming(iii: ISdk, rec: TurnStateRecord): Promise { - if (rec.max_turns !== undefined && rec.turn_count >= rec.max_turns) { - const cap = rec.max_turns ?? 0; - const exhausted: AssistantMessage = { - role: 'assistant', - content: [{ type: 'text', text: `loop stopped: max_turns (${cap}) reached` }], - stop_reason: 'end', - error_message: null, - error_kind: null, - usage: null, - model: '', - provider: '', - timestamp: Date.now(), - }; - await emit(iii, rec.session_id, { - type: 'message_complete', - message: exhausted, - body_streamed: false, - }); - await emit(iii, rec.session_id, { - type: 'turn_end', - message: exhausted, - function_results: [], - }); - rec.turn_end_emitted = true; - rec.last_assistant = exhausted; - const messages = await persistence.loadMessages(iii, rec.session_id); - messages.push(exhausted); - await persistence.saveMessages(iii, rec.session_id, messages); - transitionTo(rec, 'tearing_down'); - return; - } rec.turn_count++; rec.turn_end_emitted = false; rec.assistant_body_streamed = false; diff --git a/harness/src/turn-orchestrator/states/steering-check.ts b/harness/src/turn-orchestrator/states/steering-check.ts index 77a0f6a5..a24d0c25 100644 --- a/harness/src/turn-orchestrator/states/steering-check.ts +++ b/harness/src/turn-orchestrator/states/steering-check.ts @@ -73,6 +73,36 @@ function abortedMessage(): AssistantMessage { }; } +function maxTurnsReached(rec: TurnStateRecord): boolean { + return rec.max_turns !== undefined && rec.turn_count >= rec.max_turns; +} + +function maxTurnsAssistant(cap: number): AssistantMessage { + return { + role: 'assistant', + content: [{ type: 'text', text: `loop stopped: max_turns (${cap}) reached` }], + stop_reason: 'end', + error_message: null, + error_kind: null, + usage: null, + model: '', + provider: '', + timestamp: Date.now(), + }; +} + +async function endForMaxTurns(iii: ISdk, rec: TurnStateRecord): Promise { + const msg = maxTurnsAssistant(rec.max_turns ?? 0); + rec.last_assistant = msg; + const messages = await persistence.loadMessages(iii, rec.session_id); + messages.push(msg); + await persistence.saveMessages(iii, rec.session_id, messages); + await emit(iii, rec.session_id, { type: 'message_complete', message: msg, body_streamed: false }); + await emit(iii, rec.session_id, { type: 'turn_end', message: msg, function_results: [] }); + rec.turn_end_emitted = true; + transitionTo(rec, 'tearing_down'); +} + async function emitTurnEndOnce(iii: ISdk, rec: TurnStateRecord): Promise { if (rec.turn_end_emitted) return; const last = @@ -128,6 +158,10 @@ export async function handleSteering(iii: ISdk, rec: TurnStateRecord): Promise { expect(calls.some((c) => c.function_id === 'stream::set')).toBe(true); }); - it('exhausts max_turns and transitions to tearing_down', async () => { - const rec: TurnStateRecord = { - ...newRecord('s1', 2), - state: 'assistant_streaming', - turn_count: 2, - }; - const { iii, calls } = fakeIii(); - const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - await handleStreaming(iii, rec); - - expect(rec.state).toBe('tearing_down'); - expect(rec.turn_end_emitted).toBe(true); - expect(rec.last_assistant?.content[0]).toEqual({ - type: 'text', - text: 'loop stopped: max_turns (2) reached', - }); - expect(saveSpy).toHaveBeenCalledOnce(); - expect(calls.some((c) => c.function_id === 'stream::set')).toBe(true); - }); }); describe('handleStreaming', () => { diff --git a/harness/tests/turn-orchestrator/steering.test.ts b/harness/tests/turn-orchestrator/steering.test.ts index 28477cb1..ec09c5a1 100644 --- a/harness/tests/turn-orchestrator/steering.test.ts +++ b/harness/tests/turn-orchestrator/steering.test.ts @@ -213,4 +213,82 @@ describe('handleSteering', () => { }), ); }); + + it('caps at max_turns: emits a max_turns assistant + message_complete + turn_end and tears down instead of continuing', async () => { + const { iii } = makeIii(); + const rec = steeringRec('s1', { + max_turns: 2, + turn_count: 2, + function_results: [{ role: 'function_result', content: [] }] as never, + }); + const loadSpy = vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); + + await handleSteering(iii, rec); + + expect(rec.state).toBe('tearing_down'); + expect(rec.turn_end_emitted).toBe(true); + expect(rec.last_assistant?.content[0]).toEqual( + expect.objectContaining({ type: 'text', text: expect.stringContaining('max_turns') }), + ); + expect(emitSpy).toHaveBeenCalledWith( + iii, + 's1', + expect.objectContaining({ type: 'message_complete' }), + ); + expect(emitSpy).toHaveBeenCalledWith( + iii, + 's1', + expect.objectContaining({ type: 'turn_end' }), + ); + expect(loadSpy).toHaveBeenCalledWith(iii, 's1'); + expect(saveSpy).toHaveBeenCalledWith( + iii, + 's1', + expect.arrayContaining([ + expect.objectContaining({ + content: expect.arrayContaining([ + expect.objectContaining({ text: expect.stringContaining('max_turns') }), + ]), + }), + ]), + ); + }); + + it('caps at max_turns via steering route: tears down instead of continuing to assistant_streaming', async () => { + const { iii } = makeIii({ steeringItems: [userMessage('steer-me')] }); + const rec = steeringRec('s1', { + max_turns: 3, + turn_count: 3, + }); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + vi.spyOn(events, 'emit').mockResolvedValue(undefined); + + await handleSteering(iii, rec); + + expect(rec.state).toBe('tearing_down'); + expect(rec.turn_end_emitted).toBe(true); + expect(rec.last_assistant?.content[0]).toEqual( + expect.objectContaining({ text: expect.stringContaining('max_turns') }), + ); + }); + + it('continues to assistant_streaming when under max_turns (continue_after_function route)', async () => { + const { iii } = makeIii(); + const rec = steeringRec('s1', { + max_turns: 5, + turn_count: 2, + function_results: [{ role: 'function_result', content: [] }] as never, + }); + vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + vi.spyOn(events, 'emit').mockResolvedValue(undefined); + + await handleSteering(iii, rec); + + expect(rec.state).toBe('assistant_streaming'); + expect(rec.function_results).toEqual([]); + }); }); From 8917a68737ee131cfe3fcf44ec26593eb30bdd49 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sun, 24 May 2026 18:55:27 -0300 Subject: [PATCH 09/41] refactor(turn-orchestrator): drop staging keys, fold function_schemas into run_request --- harness/src/turn-orchestrator/persistence.ts | 81 ------------------- harness/src/turn-orchestrator/run-request.ts | 2 + harness/src/turn-orchestrator/state.ts | 1 - .../states/assistant-streaming.ts | 3 +- .../turn-orchestrator/states/provisioning.ts | 4 +- harness/src/types/function.ts | 4 +- .../tests/turn-orchestrator/assistant.test.ts | 14 ++-- .../turn-orchestrator/provisioning.test.ts | 12 ++- .../turn-orchestrator/run-request.test.ts | 10 +++ 9 files changed, 28 insertions(+), 103 deletions(-) diff --git a/harness/src/turn-orchestrator/persistence.ts b/harness/src/turn-orchestrator/persistence.ts index 586c52d5..065c86d4 100644 --- a/harness/src/turn-orchestrator/persistence.ts +++ b/harness/src/turn-orchestrator/persistence.ts @@ -5,13 +5,11 @@ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import type { AgentMessage } from '../types/agent-message.js'; -import type { FunctionCall, FunctionResult } from '../types/function.js'; import { type RunRequest, parseRunRequest } from './run-request.js'; import { type ExecutedEntry, type PreparedEntry, type TurnStateRecord, - functionSchemasKey, lastSessionTreeLenKey, messagesKey, runRequestKey, @@ -195,85 +193,6 @@ export async function loadRunRequest(iii: ISdk, session_id: string): Promise) : {}); } -export async function saveFunctionSchemas( - iii: ISdk, - session_id: string, - schemas: unknown, -): Promise { - await stateSet(iii, functionSchemasKey(session_id), schemas); -} - -export async function loadFunctionSchemas(iii: ISdk, session_id: string): Promise { - const v = await stateGet(iii, functionSchemasKey(session_id)); - return Array.isArray(v) ? v : []; -} - -const PREPARED_KEY = 'function_prepared'; -const EXECUTED_KEY = 'function_executed'; - -const stagingKey = (sid: string, suffix: string) => `session/${sid}/${suffix}`; - -async function stagingGet(iii: ISdk, session_id: string, suffix: string): Promise { - const v = await stateGet(iii, stagingKey(session_id, suffix)); - return Array.isArray(v) ? v : []; -} - - -export async function savePreparedCalls( - iii: ISdk, - session_id: string, - prepared: PreparedEntry[], -): Promise { - const payload = prepared.map((e) => ({ - function_call: e.function_call, - blocked: e.blocked, - pre_approved: e.pre_approved ?? false, - })); - await stateSet(iii, stagingKey(session_id, PREPARED_KEY), payload); -} - -export async function loadPreparedCalls(iii: ISdk, session_id: string): Promise { - const items = await stagingGet(iii, session_id, PREPARED_KEY); - const out: PreparedEntry[] = []; - for (const it of items) { - if (!it || typeof it !== 'object') continue; - const obj = it as Record; - const fc = obj.function_call as FunctionCall | undefined; - if (!fc) continue; - const blocked = (obj.blocked as FunctionResult | null) ?? null; - const pre_approved = obj.pre_approved === true; - out.push({ function_call: fc, blocked, pre_approved }); - } - return out; -} - -export async function saveExecutedCalls( - iii: ISdk, - session_id: string, - executed: ExecutedEntry[], -): Promise { - await stateSet(iii, stagingKey(session_id, EXECUTED_KEY), executed); -} - -export async function loadExecutedCalls(iii: ISdk, session_id: string): Promise { - const items = await stagingGet(iii, session_id, EXECUTED_KEY); - const out: ExecutedEntry[] = []; - for (const it of items) { - if (!it || typeof it !== 'object') continue; - const obj = it as Record; - const fc = obj.function_call as FunctionCall | undefined; - const result = obj.result as FunctionResult | undefined; - if (!fc || !result) continue; - out.push({ - function_call: fc, - result, - is_error: typeof obj.is_error === 'boolean' ? obj.is_error : false, - duration_ms: typeof obj.duration_ms === 'number' ? obj.duration_ms : 0, - }); - } - return out; -} - export function findExecutedCall( executed: ExecutedEntry[], function_call_id: string, diff --git a/harness/src/turn-orchestrator/run-request.ts b/harness/src/turn-orchestrator/run-request.ts index 23d8ca5a..168ff52d 100644 --- a/harness/src/turn-orchestrator/run-request.ts +++ b/harness/src/turn-orchestrator/run-request.ts @@ -12,6 +12,7 @@ export type RunRequest = { model: string; mode: Mode | null; system_prompt: string; + function_schemas: unknown[]; }; function parseMode(value: unknown): Mode | null { @@ -24,5 +25,6 @@ export function parseRunRequest(raw: Record): RunRequest { model: typeof raw.model === 'string' ? raw.model : '', mode: parseMode(raw.mode), system_prompt: typeof raw.system_prompt === 'string' ? raw.system_prompt : '', + function_schemas: Array.isArray(raw.function_schemas) ? raw.function_schemas : [], }; } diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index 8774bfc0..0c47e2e6 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -86,7 +86,6 @@ export function isTerminal(rec: TurnStateRecord): boolean { export const messagesKey = (sid: string) => `session/${sid}/messages`; export const turnStateKey = (sid: string) => `session/${sid}/turn_state`; export const runRequestKey = (sid: string) => `session/${sid}/run_request`; -export const functionSchemasKey = (sid: string) => `session/${sid}/function_schemas`; export const lastSessionTreeLenKey = (sid: string) => `session/${sid}/session_tree_mirror_len`; export const eventCounterKey = (sid: string) => `session/${sid}/event_counter`; export const abortSignalKey = (sid: string) => `session/${sid}/abort_signal`; diff --git a/harness/src/turn-orchestrator/states/assistant-streaming.ts b/harness/src/turn-orchestrator/states/assistant-streaming.ts index a5e9433a..c40381d5 100644 --- a/harness/src/turn-orchestrator/states/assistant-streaming.ts +++ b/harness/src/turn-orchestrator/states/assistant-streaming.ts @@ -110,10 +110,9 @@ export async function handleStreaming(iii: ISdk, rec: TurnStateRecord): Promise< const request = await persistence.loadRunRequest(iii, rec.session_id); let messages = await persistence.loadMessages(iii, rec.session_id); - const schemas = await persistence.loadFunctionSchemas(iii, rec.session_id); const { provider, model, system_prompt } = request; - const tools = (Array.isArray(schemas) ? schemas : []) as AgentFunction[]; + const tools = (Array.isArray(request.function_schemas) ? request.function_schemas : []) as AgentFunction[]; const decision = decide({ provider, model }); const targetFn = targetFunctionId(decision); diff --git a/harness/src/turn-orchestrator/states/provisioning.ts b/harness/src/turn-orchestrator/states/provisioning.ts index 8ca88284..ecc8d80d 100644 --- a/harness/src/turn-orchestrator/states/provisioning.ts +++ b/harness/src/turn-orchestrator/states/provisioning.ts @@ -74,8 +74,6 @@ export async function handleProvisioning( ): Promise { const request = await persistence.loadRunRequest(iii, rec.session_id); - await persistence.saveFunctionSchemas(iii, rec.session_id, [agentTriggerTool()]); - const override = request.system_prompt.length > 0 ? request.system_prompt : null; const [skillsIndex, bodies] = await Promise.all([ @@ -84,7 +82,7 @@ export async function handleProvisioning( ]); const prompt = buildSystemPrompt(bodies, null, override, request.mode, skillsIndex); - const updated: RunRequest = { ...request, system_prompt: prompt }; + const updated: RunRequest = { ...request, system_prompt: prompt, function_schemas: [agentTriggerTool()] }; await persistence.saveRunRequest(iii, rec.session_id, updated); transitionTo(rec, 'assistant_streaming'); diff --git a/harness/src/types/function.ts b/harness/src/types/function.ts index 3bf69979..a061df03 100644 --- a/harness/src/types/function.ts +++ b/harness/src/types/function.ts @@ -46,12 +46,12 @@ export type FunctionResult = { terminate?: boolean; }; -/** Prepared call entry persisted in the FSM's `function_prepared` staging. */ +/** Prepared call entry used during FSM function execution. */ export type PreparedFunctionCall = | { kind: 'prepared'; function_call: FunctionCall } | { kind: 'immediate'; result: FunctionResult; is_error: boolean }; -/** Finalized call entry persisted in `function_executed`. */ +/** Finalized call entry after function execution completes. */ export type FinalizedFunctionCall = { function_call: FunctionCall; result: FunctionResult; diff --git a/harness/tests/turn-orchestrator/assistant.test.ts b/harness/tests/turn-orchestrator/assistant.test.ts index c0e6b490..656033e0 100644 --- a/harness/tests/turn-orchestrator/assistant.test.ts +++ b/harness/tests/turn-orchestrator/assistant.test.ts @@ -82,9 +82,9 @@ describe('handleStreaming turn start', () => { model: 'gpt-4o', mode: null, system_prompt: '', + function_schemas: [], }); vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'loadFunctionSchemas').mockResolvedValue([]); vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); await handleStreaming(iii, rec); @@ -113,9 +113,9 @@ describe('handleStreaming', () => { model: 'gpt-4o', mode: null, system_prompt: '', + function_schemas: [], }); vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'loadFunctionSchemas').mockResolvedValue([]); vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); await handleStreaming(iii, rec); @@ -144,9 +144,9 @@ describe('handleStreaming', () => { model: 'gpt-4o', mode: null, system_prompt: '', + function_schemas: [], }); vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'loadFunctionSchemas').mockResolvedValue([]); vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); @@ -173,9 +173,9 @@ describe('handleStreaming', () => { model: 'gpt-4o', mode: null, system_prompt: '', + function_schemas: [], }); vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'loadFunctionSchemas').mockResolvedValue([]); vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); @@ -216,9 +216,9 @@ describe('handleStreaming', () => { model: 'gpt-4o', mode: null, system_prompt: '', + function_schemas: [], }); vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'loadFunctionSchemas').mockResolvedValue([]); vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); @@ -238,9 +238,9 @@ describe('handleStreaming', () => { model: 'gpt-4o', mode: null, system_prompt: '', + function_schemas: [], }); vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'loadFunctionSchemas').mockResolvedValue([]); vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); @@ -273,12 +273,12 @@ describe('handleStreaming', () => { model: 'gpt-4o', mode: null, system_prompt: '', + function_schemas: [], }); vi.spyOn(persistence, 'loadMessages').mockImplementation(async () => storedMessages as never); vi.spyOn(persistence, 'saveMessages').mockImplementation(async (_iii, _sid, msgs) => { storedMessages = msgs as never; }); - vi.spyOn(persistence, 'loadFunctionSchemas').mockResolvedValue([]); vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); await handleStreaming(iii, rec); diff --git a/harness/tests/turn-orchestrator/provisioning.test.ts b/harness/tests/turn-orchestrator/provisioning.test.ts index 251ec424..18bd297c 100644 --- a/harness/tests/turn-orchestrator/provisioning.test.ts +++ b/harness/tests/turn-orchestrator/provisioning.test.ts @@ -62,16 +62,13 @@ describe('handleProvisioning', () => { model: 'gpt-4', mode: 'agent', system_prompt: '', + function_schemas: [], }); - const saveSchemas = vi.spyOn(persistence, 'saveFunctionSchemas').mockResolvedValue(); const saveRunRequest = vi.spyOn(persistence, 'saveRunRequest').mockResolvedValue(); await handleProvisioning(iii, cfg, rec); expect(rec.state).toBe('assistant_streaming'); - expect(saveSchemas).toHaveBeenCalledWith(iii, 's1', [ - expect.objectContaining({ name: 'agent_trigger' }), - ]); expect(saveRunRequest).toHaveBeenCalledWith( iii, 's1', @@ -79,6 +76,7 @@ describe('handleProvisioning', () => { provider: 'openai', model: 'gpt-4', system_prompt: expect.stringContaining('operating in agent mode'), + function_schemas: [expect.objectContaining({ name: 'agent_trigger' })], }), ); expect(calls.some((c) => c.function_id === 'directory::skills::index')).toBe(true); @@ -95,8 +93,8 @@ describe('handleProvisioning', () => { model: 'gpt-4', mode: null, system_prompt: 'custom override', + function_schemas: [], }); - vi.spyOn(persistence, 'saveFunctionSchemas').mockResolvedValue(); const saveRunRequest = vi.spyOn(persistence, 'saveRunRequest').mockResolvedValue(); await handleProvisioning(iii, cfg, rec); @@ -118,8 +116,8 @@ describe('handleProvisioning', () => { model: '', mode: null, system_prompt: '', + function_schemas: [], }); - vi.spyOn(persistence, 'saveFunctionSchemas').mockResolvedValue(); const saveRunRequest = vi.spyOn(persistence, 'saveRunRequest').mockResolvedValue(); await handleProvisioning(iii, cfg, rec); @@ -176,8 +174,8 @@ describe('register', () => { model: '', mode: null, system_prompt: '', + function_schemas: [], }); - vi.spyOn(persistence, 'saveFunctionSchemas').mockResolvedValue(); vi.spyOn(persistence, 'saveRunRequest').mockResolvedValue(); const { iii, getHandler, getId } = captureHandler(); diff --git a/harness/tests/turn-orchestrator/run-request.test.ts b/harness/tests/turn-orchestrator/run-request.test.ts index 745557b8..2a6a4cfd 100644 --- a/harness/tests/turn-orchestrator/run-request.test.ts +++ b/harness/tests/turn-orchestrator/run-request.test.ts @@ -8,6 +8,7 @@ describe('parseRunRequest', () => { model: '', mode: null, system_prompt: '', + function_schemas: [], }); }); @@ -17,6 +18,7 @@ describe('parseRunRequest', () => { model: 'gpt-4', mode: null, system_prompt: 'hi', + function_schemas: [], }); }); @@ -33,6 +35,14 @@ describe('parseRunRequest', () => { model: '', mode: null, system_prompt: '', + function_schemas: [], }); }); }); + +describe('parseRunRequest function_schemas', () => { + it('defaults to [] and carries an array', () => { + expect(parseRunRequest({}).function_schemas).toEqual([]); + expect(parseRunRequest({ function_schemas: [{ name: 'x' }] }).function_schemas).toHaveLength(1); + }); +}); From 7a63df317cae6e9852ead50f3c178464838edc8e Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sun, 24 May 2026 19:00:10 -0300 Subject: [PATCH 10/41] docs(turn-orchestrator): update for rewrite (states, work, error model, TurnStateView) --- harness/docs/workers/turn-orchestrator.md | 206 +++++++++++++--------- 1 file changed, 124 insertions(+), 82 deletions(-) diff --git a/harness/docs/workers/turn-orchestrator.md b/harness/docs/workers/turn-orchestrator.md index 4619cbd3..d912a0c0 100644 --- a/harness/docs/workers/turn-orchestrator.md +++ b/harness/docs/workers/turn-orchestrator.md @@ -6,91 +6,124 @@ provisioning, assistant, function-execute, steering, and tearing-down. ## Purpose This is the heart of the bundle. `run::start` opens a session and returns -immediately; the rest of the work happens inside the durable `turn::step` -state machine, woken once per state transition by a publish to the -`turn::step_requested` topic. The FSM provisions the sandbox, streams the -assistant turn from a provider, executes any returned function calls -through `dispatchWithHook`, emits `agent::events` for the -harness fanout, and persists everything to iii state so the run survives -restarts. - -`dispatchWithHook` in [agent-trigger.ts](harness/src/turn-orchestrator/agent-trigger.ts) is the single -dispatcher every agent-issued tool call passes through. It runs `consultBefore` before forwarding to the target function -id. `consultBefore` triggers `policy::check_permissions` directly (5 s -timeout) and maps the reply to allow / deny / pending. Fail-closed: policy +immediately; the rest of the work happens inside per-state durable functions +(`turn::provisioning`, `turn::assistant_streaming`, …), each enqueued onto +the `turn-step` FIFO queue via `wakeState` ([wake.ts](harness/src/turn-orchestrator/wake.ts)). +Saving the record with a new non-terminal, non-parking state automatically +enqueues the next handler (`saveRecord` in +[persistence.ts](harness/src/turn-orchestrator/persistence.ts) calls `shouldWakeStep` then `wakeState`). + +Every per-state handler is wrapped by `runTransition` +([run-transition.ts](harness/src/turn-orchestrator/run-transition.ts)): +load record → null-check → stale-skip → handle → save. This owns the +crash-isolation contract: + +- An unexpected handler throw routes the session to the `failed` terminal + (acked so the durable queue stops retrying) and surfaces `message_complete{stop_reason:'error'}` + `agent_end` to the UI. +- A handler may throw `TransientError` + ([errors.ts](harness/src/turn-orchestrator/errors.ts)) to opt into the + queue's retry/backoff/DLQ instead of the terminal path. + +`dispatchWithHook` in [agent-trigger.ts](harness/src/turn-orchestrator/agent-trigger.ts) +is the single chokepoint every agent-issued function call passes through. +It runs `consultBefore` before forwarding to the target function id. +`consultBefore` triggers `policy::check_permissions` directly (5 s timeout) +and maps the reply to `allow` / `deny` / `pending`. Fail-closed: policy unreachable → deny with a `gate_unavailable` `DenialEnvelope`. ## Registered functions -- `run::start` — Start a durable agent session and return immediately. -- `turn::step` — Run one durable state machine transition for a session. -- `turn::get_state` — Read the current `TurnStateRecord` for a session (or null for unknown sessions). UI clients use this on reload to recover any in-progress modals (e.g. `function_awaiting_approval`) without reading iii state directly. +- `run::start` — Persist run config and messages, seed `turn_state` to + `provisioning`, and wake the FSM via `saveRecord`. +- `turn::provisioning` — FSM step: build system prompt + single `agent_trigger` schema, write enriched `run_request`, advance to `assistant_streaming`. +- `turn::assistant_streaming` — FSM step: stream the turn over a provider channel; on completion emit `message_complete`, persist the assistant message (dup-guarded), route to `function_execute` / `steering_check` / `tearing_down`. +- `turn::function_execute` — FSM step: own the full function lifecycle via `rec.work`; build batch from `rec.last_assistant`, run each call, checkpoint per-call via `writeRecord`, park to `function_awaiting_approval` on a `pending` gate reply, finalize results into messages + emit `turn_end`, route to `steering_check` / `tearing_down`. +- `turn::function_awaiting_approval` — FSM step: read decisions for `awaiting_approval[]`; fold them into `rec.work.batch` (`allow` → `pre_approved`, `deny`/`aborted` → `blocked`); clear `awaiting_approval`, advance to `function_execute`. +- `turn::steering_check` — FSM step: check abort signal, drain `steering`/`followup` inboxes, enforce `max_turns` cap (emits synthetic `max_turns` message + `turn_end` → `tearing_down`), route to `assistant_streaming` / `tearing_down`. +- `turn::tearing_down` — FSM step: emit `agent_end`, advance to `stopped`. +- `turn::get_state` — One-shot reader returning a lean `TurnStateView` (from `schemas.ts:toView`) for a session. UI clients call this on reload to recover in-progress modals (e.g. `function_awaiting_approval`) without reading iii state directly. Returns `null` for unknown sessions. - `turn::is_abort_signal_set` — Condition function bound to the agent-scope state trigger; matches `state:created`/`state:updated` writes that set `session//abort_signal` to `true`. -- `turn::on_abort_signal` — State trigger adapter: publishes `turn::step_requested` when the abort signal is set so the FSM advances on the next safe boundary. -- `turn::is_stepable_record_write` — Condition function bound to the record-written state trigger; matches `turn_state` writes whose `new_value.state` is non-terminal and non-parking (i.e. excludes `stopped` and `function_awaiting_approval`). -- `turn::on_record_written` — State trigger adapter: directly triggers `turn::step` for the affected session, so saving the record is itself the wake-up event. -- `turn::is_turn_state_write` — Condition function bound to the turn-state-changed trigger; matches every `state:created` / `state:updated` write to `session//turn_state` regardless of FSM state. -- `turn::on_turn_state_changed` — State trigger adapter: emits a `turn_state_changed` agent event carrying the full new (and prior) `TurnStateRecord` so the UI can derive pending approvals from state. +- `turn::on_abort_signal` — State trigger adapter: enqueues `turn::{current_state}` (via `wakeFromRecord`) when the abort signal is set so the FSM observes the abort on the next safe boundary. ## Triggers -- **Durable subscriber** on `turn::step_requested` → `turn::step`. Registered in [src/turn-orchestrator/subscriber.ts](harness/src/turn-orchestrator/subscriber.ts). Each `step` loads the `TurnStateRecord`, runs one transition, saves it back, and re-publishes `turn::step_requested` unless the run is terminal **or** paused on approvals (`function_awaiting_approval`). Paused turns are woken when `approval::resolve` or abort triggers a per-call `turn::approval_resume` function (see [workers/approval-gate.md](workers/approval-gate.md)). -- **State trigger** on `scope: agent` gated by `condition_function_id: turn::is_abort_signal_set` → `turn::on_abort_signal`. Registered in [src/turn-orchestrator/on-abort-signal.ts](harness/src/turn-orchestrator/on-abort-signal.ts). Publishes `turn::step_requested` the moment `session//abort_signal` is set to `true`, so the FSM advances to `steering_check` (and observes the abort) on the next safe boundary without waiting for the current step to time out. -- **State trigger** on `scope: agent` gated by `condition_function_id: turn::is_stepable_record_write` → `turn::on_record_written`. Registered in [src/turn-orchestrator/on-record-written.ts](harness/src/turn-orchestrator/on-record-written.ts). Directly triggers `turn::step` for the affected session on every non-terminal, non-parking `session//turn_state` write. Replaces the imperative `publishStep` self-publish — saving the record is now the wake. -- **State trigger** on `scope: agent` gated by `condition_function_id: turn::is_turn_state_write` → `turn::on_turn_state_changed`. Registered in [src/turn-orchestrator/on-turn-state-changed.ts](harness/src/turn-orchestrator/on-turn-state-changed.ts). Fires on every `session//turn_state` write (created or updated) and emits a `turn_state_changed` event to `agent::events` carrying the full new (and prior) record so the UI can derive pending approvals from state rather than from a signal event. +- **State trigger** on `scope: agent` gated by `condition_function_id: turn::is_abort_signal_set` → `turn::on_abort_signal`. Registered in [on-abort-signal.ts](harness/src/turn-orchestrator/on-abort-signal.ts). Enqueues the handler for the session's current persisted state the moment `session//abort_signal` is set to `true`, so the FSM advances to `steering_check` without waiting for the current step to time out. + +The record-written wake is now inline in `saveRecord` (no separate `on-record-written` adapter): every `saveRecord` call that transitions to a non-terminal, non-parking state calls `wakeState` directly. Similarly, `turn_state_changed` events are emitted inline from `persistRecord` via `emitTurnStateChanged` ([turn-state-write.ts](harness/src/turn-orchestrator/turn-state-write.ts)) — there is no separate `on-turn-state-changed` state trigger. + +Paused turns (`function_awaiting_approval`) are woken when `approval::resolve` or abort triggers each per-call `turn::approval_resume` function (see [approval-resume.ts](harness/src/turn-orchestrator/approval-resume.ts) and [workers/approval-gate.md](workers/approval-gate.md)). `recoverPendingApprovals` re-registers these resume functions at worker startup for sessions that were parked before a restart. ## Turn FSM -The full FSM, transitions, and dispatch table lives in -[src/turn-orchestrator/transitions.ts](harness/src/turn-orchestrator/transitions.ts). -The 11 states from -[src/turn-orchestrator/state.ts](harness/src/turn-orchestrator/state.ts): +Each state is a registered `turn::{state}` function executed via +`runTransition` and enqueued onto the `turn-step` FIFO queue by `wakeState`. +The 8 states from [state.ts](harness/src/turn-orchestrator/state.ts): -| State | Handler | Role | +| State | Handler file | Role | |---|---|---| -| `provisioning` | [states/provisioning.ts](harness/src/turn-orchestrator/states/provisioning.ts) | Boot the sandbox, prime the system prompt, fetch function schemas. | -| `awaiting_assistant` | [states/assistant.ts](harness/src/turn-orchestrator/states/assistant.ts) | Request an assistant turn via `provider::::stream`. | -| `assistant_streaming` | same | Drain the provider channel; relay `message_update` (token/thinking deltas) on `agent::events`. Tool args appear at `function_execution_start` when execute runs — no `turn_start` or streaming `function_execution_update` events. | -| `assistant_finished` | same | Persist the final `AssistantMessage`; pick next state. | -| `function_prepare` | [states/functions.ts](harness/src/turn-orchestrator/states/functions.ts) | Snapshot the pending function calls. | -| `function_execute` | same | Run each call via `dispatchWithHook` (pre-approved resume calls use `triggerFunctionCall` and skip the gate). If the gate returns `pending`, append the call to `awaiting_approval` and transition to `function_awaiting_approval` (the rest of the batch is left for the resumed step). Each call is bracketed by a `function_execution_start` / `function_execution_end` pair; the `end` event carries `duration_ms` (wall-clock between the matching start and end), persisted on `ExecutedEntry` so resumed runs replay the original timing instead of the ~0ms it takes to re-emit. Approval wait time is naturally excluded — pending calls return without an end emit, and the resumed step re-emits a fresh start that resets the timer. | -| `function_awaiting_approval` | same (`handleAwaitingApproval`) | Read `approvals//` for every entry in `awaiting_approval`. While any decision is still missing, return without stepping (the next `turn::approval_resume` invoke will wake `turn::step`). When all decisions are present, fold them into the prepared snapshot — `allow` → `pre_approved: true`, `deny`/`aborted` → `blocked` with a denial result — clear `awaiting_approval`, and transition back to `function_execute`. | -| `function_finalize` | same | Persist results; emit `function_call_end` + `turn_end` events. | -| `steering_check` | [states/steering.ts](harness/src/turn-orchestrator/states/steering.ts) | Decide whether to continue, stop, or hit `max_turns`. | -| `tearing_down` | [states/tearing-down.ts](harness/src/turn-orchestrator/states/tearing-down.ts) | Emit `agent_end` once, free the sandbox if any. | -| `stopped` | (no-op) | Terminal. Idempotent. | - -`dispatchWithHook` in [agent-trigger.ts](harness/src/turn-orchestrator/agent-trigger.ts) -now returns one of three shapes: `{ kind: 'result' }`, `{ kind: 'deny' }`, -or `{ kind: 'pending' }`. Pending is what triggers the +| `provisioning` | [states/provisioning.ts](harness/src/turn-orchestrator/states/provisioning.ts) | Fetch skills index + default-skill bodies, build system prompt, write enriched `run_request` (with `function_schemas: [agentTriggerTool()]`), → `assistant_streaming`. | +| `assistant_streaming` | [states/assistant-streaming.ts](harness/src/turn-orchestrator/states/assistant-streaming.ts) | Increment `turn_count`; create channel; trigger provider stream; relay `message_update` deltas; on completion call `finalizeAssistant` which emits `message_complete`, persists the assistant message (dup-guarded), then routes → `function_execute` (has calls) / `steering_check` (no calls) / `tearing_down` (error/aborted). | +| `function_execute` | [states/function-execute.ts](harness/src/turn-orchestrator/states/function-execute.ts) | Build batch from `rec.last_assistant` (or reuse existing `rec.work`); for each call: emit `function_execution_start`, skip if already executed, dispatch via `dispatchWithHook`; if `pending` → append to `awaiting_approval`, register `turn::approval_resume`, → `function_awaiting_approval`; otherwise commit result (silent `writeRecord` checkpoint) + emit `function_execution_end`; after batch: fold results into messages + emit `turn_end` → `steering_check` / `tearing_down`. | +| `function_awaiting_approval` | [states/function-awaiting-approval.ts](harness/src/turn-orchestrator/states/function-awaiting-approval.ts) | Read decision for each `awaiting_approval[]` entry; if any is still missing → return (park); when all present, fold into `rec.work.batch` (`allow` → `pre_approved: true`; `deny`/`aborted` → `blocked` with denial result); clear `awaiting_approval` → `function_execute`. | +| `steering_check` | [states/steering-check.ts](harness/src/turn-orchestrator/states/steering-check.ts) | Priority route: abort → `tearing_down`; steering msg → `assistant_streaming` (unless `max_turns` reached); followup msg → `assistant_streaming` (unless `max_turns` reached); function results present → `assistant_streaming` (unless `max_turns` reached); else emit `turn_end` once → `tearing_down`. `max_turns` path emits a synthetic `message_complete` + `turn_end`. | +| `tearing_down` | [states/tearing-down.ts](harness/src/turn-orchestrator/states/tearing-down.ts) | Emit `agent_end` → `stopped`. | +| `stopped` | (no handler) | Terminal. Idempotent. | +| `failed` | (set by `runTransition` on unexpected throw) | Terminal. Carries `error: {kind, message}` on the record. Emits `message_complete{stop_reason:'error'}` + `agent_end` so the UI sees the reason. A handler may throw `TransientError` to use the queue's retry/DLQ instead. | + +`NON_STEPABLE_STATES` in [wake.ts](harness/src/turn-orchestrator/wake.ts) are +`stopped`, `failed`, and `function_awaiting_approval` — `saveRecord` does not +enqueue a handler for these. + +`dispatchWithHook` returns one of three shapes: `{ kind: 'result' }`, +`{ kind: 'deny' }`, or `{ kind: 'pending' }`. `pending` triggers the `function_awaiting_approval` park. ## State keys -All keys live under iii state scope `agent`. From -[src/turn-orchestrator/state.ts](harness/src/turn-orchestrator/state.ts): +All keys live under iii state scope `agent`. Key helpers are defined in +[state.ts](harness/src/turn-orchestrator/state.ts); persistence helpers in +[persistence.ts](harness/src/turn-orchestrator/persistence.ts). | Key shape | Purpose | |---|---| -| `session//turn_state` | Serialised `TurnStateRecord`. | -| `session//messages` | Active path `AgentMessage[]`; mirrored into `session-tree::*` on every save. | -| `session//run_request` | The original `run::start` payload (provider, model, system_prompt, mode, image, idle_timeout_secs). | -| `session//sandbox_id` | Active sandbox handle. | -| `session//function_schemas` | Cached tool schemas exposed to the model. | -| `session//tool_schemas` | Legacy alias of `function_schemas`. | -| `session//session_tree_mirror_len` | High-water mark so the messages mirror is incremental. | -| `session//last_compaction_at` | Last entry id the compactor wrote. | -| `session//last_compaction_consumed_at` | Last compaction the loader applied. | +| `session//turn_state` | Serialised `TurnStateRecord` (incl. `work?: TurnWork` and `error?: {kind, message}`). | +| `session//messages` | Active path `AgentMessage[]`; mirrored into `session-tree::*` on every save (inline in `persistence.saveMessages`). | +| `session//run_request` | The `run::start` payload enriched by `provisioning` to include `function_schemas: [agentTriggerTool()]` and the assembled `system_prompt`. Typed as `RunRequest` ([run-request.ts](harness/src/turn-orchestrator/run-request.ts)). | +| `session//session_tree_mirror_len` | High-water mark so the session-tree messages mirror is incremental. The session-tree mirror is still inline in `persistence.saveMessages` — its relocation to a reactive subscriber is tracked as a follow-up, not done. | | `session//event_counter` | Monotonic counter for `agent::events` sequence numbers. | -| `session//abort_signal` | Set by `router::abort` to interrupt a streaming turn. | -| `session//function_prepared` | Snapshot of pending function calls for the current turn. Each entry carries `pre_approved` / `blocked` flags so resumed approvals can short-circuit re-dispatch. | -| `session//function_executed` | Results of the current turn's function calls. | -| `session//tool_prepared`, `session//tool_executed` | Legacy aliases of the two above. | +| `session//abort_signal` | Set by `router::abort` via `performAbortSideEffects` to interrupt a streaming turn. | + +Keys that no longer exist: `function_prepared`, `function_executed`, +`function_schemas` (standalone), `tool_prepared`, `tool_executed`, +`tool_schemas`, `sandbox_id`, `last_compaction_at`, +`last_compaction_consumed_at` — these were removed in the rewrite. + +The `TurnStateRecord` carries `work?: TurnWork` (inline `{batch: PreparedEntry[]; results: ExecutedEntry[]}`) in place of the former separate state keys. `PreparedEntry`, `ExecutedEntry`, and `TurnWork` are all defined in [state.ts](harness/src/turn-orchestrator/state.ts). + +## UI events + +`turn_state_changed` is emitted inline by `persistRecord` (via +[turn-state-write.ts](harness/src/turn-orchestrator/turn-state-write.ts)) +on every `saveRecord` / `persistRecord` call. It carries a lean +`TurnStateView` (not the full `TurnStateRecord`) as `new_value` (and +`old_value` when updating). `TurnStateView` is defined in +[schemas.ts](harness/src/turn-orchestrator/schemas.ts) and contains: +`session_id`, `state`, `turn_count`, `max_turns`, `awaiting_approval`, `error`. + +`turn::get_state` also returns a `TurnStateView` (via `toView`), not the full +record, so heavy internal fields (`work`, `last_assistant`) are never sent to +consumers. + +## Approval chokepoint -The `TurnStateRecord` also carries an optional `awaiting_approval: -AwaitingApprovalEntry[]` field — populated when `function_execute` is -parked, drained when `function_awaiting_approval` folds the resolved -decisions back into the prepared snapshot. +Unchanged from prior design: `dispatchWithHook` → `consultBefore` → +`policy::check_permissions` (5 s timeout, fail-closed). A `needs_approval` +reply returns `{ kind: 'pending' }` from `dispatchWithHook`, which parks the +session to `function_awaiting_approval` and registers a per-call +`turn::approval_resume` function. `approval::resolve` (or abort via +`performAbortSideEffects`) triggers that resume function, which persists the +decision to scope `approvals` and calls `wakeFromRecord` to re-enqueue the +session's current state handler. ## Configuration @@ -98,7 +131,7 @@ From the top-level `turn-orchestrator` section of [config.yaml](harness/config.yaml): - `system_default_skills` (default `["iii://iii-directory/index"]`) — - skills the bootstrap step downloads into the session's system prompt + skill URIs the bootstrap step downloads into the session's system prompt context. ## Dependencies @@ -113,24 +146,33 @@ From | File | Purpose | |---|---| | [src/turn-orchestrator/main.ts](harness/src/turn-orchestrator/main.ts) | Binary entry point. | -| [src/turn-orchestrator/register.ts](harness/src/turn-orchestrator/register.ts) | Composes `run::start`, per-state `turn::{state}` handlers, abort-signal trigger, and kicks off the bootstrap. | -| [src/turn-orchestrator/run-start.ts](harness/src/turn-orchestrator/run-start.ts) | `run::start` handler — persists run config and messages, seeds `turn_state`, and wakes the FSM via the record-written state trigger. | -| [src/turn-orchestrator/get-state.ts](harness/src/turn-orchestrator/get-state.ts) | `turn::get_state` — one-shot reader that returns the current `TurnStateRecord` for a session. UI clients call this on reload to recover in-progress modals; the orchestrator owns the state schema/key layout so clients never read iii state directly. | -| [src/turn-orchestrator/agent-trigger.ts](harness/src/turn-orchestrator/agent-trigger.ts) | The dispatcher chokepoint; `dispatchWithHook` runs `consultBefore` before triggering the function and returns `result` / `deny` / `pending`. | -| [src/turn-orchestrator/hook.ts](harness/src/turn-orchestrator/hook.ts) | `consultBefore` — calls `policy::check_permissions` directly (5 s timeout) and maps the reply via `parsePolicyReply` (`approval-gate/schemas.ts`) to `allow` / `pending` / `deny`; fails closed with a `gate_unavailable` envelope. `publishAfter` still routes through `hook-fanout::publish_collect` for the after-hook fanout path. | -| [src/turn-orchestrator/approval-resume.ts](harness/src/turn-orchestrator/approval-resume.ts) | Per-call `turn::approval_resume` registration, handler (persist + `turn::step`), and startup recovery for parked sessions. | -| [src/turn-orchestrator/abort.ts](harness/src/turn-orchestrator/abort.ts) | `performAbortSideEffects` — writes `session//abort_signal = true` and, for turns paused on approvals, triggers each `turn::approval_resume` fn with `{decision: 'aborted'}`. | -| [src/turn-orchestrator/on-abort-signal.ts](harness/src/turn-orchestrator/on-abort-signal.ts) | State trigger adapter — `turn::is_abort_signal_set` (condition) + `turn::on_abort_signal` (handler) — publishes `turn::step_requested` whenever `session//abort_signal` is set to `true`. | -| [src/turn-orchestrator/subscriber.ts](harness/src/turn-orchestrator/subscriber.ts) | `turn::step` durable subscriber. Skips the auto re-publish of `turn::step_requested` while the record is in `function_awaiting_approval` (per-call resume fns own that wake). | -| [src/turn-orchestrator/transitions.ts](harness/src/turn-orchestrator/transitions.ts) | State → handler dispatch table. | -| [src/turn-orchestrator/states/*.ts](harness/src/turn-orchestrator/states/) | One file per FSM state; `states/functions.ts` owns `function_prepare`, `function_execute`, `function_awaiting_approval`, and `function_finalize`. | -| [src/turn-orchestrator/state.ts](harness/src/turn-orchestrator/state.ts) | `TurnState`, `TurnStateRecord` (now with `awaiting_approval?: AwaitingApprovalEntry[]`), state-key helpers. | -| [src/turn-orchestrator/persistence.ts](harness/src/turn-orchestrator/persistence.ts) | Load/save helpers + the `session-tree::*` messages mirror. `PreparedEntry` now carries `pre_approved` so resumed turns can dispatch the call without re-asking the gate. | +| [src/turn-orchestrator/register.ts](harness/src/turn-orchestrator/register.ts) | Composes all registered functions: `run::start`, per-state `turn::{state}` handlers, abort-signal trigger, approval-resume recovery, `turn::get_state`. | +| [src/turn-orchestrator/run-start.ts](harness/src/turn-orchestrator/run-start.ts) | `run::start` handler — persists run config and messages, seeds `turn_state` to `provisioning` via `saveRecord` (which wakes the FSM). | +| [src/turn-orchestrator/run-transition.ts](harness/src/turn-orchestrator/run-transition.ts) | Shared FSM transition runner: load → null-check → stale-skip → handle → save. Routes to `failed` on unexpected throw; re-throws `TransientError` for queue retry. | +| [src/turn-orchestrator/wake.ts](harness/src/turn-orchestrator/wake.ts) | `wakeState` / `wakeFromRecord` — enqueue `turn::{state}` onto the `turn-step` FIFO queue; `shouldWakeStep` gates non-stepable states. | +| [src/turn-orchestrator/schemas.ts](harness/src/turn-orchestrator/schemas.ts) | All registered-function I/O schemas and types: `RunStartPayloadSchema`, `TurnStepPayloadSchema`, `TurnStateView`, `toView`, `AbortSignalWriteEventSchema`. | +| [src/turn-orchestrator/run-request.ts](harness/src/turn-orchestrator/run-request.ts) | `RunRequest` type and `parseRunRequest` — the typed, parsed form of `session//run_request` (includes `function_schemas`). | +| [src/turn-orchestrator/get-state.ts](harness/src/turn-orchestrator/get-state.ts) | `turn::get_state` — one-shot reader returning `TurnStateView \| null`. | +| [src/turn-orchestrator/agent-trigger.ts](harness/src/turn-orchestrator/agent-trigger.ts) | Dispatcher chokepoint: `dispatchWithHook` (consult + trigger), `triggerFunctionCall` (trigger/decode/error), `agentTriggerTool` (schema), `unwrapAgentTrigger`. | +| [src/turn-orchestrator/hook.ts](harness/src/turn-orchestrator/hook.ts) | `consultBefore` — `policy::check_permissions` (5 s, fail-closed) → `allow` / `pending` / `deny`. `publishAfter` — `hook-fanout::publish_collect` for after-hook fanout. | +| [src/turn-orchestrator/approval-resume.ts](harness/src/turn-orchestrator/approval-resume.ts) | Per-call `turn::approval_resume` registration and handler (persist decision + `wakeFromRecord`); `recoverPendingApprovals` re-registers at startup. | +| [src/turn-orchestrator/abort.ts](harness/src/turn-orchestrator/abort.ts) | `performAbortSideEffects` — writes `session//abort_signal = true` and triggers each `turn::approval_resume` with `{decision: 'aborted'}` for parked sessions. | +| [src/turn-orchestrator/on-abort-signal.ts](harness/src/turn-orchestrator/on-abort-signal.ts) | State trigger adapter — `turn::is_abort_signal_set` (condition) + `turn::on_abort_signal` (handler, calls `wakeFromRecord`). | +| [src/turn-orchestrator/turn-state-write.ts](harness/src/turn-orchestrator/turn-state-write.ts) | `emitTurnStateChanged` — inline UI notification emitting `turn_state_changed` with lean `TurnStateView`. Called from `persistRecord`. | +| [src/turn-orchestrator/states/provisioning.ts](harness/src/turn-orchestrator/states/provisioning.ts) | `turn::provisioning` handler. | +| [src/turn-orchestrator/states/assistant-streaming.ts](harness/src/turn-orchestrator/states/assistant-streaming.ts) | `turn::assistant_streaming` handler. | +| [src/turn-orchestrator/states/function-execute.ts](harness/src/turn-orchestrator/states/function-execute.ts) | `turn::function_execute` handler. | +| [src/turn-orchestrator/states/function-awaiting-approval.ts](harness/src/turn-orchestrator/states/function-awaiting-approval.ts) | `turn::function_awaiting_approval` handler. | +| [src/turn-orchestrator/states/steering-check.ts](harness/src/turn-orchestrator/states/steering-check.ts) | `turn::steering_check` handler. | +| [src/turn-orchestrator/states/tearing-down.ts](harness/src/turn-orchestrator/states/tearing-down.ts) | `turn::tearing_down` handler. | +| [src/turn-orchestrator/states/index.ts](harness/src/turn-orchestrator/states/index.ts) | Re-exports per-state `register` functions. | +| [src/turn-orchestrator/state.ts](harness/src/turn-orchestrator/state.ts) | `TurnState`, `TurnStateRecord`, `TurnWork`, `PreparedEntry`, `ExecutedEntry`, `AwaitingApprovalEntry`, state-key helpers, `newRecord`, `transitionTo`, `isTerminal`. | +| [src/turn-orchestrator/persistence.ts](harness/src/turn-orchestrator/persistence.ts) | Load/save helpers: `loadRecord` (with legacy `assistant_finished` migration), `saveRecord` (persist + wake), `persistRecord` (persist + UI event, no wake), `writeRecord` (silent checkpoint), `saveMessages` (+ session-tree mirror). | +| [src/turn-orchestrator/errors.ts](harness/src/turn-orchestrator/errors.ts) | `TransientError` (opt into queue retry), `ContextOverflowError`, `CompactionBusyError`. | | [src/turn-orchestrator/events.ts](harness/src/turn-orchestrator/events.ts) | `emit(iii, sid, event)` — appends a sequenced `AgentEvent` to the `agent::events` stream. | -| [src/turn-orchestrator/on-record-written.ts](harness/src/turn-orchestrator/on-record-written.ts) | State-trigger adapter — `turn::is_stepable_record_write` (condition) + `turn::on_record_written` (handler) — directly triggers `turn::step` on every non-terminal, non-parking `turn_state` write. Replaces the imperative `publishStep` self-publish so saving the record is itself the wake. | -| [src/turn-orchestrator/on-turn-state-changed.ts](harness/src/turn-orchestrator/on-turn-state-changed.ts) | State-trigger adapter — `turn::is_turn_state_write` (condition) + `turn::on_turn_state_changed` (handler) — emits `turn_state_changed` to `agent::events` on every `turn_state` write (created or updated). Carries the full new (and prior) `TurnStateRecord` so the console can derive pending approvals from state rather than from a signal event. | -| [src/turn-orchestrator/provider-router.ts](harness/src/turn-orchestrator/provider-router.ts) | Picks `provider::::stream` for the run's `provider` field. | -| [src/turn-orchestrator/system-prompt.ts](harness/src/turn-orchestrator/system-prompt.ts) | Builds the system prompt from `run_request.system_prompt` + bootstrap skills. | -| [src/turn-orchestrator/bootstrap.ts](harness/src/turn-orchestrator/bootstrap.ts) | Best-effort skill download via `directory::skills::download`. | +| [src/turn-orchestrator/preflight.ts](harness/src/turn-orchestrator/preflight.ts) | `runPreflight` — context-compaction check before each provider call. | +| [src/turn-orchestrator/provider-router.ts](harness/src/turn-orchestrator/provider-router.ts) | `decide` + `targetFunctionId` — pick `provider::::stream` for the run's `provider` field. | +| [src/turn-orchestrator/system-prompt.ts](harness/src/turn-orchestrator/system-prompt.ts) | `buildSystemPrompt` — assembles system prompt from request, bootstrap skills, skills index. | +| [src/turn-orchestrator/bootstrap.ts](harness/src/turn-orchestrator/bootstrap.ts) | Best-effort skill download via `directory::skills::download` at startup. | | [src/turn-orchestrator/config.ts](harness/src/turn-orchestrator/config.ts) | Loads the worker's config slice. | | [src/turn-orchestrator/iii.worker.yaml](harness/src/turn-orchestrator/iii.worker.yaml) | Worker manifest. | From 73d5666ec43e75095586e2a14245818968b40bd3 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sun, 24 May 2026 19:07:39 -0300 Subject: [PATCH 11/41] fix(turn-orchestrator): guard failed in shouldRunStep; no phantom start on re-entry --- .../states/function-execute.ts | 18 +++++--- harness/src/turn-orchestrator/wake.ts | 10 +++-- .../tests/turn-orchestrator/functions.test.ts | 45 +++++++++++++++++++ .../turn-orchestrator/persistence.test.ts | 42 ----------------- harness/tests/turn-orchestrator/wake.test.ts | 18 ++++++++ 5 files changed, 81 insertions(+), 52 deletions(-) delete mode 100644 harness/tests/turn-orchestrator/persistence.test.ts diff --git a/harness/src/turn-orchestrator/states/function-execute.ts b/harness/src/turn-orchestrator/states/function-execute.ts index b02a910d..629b5215 100644 --- a/harness/src/turn-orchestrator/states/function-execute.ts +++ b/harness/src/turn-orchestrator/states/function-execute.ts @@ -213,14 +213,10 @@ export async function handleExecute(iii: ISdk, rec: TurnStateRecord): Promise { diff --git a/harness/tests/turn-orchestrator/functions.test.ts b/harness/tests/turn-orchestrator/functions.test.ts index 58163f19..275d9e8f 100644 --- a/harness/tests/turn-orchestrator/functions.test.ts +++ b/harness/tests/turn-orchestrator/functions.test.ts @@ -103,6 +103,51 @@ describe('handleExecute new flow', () => { expect(rec.function_results[0]?.function_call_id).toBe('fc-1'); }); + it('does not re-emit function_execution_start for already-executed calls on re-entry', async () => { + const emitted: Array<{ type: string; function_call_id?: string }> = []; + vi.spyOn(events, 'emit').mockImplementation(async (_iii, _sid, ev: never) => { + emitted.push(ev as { type: string; function_call_id?: string }); + }); + vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ + kind: 'result', + result: { content: [{ type: 'text' as const, text: 'ok' }], details: {}, terminate: false }, + }); + const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; + const rec: TurnStateRecord = newRecord('s1'); + rec.state = 'function_execute'; + // Re-entry: fc-1 already in results (executed before park), fc-2 still pending. + rec.work = { + batch: [ + { function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, blocked: null }, + { function_call: { id: 'fc-2', function_id: 'shell::run', arguments: {} }, blocked: null }, + ], + results: [ + { + function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + result: { + content: [{ type: 'text' as const, text: 'done' }], + details: {}, + terminate: false, + }, + is_error: false, + duration_ms: 5, + }, + ], + }; + mockFinalizePersistence(); + + await handleExecute(iii, rec); + + const starts = emitted + .filter((e) => e.type === 'function_execution_start') + .map((e) => e.function_call_id); + expect(starts).toEqual(['fc-2']); // fc-1 NOT restarted on re-entry + const fc1Ends = emitted.filter( + (e) => e.type === 'function_execution_end' && e.function_call_id === 'fc-1', + ); + expect(fc1Ends).toHaveLength(1); // fc-1 end replayed exactly once + }); + it('pushes the call onto awaiting_approval and transitions to function_awaiting_approval on pending', async () => { const dispatchSpy = vi.spyOn(agentTriggerModule, 'dispatchWithHook'); dispatchSpy.mockResolvedValueOnce({ kind: 'pending' }); diff --git a/harness/tests/turn-orchestrator/persistence.test.ts b/harness/tests/turn-orchestrator/persistence.test.ts deleted file mode 100644 index b749a01b..00000000 --- a/harness/tests/turn-orchestrator/persistence.test.ts +++ /dev/null @@ -1,42 +0,0 @@ -import { describe, it, expect, vi } from 'vitest'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; -import { migrateLegacyRecord } from '../../src/turn-orchestrator/persistence.js'; -import { newRecord } from '../../src/turn-orchestrator/state.js'; - -describe('writeRecord', () => { - it('writes turn_state without emitting turn_state_changed', async () => { - const calls: string[] = []; - const iii = { - trigger: vi.fn(async ({ function_id }: any) => { - calls.push(function_id); - return null; - }), - } as any; - await persistence.writeRecord(iii, newRecord('s1')); - expect(calls).toContain('state::set'); - expect(calls).not.toContain('stream::set'); // no agent::events emit - }); -}); - -describe('migrateLegacyRecord', () => { - it('coerces legacy assistant_finished -> function_execute when last_assistant has calls', () => { - const legacy: any = { - session_id: 's1', state: 'assistant_finished', turn_count: 1, - last_assistant: { role: 'assistant', content: [{ type: 'function_call', id: 'c1', function_id: 'x::y', arguments: {} }] }, - pending_function_calls: [], function_results: [], turn_end_emitted: false, started_at_ms: 1, updated_at_ms: 1, - }; - expect(migrateLegacyRecord(legacy).state).toBe('function_execute'); - }); - it('coerces legacy assistant_finished -> steering_check when no calls', () => { - const legacy: any = { - session_id: 's1', state: 'assistant_finished', turn_count: 1, - last_assistant: { role: 'assistant', content: [{ type: 'text', text: 'hi' }] }, - pending_function_calls: [], function_results: [], turn_end_emitted: false, started_at_ms: 1, updated_at_ms: 1, - }; - expect(migrateLegacyRecord(legacy).state).toBe('steering_check'); - }); - it('leaves current records untouched', () => { - const cur: any = { session_id: 's1', state: 'function_execute', turn_count: 1, pending_function_calls: [], function_results: [], turn_end_emitted: false, started_at_ms: 1, updated_at_ms: 1 }; - expect(migrateLegacyRecord(cur).state).toBe('function_execute'); - }); -}); diff --git a/harness/tests/turn-orchestrator/wake.test.ts b/harness/tests/turn-orchestrator/wake.test.ts index e09ae12c..f3a5b753 100644 --- a/harness/tests/turn-orchestrator/wake.test.ts +++ b/harness/tests/turn-orchestrator/wake.test.ts @@ -91,4 +91,22 @@ describe('wakeFromRecord', () => { await wakeFromRecord(iii, 'sess-y'); expect(iii.trigger).toHaveBeenCalledTimes(1); }); + + it('no-ops when session is failed (no turn::failed handler exists)', async () => { + const rec = newRecord('sess-z'); + rec.state = 'failed'; + const triggers: Array<{ function_id: string }> = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string }) => { + if (req.function_id === 'state::get') return rec; + triggers.push(req); + return null; + }), + } as unknown as ISdk; + + await wakeFromRecord(iii, 'sess-z'); + // only the state::get read — no enqueue of an unregistered turn::failed + expect(iii.trigger).toHaveBeenCalledTimes(1); + expect(triggers).toHaveLength(0); + }); }); From 2ce503b81e259ce1794953e736f431dab1b12434 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sun, 24 May 2026 20:51:44 -0300 Subject: [PATCH 12/41] refactor(turn-orchestrator): extract skillIdFromUri, options-object system prompt - Add skillIdFromUri; remove the iii:// strip duplicated across system-prompt, provisioning, and bootstrap. - buildSystemPrompt takes a SystemPromptOptions object instead of four positional optionals, removing the unreadable bare-null call sites. - Delete the dead-in-production cwd path (no run-request source fed it). - Drop stale 'Mirrors *.rs' doc comments; use function-schema terminology. --- harness/src/turn-orchestrator/bootstrap.ts | 9 ++- harness/src/turn-orchestrator/run-start.ts | 3 +- .../turn-orchestrator/states/provisioning.ts | 19 +++--- .../src/turn-orchestrator/system-prompt.ts | 30 +++++++--- .../turn-orchestrator/system-prompt.test.ts | 59 ++++++++++--------- 5 files changed, 70 insertions(+), 50 deletions(-) diff --git a/harness/src/turn-orchestrator/bootstrap.ts b/harness/src/turn-orchestrator/bootstrap.ts index 26ba0291..948194da 100644 --- a/harness/src/turn-orchestrator/bootstrap.ts +++ b/harness/src/turn-orchestrator/bootstrap.ts @@ -1,18 +1,17 @@ /** - * Best-effort fetch of default skills at boot. Mirrors - * `turn-orchestrator/src/bootstrap.rs`. Failures are logged and never - * abort startup. + * Best-effort download of default-skill namespaces at boot. Failures are logged + * and never abort startup. */ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import type { TurnOrchestratorConfig } from './config.js'; +import { skillIdFromUri } from './system-prompt.js'; export async function run(iii: ISdk, cfg: TurnOrchestratorConfig): Promise { const namespaces = new Set(); for (const uri of cfg.system_default_skills) { - const id = uri.startsWith('iii://') ? uri.slice('iii://'.length) : uri; - const ns = id.split('/')[0]; + const ns = skillIdFromUri(uri).split('/')[0]; if (ns) namespaces.add(ns); } for (const ns of namespaces) { diff --git a/harness/src/turn-orchestrator/run-start.ts b/harness/src/turn-orchestrator/run-start.ts index d6112a95..99f232e1 100644 --- a/harness/src/turn-orchestrator/run-start.ts +++ b/harness/src/turn-orchestrator/run-start.ts @@ -1,5 +1,5 @@ /** - * `run::start`. Mirrors `turn-orchestrator/src/run_start.rs`. + * `run::start`. Persist run config + messages and seed the FSM at `provisioning`. * * **Incoming**: flat run request from `harness::trigger` (`body.payload` after * `HarnessTriggerInputSchema` parse); console/web sends @@ -20,6 +20,7 @@ export async function execute(iii: ISdk, payload: RunStartPayload): Promise { async function fetchDefaultSkills(iii: ISdk, uris: readonly string[]): Promise { const bodies: DefaultSkillBody[] = []; for (const uri of uris) { - const id = uri.startsWith('iii://') ? uri.slice('iii://'.length) : uri; - const body = await fetchSkill(iii, id); + const body = await fetchSkill(iii, skillIdFromUri(uri)); bodies.push(defaultSkillBody(uri, body)); } return bodies; @@ -80,7 +85,7 @@ export async function handleProvisioning( fetchSkillsIndex(iii), fetchDefaultSkills(iii, cfg.system_default_skills), ]); - const prompt = buildSystemPrompt(bodies, null, override, request.mode, skillsIndex); + const prompt = buildSystemPrompt(bodies, { override, mode: request.mode, skillsIndex }); const updated: RunRequest = { ...request, system_prompt: prompt, function_schemas: [agentTriggerTool()] }; await persistence.saveRunRequest(iii, rec.session_id, updated); @@ -102,7 +107,7 @@ export function register(iii: ISdk, cfg: TurnOrchestratorConfig): void { }, { description: - 'Run one durable FSM transition for session in state provisioning: materialize tool schemas, build system prompt, advance to assistant_streaming.', + 'Run one durable FSM transition for session in state provisioning: build the system prompt, attach the agent_trigger function schema, advance to assistant_streaming.', }, ); } diff --git a/harness/src/turn-orchestrator/system-prompt.ts b/harness/src/turn-orchestrator/system-prompt.ts index 0f5b4e0e..443b632d 100644 --- a/harness/src/turn-orchestrator/system-prompt.ts +++ b/harness/src/turn-orchestrator/system-prompt.ts @@ -1,10 +1,17 @@ /** - * System-prompt assembly. Mirrors - * `turn-orchestrator/src/system_prompt.rs`. + * System-prompt assembly: turns the run's mode, default-skill bodies, and the + * skills index into the single system prompt string sent to the provider. */ export type Mode = 'plan' | 'ask' | 'agent'; +const III_URI_PREFIX = 'iii://'; + +/** Bare skill id from a skill URI (`iii://a/b` → `a/b`; bare ids pass through). */ +export function skillIdFromUri(uri: string): string { + return uri.startsWith(III_URI_PREFIX) ? uri.slice(III_URI_PREFIX.length) : uri; +} + const MODE_PARAGRAPHS: Record = { plan: `You are operating in plan mode: investigate first, then produce a concise numbered plan. 1. Investigate everything needed to fully plan — explore relevant functions, skills, and code via \`agent_trigger\` as needed. @@ -58,20 +65,25 @@ export type DefaultSkillBody = { }; export function defaultSkillBody(uri: string, body: string | null): DefaultSkillBody { - const id = uri.startsWith('iii://') ? uri.slice('iii://'.length) : uri; - return { uri, id, body }; + return { uri, id: skillIdFromUri(uri), body }; } +export type SystemPromptOptions = { + /** Caller-supplied prompt; when non-empty it is returned verbatim. */ + override?: string | null; + /** Operating mode; prepends a mode paragraph before the identity preamble. */ + mode?: Mode | null; + /** Skills index block appended after the preamble. */ + skillsIndex?: string | null; +}; + export function buildSystemPrompt( skills: DefaultSkillBody[], - cwd?: string | null, - override?: string | null, - mode?: Mode | null, - skillsIndex?: string | null, + opts: SystemPromptOptions = {}, ): string { + const { override, mode, skillsIndex } = opts; if (override && override.length > 0) return override; let out = isMode(mode) ? `${MODE_PARAGRAPHS[mode]}\n\n${IDENTITY_PREAMBLE}` : IDENTITY_PREAMBLE; - if (cwd && cwd.length > 0) out += `\n\nWorking directory: ${cwd}`; if (skillsIndex && skillsIndex.length > 0) out += `\n\n${skillsIndex}`; for (const s of skills) { out += `\n\n# ${s.uri}\n\n`; diff --git a/harness/tests/turn-orchestrator/system-prompt.test.ts b/harness/tests/turn-orchestrator/system-prompt.test.ts index d7473af2..da7ac12f 100644 --- a/harness/tests/turn-orchestrator/system-prompt.test.ts +++ b/harness/tests/turn-orchestrator/system-prompt.test.ts @@ -1,35 +1,38 @@ import { describe, expect, it } from 'vitest'; -import { buildSystemPrompt, defaultSkillBody } from '../../src/turn-orchestrator/system-prompt.js'; +import { + buildSystemPrompt, + defaultSkillBody, + skillIdFromUri, +} from '../../src/turn-orchestrator/system-prompt.js'; describe('buildSystemPrompt', () => { it('non-empty override returns verbatim', () => { - expect(buildSystemPrompt([defaultSkillBody('iii://iii', 'body')], '/tmp', 'custom')).toBe( + expect(buildSystemPrompt([defaultSkillBody('iii://iii', 'body')], { override: 'custom' })).toBe( 'custom', ); }); it('empty override falls through to canonical assembly', () => { - const out = buildSystemPrompt([defaultSkillBody('iii://iii', 'BODY')], '/tmp', ''); + const out = buildSystemPrompt([defaultSkillBody('iii://iii', 'BODY')], { override: '' }); expect(out).toContain('You are an iii agent worker'); - expect(out).toContain('/tmp'); expect(out).toContain('BODY'); }); it('failed skill produces recovery stub with bare id', () => { - const out = buildSystemPrompt([defaultSkillBody('iii://iii', null)], null); + const out = buildSystemPrompt([defaultSkillBody('iii://iii', null)]); expect(out).toContain('# iii://iii'); expect(out).toContain('directory::skills::get { id: "iii" }'); }); it('preamble identity preserved', () => { - const out = buildSystemPrompt([], null); + const out = buildSystemPrompt([]); expect(out).toContain('You are an iii agent worker.'); expect(out).toContain('agent_trigger'); expect(out).toContain('directory::skills::get'); }); it('preamble teaches the @fn() pill syntax', () => { - const out = buildSystemPrompt([], null); + const out = buildSystemPrompt([]); expect(out).toContain('@fn()'); expect(out).toContain('@fn(directory::skills::get)'); }); @@ -39,22 +42,22 @@ describe('buildSystemPrompt', () => { // index straight to a function call, guess field names, and burn // turns on retries. The preamble must explicitly tell them to fetch // the per-function skill body first. - const out = buildSystemPrompt([], null); + const out = buildSystemPrompt([]); expect(out).toContain('FIRST time'); expect(out).toContain('/'); expect(out).toContain('sandbox/exec'); }); it('skills appear in config order', () => { - const out = buildSystemPrompt( - [defaultSkillBody('iii://iii', 'AAA'), defaultSkillBody('iii://shell', 'BBB')], - null, - ); + const out = buildSystemPrompt([ + defaultSkillBody('iii://iii', 'AAA'), + defaultSkillBody('iii://shell', 'BBB'), + ]); expect(out.indexOf('AAA')).toBeLessThan(out.indexOf('BBB')); }); it('mode plan prepends planner paragraph before identity preamble', () => { - const out = buildSystemPrompt([], null, null, 'plan'); + const out = buildSystemPrompt([], { mode: 'plan' }); expect(out).toContain('operating in plan mode'); expect(out.indexOf('operating in plan mode')).toBeLessThan( out.indexOf('You are an iii agent worker'), @@ -62,7 +65,7 @@ describe('buildSystemPrompt', () => { }); it('mode ask prepends ask paragraph before identity preamble', () => { - const out = buildSystemPrompt([], null, null, 'ask'); + const out = buildSystemPrompt([], { mode: 'ask' }); expect(out).toContain('operating in ask mode'); expect(out.indexOf('operating in ask mode')).toBeLessThan( out.indexOf('You are an iii agent worker'), @@ -70,7 +73,7 @@ describe('buildSystemPrompt', () => { }); it('mode agent prepends agent paragraph before identity preamble', () => { - const out = buildSystemPrompt([], null, null, 'agent'); + const out = buildSystemPrompt([], { mode: 'agent' }); expect(out).toContain('operating in agent mode'); expect(out.indexOf('operating in agent mode')).toBeLessThan( out.indexOf('You are an iii agent worker'), @@ -78,7 +81,7 @@ describe('buildSystemPrompt', () => { }); it('omitting mode preserves the canonical preamble verbatim (no mode paragraph)', () => { - const out = buildSystemPrompt([], null); + const out = buildSystemPrompt([]); expect(out.startsWith('You are an iii agent worker')).toBe(true); expect(out).not.toContain('operating in plan mode'); expect(out).not.toContain('operating in ask mode'); @@ -86,30 +89,23 @@ describe('buildSystemPrompt', () => { }); it('mode null behaves like omitted (backwards compat for non-console callers)', () => { - const out = buildSystemPrompt([], null, null, null); + const out = buildSystemPrompt([], { mode: null }); expect(out.startsWith('You are an iii agent worker')).toBe(true); expect(out).not.toContain('operating in'); }); it('non-empty override wins over mode (override returned verbatim)', () => { - const out = buildSystemPrompt([], '/tmp', 'custom-override', 'plan'); + const out = buildSystemPrompt([], { override: 'custom-override', mode: 'plan' }); expect(out).toBe('custom-override'); }); - it('mode interacts with cwd and skills: paragraph, preamble, cwd, skill body in order', () => { - const out = buildSystemPrompt( - [defaultSkillBody('iii://iii', 'SKILLBODY')], - '/work', - null, - 'agent', - ); + it('mode interacts with skills: paragraph, preamble, skill body in order', () => { + const out = buildSystemPrompt([defaultSkillBody('iii://iii', 'SKILLBODY')], { mode: 'agent' }); const pAgent = out.indexOf('operating in agent mode'); const pIdentity = out.indexOf('You are an iii agent worker'); - const pCwd = out.indexOf('/work'); const pSkill = out.indexOf('SKILLBODY'); expect(pAgent).toBeLessThan(pIdentity); - expect(pIdentity).toBeLessThan(pCwd); - expect(pCwd).toBeLessThan(pSkill); + expect(pIdentity).toBeLessThan(pSkill); }); }); @@ -125,3 +121,10 @@ describe('defaultSkillBody', () => { expect(s.id).toBe('iii'); }); }); + +describe('skillIdFromUri', () => { + it('strips the iii:// scheme and passes bare ids through', () => { + expect(skillIdFromUri('iii://iii-directory/index')).toBe('iii-directory/index'); + expect(skillIdFromUri('iii-directory/index')).toBe('iii-directory/index'); + }); +}); From 364c5cb5dbe703a994c18c31c0ccc86eb20dbc70 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sun, 24 May 2026 20:55:12 -0300 Subject: [PATCH 13/41] refactor(turn-orchestrator): extract provider-stream from assistant-streaming - New provider-stream.ts owns channel creation, the concurrent provider trigger, and the read loop. A MessagePump bridges channel.onMessage to an async iterator, replacing the hand-rolled messageQueue/resolveNext/done. - assistant-streaming.ts drops 259->154 lines; handleStreaming is now linear orchestration over streamProviderTurn + finalizeAssistant. - Unify the two synthetic-error paths: createChannel failure now also emits a message_update, so the UI surfaces the error like the channel-closed path. - Add provider-stream.test.ts covering done/error frames, the per-delta callback, trigger rejection, create_channel failure, and bad-frame skips. --- .../src/turn-orchestrator/provider-stream.ts | 149 +++++++++++++ .../states/assistant-streaming.ts | 209 +++++------------- .../turn-orchestrator/provider-stream.test.ts | 168 ++++++++++++++ 3 files changed, 369 insertions(+), 157 deletions(-) create mode 100644 harness/src/turn-orchestrator/provider-stream.ts create mode 100644 harness/tests/turn-orchestrator/provider-stream.test.ts diff --git a/harness/src/turn-orchestrator/provider-stream.ts b/harness/src/turn-orchestrator/provider-stream.ts new file mode 100644 index 00000000..30ad42c2 --- /dev/null +++ b/harness/src/turn-orchestrator/provider-stream.ts @@ -0,0 +1,149 @@ +/** + * Provider streaming. Turns an iii stream channel plus the provider trigger into + * a single final `AssistantMessage`, hiding the pull-based message pump behind an + * async iterator. + * + * `streamProviderTurn` owns channel creation, the concurrent provider trigger, + * and the read loop. The caller supplies how to build the provider input (it + * needs the channel's writer ref) and a per-delta callback used to emit UI + * `message_update` events. + */ + +import type { ISdk, StreamChannelRef } from '../runtime/iii.js'; +import { logger } from '../runtime/otel.js'; +import type { AssistantMessage } from '../types/agent-message.js'; +import type { ProviderStreamInput } from '../types/provider.js'; +import type { AssistantMessageEvent } from '../types/stream-event.js'; + +const PROVIDER_STREAM_TIMEOUT_MS = 300_000; + +type Channel = Awaited>; + +/** + * Bridges a push callback (`channel.reader.onMessage`) to async iteration. + * `push` buffers a message and wakes a pending `drain`; `end` terminates the + * iterator once the buffer is empty. + */ +class MessagePump { + private readonly items: string[] = []; + private wake: (() => void) | null = null; + private ended = false; + + push(item: string): void { + this.items.push(item); + this.signal(); + } + + end(): void { + this.ended = true; + this.signal(); + } + + private signal(): void { + if (this.wake) { + const wake = this.wake; + this.wake = null; + wake(); + } + } + + async *drain(): AsyncGenerator { + while (true) { + while (this.items.length > 0) { + const item = this.items.shift(); + if (item !== undefined) yield item; + } + if (this.ended) return; + await new Promise((resolve) => { + this.wake = resolve; + }); + } + } +} + +/** Outcome of a provider turn: the final message, or the reason none arrived. */ +export type ProviderTurnResult = { + final: AssistantMessage | null; + /** Set when the provider trigger threw; null when the channel just closed. */ + error: string | null; +}; + +/** Strip iii invocation-error prefixes so the surfaced message reads cleanly. */ +export function formatProviderError(err: unknown): string { + const raw = err instanceof Error ? err.message : String(err); + return raw + .replace(/^IIIInvocationError:\s*/i, '') + .replace(/^invocation_failed:\s*/i, '') + .trim(); +} + +/** The assistant message a stream event carries (final for done/error, else the partial). */ +function eventMessage(ev: AssistantMessageEvent): AssistantMessage | null { + if (ev.type === 'done') return ev.message; + if (ev.type === 'error') return ev.error; + if ('partial' in ev) return ev.partial; + return null; +} + +function parseEvent(text: string, session_id: string): AssistantMessageEvent | null { + try { + return JSON.parse(text) as AssistantMessageEvent; + } catch (err) { + logger.warn('decode AssistantMessageEvent failed', { session_id, err: String(err) }); + return null; + } +} + +export async function streamProviderTurn( + iii: ISdk, + params: { + session_id: string; + targetFn: string; + buildInput: (writerRef: StreamChannelRef) => ProviderStreamInput; + onDelta: (partial: AssistantMessage, event: AssistantMessageEvent) => Promise; + }, +): Promise { + let channel: Channel; + try { + channel = await iii.createChannel(); + } catch (err) { + logger.warn('createChannel failed; falling back to synthetic error', { err: String(err) }); + return { final: null, error: `create_channel failed: ${String(err)}` }; + } + + const pump = new MessagePump(); + channel.reader.onMessage((msg: string) => pump.push(msg)); + channel.reader.stream.resume(); + + let error: string | null = null; + const triggerPromise = iii + .trigger({ + function_id: params.targetFn, + payload: params.buildInput(channel.writerRef as StreamChannelRef), + timeoutMs: PROVIDER_STREAM_TIMEOUT_MS, + }) + .catch((err) => { + logger.warn('provider stream trigger failed', { targetFn: params.targetFn, err: String(err) }); + error = formatProviderError(err); + pump.end(); + return null; + }); + + let final: AssistantMessage | null = null; + for await (const text of pump.drain()) { + const event = parseEvent(text, params.session_id); + if (!event) continue; + if (event.type === 'done' || event.type === 'error') { + final = eventMessage(event); + break; + } + const partial = eventMessage(event); + if (partial) { + final = partial; + await params.onDelta(partial, event); + } + } + pump.end(); + await triggerPromise; + return { final, error }; +} diff --git a/harness/src/turn-orchestrator/states/assistant-streaming.ts b/harness/src/turn-orchestrator/states/assistant-streaming.ts index c40381d5..332e6b2a 100644 --- a/harness/src/turn-orchestrator/states/assistant-streaming.ts +++ b/harness/src/turn-orchestrator/states/assistant-streaming.ts @@ -1,31 +1,24 @@ /** - * `turn::assistant_streaming`. Start turn, stream provider response, finalize, and route onward. + * `turn::assistant_streaming`. Stream one provider turn, persist the assistant + * message, and route onward. * * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. */ -import type { ISdk, StreamChannelRef } from '../../runtime/iii.js'; +import type { ISdk } from '../../runtime/iii.js'; import { logger } from '../../runtime/otel.js'; import type { AssistantMessage } from '../../types/agent-message.js'; import type { AgentFunction } from '../../types/function.js'; -import type { ProviderStreamInput } from '../../types/provider.js'; -import type { AssistantMessageEvent } from '../../types/stream-event.js'; import { emit } from '../events.js'; import * as persistence from '../persistence.js'; import { runPreflight } from '../preflight.js'; import { buildInput, decide, targetFunctionId } from '../provider-router.js'; +import { streamProviderTurn } from '../provider-stream.js'; import { runTransition } from '../run-transition.js'; import { type TurnStateRecord, transitionTo } from '../state.js'; import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; -function eventPartial(ev: AssistantMessageEvent): AssistantMessage | null { - if ('partial' in ev) return ev.partial; - if (ev.type === 'done') return ev.message; - if (ev.type === 'error') return ev.error; - return null; -} - function syntheticErrorAssistant( provider: string, model: string, @@ -44,18 +37,35 @@ function syntheticErrorAssistant( }; } -function formatProviderError(err: unknown): string { - const raw = err instanceof Error ? err.message : String(err); - return raw - .replace(/^IIIInvocationError:\s*/i, '') - .replace(/^invocation_failed:\s*/i, '') - .trim(); -} - function isErrorOrAborted(asst: AssistantMessage): boolean { return asst.stop_reason === 'error' || asst.stop_reason === 'aborted'; } +/** Append the assistant message unless a re-entry already persisted it. */ +async function persistAssistantOnce( + iii: ISdk, + rec: TurnStateRecord, + asst: AssistantMessage, +): Promise { + const messages = await persistence.loadMessages(iii, rec.session_id); + const last = messages[messages.length - 1]; + const dup = + last && + last.role === 'assistant' && + last.timestamp === asst.timestamp && + last.model === asst.model && + last.provider === asst.provider; + if (dup) { + logger.warn('finalizeAssistant: skipping duplicate assistant push (re-entry detected)', { + session_id: rec.session_id, + timestamp: asst.timestamp, + }); + return; + } + messages.push(asst); + await persistence.saveMessages(iii, rec.session_id, messages); +} + async function finalizeAssistant(iii: ISdk, rec: TurnStateRecord): Promise { const asst = rec.last_assistant; if (!asst) throw new Error('assistant_streaming finalize without last_assistant'); @@ -66,33 +76,15 @@ async function finalizeAssistant(iii: ISdk, rec: TurnStateRecord): Promise body_streamed: rec.assistant_body_streamed === true, }); - const errored = isErrorOrAborted(asst); - if (!errored) { - const messages = await persistence.loadMessages(iii, rec.session_id); - const last = messages[messages.length - 1]; - const dup = - last && - last.role === 'assistant' && - last.timestamp === asst.timestamp && - last.model === asst.model && - last.provider === asst.provider; - if (!dup) { - messages.push(asst); - await persistence.saveMessages(iii, rec.session_id, messages); - } else { - logger.warn('finalizeAssistant: skipping duplicate assistant push (re-entry detected)', { - session_id: rec.session_id, - timestamp: asst.timestamp, - }); - } - } - - if (errored) { + if (isErrorOrAborted(asst)) { await emit(iii, rec.session_id, { type: 'turn_end', message: asst, function_results: [] }); rec.turn_end_emitted = true; transitionTo(rec, 'tearing_down'); return; } + + await persistAssistantOnce(iii, rec, asst); + const hasCalls = asst.content.some((b) => b.type === 'function_call'); if (!hasCalls) { transitionTo(rec, 'steering_check'); @@ -110,134 +102,37 @@ export async function handleStreaming(iii: ISdk, rec: TurnStateRecord): Promise< const request = await persistence.loadRunRequest(iii, rec.session_id); let messages = await persistence.loadMessages(iii, rec.session_id); - const { provider, model, system_prompt } = request; - const tools = (Array.isArray(request.function_schemas) ? request.function_schemas : []) as AgentFunction[]; - + const tools = (Array.isArray(request.function_schemas) + ? request.function_schemas + : []) as AgentFunction[]; const decision = decide({ provider, model }); - const targetFn = targetFunctionId(decision); - const preflightResult = await runPreflight( - iii, - rec.session_id, - messages, - decision.provider, - model, - ); - if (preflightResult === 'compacted') { + if ((await runPreflight(iii, rec.session_id, messages, decision.provider, model)) === 'compacted') { messages = await persistence.loadMessages(iii, rec.session_id); } - let channel: Awaited>; - try { - channel = await iii.createChannel(); - } catch (err) { - logger.warn('createChannel failed; falling back to synthetic error', { - err: String(err), - }); - rec.last_assistant = syntheticErrorAssistant( - decision.provider, - decision.model, - `create_channel failed: ${String(err)}`, - ); - await finalizeAssistant(iii, rec); - return; - } - - const messageQueue: string[] = []; - let done = false; - let resolveNext: (() => void) | null = null; - channel.reader.onMessage((msg: string) => { - messageQueue.push(msg); - if (resolveNext) { - const fn = resolveNext; - resolveNext = null; - fn(); - } - }); - channel.reader.stream.resume(); - - const input: ProviderStreamInput = buildInput( - decision, - channel.writerRef as StreamChannelRef, - system_prompt, - messages, - tools, - ); - - let triggerError: string | null = null; - const triggerPromise = iii - .trigger({ - function_id: targetFn, - payload: input, - timeoutMs: 300_000, - }) - .catch((err) => { - logger.warn('provider stream trigger failed', { targetFn, err: String(err) }); - triggerError = formatProviderError(err); - done = true; - if (resolveNext) { - const fn = resolveNext; - resolveNext = null; - fn(); + const { final, error } = await streamProviderTurn(iii, { + session_id: rec.session_id, + targetFn: targetFunctionId(decision), + buildInput: (writerRef) => buildInput(decision, writerRef, system_prompt, messages, tools), + onDelta: async (partial, event) => { + await emit(iii, rec.session_id, { type: 'message_update', message: partial, llm_event: event }); + if (event.type === 'text_delta' || event.type === 'thinking_delta') { + rec.assistant_body_streamed = true; } - return null; - }); - - const readPromise = (async (): Promise => { - let final: AssistantMessage | null = null; - while (!done) { - while (messageQueue.length > 0) { - const text = messageQueue.shift(); - if (text === undefined) break; - let event: AssistantMessageEvent | null = null; - try { - event = JSON.parse(text) as AssistantMessageEvent; - } catch (err) { - logger.warn('decode AssistantMessageEvent failed', { - session_id: rec.session_id, - err: String(err), - }); - continue; - } - const partial = eventPartial(event); - if (partial) final = partial; - if (event.type !== 'done' && event.type !== 'error') { - if (partial) { - await emit(iii, rec.session_id, { - type: 'message_update', - message: partial, - llm_event: event, - }); - if (event.type === 'text_delta' || event.type === 'thinking_delta') { - rec.assistant_body_streamed = true; - } - } - continue; - } - if (event.type === 'done') final = event.message; - else final = event.error; - done = true; - break; - } - if (done) break; - await new Promise((r) => { - resolveNext = r; - }); - } - return final; - })(); + }, + }); - const [, finalMsg] = await Promise.all([triggerPromise, readPromise]); - if (finalMsg) { - rec.last_assistant = finalMsg; + if (final) { + rec.last_assistant = final; } else { - const errorText = triggerError ?? 'provider channel closed without final'; - const synthetic = syntheticErrorAssistant(decision.provider, decision.model, errorText); + const reason = error ?? 'provider channel closed without final'; + const synthetic = syntheticErrorAssistant(decision.provider, decision.model, reason); await emit(iii, rec.session_id, { type: 'message_update', message: synthetic, - llm_event: { type: 'text_delta', partial: synthetic, delta: errorText }, + llm_event: { type: 'text_delta', partial: synthetic, delta: reason }, }); rec.last_assistant = synthetic; } diff --git a/harness/tests/turn-orchestrator/provider-stream.test.ts b/harness/tests/turn-orchestrator/provider-stream.test.ts new file mode 100644 index 00000000..cf213001 --- /dev/null +++ b/harness/tests/turn-orchestrator/provider-stream.test.ts @@ -0,0 +1,168 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import type { AssistantMessage } from '../../src/types/agent-message.js'; +import type { ProviderStreamInput } from '../../src/types/provider.js'; +import type { AssistantMessageEvent } from '../../src/types/stream-event.js'; +import { + formatProviderError, + streamProviderTurn, +} from '../../src/turn-orchestrator/provider-stream.js'; + +function assistant(overrides: Partial = {}): AssistantMessage { + return { + role: 'assistant', + content: [{ type: 'text', text: 'hi' }], + stop_reason: 'end', + error_message: null, + error_kind: null, + usage: null, + model: 'gpt-4o', + provider: 'openai', + timestamp: 1, + ...overrides, + }; +} + +/** + * Fake iii: its channel delivers the given events (JSON-encoded) synchronously + * on `stream.resume()`, and `trigger` resolves to null unless `triggerRejects`. + */ +function fakeIii(opts: { + events?: unknown[]; + triggerRejects?: unknown; + createChannelThrows?: unknown; +}): ISdk { + return { + createChannel: async () => { + if (opts.createChannelThrows) throw opts.createChannelThrows; + let deliver: ((m: string) => void) | null = null; + return { + writerRef: {}, + reader: { + onMessage: (cb: (m: string) => void) => { + deliver = cb; + }, + stream: { + resume: () => { + for (const e of opts.events ?? []) deliver?.(JSON.stringify(e)); + }, + }, + }, + }; + }, + trigger: async () => { + if (opts.triggerRejects) throw opts.triggerRejects; + return null; + }, + } as unknown as ISdk; +} + +const baseParams = { + session_id: 's1', + targetFn: 'provider::openai::stream', + buildInput: () => ({}) as unknown as ProviderStreamInput, + onDelta: async () => {}, +}; + +describe('streamProviderTurn', () => { + it('returns the done frame as the final message', async () => { + const finalMsg = assistant({ content: [{ type: 'text', text: 'done' }] }); + const iii = fakeIii({ events: [{ type: 'done', message: finalMsg }] }); + + const result = await streamProviderTurn(iii, baseParams); + + expect(result.final).toEqual(finalMsg); + expect(result.error).toBeNull(); + }); + + it('invokes onDelta per partial and tracks the latest before done', async () => { + const p1 = assistant({ content: [{ type: 'text', text: 'a' }] }); + const p2 = assistant({ content: [{ type: 'text', text: 'ab' }] }); + const finalMsg = assistant({ content: [{ type: 'text', text: 'abc' }] }); + const seen: AssistantMessageEvent[] = []; + const iii = fakeIii({ + events: [ + { type: 'text_delta', partial: p1, delta: 'a' }, + { type: 'text_delta', partial: p2, delta: 'b' }, + { type: 'done', message: finalMsg }, + ], + }); + + const result = await streamProviderTurn(iii, { + ...baseParams, + onDelta: async (_partial, event) => { + seen.push(event); + }, + }); + + expect(seen.map((e) => e.type)).toEqual(['text_delta', 'text_delta']); + expect(result.final).toEqual(finalMsg); + expect(result.error).toBeNull(); + }); + + it('surfaces the error frame as the final message', async () => { + const errMsg = assistant({ stop_reason: 'error', error_message: 'boom' }); + const iii = fakeIii({ events: [{ type: 'error', error: errMsg }] }); + + const result = await streamProviderTurn(iii, baseParams); + + expect(result.final).toEqual(errMsg); + expect(result.error).toBeNull(); + }); + + it('returns a cleaned error when the provider trigger rejects', async () => { + const iii = fakeIii({ triggerRejects: new Error('IIIInvocationError: upstream 500') }); + + const result = await streamProviderTurn(iii, baseParams); + + expect(result.final).toBeNull(); + expect(result.error).toBe('upstream 500'); + }); + + it('returns a create_channel error when the channel cannot be created', async () => { + const iii = fakeIii({ createChannelThrows: new Error('channel unavailable') }); + + const result = await streamProviderTurn(iii, baseParams); + + expect(result.final).toBeNull(); + expect(result.error).toContain('create_channel failed'); + }); + + it('skips undecodable frames and still completes on done', async () => { + const finalMsg = assistant({ content: [{ type: 'text', text: 'ok' }] }); + const onDelta = vi.fn(async () => {}); + const iii = { + createChannel: async () => { + let deliver: ((m: string) => void) | null = null; + return { + writerRef: {}, + reader: { + onMessage: (cb: (m: string) => void) => { + deliver = cb; + }, + stream: { + resume: () => { + deliver?.('not json'); + deliver?.(JSON.stringify({ type: 'done', message: finalMsg })); + }, + }, + }, + }; + }, + trigger: async () => null, + } as unknown as ISdk; + + const result = await streamProviderTurn(iii, { ...baseParams, onDelta }); + + expect(result.final).toEqual(finalMsg); + expect(onDelta).not.toHaveBeenCalled(); + }); +}); + +describe('formatProviderError', () => { + it('strips iii invocation-error prefixes', () => { + expect(formatProviderError(new Error('IIIInvocationError: nope'))).toBe('nope'); + expect(formatProviderError(new Error('invocation_failed: nope'))).toBe('nope'); + expect(formatProviderError('plain string')).toBe('plain string'); + }); +}); From 2d948159529701ad5dad60a09c85e0c254229929 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sun, 24 May 2026 20:57:12 -0300 Subject: [PATCH 14/41] refactor(turn-orchestrator): simplify function_execute finalize + dedup - Replace the stale 55-line finalize comment and the convoluted tail-walk dedup (incomingIds/existingResultIds/unseen boundary) with a small persistedResultIds helper: a turn's results are the trailing run of function_result messages, so skip ids already present there. - Make augmentFunctionCall pure (it mutated the arguments object in place). - Extract applyAfterHook and toFunctionResultMessage from finalizeExecutedCalls. - Fix the stale 'handleFinalize:' log label. --- .../states/function-execute.ts | 211 ++++++++---------- 1 file changed, 99 insertions(+), 112 deletions(-) diff --git a/harness/src/turn-orchestrator/states/function-execute.ts b/harness/src/turn-orchestrator/states/function-execute.ts index 629b5215..0731858b 100644 --- a/harness/src/turn-orchestrator/states/function-execute.ts +++ b/harness/src/turn-orchestrator/states/function-execute.ts @@ -8,20 +8,27 @@ import type { ISdk } from '../../runtime/iii.js'; import { logger } from '../../runtime/otel.js'; import type { AgentEvent } from '../../types/agent-event.js'; -import type { - AgentMessage, - AssistantMessage, - FunctionResultMessage, -} from '../../types/agent-message.js'; +import type { AgentMessage, AssistantMessage, FunctionResultMessage } from '../../types/agent-message.js'; import type { FunctionCall, FunctionResult } from '../../types/function.js'; -import { dispatchWithHook, isErrorResult, missingFunctionResult, triggerFunctionCall, unwrapAgentTrigger } from '../agent-trigger.js'; +import { + dispatchWithHook, + isErrorResult, + missingFunctionResult, + triggerFunctionCall, + unwrapAgentTrigger, +} from '../agent-trigger.js'; import { registerApprovalResume } from '../approval-resume.js'; import { emit } from '../events.js'; import { publishAfter } from '../hook.js'; import * as persistence from '../persistence.js'; -import type { ExecutedEntry } from '../persistence.js'; import { runTransition } from '../run-transition.js'; -import { type PreparedEntry, type TurnWork, type TurnStateRecord, transitionTo } from '../state.js'; +import { + type ExecutedEntry, + type PreparedEntry, + type TurnWork, + type TurnStateRecord, + transitionTo, +} from '../state.js'; import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; function buildFunctionExecutionEnd( @@ -40,25 +47,23 @@ function buildFunctionExecutionEnd( }; } +/** + * Attach the call's identity + session to its arguments so the target function + * receives the routing context. Pure: never mutates `fc` or its arguments. + */ function augmentFunctionCall(fc: FunctionCall, session_id: string): FunctionCall { - let augmented_args: unknown; - if (fc.arguments && typeof fc.arguments === 'object' && !Array.isArray(fc.arguments)) { - augmented_args = { ...(fc.arguments as Record) }; - } else { - augmented_args = { arguments: fc.arguments }; - } - if (typeof augmented_args === 'object' && augmented_args !== null) { - const obj = augmented_args as Record; - obj.session_id = session_id; - obj.function_call_id = fc.id; - obj.function_id = fc.function_id; - obj.function_call = { - id: fc.id, - function_id: fc.function_id, - arguments: fc.arguments, - }; - } - return { id: fc.id, function_id: fc.function_id, arguments: augmented_args }; + const baseArgs = + fc.arguments && typeof fc.arguments === 'object' && !Array.isArray(fc.arguments) + ? (fc.arguments as Record) + : { arguments: fc.arguments }; + const augmented = { + ...baseArgs, + session_id, + function_call_id: fc.id, + function_id: fc.function_id, + function_call: { id: fc.id, function_id: fc.function_id, arguments: fc.arguments }, + }; + return { id: fc.id, function_id: fc.function_id, arguments: augmented }; } function extractFunctionCalls(msg: AssistantMessage): FunctionCall[] { @@ -81,6 +86,12 @@ function buildBatch(asst: AssistantMessage): PreparedEntry[] { }); } +function upsertExecutedCall(executed: ExecutedEntry[], entry: ExecutedEntry): void { + const idx = executed.findIndex((e) => e.function_call.id === entry.function_call.id); + if (idx >= 0) executed[idx] = entry; + else executed.push(entry); +} + function ensureWork(rec: TurnStateRecord): TurnWork { if (!rec.work) { const asst = rec.last_assistant; @@ -101,7 +112,7 @@ async function commitExecutedCall( ): Promise { const duration_ms = Date.now() - startedAt; const error = is_error ?? isErrorResult(result); - persistence.upsertExecutedCall(work.results, { + upsertExecutedCall(work.results, { function_call: fc, result, is_error: error, @@ -111,90 +122,73 @@ async function commitExecutedCall( await emit(iii, rec.session_id, buildFunctionExecutionEnd(fc, result, error, duration_ms)); } -async function finalizeExecutedCalls(iii: ISdk, rec: TurnStateRecord): Promise { - const work = rec.work ?? { batch: [], results: [] }; - const executed: ExecutedEntry[] = work.results; - const function_results: FunctionResultMessage[] = []; - let all_terminate = executed.length > 0; - for (const e of executed) { - let result = e.result; - const merged = await publishAfter(iii, e.function_call, result); - if ( - merged && - typeof merged === 'object' && - Array.isArray((merged as Record).content) - ) { - result = merged as FunctionResult; - } - if (!result.terminate) all_terminate = false; - function_results.push({ - role: 'function_result', - function_call_id: e.function_call.id, - function_id: e.function_call.function_id, - content: result.content, - details: result.details, - is_error: e.is_error, - timestamp: Date.now(), - }); +/** Run the registered after-hook and adopt its merged result when it returns one. */ +async function applyAfterHook(iii: ISdk, entry: ExecutedEntry): Promise { + const merged = await publishAfter(iii, entry.function_call, entry.result); + if ( + merged && + typeof merged === 'object' && + Array.isArray((merged as Record).content) + ) { + return merged as FunctionResult; } - const messages = await persistence.loadMessages(iii, rec.session_id); - // Idempotency guard: handleFinalize can re-enter (durable trigger retry, - // step-fanout race, crash mid-finalize before transitionTo persists). - // executedCalls is only cleared at the start of the NEXT handlePrepare, - // so a second run reads the SAME results and would push duplicates into - // flat-state. Skip any function_result whose function_call_id is already - // present. Anthropic rejects duplicate `tool_result` blocks with id: - // "each tool_use must have a single result. Found multiple tool_result - // blocks with id: toolu_..." - // and any provider's wire-messages flush would produce them otherwise. - // Only the most-recent function_result block matters for dedup — - // duplicates only appear when the re-entry runs against a slice - // we already wrote in this same finalize, so walking from the tail - // and stopping once we pass the boundary of pre-existing results - // is sufficient. Pre-fix this scanned every message from the head - // on every finalize, which grew O(history) per turn for a guard - // that only ever protects against ~10 entries. - const incomingIds = new Set(); - for (const r of function_results) incomingIds.add(r.function_call_id); - const existingResultIds = new Set(); + return entry.result; +} + +function toFunctionResultMessage(entry: ExecutedEntry, result: FunctionResult): FunctionResultMessage { + return { + role: 'function_result', + function_call_id: entry.function_call.id, + function_id: entry.function_call.function_id, + content: result.content, + details: result.details, + is_error: entry.is_error, + timestamp: Date.now(), + }; +} + +/** + * Function_call_ids already persisted for the current turn. Results are appended + * right after the assistant that requested them, so they form the trailing run + * of `function_result` messages; the first non-result from the tail is the turn + * boundary. + */ +function persistedResultIds(messages: AgentMessage[]): Set { + const ids = new Set(); for (let i = messages.length - 1; i >= 0; i--) { const m = messages[i]; - if (!m) continue; - if (m.role === 'function_result') { - existingResultIds.add(m.function_call_id); - continue; - } - if (m.role === 'assistant') { - // Once we cross an assistant boundary BEFORE seeing any - // pending incoming id we've passed the turn this finalize - // is writing for — earlier function_result blocks can't be - // duplicates of `function_results`. - let unseen = false; - for (const id of incomingIds) { - if (!existingResultIds.has(id)) { - unseen = true; - break; - } - } - if (!unseen) break; - } + if (m?.role === 'function_result') ids.add(m.function_call_id); + else break; } - let appended = 0; - for (const r of function_results) { - if (existingResultIds.has(r.function_call_id)) continue; - messages.push(r as AgentMessage); - existingResultIds.add(r.function_call_id); - appended++; + return ids; +} + +async function finalizeExecutedCalls(iii: ISdk, rec: TurnStateRecord): Promise { + const executed = rec.work?.results ?? []; + const function_results: FunctionResultMessage[] = []; + let allTerminate = executed.length > 0; + for (const entry of executed) { + const result = await applyAfterHook(iii, entry); + if (!result.terminate) allTerminate = false; + function_results.push(toFunctionResultMessage(entry, result)); } - if (appended < function_results.length) { - logger.warn('handleFinalize: skipped duplicate function_results (re-entry detected)', { + + // Idempotency: a durable retry / step-fanout race can replay finalize with the + // same work after the results were appended but before the transition + // persisted. Re-appending duplicate function_result blocks makes providers + // reject the turn ("multiple tool_result blocks with id ..."), so drop any id + // already present in this turn's trailing results. + const messages = await persistence.loadMessages(iii, rec.session_id); + const alreadyPersisted = persistedResultIds(messages); + const fresh = function_results.filter((r) => !alreadyPersisted.has(r.function_call_id)); + if (fresh.length < function_results.length) { + logger.warn('finalizeExecutedCalls: skipped duplicate function_results (re-entry detected)', { session_id: rec.session_id, total: function_results.length, - appended, - skipped: function_results.length - appended, + skipped: function_results.length - fresh.length, }); } - await persistence.saveMessages(iii, rec.session_id, messages); + await persistence.saveMessages(iii, rec.session_id, [...messages, ...fresh]); const asst = rec.last_assistant; rec.function_results = function_results; @@ -205,7 +199,7 @@ async function finalizeExecutedCalls(iii: ISdk, rec: TurnStateRecord): Promise { @@ -217,7 +211,7 @@ export async function handleExecute(iii: ISdk, rec: TurnStateRecord): Promise e.function_call.id === fc.id); if (existing) { await emit( iii, @@ -236,14 +230,7 @@ export async function handleExecute(iii: ISdk, rec: TurnStateRecord): Promise Date: Sun, 24 May 2026 20:58:50 -0300 Subject: [PATCH 15/41] refactor(turn-orchestrator): abort uses canonical agent-scope key helpers Replace the local STATE_SCOPE_AGENT constant and inlined abort_signal key string with AGENT_SCOPE and abortSignalKey() from state.ts. --- harness/src/turn-orchestrator/abort.ts | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/harness/src/turn-orchestrator/abort.ts b/harness/src/turn-orchestrator/abort.ts index 45b56281..b7103929 100644 --- a/harness/src/turn-orchestrator/abort.ts +++ b/harness/src/turn-orchestrator/abort.ts @@ -8,13 +8,12 @@ import { approvalResumeFnId } from '../approval-gate/schemas.js'; import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import * as persistence from './persistence.js'; - -const STATE_SCOPE_AGENT = 'agent'; +import { AGENT_SCOPE, abortSignalKey } from './state.js'; export async function performAbortSideEffects(iii: ISdk, session_id: string): Promise { await trigger(iii, 'state::set', { - scope: STATE_SCOPE_AGENT, - key: `session/${session_id}/abort_signal`, + scope: AGENT_SCOPE, + key: abortSignalKey(session_id), value: true, }); From 27d6d15f86c78e6e01a95a9f5c950d675ed0a78f Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sun, 24 May 2026 20:59:36 -0300 Subject: [PATCH 16/41] refactor(turn-orchestrator): steering_check abort read uses AGENT_SCOPE --- harness/src/turn-orchestrator/states/steering-check.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/harness/src/turn-orchestrator/states/steering-check.ts b/harness/src/turn-orchestrator/states/steering-check.ts index a24d0c25..b28674ef 100644 --- a/harness/src/turn-orchestrator/states/steering-check.ts +++ b/harness/src/turn-orchestrator/states/steering-check.ts @@ -10,7 +10,7 @@ import type { AgentMessage, AssistantMessage } from '../../types/agent-message.j import { emit } from '../events.js'; import * as persistence from '../persistence.js'; import { runTransition } from '../run-transition.js'; -import { type TurnStateRecord, abortSignalKey, transitionTo } from '../state.js'; +import { AGENT_SCOPE, type TurnStateRecord, abortSignalKey, transitionTo } from '../state.js'; import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; export type SteeringRoute = @@ -38,7 +38,7 @@ async function abortSet(iii: ISdk, session_id: string): Promise { try { const v = await iii.trigger({ function_id: 'state::get', - payload: { scope: 'agent', key: abortSignalKey(session_id) }, + payload: { scope: AGENT_SCOPE, key: abortSignalKey(session_id) }, }); return v === true; } catch { From 77d992a59ac49173ad2af0394ad5ee7358411c59 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sun, 24 May 2026 21:01:50 -0300 Subject: [PATCH 17/41] refactor(turn-orchestrator): one synthetic-assistant factory Collapse four hand-built synthetic AssistantMessage literals (assistant stream error, transition failure, steering abort, max_turns stop) into a single syntheticAssistant() helper built on emptyAssistant. The empty turn_end fallback in steering_check now uses emptyAssistant directly. --- .../src/turn-orchestrator/run-transition.ts | 17 +----- .../states/assistant-streaming.ts | 26 +++------ .../states/steering-check.ts | 56 ++++--------------- .../turn-orchestrator/synthetic-assistant.ts | 33 +++++++++++ 4 files changed, 55 insertions(+), 77 deletions(-) create mode 100644 harness/src/turn-orchestrator/synthetic-assistant.ts diff --git a/harness/src/turn-orchestrator/run-transition.ts b/harness/src/turn-orchestrator/run-transition.ts index d1b1dd78..890ae4e6 100644 --- a/harness/src/turn-orchestrator/run-transition.ts +++ b/harness/src/turn-orchestrator/run-transition.ts @@ -11,12 +11,12 @@ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; -import type { AssistantMessage } from '../types/agent-message.js'; import { TransientError } from './errors.js'; import { emit } from './events.js'; import * as persistence from './persistence.js'; import { type TurnStepPayload, type TurnStepResult } from './schemas.js'; import { type TurnState, type TurnStateRecord, transitionTo } from './state.js'; +import { syntheticAssistant } from './synthetic-assistant.js'; export type TransitionHandler = (iii: ISdk, rec: TurnStateRecord) => Promise; @@ -46,19 +46,8 @@ async function failTransition( // Surface the failure to the live UI (mirrors the graceful error path): // message_complete{stop_reason:'error'} → the translator emits a `stop-reason` // event so the user sees WHY; a bare agent_end renders as a silent end. - // error_kind:'transient' matches syntheticErrorAssistant's union usage; - // the UI translator only reads stop_reason, not error_kind. - const failed: AssistantMessage = { - role: 'assistant', - content: [{ type: 'text', text: rec.error.message }], - stop_reason: 'error', - error_message: rec.error.message, - error_kind: 'transient', - usage: null, - model: '', - provider: '', - timestamp: Date.now(), - }; + // (The UI translator reads stop_reason, not error_kind.) + const failed = syntheticAssistant({ stop_reason: 'error', text: rec.error.message }); await emit(iii, rec.session_id, { type: 'message_complete', message: failed, body_streamed: false }); const messages = await persistence.loadMessages(iii, rec.session_id); diff --git a/harness/src/turn-orchestrator/states/assistant-streaming.ts b/harness/src/turn-orchestrator/states/assistant-streaming.ts index 332e6b2a..b8bfe217 100644 --- a/harness/src/turn-orchestrator/states/assistant-streaming.ts +++ b/harness/src/turn-orchestrator/states/assistant-streaming.ts @@ -18,24 +18,7 @@ import { streamProviderTurn } from '../provider-stream.js'; import { runTransition } from '../run-transition.js'; import { type TurnStateRecord, transitionTo } from '../state.js'; import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; - -function syntheticErrorAssistant( - provider: string, - model: string, - reason: string, -): AssistantMessage { - return { - role: 'assistant', - content: [{ type: 'text', text: reason }], - stop_reason: 'error', - error_message: reason, - error_kind: 'transient', - usage: null, - model, - provider, - timestamp: Date.now(), - }; -} +import { syntheticAssistant } from '../synthetic-assistant.js'; function isErrorOrAborted(asst: AssistantMessage): boolean { return asst.stop_reason === 'error' || asst.stop_reason === 'aborted'; @@ -128,7 +111,12 @@ export async function handleStreaming(iii: ISdk, rec: TurnStateRecord): Promise< rec.last_assistant = final; } else { const reason = error ?? 'provider channel closed without final'; - const synthetic = syntheticErrorAssistant(decision.provider, decision.model, reason); + const synthetic = syntheticAssistant({ + stop_reason: 'error', + text: reason, + provider: decision.provider, + model: decision.model, + }); await emit(iii, rec.session_id, { type: 'message_update', message: synthetic, diff --git a/harness/src/turn-orchestrator/states/steering-check.ts b/harness/src/turn-orchestrator/states/steering-check.ts index b28674ef..d3dc9278 100644 --- a/harness/src/turn-orchestrator/states/steering-check.ts +++ b/harness/src/turn-orchestrator/states/steering-check.ts @@ -6,12 +6,13 @@ */ import type { ISdk } from '../../runtime/iii.js'; -import type { AgentMessage, AssistantMessage } from '../../types/agent-message.js'; +import { type AgentMessage, emptyAssistant } from '../../types/agent-message.js'; import { emit } from '../events.js'; import * as persistence from '../persistence.js'; import { runTransition } from '../run-transition.js'; import { AGENT_SCOPE, type TurnStateRecord, abortSignalKey, transitionTo } from '../state.js'; import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; +import { syntheticAssistant } from '../synthetic-assistant.js'; export type SteeringRoute = | 'abort' @@ -59,40 +60,15 @@ async function drainQueue(iii: ISdk, name: string, session_id: string): Promise< return []; } -function abortedMessage(): AssistantMessage { - return { - role: 'assistant', - content: [], - stop_reason: 'aborted', - error_message: 'aborted', - error_kind: 'transient', - usage: null, - model: 'harness', - provider: 'harness', - timestamp: Date.now(), - }; -} - function maxTurnsReached(rec: TurnStateRecord): boolean { return rec.max_turns !== undefined && rec.turn_count >= rec.max_turns; } -function maxTurnsAssistant(cap: number): AssistantMessage { - return { - role: 'assistant', - content: [{ type: 'text', text: `loop stopped: max_turns (${cap}) reached` }], - stop_reason: 'end', - error_message: null, - error_kind: null, - usage: null, - model: '', - provider: '', - timestamp: Date.now(), - }; -} - async function endForMaxTurns(iii: ISdk, rec: TurnStateRecord): Promise { - const msg = maxTurnsAssistant(rec.max_turns ?? 0); + const msg = syntheticAssistant({ + stop_reason: 'end', + text: `loop stopped: max_turns (${rec.max_turns ?? 0}) reached`, + }); rec.last_assistant = msg; const messages = await persistence.loadMessages(iii, rec.session_id); messages.push(msg); @@ -105,19 +81,7 @@ async function endForMaxTurns(iii: ISdk, rec: TurnStateRecord): Promise { async function emitTurnEndOnce(iii: ISdk, rec: TurnStateRecord): Promise { if (rec.turn_end_emitted) return; - const last = - rec.last_assistant ?? - ({ - role: 'assistant', - content: [], - stop_reason: 'end', - error_message: null, - error_kind: null, - usage: null, - model: '', - provider: '', - timestamp: Date.now(), - } as AssistantMessage); + const last = rec.last_assistant ?? emptyAssistant(); await emit(iii, rec.session_id, { type: 'turn_end', message: last, @@ -140,7 +104,11 @@ export async function handleSteering(iii: ISdk, rec: TurnStateRecord): Promise Date: Sun, 24 May 2026 21:02:58 -0300 Subject: [PATCH 18/41] refactor(turn-orchestrator): drop dead pending_function_calls field The field was only ever written (= []) and never read. Remove it from TurnStateRecord, its schema, newRecord, the function_execute finalize assignment, and the test fixtures that set it. Also drop the stale 'Mirrors src/state.rs' header comment. --- harness/src/turn-orchestrator/state.ts | 47 +++++++++++++++---- .../states/function-execute.ts | 1 - .../turn-orchestrator/approval-resume.test.ts | 35 -------------- .../awaiting-approval.test.ts | 1 - .../tests/turn-orchestrator/functions.test.ts | 1 - .../turn-orchestrator/run-transition.test.ts | 2 +- 6 files changed, 40 insertions(+), 47 deletions(-) diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index 0c47e2e6..3d7aaf26 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -1,8 +1,16 @@ /** - * TurnState + TurnStateRecord + state-key helpers. Mirrors - * `turn-orchestrator/src/state.rs`. + * TurnState + TurnStateRecord + agent-scope key helpers. + * + * All turn-orchestrator persistence uses iii scope {@link AGENT_SCOPE} with + * keys from the helpers below (`session//turn_state`, etc.). Because + * `state::list` returns values without keys, recovery paths filter listed + * values with {@link parseTurnStateRecord} rather than key-prefix matching. */ +/** iii-state scope for turn FSM records, flat messages, run_request, etc. */ +export const AGENT_SCOPE = 'agent' as const; + +import { z } from 'zod'; import type { AssistantMessage, FunctionResultMessage } from '../types/agent-message.js'; import type { FunctionCall, FunctionResult } from '../types/function.js'; @@ -46,7 +54,6 @@ export type TurnStateRecord = { turn_count: number; max_turns?: number; last_assistant?: AssistantMessage | null; - pending_function_calls: FunctionCall[]; function_results: FunctionResultMessage[]; turn_end_emitted: boolean; started_at_ms: number; @@ -58,6 +65,35 @@ export type TurnStateRecord = { error?: { kind: string; message: string }; }; +const TURN_STATES = [ + 'provisioning', + 'assistant_streaming', + 'function_execute', + 'function_awaiting_approval', + 'steering_check', + 'tearing_down', + 'stopped', + 'failed', +] as const satisfies readonly TurnState[]; + +/** Minimal structural guard for persisted turn_state — nested fields pass through. */ +export const TurnStateRecordSchema = z + .object({ + session_id: z.string(), + state: z.enum(TURN_STATES), + turn_count: z.number().catch(0), + function_results: z.array(z.unknown()).catch([]), + turn_end_emitted: z.boolean().catch(false), + started_at_ms: z.number().catch(0), + updated_at_ms: z.number().catch(0), + }) + .passthrough(); + +export function parseTurnStateRecord(raw: unknown): TurnStateRecord | null { + const result = TurnStateRecordSchema.safeParse(raw); + return result.success ? (result.data as TurnStateRecord) : null; +} + export function newRecord(session_id: string, max_turns?: number): TurnStateRecord { const now = Date.now(); return { @@ -66,7 +102,6 @@ export function newRecord(session_id: string, max_turns?: number): TurnStateReco turn_count: 0, max_turns, last_assistant: null, - pending_function_calls: [], function_results: [], turn_end_emitted: false, started_at_ms: now, @@ -79,10 +114,6 @@ export function transitionTo(rec: TurnStateRecord, next: TurnState): void { rec.updated_at_ms = Date.now(); } -export function isTerminal(rec: TurnStateRecord): boolean { - return rec.state === 'stopped' || rec.state === 'failed'; -} - export const messagesKey = (sid: string) => `session/${sid}/messages`; export const turnStateKey = (sid: string) => `session/${sid}/turn_state`; export const runRequestKey = (sid: string) => `session/${sid}/run_request`; diff --git a/harness/src/turn-orchestrator/states/function-execute.ts b/harness/src/turn-orchestrator/states/function-execute.ts index 0731858b..2f77afa2 100644 --- a/harness/src/turn-orchestrator/states/function-execute.ts +++ b/harness/src/turn-orchestrator/states/function-execute.ts @@ -192,7 +192,6 @@ async function finalizeExecutedCalls(iii: ISdk, rec: TurnStateRecord): Promise { session_id: 's1', state: 'function_awaiting_approval', turn_count: 0, - pending_function_calls: [], function_results: [], turn_end_emitted: false, started_at_ms: 0, @@ -175,7 +174,6 @@ describe('recoverPendingApprovals', () => { session_id: 's2', state: 'stopped', turn_count: 0, - pending_function_calls: [], function_results: [], turn_end_emitted: false, started_at_ms: 0, @@ -196,7 +194,6 @@ describe('recoverPendingApprovals', () => { session_id: 's1', state: 'function_awaiting_approval', turn_count: 0, - pending_function_calls: [], function_results: [], turn_end_emitted: false, started_at_ms: 0, @@ -208,36 +205,4 @@ describe('recoverPendingApprovals', () => { expect(registered.has('turn::approval_resume::s1/fc-1')).toBe(true); expect(registered.size).toBe(1); }); - - it('uses keyed list rows when state::list returns session//turn_state keys', async () => { - const { iii, registered } = makeIiiWithRegistry(new Map()); - const listSpy = iii.trigger as ReturnType; - listSpy.mockImplementation(async ({ function_id }: { function_id: string }) => { - if (function_id === 'state::list') { - return { - items: [ - { key: 'session/s1/messages', value: [{ role: 'user', content: 'hi' }] }, - { - key: 'session/s1/turn_state', - value: { - session_id: 's1', - state: 'function_awaiting_approval', - turn_count: 0, - pending_function_calls: [], - function_results: [], - turn_end_emitted: false, - started_at_ms: 0, - updated_at_ms: 0, - awaiting_approval: [{ function_call_id: 'fc-9', function_id: 'tool::z', args: {} }], - }, - }, - ], - }; - } - return null; - }); - await recoverPendingApprovals(iii); - expect(registered.has('turn::approval_resume::s1/fc-9')).toBe(true); - expect(registered.size).toBe(1); - }); }); diff --git a/harness/tests/turn-orchestrator/awaiting-approval.test.ts b/harness/tests/turn-orchestrator/awaiting-approval.test.ts index 8f116a0d..1d09384b 100644 --- a/harness/tests/turn-orchestrator/awaiting-approval.test.ts +++ b/harness/tests/turn-orchestrator/awaiting-approval.test.ts @@ -26,7 +26,6 @@ function recordWith( turn_count: 0, max_turns: undefined, last_assistant: null, - pending_function_calls: [], function_results: [], turn_end_emitted: false, started_at_ms: 0, diff --git a/harness/tests/turn-orchestrator/functions.test.ts b/harness/tests/turn-orchestrator/functions.test.ts index 275d9e8f..6107a7e2 100644 --- a/harness/tests/turn-orchestrator/functions.test.ts +++ b/harness/tests/turn-orchestrator/functions.test.ts @@ -368,7 +368,6 @@ describe('handleExecute new flow', () => { await handleExecute(iii, rec); expect(rec.state).toBe('steering_check'); - expect(rec.pending_function_calls).toEqual([]); expect(rec.function_results).toHaveLength(1); // No turn_end emitted when last_assistant is null expect(emitSpy.mock.calls.some((call) => call[2]?.type === 'turn_end')).toBe(false); diff --git a/harness/tests/turn-orchestrator/run-transition.test.ts b/harness/tests/turn-orchestrator/run-transition.test.ts index 6f0ab85e..7f13321a 100644 --- a/harness/tests/turn-orchestrator/run-transition.test.ts +++ b/harness/tests/turn-orchestrator/run-transition.test.ts @@ -114,7 +114,7 @@ function fakeIii(record: unknown) { describe('runTransition error model', () => { const base = { session_id: 's1', state: 'function_execute', turn_count: 1, - pending_function_calls: [], function_results: [], turn_end_emitted: false, + function_results: [], turn_end_emitted: false, started_at_ms: 1, updated_at_ms: 1, }; From 3a24b3560ebe713540e4c7d3da49fbda9c8e3e2b Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sun, 24 May 2026 21:04:27 -0300 Subject: [PATCH 19/41] refactor(turn-orchestrator): drop stale rust-mirror comments, align terminology - Remove the last 'Mirrors *.rs' header comments (events, persistence). - events.ts uses AGENT_SCOPE instead of a local 'agent' constant. - 'tool call' -> 'function call' in agent-trigger/function-execute comments (the agent_trigger tool schema name stays: it is the LLM-facing tool). - Name the policy consult timeout (POLICY_TIMEOUT_MS) like HOOK_TIMEOUT_MS. --- .../src/turn-orchestrator/agent-trigger.ts | 4 +- harness/src/turn-orchestrator/events.ts | 9 +- harness/src/turn-orchestrator/hook.ts | 4 +- harness/src/turn-orchestrator/persistence.ts | 181 ++++-------------- .../states/function-execute.ts | 2 +- 5 files changed, 50 insertions(+), 150 deletions(-) diff --git a/harness/src/turn-orchestrator/agent-trigger.ts b/harness/src/turn-orchestrator/agent-trigger.ts index 6912b545..ddb500e2 100644 --- a/harness/src/turn-orchestrator/agent-trigger.ts +++ b/harness/src/turn-orchestrator/agent-trigger.ts @@ -1,8 +1,8 @@ /** - * Agent tool-call dispatcher + approval chokepoint. + * Agent function-call dispatcher + approval chokepoint. * * `dispatchWithHook` is the single chokepoint for FSM-issued calls: every - * agent tool call goes through `consultBefore` before reaching the inner + * agent function call goes through `consultBefore` before reaching the inner * trigger. `triggerFunctionCall` is the shared trigger/decode/error path * used by both the hook gate and pre-approved resume execution. */ diff --git a/harness/src/turn-orchestrator/events.ts b/harness/src/turn-orchestrator/events.ts index 12bfb7db..fdf8be7c 100644 --- a/harness/src/turn-orchestrator/events.ts +++ b/harness/src/turn-orchestrator/events.ts @@ -1,15 +1,14 @@ /** - * Emit AgentEvent frames on `agent::events`. Mirrors - * `turn-orchestrator/src/events.rs`. + * Emit AgentEvent frames on `agent::events`, one per call with a per-session + * monotonic sequence number. */ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import type { AgentEvent } from '../types/agent-event.js'; -import { eventCounterKey } from './state.js'; +import { AGENT_SCOPE, eventCounterKey } from './state.js'; export const EVENTS_STREAM = 'agent::events'; -const STATE_SCOPE = 'agent'; export function formatItemId(session_id: string, seq: number): string { return `${session_id}-${seq.toString().padStart(8, '0')}`; @@ -20,7 +19,7 @@ async function nextSeq(iii: ISdk, session_id: string): Promise { const resp = await iii.trigger({ function_id: 'state::update', payload: { - scope: STATE_SCOPE, + scope: AGENT_SCOPE, key: eventCounterKey(session_id), ops: [{ type: 'increment', path: '', by: 1 }], }, diff --git a/harness/src/turn-orchestrator/hook.ts b/harness/src/turn-orchestrator/hook.ts index b76fcc10..a442a9b2 100644 --- a/harness/src/turn-orchestrator/hook.ts +++ b/harness/src/turn-orchestrator/hook.ts @@ -21,6 +21,8 @@ import type { FunctionCall } from '../types/function.js'; export const TOPIC_AFTER = 'agent::after_function_call'; export const HOOK_TIMEOUT_MS = 500; +/** Fail-closed budget for the synchronous policy consult before a call. */ +export const POLICY_TIMEOUT_MS = 5_000; export type HookOutcome = | { kind: 'allow' } @@ -45,7 +47,7 @@ export async function consultBefore(iii: ISdk, function_call: FunctionCall): Pro function_id: function_call.function_id, args: function_call.arguments as CheckPermissionsPayload['args'], }, - timeoutMs: 5_000, + timeoutMs: POLICY_TIMEOUT_MS, }); switch (reply.decision) { case 'allow': diff --git a/harness/src/turn-orchestrator/persistence.ts b/harness/src/turn-orchestrator/persistence.ts index 065c86d4..92ce5eff 100644 --- a/harness/src/turn-orchestrator/persistence.ts +++ b/harness/src/turn-orchestrator/persistence.ts @@ -1,98 +1,67 @@ /** - * State load/save helpers. Mirrors `turn-orchestrator/src/persistence.rs`. + * State load/save helpers. All `state::*` I/O goes through + * `../runtime/state.js` (agent scope). */ +import { stateGet, stateListValues, stateSet } from '../runtime/state.js'; import type { ISdk } from '../runtime/iii.js'; -import { logger } from '../runtime/otel.js'; import type { AgentMessage } from '../types/agent-message.js'; +import { parseFlatMessages } from './flat-messages.js'; import { type RunRequest, parseRunRequest } from './run-request.js'; import { - type ExecutedEntry, - type PreparedEntry, + AGENT_SCOPE, type TurnStateRecord, - lastSessionTreeLenKey, messagesKey, + parseTurnStateRecord, runRequestKey, turnStateKey, } from './state.js'; import { toView } from './schemas.js'; +import { mirrorMessagesToSessionTree } from './session-tree-mirror.js'; import { emitTurnStateChanged } from './turn-state-write.js'; import { shouldWakeStep, wakeState } from './wake.js'; -export type { ExecutedEntry, PreparedEntry } from './state.js'; +const agentGet = (iii: ISdk, key: string) => stateGet(iii, AGENT_SCOPE, key); +const agentSet = (iii: ISdk, key: string, value: unknown) => + stateSet(iii, AGENT_SCOPE, key, value); -const SCOPE = 'agent'; +// --- turn_state --- -async function stateGet(iii: ISdk, key: string): Promise { - try { - const v = await iii.trigger({ - function_id: 'state::get', - payload: { scope: SCOPE, key }, - }); - return v === null || v === undefined ? null : v; - } catch (err) { - logger.warn('persistence state::get failed', { key, err: String(err) }); - return null; - } -} - -async function stateSet(iii: ISdk, key: string, value: unknown): Promise { - try { - await iii.trigger({ - function_id: 'state::set', - payload: { scope: SCOPE, key, value }, - }); - } catch (err) { - logger.warn('persistence state::set failed', { key, err: String(err) }); - } -} - -/** Defensive coercion for records persisted before assistant_finished was - * removed. Drain-before-cutover is preferred; this prevents a crash on an - * in-flight legacy record. */ -export function migrateLegacyRecord(rec: TurnStateRecord): TurnStateRecord { - if ((rec.state as string) === 'assistant_finished') { - const asst = rec.last_assistant; - const hasCalls = !!asst && asst.content.some((b) => b.type === 'function_call'); - return { ...rec, state: hasCalls ? 'function_execute' : 'steering_check' }; - } - return rec; +export async function loadRecord(iii: ISdk, session_id: string): Promise { + return parseTurnStateRecord(await agentGet(iii, turnStateKey(session_id))); } -export async function loadRecord(iii: ISdk, session_id: string): Promise { - const v = await stateGet(iii, turnStateKey(session_id)); - if (!v || typeof v !== 'object') return null; - return migrateLegacyRecord(v as TurnStateRecord); +/** All turn_state values in agent scope; non-records are dropped by shape parse. */ +export async function listAgentTurnStateRecords(iii: ISdk): Promise { + const values = await stateListValues(iii, { scope: AGENT_SCOPE }); + return values + .map((value) => parseTurnStateRecord(value)) + .filter((rec): rec is TurnStateRecord => rec !== null); } -/** Persist the record with no UI event and no FSM wake — for mid-handler, - * same-state checkpoints (e.g. per function-call result during execute). */ +/** Silent checkpoint — no UI event, no FSM wake. */ export async function writeRecord(iii: ISdk, rec: TurnStateRecord): Promise { - await stateSet(iii, turnStateKey(rec.session_id), rec); + await agentSet(iii, turnStateKey(rec.session_id), rec); } -/** - * Persist turn_state and emit UI event — no FSM wake (mid-handler saves). - * Pass `previous` (the pre-write record) to skip the `state::get` that would - * otherwise re-read it; omit it and the prior value is loaded here. - */ -export async function persistRecord( +async function persistRecord( iii: ISdk, rec: TurnStateRecord, previous?: TurnStateRecord | null, -): Promise { - const prev = previous !== undefined ? previous : await loadRecord(iii, rec.session_id); - const eventType = prev === null ? 'state:created' : 'state:updated'; - - await stateSet(iii, turnStateKey(rec.session_id), rec); +): Promise { + const result = await agentSet(iii, turnStateKey(rec.session_id), rec); + const prev = + previous !== undefined ? previous : parseTurnStateRecord(result?.old_value ?? null); await emitTurnStateChanged( iii, rec.session_id, - eventType, - toView(rec) as unknown as Record, - prev !== null ? (toView(prev) as unknown as Record) : undefined, + prev == null ? 'state:created' : 'state:updated', + toView(rec), + prev != null ? toView(prev) : undefined, ); + + return prev; } export async function saveRecord( @@ -100,17 +69,17 @@ export async function saveRecord( rec: TurnStateRecord, previous?: TurnStateRecord | null, ): Promise { - const prev = previous !== undefined ? previous : await loadRecord(iii, rec.session_id); - await persistRecord(iii, rec, prev); + const prev = await persistRecord(iii, rec, previous); if (shouldWakeStep(prev?.state ?? null, rec.state)) { await wakeState(iii, rec.session_id, rec.state); } } +// --- messages --- + export async function loadMessages(iii: ISdk, session_id: string): Promise { - const v = await stateGet(iii, messagesKey(session_id)); - return Array.isArray(v) ? (v as AgentMessage[]) : []; + return parseFlatMessages(await agentGet(iii, messagesKey(session_id))); } export async function saveMessages( @@ -118,90 +87,20 @@ export async function saveMessages( session_id: string, messages: AgentMessage[], ): Promise { - await stateSet(iii, messagesKey(session_id), messages); + await agentSet(iii, messagesKey(session_id), messages); await mirrorMessagesToSessionTree(iii, session_id, messages); } -async function mirrorMessagesToSessionTree( - iii: ISdk, - session_id: string, - messages: AgentMessage[], -): Promise { - const lastKey = lastSessionTreeLenKey(session_id); - const last = await stateGet(iii, lastKey); - const alreadyMirrored = typeof last === 'number' ? last : 0; - if (messages.length <= alreadyMirrored) return; - if (alreadyMirrored === 0) { - try { - await iii.trigger({ - function_id: 'session-tree::ensure', - payload: { session_id }, - }); - } catch (err) { - logger.warn('session-tree::ensure failed; mirror skipped', { - session_id, - err: String(err), - }); - return; - } - } - let lastAppended: string | null = null; - if (alreadyMirrored > 0) { - try { - const resp = await iii.trigger }>({ - function_id: 'session-tree::messages', - payload: { session_id }, - }); - const arr = resp?.messages; - if (Array.isArray(arr) && arr.length > 0) { - const tail = arr[arr.length - 1]; - lastAppended = tail?.entry_id ?? null; - } - } catch (err) { - logger.warn('session-tree::messages read failed mid-mirror; skipping', { - session_id, - err: String(err), - }); - return; - } - } - for (const msg of messages.slice(alreadyMirrored)) { - try { - const resp = await iii.trigger({ - function_id: 'session-tree::append', - payload: { session_id, parent_id: lastAppended, message: msg }, - }); - lastAppended = resp?.entry_id ?? lastAppended; - } catch (err) { - logger.warn('session-tree::append mirror failed', { session_id, err: String(err) }); - return; - } - } - await stateSet(iii, lastKey, messages.length); -} +// --- run_request --- export async function saveRunRequest( iii: ISdk, session_id: string, - request: unknown, + request: RunRequest, ): Promise { - await stateSet(iii, runRequestKey(session_id), request); + await agentSet(iii, runRequestKey(session_id), request); } export async function loadRunRequest(iii: ISdk, session_id: string): Promise { - const v = await stateGet(iii, runRequestKey(session_id)); - return parseRunRequest(v && typeof v === 'object' ? (v as Record) : {}); -} - -export function findExecutedCall( - executed: ExecutedEntry[], - function_call_id: string, -): ExecutedEntry | undefined { - return executed.find((e) => e.function_call.id === function_call_id); -} - -export function upsertExecutedCall(executed: ExecutedEntry[], entry: ExecutedEntry): void { - const idx = executed.findIndex((e) => e.function_call.id === entry.function_call.id); - if (idx >= 0) executed[idx] = entry; - else executed.push(entry); + return parseRunRequest(await agentGet(iii, runRequestKey(session_id))); } diff --git a/harness/src/turn-orchestrator/states/function-execute.ts b/harness/src/turn-orchestrator/states/function-execute.ts index 2f77afa2..8fcdec0b 100644 --- a/harness/src/turn-orchestrator/states/function-execute.ts +++ b/harness/src/turn-orchestrator/states/function-execute.ts @@ -209,7 +209,7 @@ export async function handleExecute(iii: ISdk, rec: TurnStateRecord): Promise e.function_call.id === fc.id); if (existing) { await emit( From 519a61158aaf301253a59cf50912955c57dab658 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Sun, 24 May 2026 21:13:16 -0300 Subject: [PATCH 20/41] refactor(turn-orchestrator): unexport internal-only helpers and schemas decodeOrPassthrough, formatItemId, TurnStateRecordSchema, and SessionIdPayloadSchema are each used only within their own file (the public wrappers are triggerFunctionCall, emit, parseTurnStateRecord, and the derived payload schemas). Drop the unnecessary export to shrink the module surface. --- harness/src/turn-orchestrator/agent-trigger.ts | 2 +- harness/src/turn-orchestrator/events.ts | 2 +- harness/src/turn-orchestrator/schemas.ts | 2 +- harness/src/turn-orchestrator/state.ts | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/harness/src/turn-orchestrator/agent-trigger.ts b/harness/src/turn-orchestrator/agent-trigger.ts index ddb500e2..67faa8f2 100644 --- a/harness/src/turn-orchestrator/agent-trigger.ts +++ b/harness/src/turn-orchestrator/agent-trigger.ts @@ -73,7 +73,7 @@ function denialResult(denial: DenialEnvelope): FunctionResult { }; } -export function decodeOrPassthrough(value: unknown): FunctionResult { +function decodeOrPassthrough(value: unknown): FunctionResult { if ( value && typeof value === 'object' && diff --git a/harness/src/turn-orchestrator/events.ts b/harness/src/turn-orchestrator/events.ts index fdf8be7c..9f50b009 100644 --- a/harness/src/turn-orchestrator/events.ts +++ b/harness/src/turn-orchestrator/events.ts @@ -10,7 +10,7 @@ import { AGENT_SCOPE, eventCounterKey } from './state.js'; export const EVENTS_STREAM = 'agent::events'; -export function formatItemId(session_id: string, seq: number): string { +function formatItemId(session_id: string, seq: number): string { return `${session_id}-${seq.toString().padStart(8, '0')}`; } diff --git a/harness/src/turn-orchestrator/schemas.ts b/harness/src/turn-orchestrator/schemas.ts index 79516932..2c83ed1b 100644 --- a/harness/src/turn-orchestrator/schemas.ts +++ b/harness/src/turn-orchestrator/schemas.ts @@ -11,7 +11,7 @@ import type { AwaitingApprovalEntry, TurnState, TurnStateRecord } from './state. import type { Mode } from './system-prompt.js'; /** Shared `{ session_id }` payload — `turn::{state}` steps and `turn::get_state`. */ -export const SessionIdPayloadSchema = z.object({ +const SessionIdPayloadSchema = z.object({ session_id: z.string().min(1), }); diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index 3d7aaf26..2e18da0c 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -77,7 +77,7 @@ const TURN_STATES = [ ] as const satisfies readonly TurnState[]; /** Minimal structural guard for persisted turn_state — nested fields pass through. */ -export const TurnStateRecordSchema = z +const TurnStateRecordSchema = z .object({ session_id: z.string(), state: z.enum(TURN_STATES), From 5d4cf7554e31c345f03e0dfb8ad1c8443f57ba55 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Mon, 25 May 2026 05:31:57 -0300 Subject: [PATCH 21/41] wip: turn-orchestrator cleanup in progress Snapshot of uncommitted work moved off the harness-trigger PR branch. --- console/web/src/lib/backend/real.ts | 10 +- harness/docs/architecture.md | 46 ++-- harness/docs/workers/context-compaction.md | 4 +- harness/docs/workers/turn-orchestrator.md | 4 +- harness/src/approval-gate/schemas.ts | 38 +-- harness/src/context-compaction/config.ts | 17 -- harness/src/context-compaction/lease.ts | 9 +- harness/src/context-compaction/overflow.ts | 4 +- harness/src/llm-budget/store.ts | 93 +++---- harness/src/models-catalog/state.ts | 11 +- harness/src/runtime/state.ts | 245 ++++++++++-------- harness/src/session/tree/store.ts | 82 ++---- .../src/turn-orchestrator/approval-resume.ts | 38 +-- .../src/turn-orchestrator/flat-messages.ts | 14 + harness/src/turn-orchestrator/run-request.ts | 33 ++- .../turn-orchestrator/session-tree-mirror.ts | 71 +++++ .../src/turn-orchestrator/turn-state-write.ts | 5 +- harness/src/turn-orchestrator/wake.ts | 13 +- harness/tests/approval-gate/resolve.test.ts | 21 +- harness/tests/approval-gate/schemas.test.ts | 24 +- .../tests/context-compaction/lease.test.ts | 4 +- .../integration/approval-resume.e2e.test.ts | 7 +- .../integration/on-record-written.e2e.test.ts | 13 +- harness/tests/runtime/state-client.test.ts | 57 ++++ harness/tests/runtime/state-list.test.ts | 21 +- harness/tests/session/tree/store.test.ts | 2 +- .../turn-orchestrator/flat-messages.test.ts | 16 ++ .../parse-turn-state-record.test.ts | 35 +++ .../persistence-prepared.test.ts | 2 +- .../turn-orchestrator/run-request.test.ts | 12 + harness/tests/turn-orchestrator/state.test.ts | 10 +- 31 files changed, 506 insertions(+), 455 deletions(-) create mode 100644 harness/src/turn-orchestrator/flat-messages.ts create mode 100644 harness/src/turn-orchestrator/session-tree-mirror.ts create mode 100644 harness/tests/runtime/state-client.test.ts create mode 100644 harness/tests/turn-orchestrator/flat-messages.test.ts create mode 100644 harness/tests/turn-orchestrator/parse-turn-state-record.test.ts diff --git a/console/web/src/lib/backend/real.ts b/console/web/src/lib/backend/real.ts index 02164d7a..2dc768ec 100644 --- a/console/web/src/lib/backend/real.ts +++ b/console/web/src/lib/backend/real.ts @@ -274,14 +274,10 @@ async function realCompactSession( } if (resp?.status === 'busy') return { status: 'busy' } if (resp?.status === 'overflow') { - // Accepts both `message` and (legacy) `reason` during rollout. - const wire = resp as { message?: unknown; reason?: unknown } const message = - typeof wire.message === 'string' - ? wire.message - : typeof wire.reason === 'string' - ? wire.reason - : 'unknown summariser error' + typeof resp.message === 'string' + ? resp.message + : 'unknown summariser error' return { status: 'overflow', message } } if (resp?.status === 'empty') return surfaceEmpty() diff --git a/harness/docs/architecture.md b/harness/docs/architecture.md index 16db4fc1..0d9f9c11 100644 --- a/harness/docs/architecture.md +++ b/harness/docs/architecture.md @@ -76,7 +76,7 @@ flowchart LR client -- "approval::resolve" --> approval approval -- "trigger turn::approval_resume::/" --> turnOrch turnOrch -- "state::set approvals//" --> state - turnOrch -- "iii.trigger turn::step" --> turnOrch + turnOrch -- "enqueue turn::{state} on turn-step queue" --> turnOrch provAnth -- "auth::get_token" --> auth provOAI -- "auth::get_token" --> auth @@ -94,33 +94,35 @@ flowchart LR ## Turn FSM [src/turn-orchestrator/state.ts](harness/src/turn-orchestrator/state.ts) -defines an 11-state durable FSM. Every transition is driven by the -`turn::step` durable subscriber, which is woken by a publish to the -`turn::step_requested` topic — either by the orchestrator itself -(re-publish at the end of a step), by a per-call -`turn::approval_resume` handler (when a human decision or abort lands), or by -the orchestrator's own `abort_signal` state trigger. +defines an 8-state durable FSM. Each state is a registered `turn::{state}` +function executed via `runTransition` and enqueued onto the `turn-step` FIFO +queue by `wakeState` ([wake.ts](harness/src/turn-orchestrator/wake.ts)). +`saveRecord` calls `shouldWakeStep` then `wakeState` when the persisted state +transitions to a stepable state. Paused or terminal sessions are also woken by +per-call `turn::approval_resume` handlers (approval/abort) or +`turn::on_abort_signal` (abort signal state trigger), both via `wakeFromRecord`. ```mermaid stateDiagram-v2 [*] --> provisioning - provisioning --> awaiting_assistant - awaiting_assistant --> assistant_streaming - assistant_streaming --> assistant_finished - assistant_finished --> function_prepare: has function calls - assistant_finished --> steering_check: no function calls - function_prepare --> function_execute - function_execute --> function_finalize: all calls resolved (allow/deny) - function_execute --> function_awaiting_approval: any call needs_approval - function_awaiting_approval --> function_awaiting_approval: decision(s) still missing + provisioning --> assistant_streaming + assistant_streaming --> function_execute: has function calls + assistant_streaming --> steering_check: no function calls + assistant_streaming --> tearing_down: error or aborted + function_execute --> function_awaiting_approval: any call needs approval + function_execute --> steering_check: batch complete + function_execute --> tearing_down: all calls terminate session function_awaiting_approval --> function_execute: all decisions written - function_finalize --> steering_check - steering_check --> awaiting_assistant: continue + steering_check --> assistant_streaming: continue turn steering_check --> tearing_down: stop or max turns tearing_down --> stopped stopped --> [*] + failed --> [*] ``` +`failed` is a terminal state set by `runTransition` when a handler throws +unexpectedly (unless it opts into queue retry via `TransientError`). + ## Approval flow The orchestrator consults `policy::check_permissions` directly inside @@ -128,7 +130,7 @@ The orchestrator consults `policy::check_permissions` directly inside the before path. The orchestrator parks the turn in `function_awaiting_approval`, registers a `turn::approval_resume` function per pending call, and waits until `approval::resolve` (or abort) triggers that function, which persists the -decision and invokes `turn::step`. +decision and invokes `wakeFromRecord` to re-enqueue the current state handler. ```mermaid sequenceDiagram @@ -145,12 +147,12 @@ sequenceDiagram Harness-->>Turn: deny + DenialEnvelope → DenialResult else no rule (needs_approval) Harness-->>Turn: needs_approval → park in function_awaiting_approval - Note over Turn,Bus: Orchestrator stops re-publishing turn::step_requested.
The TurnStateRecord.awaiting_approval list pins the open calls. + Note over Turn,Bus: saveRecord does not wake stepable handlers for
function_awaiting_approval. awaiting_approval pins open calls. User->>Gate: approval::resolve(decision, reason) Gate->>Turn: trigger turn::approval_resume::/ Turn->>Bus: state::set approvals// = {decision, reason} - Turn->>Turn: turn::step → function_awaiting_approval reads
approvals// for each pending entry - Turn->>Turn: fold decisions into prepared snapshot,
transition back to function_execute + Turn->>Turn: wakeFromRecord → function_awaiting_approval reads
approvals// for each pending entry + Turn->>Turn: fold decisions into work batch,
transition back to function_execute end ``` diff --git a/harness/docs/workers/context-compaction.md b/harness/docs/workers/context-compaction.md index 5cda2227..7a4ea3c3 100644 --- a/harness/docs/workers/context-compaction.md +++ b/harness/docs/workers/context-compaction.md @@ -138,8 +138,7 @@ usable = max(0, model.input_limit − COMPACT_RESERVED_TOKENS) ``` If `model.input_limit` is zero, it falls back to -`model.context_window − model.output_tokens`. `COMPACT_TRIGGER_TOKENS` (deprecated) -acts as a hard cap on the result if set, preserving old behaviour. +`model.context_window − model.output_tokens`. A session with a 200 k-token model reserves 20 k by default and triggers at 180 k. A 32 k model triggers at 12 k with the same defaults. @@ -218,7 +217,6 @@ All knobs are env-driven; no `config.yaml` fields are read. | `COMPACT_TOOL_OUTPUT_MAX_CHARS` | `2000` | Per-output character cap applied before sending to the summariser. | | `COMPACT_BUSY_TIMEOUT_MS` | `30000` | Max ms `compact_now` / `compact_session` waits for the compaction lease before returning `{ status: 'busy' }`. Sized to cover a typical summariser stream (10–30s) so user-initiated `/compact` doesn't race the async TurnEnd path. | | `COMPACT_PRUNE_PROTECTED_TOOLS` | _(empty)_ | Comma-separated function IDs whose outputs are never pruned. | -| `COMPACT_TRIGGER_TOKENS` | _(deprecated)_ | If set, caps `usable()` to this value. Preserves pre-v2 behaviour. Prefer `COMPACT_RESERVED_TOKENS` instead. | The summariser provider and model are always inherited from the session's own selection. Routing goes through `turn-orchestrator/provider-router`, diff --git a/harness/docs/workers/turn-orchestrator.md b/harness/docs/workers/turn-orchestrator.md index d912a0c0..5df3212e 100644 --- a/harness/docs/workers/turn-orchestrator.md +++ b/harness/docs/workers/turn-orchestrator.md @@ -166,8 +166,8 @@ From | [src/turn-orchestrator/states/steering-check.ts](harness/src/turn-orchestrator/states/steering-check.ts) | `turn::steering_check` handler. | | [src/turn-orchestrator/states/tearing-down.ts](harness/src/turn-orchestrator/states/tearing-down.ts) | `turn::tearing_down` handler. | | [src/turn-orchestrator/states/index.ts](harness/src/turn-orchestrator/states/index.ts) | Re-exports per-state `register` functions. | -| [src/turn-orchestrator/state.ts](harness/src/turn-orchestrator/state.ts) | `TurnState`, `TurnStateRecord`, `TurnWork`, `PreparedEntry`, `ExecutedEntry`, `AwaitingApprovalEntry`, state-key helpers, `newRecord`, `transitionTo`, `isTerminal`. | -| [src/turn-orchestrator/persistence.ts](harness/src/turn-orchestrator/persistence.ts) | Load/save helpers: `loadRecord` (with legacy `assistant_finished` migration), `saveRecord` (persist + wake), `persistRecord` (persist + UI event, no wake), `writeRecord` (silent checkpoint), `saveMessages` (+ session-tree mirror). | +| [src/turn-orchestrator/state.ts](harness/src/turn-orchestrator/state.ts) | `TurnState`, `TurnStateRecord`, `TurnWork`, `PreparedEntry`, `ExecutedEntry`, `AwaitingApprovalEntry`, state-key helpers, `newRecord`, `transitionTo`. | +| [src/turn-orchestrator/persistence.ts](harness/src/turn-orchestrator/persistence.ts) | Load/save helpers: `loadRecord`, `saveRecord` (persist + wake), `persistRecord` (persist + UI event, no wake), `writeRecord` (silent checkpoint), `saveMessages` (+ session-tree mirror). | | [src/turn-orchestrator/errors.ts](harness/src/turn-orchestrator/errors.ts) | `TransientError` (opt into queue retry), `ContextOverflowError`, `CompactionBusyError`. | | [src/turn-orchestrator/events.ts](harness/src/turn-orchestrator/events.ts) | `emit(iii, sid, event)` — appends a sequenced `AgentEvent` to the `agent::events` stream. | | [src/turn-orchestrator/preflight.ts](harness/src/turn-orchestrator/preflight.ts) | `runPreflight` — context-compaction check before each provider call. | diff --git a/harness/src/approval-gate/schemas.ts b/harness/src/approval-gate/schemas.ts index d3025e6a..2929dff9 100644 --- a/harness/src/approval-gate/schemas.ts +++ b/harness/src/approval-gate/schemas.ts @@ -36,52 +36,38 @@ const denialEnvelopeSchema = z.object({ export type DenialEnvelope = z.infer; /** - * Wire payload for `approval::resolve`. Accepts `function_call_id` or the - * legacy `tool_call_id` alias; output always has `function_call_id` set. - * Rejects "/" in either id at the boundary — it is the reserved separator in - * the state key, so a slashed id is refused here rather than thrown on later. + * Wire payload for `approval::resolve`. Rejects "/" in ids at the boundary — + * it is the reserved separator in the state key. */ export const ResolvePayloadSchema = z .object({ session_id: z.string().min(1), - function_call_id: z.string().min(1).optional(), - tool_call_id: z.string().min(1).optional(), + function_call_id: z.string().min(1), decision: wireDecisionSchema, reason: z.string().nullable().optional(), }) - .transform((v, ctx) => { - const fnId = v.function_call_id ?? v.tool_call_id; - if (!fnId) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - path: ['function_call_id'], - message: 'function_call_id or tool_call_id is required', - }); - return z.NEVER; - } + .superRefine((v, ctx) => { if (v.session_id.includes('/')) { ctx.addIssue({ code: z.ZodIssueCode.custom, path: ['session_id'], message: 'session_id must not contain "/"', }); - return z.NEVER; } - if (fnId.includes('/')) { + if (v.function_call_id.includes('/')) { ctx.addIssue({ code: z.ZodIssueCode.custom, path: ['function_call_id'], message: 'function_call_id must not contain "/"', }); - return z.NEVER; } - return { - session_id: v.session_id, - function_call_id: fnId, - decision: v.decision, - reason: v.reason ?? null, - }; - }); + }) + .transform((v) => ({ + session_id: v.session_id, + function_call_id: v.function_call_id, + decision: v.decision, + reason: v.reason ?? null, + })); export type ResolvePayloadInput = z.input; const policyReplySchema = z.discriminatedUnion('decision', [ diff --git a/harness/src/context-compaction/config.ts b/harness/src/context-compaction/config.ts index 19989cf4..1f9ac4a9 100644 --- a/harness/src/context-compaction/config.ts +++ b/harness/src/context-compaction/config.ts @@ -58,20 +58,3 @@ export function pruneProtectedTools(): string[] { .map((s) => s.trim()) .filter(Boolean); } - -// Deprecated. Hard upper bound on usable() to keep existing deployments -// from regressing. One-shot warning on first read. -let deprecatedTriggerTokensWarned = false; -export function deprecatedTriggerTokensCap(): number | undefined { - const v = process.env.COMPACT_TRIGGER_TOKENS; - if (!v) return undefined; - if (!deprecatedTriggerTokensWarned) { - deprecatedTriggerTokensWarned = true; - // eslint-disable-next-line no-console - console.warn( - '[context-compaction] COMPACT_TRIGGER_TOKENS is deprecated; use COMPACT_RESERVED_TOKENS. Treating as hard cap on usable().', - ); - } - const n = Number.parseInt(v, 10); - return Number.isFinite(n) && n > 0 ? n : undefined; -} diff --git a/harness/src/context-compaction/lease.ts b/harness/src/context-compaction/lease.ts index 32471d3f..4c1b127a 100644 --- a/harness/src/context-compaction/lease.ts +++ b/harness/src/context-compaction/lease.ts @@ -25,12 +25,9 @@ export function mintLeaseNonce(): string { } export function readLeaseTimestampSecs(v: unknown): number { - if (!v) return 0; - if (typeof v === 'number') return Math.floor(v); - if (typeof v === 'object') { - const ts = (v as Record).ts; - if (typeof ts === 'number') return Math.floor(ts / 1000); - } + if (!v || typeof v !== 'object') return 0; + const ts = (v as Record).ts; + if (typeof ts === 'number') return Math.floor(ts / 1000); return 0; } diff --git a/harness/src/context-compaction/overflow.ts b/harness/src/context-compaction/overflow.ts index 0d3e88ab..0b88a60f 100644 --- a/harness/src/context-compaction/overflow.ts +++ b/harness/src/context-compaction/overflow.ts @@ -1,7 +1,6 @@ import { MAX_PRESERVE_RECENT_TOKENS, MIN_PRESERVE_RECENT_TOKENS, - deprecatedTriggerTokensCap, preserveRecentTokensOverride, reservedTokens, } from './config.js'; @@ -33,8 +32,7 @@ export function usable(input: { model: ModelLike; reserved?: number }): number { model.limit.input > 0 ? Math.max(0, model.limit.input - reserved) : Math.max(0, model.limit.context - model.limit.output); - const cap = deprecatedTriggerTokensCap(); - return cap !== undefined ? Math.min(base, cap) : base; + return base; } export function isOverflow(input: { diff --git a/harness/src/llm-budget/store.ts b/harness/src/llm-budget/store.ts index 4206f4d6..52b442d5 100644 --- a/harness/src/llm-budget/store.ts +++ b/harness/src/llm-budget/store.ts @@ -3,6 +3,7 @@ */ import type { ISdk } from '../runtime/iii.js'; +import { createState } from '../runtime/state.js'; import { type Budget, SCOPE, @@ -12,71 +13,45 @@ import { spendLogKey, } from './types.js'; -async function stateSet(iii: ISdk, key: string, value: unknown): Promise { - await iii.trigger({ - function_id: 'state::set', - payload: { scope: SCOPE, key, value }, - }); +function strictState(iii: ISdk) { + return createState(iii, { tolerant: false }); } -async function stateGetValue(iii: ISdk, key: string): Promise { - const resp = await iii.trigger({ - function_id: 'state::get', - payload: { scope: SCOPE, key }, - }); - if (resp === null || resp === undefined) return null; - if (resp && typeof resp === 'object' && 'value' in (resp as Record)) { - const v = (resp as Record).value; - return v === null || v === undefined ? null : v; - } - return resp; +function isBudget(v: unknown): v is Budget { + return ( + v !== null && + typeof v === 'object' && + typeof (v as Budget).id === 'string' && + typeof (v as Budget).ceiling_usd === 'number' + ); } -async function stateList(iii: ISdk, prefix: string): Promise { - const resp = await iii.trigger({ - function_id: 'state::list', - payload: { scope: SCOPE, prefix }, - }); - if (Array.isArray(resp)) return resp; - if (resp && typeof resp === 'object') { - const items = (resp as Record).items; - if (Array.isArray(items)) return items; - } - return []; -} - -async function stateDelete(iii: ISdk, key: string): Promise { - await iii.trigger({ - function_id: 'state::delete', - payload: { scope: SCOPE, key }, - }); +function isSpendLogEntry(v: unknown, budget_id: string): v is SpendLogEntry { + return ( + v !== null && + typeof v === 'object' && + (v as SpendLogEntry).budget_id === budget_id && + !('ceiling_usd' in (v as Record)) + ); } export async function loadBudget(iii: ISdk, id: string): Promise { - const v = await stateGetValue(iii, budgetKey(id)); + const v = await strictState(iii).get({ scope: SCOPE, key: budgetKey(id) }); if (v === null) return null; - return v as Budget; + return isBudget(v) ? v : null; } export async function saveBudget(iii: ISdk, b: Budget): Promise { - await stateSet(iii, budgetKey(b.id), b); + await strictState(iii).set({ scope: SCOPE, key: budgetKey(b.id), value: b }); } export async function deleteBudgetRecord(iii: ISdk, id: string): Promise { - await stateDelete(iii, budgetKey(id)); + await strictState(iii).delete({ scope: SCOPE, key: budgetKey(id) }); } export async function listAllBudgets(iii: ISdk): Promise { - const items = await stateList(iii, 'budget:'); - const out: Budget[] = []; - for (const v of items) { - const inner = - v && typeof v === 'object' && 'value' in (v as Record) - ? (v as Record).value - : v; - if (inner && typeof inner === 'object' && (inner as Budget).id) out.push(inner as Budget); - } - return out; + const items = await strictState(iii).list({ scope: SCOPE }); + return items.filter(isBudget); } export async function saveSpendLog( @@ -85,7 +60,7 @@ export async function saveSpendLog( period_start: number, e: SpendLogEntry, ): Promise { - await stateSet(iii, spendLogKey(id, period_start), e); + await strictState(iii).set({ scope: SCOPE, key: spendLogKey(id, period_start), value: e }); } export async function saveResetLog( @@ -96,20 +71,14 @@ export async function saveResetLog( suffix: string, e: SpendLogEntry, ): Promise { - await stateSet(iii, resetLogKey(id, period_start, ts, suffix), e); + await strictState(iii).set({ + scope: SCOPE, + key: resetLogKey(id, period_start, ts, suffix), + value: e, + }); } export async function listSpendLogs(iii: ISdk, budget_id: string): Promise { - const items = await stateList(iii, `spend_log:${budget_id}:`); - const out: SpendLogEntry[] = []; - for (const v of items) { - const inner = - v && typeof v === 'object' && 'value' in (v as Record) - ? (v as Record).value - : v; - if (inner && typeof inner === 'object' && (inner as SpendLogEntry).budget_id === budget_id) { - out.push(inner as SpendLogEntry); - } - } - return out; + const items = await strictState(iii).list({ scope: SCOPE }); + return items.filter((v): v is SpendLogEntry => isSpendLogEntry(v, budget_id)); } diff --git a/harness/src/models-catalog/state.ts b/harness/src/models-catalog/state.ts index dee5e543..4e5644ab 100644 --- a/harness/src/models-catalog/state.ts +++ b/harness/src/models-catalog/state.ts @@ -6,7 +6,7 @@ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; -import { stateGet, stateList, stateSet } from '../runtime/state.js'; +import { stateGet, stateListValues, stateSet } from '../runtime/state.js'; import { type ListFilter, loadEmbeddedCatalog } from './catalog.js'; import { MODELS_KEY_PREFIX, MODELS_SCOPE, type Model, supportsModel } from './types.js'; @@ -22,7 +22,7 @@ export function modelKey(provider: string, id: string): string { export async function seedStateIfEmpty(iii: ISdk, _cfg: StateConfig): Promise { try { - const items = await stateList(iii, MODELS_SCOPE, MODELS_KEY_PREFIX); + const items = await stateListValues(iii, { scope: MODELS_SCOPE }); if (items.length > 0) return; const catalog = await loadEmbeddedCatalog(); for (const m of catalog) { @@ -37,10 +37,9 @@ export async function seedStateIfEmpty(iii: ISdk, _cfg: StateConfig): Promise { - const items = await stateList(iii, MODELS_SCOPE, MODELS_KEY_PREFIX); - const fromState = items - .map((v) => v as Model | null) - .filter((m): m is Model => Boolean(m && typeof m === 'object' && m.id)); + const fromState = (await stateListValues(iii, { scope: MODELS_SCOPE })).filter( + (m): m is Model => Boolean(m && typeof m === 'object' && m.id), + ); const source = fromState.length > 0 ? fromState : await loadEmbeddedCatalog(); return source .filter((m) => filter.provider === undefined || m.provider === filter.provider) diff --git a/harness/src/runtime/state.ts b/harness/src/runtime/state.ts index 6a47279d..d06500a0 100644 --- a/harness/src/runtime/state.ts +++ b/harness/src/runtime/state.ts @@ -1,43 +1,55 @@ /** - * Tiny `state::*` wrappers. Mirrors - * `turn-orchestrator/src/persistence.rs::state_get` / `state_set`. + * `state::*` client aligned with `iii-sdk/state` (`IState`). * - * All helpers are tolerant: trigger errors degrade to `null` / `[]` and - * are logged at warn level so a single failed read never aborts a turn. + * Tolerant helpers (default) mirror turn-orchestrator persistence: trigger + * errors degrade to `null` / `[]` and are logged at warn level so a single + * failed read never aborts a turn. Use `createState(iii, { tolerant: false })` + * when storage errors should propagate (session store, llm-budget). */ import type { ISdk } from 'iii-sdk'; -import type { StateListInput } from 'iii-sdk/state'; +import type { UpdateOp } from 'iii-sdk/stream'; +import type { + DeleteResult, + IState, + StateDeleteInput, + StateGetInput, + StateListInput, + StateSetInput, + StateSetResult, + StateUpdateInput, + StateUpdateResult, +} from 'iii-sdk/state'; import { logger } from './otel.js'; -export type { StateListInput } from 'iii-sdk/state'; - -// Mirrors engine `UpdateOp` (sdk/packages/rust/iii/src/types.rs). Variants -// that target a JSON field (`set`, `increment`, `decrement`, `remove`) take -// a required `path` string — `""` (FieldPath::root) means "the whole value". -// `merge`/`append` accept an optional MergePath (string or array of strings). -export type StateUpdateOp = - | { type: 'set'; path: string; value: unknown } - | { type: 'merge'; path?: string | string[]; value: Record } - | { type: 'append'; path?: string | string[]; value: unknown } - | { type: 'increment'; path: string; by: number } - | { type: 'decrement'; path: string; by: number } - | { type: 'remove'; path: string } - | { type: string; [k: string]: unknown }; - -/** One row from a keyed `state::list` envelope (`{ items: [...] }`), when present. */ -export type StateListKeyedEntry = { - key?: string; - value?: unknown; +export type { UpdateOp } from 'iii-sdk/stream'; +export type { + DeleteResult, + IState, + StateDeleteInput, + StateGetInput, + StateListInput, + StateSetInput, + StateSetResult, + StateUpdateInput, + StateUpdateResult, +} from 'iii-sdk/state'; + +export type CreateStateOptions = { + /** When true (default), log and return null/[] on trigger failure. */ + tolerant?: boolean; }; +type StateListGroupsResult = { groups: string[] }; + +function normalizeGetResult(v: unknown): T | null { + if (v === null || v === undefined) return null; + return v as T; +} + /** Raw list rows before value unwrap; `null` when the response is not a list. */ export function stateListResponseRows(response: unknown): unknown[] | null { if (Array.isArray(response)) return response; - if (response && typeof response === 'object') { - const items = (response as Record).items; - if (Array.isArray(items)) return items; - } return null; } @@ -51,9 +63,8 @@ function unwrapStateListEntry(entry: unknown): T { /** * Normalizes a `state::list` trigger result to stored values. * - * Official iii returns a flat `T[]` ({@link StateListInput} only). Some - * deployments also wrap rows as `{ value }` or `{ items: [{ key, value }] }`; - * we accept those shapes so harness workers stay compatible. + * Official iii returns a flat `T[]`. Some bridge deployments wrap rows as + * `{ value }`; we accept that shape for compatibility. */ export function parseStateListValues(response: unknown): T[] { const arr = stateListResponseRows(response); @@ -61,106 +72,126 @@ export function parseStateListValues(response: unknown): T[] { return arr.map((entry) => unwrapStateListEntry(entry)); } -/** Keyed rows when the list response includes `key` (not returned by stock iii). */ -export function parseStateListKeyedEntries(response: unknown): StateListKeyedEntry[] { - const arr = stateListResponseRows(response); - if (!arr) return []; - return arr.map((entry) => { - if (entry && typeof entry === 'object') { - const row = entry as Record; - return { - key: typeof row.key === 'string' ? row.key : undefined, - value: row.value !== undefined ? row.value : entry, - }; +export function createState(iii: ISdk, opts: CreateStateOptions = {}): IState { + const tolerant = opts.tolerant !== false; + + async function run( + op: string, + context: Record, + fn: () => Promise, + fallback: T, + ): Promise { + try { + return await fn(); + } catch (err) { + if (tolerant) { + logger.warn(`${op} failed`, { ...context, err: String(err) }); + return fallback; + } + throw err; } - return { value: entry }; - }); + } + + return { + get: (input: StateGetInput): Promise => + run('state::get', { scope: input.scope, key: input.key }, async () => { + const v = await iii.trigger({ + function_id: 'state::get', + payload: input, + }); + return normalizeGetResult(v); + }, null), + + set: (input: StateSetInput): Promise | null> => + run('state::set', { scope: input.scope, key: input.key }, async () => { + const result = await iii.trigger>({ + function_id: 'state::set', + payload: input, + }); + return result ?? null; + }, null), + + delete: (input: StateDeleteInput): Promise => + run('state::delete', { scope: input.scope, key: input.key }, async () => { + const result = await iii.trigger({ + function_id: 'state::delete', + payload: input, + }); + return result ?? {}; + }, {}), + + list: (input: StateListInput): Promise => + run('state::list', { scope: input.scope }, async () => { + const resp = await iii.trigger({ + function_id: 'state::list', + payload: input, + }); + return parseStateListValues(resp); + }, []), + + update: (input: StateUpdateInput): Promise | null> => + run('state::update', { scope: input.scope, key: input.key }, async () => { + const result = await iii.trigger>({ + function_id: 'state::update', + payload: input, + }); + return result ?? null; + }, null), + }; } -export async function stateGet(iii: ISdk, scope: string, key: string): Promise { +/** Lists all scope names that contain state data. */ +export async function stateListGroups( + iii: ISdk, + opts: CreateStateOptions = {}, +): Promise { + const tolerant = opts.tolerant !== false; try { - const v = await iii.trigger({ - function_id: 'state::get', - payload: { scope, key }, + const result = await iii.trigger, StateListGroupsResult | string[]>({ + function_id: 'state::list_groups', + payload: {}, }); - if (v === null || v === undefined) return null; - return v; + if (Array.isArray(result)) return result; + return result?.groups ?? []; } catch (err) { - logger.warn('state::get failed', { scope, key, err: String(err) }); - return null; + if (tolerant) { + logger.warn('state::list_groups failed', { err: String(err) }); + return []; + } + throw err; } } +// --- Tolerant (scope, key) ergonomics for turn-orchestrator --- + +const tolerantState = (iii: ISdk) => createState(iii, { tolerant: true }); + +export async function stateGet(iii: ISdk, scope: string, key: string): Promise { + return tolerantState(iii).get({ scope, key }); +} + export async function stateSet( iii: ISdk, scope: string, key: string, value: unknown, -): Promise { - try { - await iii.trigger({ - function_id: 'state::set', - payload: { scope, key, value }, - }); - } catch (err) { - logger.warn('state::set failed', { scope, key, err: String(err) }); - } +): Promise | null> { + return tolerantState(iii).set({ scope, key, value }); } export async function stateDelete(iii: ISdk, scope: string, key: string): Promise { - try { - await iii.trigger({ - function_id: 'state::delete', - payload: { scope, key }, - }); - } catch (err) { - logger.warn('state::delete failed', { scope, key, err: String(err) }); - } + await tolerantState(iii).delete({ scope, key }); } -/** - * Lists all values in a scope using the iii SDK contract (`StateListInput`). - */ export async function stateListValues(iii: ISdk, input: StateListInput): Promise { - try { - const resp = await iii.trigger({ - function_id: 'state::list', - payload: input, - }); - return parseStateListValues(resp); - } catch (err) { - logger.warn('state::list failed', { scope: input.scope, err: String(err) }); - return []; - } -} - -/** - * @deprecated Third argument `prefix` is not sent to iii (engine lists the - * whole scope). Kept for call-site stability; filter returned values locally - * if you need key-prefix semantics. - */ -export async function stateList(iii: ISdk, scope: string, _prefix?: string): Promise { - return stateListValues(iii, { scope }); + return tolerantState(iii).list(input); } -/** - * `state::update` applies one or more atomic ops and returns the - * `{ old_value, new_value }` envelope. - */ export async function stateUpdate( iii: ISdk, scope: string, key: string, - ops: StateUpdateOp[], -): Promise<{ old_value?: unknown; new_value?: unknown } | null> { - try { - const v = await iii.trigger({ - function_id: 'state::update', - payload: { scope, key, ops }, - }); - return v ?? null; - } catch (err) { - logger.warn('state::update failed', { scope, key, err: String(err) }); - return null; - } + ops: UpdateOp[], +): Promise | null> { + return tolerantState(iii).update({ scope, key, ops }); } diff --git a/harness/src/session/tree/store.ts b/harness/src/session/tree/store.ts index 72e3b7e2..d6c2cf44 100644 --- a/harness/src/session/tree/store.ts +++ b/harness/src/session/tree/store.ts @@ -14,7 +14,7 @@ import type { ISdk } from '../../runtime/iii.js'; import { logger } from '../../runtime/otel.js'; -import { parseStateListValues, stateListResponseRows } from '../../runtime/state.js'; +import { createState } from '../../runtime/state.js'; import { type SessionEntry, SessionError, type SessionMeta, entryTimestamp } from './types.js'; export interface SessionStore { @@ -76,14 +76,15 @@ function entriesScope(session_id: string): string { } export class IiiStateSessionStore implements SessionStore { - constructor(private readonly iii: ISdk) {} + private readonly state; + + constructor(iii: ISdk) { + this.state = createState(iii, { tolerant: false }); + } async create(meta: SessionMeta): Promise { try { - await this.iii.trigger({ - function_id: 'state::set', - payload: { scope: META_SCOPE, key: meta.session_id, value: meta }, - }); + await this.state.set({ scope: META_SCOPE, key: meta.session_id, value: meta }); } catch (e) { throw new SessionError('storage', `state::set meta: ${String(e)}`); } @@ -91,13 +92,10 @@ export class IiiStateSessionStore implements SessionStore { async append(session_id: string, entry: SessionEntry): Promise { try { - await this.iii.trigger({ - function_id: 'state::set', - payload: { - scope: entriesScope(session_id), - key: entry.id, - value: entry, - }, + await this.state.set({ + scope: entriesScope(session_id), + key: entry.id, + value: entry, }); } catch (e) { throw new SessionError('storage', `state::set entry: ${String(e)}`); @@ -114,19 +112,12 @@ export class IiiStateSessionStore implements SessionStore { } async loadEntries(session_id: string): Promise { - let resp: unknown; + let entries: SessionEntry[]; try { - resp = await this.iii.trigger({ - function_id: 'state::list', - payload: { scope: entriesScope(session_id) }, - }); + entries = await this.state.list({ scope: entriesScope(session_id) }); } catch (e) { throw new SessionError('storage', `state::list entries: ${String(e)}`); } - if (!stateListResponseRows(resp)) { - throw new SessionError('storage', 'state::list returned non-array'); - } - const entries = parseStateListValues(resp); // PR #150: sort by (timestamp, id) so resumed approval replies that // arrive after the session paused appear in correct transcript order // even when their entry ids are non-monotonic. @@ -139,46 +130,32 @@ export class IiiStateSessionStore implements SessionStore { } async loadMeta(session_id: string): Promise { - let resp: unknown; + let resp: SessionMeta | null; try { - resp = await this.iii.trigger({ - function_id: 'state::get', - payload: { scope: META_SCOPE, key: session_id }, - }); + resp = await this.state.get({ scope: META_SCOPE, key: session_id }); } catch (e) { throw new SessionError('storage', `state::get meta: ${String(e)}`); } - if (resp === null || resp === undefined) { + if (resp === null) { throw new SessionError('not_found', session_id); } - return resp as SessionMeta; + return resp; } async list(): Promise { - let resp: unknown; try { - resp = await this.iii.trigger({ - function_id: 'state::list', - payload: { scope: META_SCOPE }, - }); + return await this.state.list({ scope: META_SCOPE }); } catch (e) { throw new SessionError('storage', `state::list meta: ${String(e)}`); } - if (!stateListResponseRows(resp)) { - throw new SessionError('storage', 'state::list returned non-array'); - } - return parseStateListValues(resp); } async updateEntry(session_id: string, entry_id: string, updated: SessionEntry): Promise { try { - await this.iii.trigger({ - function_id: 'state::set', - payload: { - scope: entriesScope(session_id), - key: entry_id, - value: updated, - }, + await this.state.set({ + scope: entriesScope(session_id), + key: entry_id, + value: updated, }); } catch (e) { throw new SessionError('storage', `state::set updateEntry: ${String(e)}`); @@ -195,16 +172,9 @@ export class IiiStateSessionStore implements SessionStore { } private async refreshMetaUpdatedAt(session_id: string): Promise { - const value = await this.iii.trigger({ - function_id: 'state::get', - payload: { scope: META_SCOPE, key: session_id }, - }); - if (value === null || value === undefined) return; - const meta = value as SessionMeta; - meta.updated_at = Date.now(); - await this.iii.trigger({ - function_id: 'state::set', - payload: { scope: META_SCOPE, key: session_id, value: meta }, - }); + const value = await this.state.get({ scope: META_SCOPE, key: session_id }); + if (value === null) return; + const meta = { ...value, updated_at: Date.now() }; + await this.state.set({ scope: META_SCOPE, key: session_id, value: meta }); } } diff --git a/harness/src/turn-orchestrator/approval-resume.ts b/harness/src/turn-orchestrator/approval-resume.ts index 93acf7b7..817c9e9a 100644 --- a/harness/src/turn-orchestrator/approval-resume.ts +++ b/harness/src/turn-orchestrator/approval-resume.ts @@ -12,23 +12,12 @@ import { } from '../approval-gate/schemas.js'; import type { FunctionRef, ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; -import { - parseStateListKeyedEntries, - parseStateListValues, - stateGet, - stateSet, -} from '../runtime/state.js'; +import { stateGet, stateSet } from '../runtime/state.js'; +import { listAgentTurnStateRecords } from './persistence.js'; import type { TurnStateRecord } from './state.js'; import { wakeFromRecord } from './wake.js'; const resumeRefs = new Map(); -const TURN_STATE_KEY_RE = /^session\/[^/]+\/turn_state$/; - -function isTurnStateRecord(value: unknown): value is TurnStateRecord { - if (!value || typeof value !== 'object') return false; - const rec = value as Record; - return typeof rec.session_id === 'string' && typeof rec.state === 'string'; -} /** Agent-scope turn_state still parked on human approval. */ function pausedApprovalCalls( @@ -124,30 +113,9 @@ export function clearApprovalResumeRegistry(): void { resumeRefs.clear(); } -/** Turn_state rows from `state::list` on scope agent (not every value in the scope). */ -async function listTurnStateRecords(iii: ISdk): Promise { - try { - const resp = await iii.trigger({ - function_id: 'state::list', - payload: { scope: 'agent' }, - }); - const keyed = parseStateListKeyedEntries(resp); - if (keyed.some((entry) => typeof entry.key === 'string')) { - return keyed - .filter((entry) => entry.key && TURN_STATE_KEY_RE.test(entry.key)) - .map((entry) => entry.value) - .filter(isTurnStateRecord); - } - return parseStateListValues(resp).filter(isTurnStateRecord); - } catch (err) { - logger.warn('approval resume: state::list failed during recovery', { err: String(err) }); - return []; - } -} - /** Re-register resume fns for sessions still paused on approval after worker restart. */ export async function recoverPendingApprovals(iii: ISdk): Promise { - const records = await listTurnStateRecords(iii); + const records = await listAgentTurnStateRecords(iii); for (const rec of records) { const paused = pausedApprovalCalls(rec); diff --git a/harness/src/turn-orchestrator/flat-messages.ts b/harness/src/turn-orchestrator/flat-messages.ts new file mode 100644 index 00000000..e2d4589a --- /dev/null +++ b/harness/src/turn-orchestrator/flat-messages.ts @@ -0,0 +1,14 @@ +/** + * Parser for the flat `session//messages` agent-scope array. + */ + +import { z } from 'zod'; +import type { AgentMessage } from '../types/agent-message.js'; + +const FlatMessagesSchema = z + .array(z.custom((v) => v != null && typeof v === 'object')) + .catch([]); + +export function parseFlatMessages(raw: unknown): AgentMessage[] { + return FlatMessagesSchema.parse(raw ?? []); +} diff --git a/harness/src/turn-orchestrator/run-request.ts b/harness/src/turn-orchestrator/run-request.ts index 168ff52d..85307222 100644 --- a/harness/src/turn-orchestrator/run-request.ts +++ b/harness/src/turn-orchestrator/run-request.ts @@ -5,26 +5,23 @@ * instead of re-guarding `unknown` fields. */ +import { z } from 'zod'; import type { Mode } from './system-prompt.js'; -export type RunRequest = { - provider: string; - model: string; - mode: Mode | null; - system_prompt: string; - function_schemas: unknown[]; -}; +export const RunRequestSchema = z.object({ + provider: z.string().catch(''), + model: z.string().catch(''), + mode: z + .unknown() + .transform((v): Mode | null => + v === 'plan' || v === 'ask' || v === 'agent' ? v : null, + ), + system_prompt: z.string().catch(''), + function_schemas: z.array(z.unknown()).catch([]), +}); -function parseMode(value: unknown): Mode | null { - return value === 'plan' || value === 'ask' || value === 'agent' ? value : null; -} +export type RunRequest = z.infer; -export function parseRunRequest(raw: Record): RunRequest { - return { - provider: typeof raw.provider === 'string' ? raw.provider : '', - model: typeof raw.model === 'string' ? raw.model : '', - mode: parseMode(raw.mode), - system_prompt: typeof raw.system_prompt === 'string' ? raw.system_prompt : '', - function_schemas: Array.isArray(raw.function_schemas) ? raw.function_schemas : [], - }; +export function parseRunRequest(raw: unknown): RunRequest { + return RunRequestSchema.parse(raw ?? {}); } diff --git a/harness/src/turn-orchestrator/session-tree-mirror.ts b/harness/src/turn-orchestrator/session-tree-mirror.ts new file mode 100644 index 00000000..e44fa409 --- /dev/null +++ b/harness/src/turn-orchestrator/session-tree-mirror.ts @@ -0,0 +1,71 @@ +/** + * Incrementally mirrors flat agent messages into the session-tree store. + */ + +import { z } from 'zod'; +import { stateGet, stateSet } from '../runtime/state.js'; +import type { ISdk } from '../runtime/iii.js'; +import { logger } from '../runtime/otel.js'; +import type { AgentMessage } from '../types/agent-message.js'; +import { AGENT_SCOPE, lastSessionTreeLenKey } from './state.js'; + +const MirrorLenSchema = z.coerce.number().int().nonnegative().catch(0); + +export function parseMirrorLen(raw: unknown): number { + return MirrorLenSchema.parse(raw ?? 0); +} + +export async function mirrorMessagesToSessionTree( + iii: ISdk, + session_id: string, + messages: AgentMessage[], +): Promise { + const lastKey = lastSessionTreeLenKey(session_id); + const alreadyMirrored = parseMirrorLen(await stateGet(iii, AGENT_SCOPE, lastKey)); + if (messages.length <= alreadyMirrored) return; + + if (alreadyMirrored === 0) { + const ensured = await triggerSessionTree(iii, 'session-tree::ensure', { session_id }); + if (!ensured) return; + } + + let lastAppended: string | null = null; + if (alreadyMirrored > 0) { + const resp = await triggerSessionTree<{ messages?: Array<{ entry_id?: string }> }>( + iii, + 'session-tree::messages', + { session_id }, + ); + if (!resp) return; + const tail = resp.messages?.at(-1); + lastAppended = tail?.entry_id ?? null; + } + + for (const msg of messages.slice(alreadyMirrored)) { + const resp = await triggerSessionTree<{ entry_id?: string }>( + iii, + 'session-tree::append', + { session_id, parent_id: lastAppended, message: msg }, + ); + if (!resp) return; + lastAppended = resp.entry_id ?? lastAppended; + } + + await stateSet(iii, AGENT_SCOPE, lastKey, messages.length); +} + +async function triggerSessionTree( + iii: ISdk, + function_id: string, + payload: Record, +): Promise { + try { + return await iii.trigger({ function_id, payload }); + } catch (err) { + logger.warn(`${function_id} failed; session-tree mirror skipped`, { + session_id: payload.session_id, + err: String(err), + }); + return null; + } +} diff --git a/harness/src/turn-orchestrator/turn-state-write.ts b/harness/src/turn-orchestrator/turn-state-write.ts index 296f4a7e..18e2b855 100644 --- a/harness/src/turn-orchestrator/turn-state-write.ts +++ b/harness/src/turn-orchestrator/turn-state-write.ts @@ -6,13 +6,14 @@ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import { emit } from './events.js'; +import type { TurnStateView } from './schemas.js'; export async function emitTurnStateChanged( iii: ISdk, session_id: string, event_type: 'state:created' | 'state:updated', - new_value: Record, - old_value?: Record, + new_value: TurnStateView, + old_value?: TurnStateView, ): Promise { try { await emit(iii, session_id, { diff --git a/harness/src/turn-orchestrator/wake.ts b/harness/src/turn-orchestrator/wake.ts index b6945f08..258192ec 100644 --- a/harness/src/turn-orchestrator/wake.ts +++ b/harness/src/turn-orchestrator/wake.ts @@ -6,7 +6,7 @@ import { TriggerAction, type ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import * as persistence from './persistence.js'; -import { isTerminal, type TurnState, type TurnStateRecord } from './state.js'; +import { type TurnState } from './state.js'; export const TURN_STEP_QUEUE = 'turn-step'; @@ -19,15 +19,6 @@ export function shouldWakeStep(previousState: TurnState | null, newState: TurnSt return true; } -/** Guard before enqueueing from approval/abort — skip terminal sessions. - * Must exclude BOTH `stopped` and `failed`: there is no `turn::failed` - * handler, so waking a failed session would enqueue an unregistered - * function (function_not_found → retries → DLQ, blocking the session's - * FIFO group). `isTerminal` covers both. */ -export function shouldRunStep(rec: TurnStateRecord | null): boolean { - if (!rec) return false; - return !isTerminal(rec); -} export async function wakeState(iii: ISdk, session_id: string, state: TurnState): Promise { try { @@ -44,6 +35,6 @@ export async function wakeState(iii: ISdk, session_id: string, state: TurnState) /** Enqueue the handler for the session's current persisted state (approval/abort). */ export async function wakeFromRecord(iii: ISdk, session_id: string): Promise { const rec = await persistence.loadRecord(iii, session_id); - if (!rec || !shouldRunStep(rec)) return; + if (!rec || rec.state === 'stopped' || rec.state === 'failed') return; await wakeState(iii, session_id, rec.state); } diff --git a/harness/tests/approval-gate/resolve.test.ts b/harness/tests/approval-gate/resolve.test.ts index 689b0756..c859cb6d 100644 --- a/harness/tests/approval-gate/resolve.test.ts +++ b/harness/tests/approval-gate/resolve.test.ts @@ -27,17 +27,6 @@ describe('handleResolveRequest — routing the decision', () => { ]); }); - it('prefers function_call_id over a conflicting legacy tool_call_id', async () => { - const { iii, resumeCalls } = fakeIii(); - await handleResolveRequest(iii, { - session_id: 's1', - function_call_id: 'canonical', - tool_call_id: 'legacy', - decision: 'allow', - }); - expect(resumeCalls[0]?.function_id).toBe('turn::approval_resume::s1/canonical'); - }); - it('never emits to the agent::events stream (denial flows via execution_end)', async () => { const { iii, streamSets } = fakeIii(); await handleResolveRequest(iii, { @@ -66,23 +55,23 @@ describe('handleResolveRequest — hostile / malformed input is rejected, not cr expect(calls).toHaveLength(0); }); - it('returns invalid_payload when the resolved id (via tool_call_id) contains a slash', async () => { + it('returns invalid_payload and fires nothing when function_call_id is missing', async () => { const { iii, calls } = fakeIii(); const out = await handleResolveRequest(iii, { session_id: 's1', - tool_call_id: 'fc/evil', decision: 'allow', - }); + } as never); expect(out).toEqual({ ok: false, error: 'invalid_payload' }); expect(calls).toHaveLength(0); }); - it('returns invalid_payload and fires nothing when both ids are missing', async () => { + it('returns invalid_payload when function_call_id contains a slash', async () => { const { iii, calls } = fakeIii(); const out = await handleResolveRequest(iii, { session_id: 's1', + function_call_id: 'fc/evil', decision: 'allow', - } as never); + }); expect(out).toEqual({ ok: false, error: 'invalid_payload' }); expect(calls).toHaveLength(0); }); diff --git a/harness/tests/approval-gate/schemas.test.ts b/harness/tests/approval-gate/schemas.test.ts index 662e6c25..f32ddbde 100644 --- a/harness/tests/approval-gate/schemas.test.ts +++ b/harness/tests/approval-gate/schemas.test.ts @@ -16,39 +16,23 @@ import { } from '../../src/approval-gate/schemas.js'; describe('ResolvePayloadSchema — id normalization & validation', () => { - it('prefers function_call_id over a conflicting tool_call_id', () => { - expect( - ResolvePayloadSchema.parse({ - session_id: 's', - function_call_id: 'canonical', - tool_call_id: 'legacy', - decision: 'allow', - }), - ).toEqual({ - session_id: 's', - function_call_id: 'canonical', - decision: 'allow', - reason: null, - }); - }); - it('coerces an omitted reason to null', () => { const parsed = ResolvePayloadSchema.parse({ session_id: 's', - tool_call_id: 'legacy', + function_call_id: 'fc-1', decision: 'deny', }); expect(parsed.reason).toBeNull(); - expect(parsed.function_call_id).toBe('legacy'); + expect(parsed.function_call_id).toBe('fc-1'); }); it.each([ - ['both ids missing', { session_id: 's', decision: 'allow' }], + ['function_call_id missing', { session_id: 's', decision: 'allow' }], + ['tool_call_id only (legacy)', { session_id: 's', tool_call_id: 'legacy', decision: 'allow' }], ['empty function_call_id', { session_id: 's', function_call_id: '', decision: 'allow' }], ['empty session_id', { session_id: '', function_call_id: 'fc', decision: 'allow' }], ['slash in session_id', { session_id: 'a/b', function_call_id: 'fc', decision: 'allow' }], ['slash in function_call_id', { session_id: 's', function_call_id: 'a/b', decision: 'allow' }], - ['slash via tool_call_id', { session_id: 's', tool_call_id: 'a/b', decision: 'allow' }], ['non-enum decision', { session_id: 's', function_call_id: 'fc', decision: 'maybe' }], ['numeric reason', { session_id: 's', function_call_id: 'fc', decision: 'allow', reason: 7 }], ])('rejects %s', (_label, payload) => { diff --git a/harness/tests/context-compaction/lease.test.ts b/harness/tests/context-compaction/lease.test.ts index 3f44b7d0..612e2e9b 100644 --- a/harness/tests/context-compaction/lease.test.ts +++ b/harness/tests/context-compaction/lease.test.ts @@ -25,8 +25,8 @@ describe('lease helpers', () => { expect(readLeaseTimestampSecs({ nonce: 'a', ts: 1_700_000_000_000 })).toBe(1_700_000_000); }); - it('readLeaseTimestampSecs accepts legacy bare-int (seconds)', () => { - expect(readLeaseTimestampSecs(1_700_000_000)).toBe(1_700_000_000); + it('readLeaseTimestampSecs treats bare-int values as inactive', () => { + expect(readLeaseTimestampSecs(1_700_000_000)).toBe(0); }); it('readLeaseTimestampSecs returns 0 for garbage', () => { diff --git a/harness/tests/integration/approval-resume.e2e.test.ts b/harness/tests/integration/approval-resume.e2e.test.ts index 77d20819..08b3061f 100644 --- a/harness/tests/integration/approval-resume.e2e.test.ts +++ b/harness/tests/integration/approval-resume.e2e.test.ts @@ -55,9 +55,7 @@ function fakeIii(): { message_type: 'state', }; if (isAbortSignalWrite(event)) { - queueMicrotask(() => { - void handleAbortSignalWrite(iii as unknown as ISdk, event); - }); + await handleAbortSignalWrite(iii as unknown as ISdk, event); } } return null; @@ -76,8 +74,7 @@ function fakeIii(): { const handler = handlers.get(function_id); if (handler) { - await handler(payload); - return null; + return handler(payload); } return null; diff --git a/harness/tests/integration/on-record-written.e2e.test.ts b/harness/tests/integration/on-record-written.e2e.test.ts index bac27500..39d72251 100644 --- a/harness/tests/integration/on-record-written.e2e.test.ts +++ b/harness/tests/integration/on-record-written.e2e.test.ts @@ -30,8 +30,13 @@ function fakeIii(): { if (function_id === 'state::set') { const p = payload as { scope: string; key: string; value: unknown }; - stateStore.set(`${p.scope}/${p.key}`, structuredClone(p.value)); - return null; + const storeKey = `${p.scope}/${p.key}`; + const old_value = stateStore.has(storeKey) + ? structuredClone(stateStore.get(storeKey)) + : null; + const new_value = structuredClone(p.value); + stateStore.set(storeKey, new_value); + return { old_value, new_value }; } if (function_id === 'state::update') { @@ -135,14 +140,14 @@ function turnStateGets(iii: ISdk, session_id: string): number { } describe('saveRecord read elimination (#5)', () => { - it('2-arg saveRecord reads turn_state exactly once (no double load)', async () => { + it('2-arg saveRecord does not pre-read turn_state (uses state::set old_value)', async () => { const { iii } = fakeIii(); const rec = newRecord('sess-r1'); rec.state = 'provisioning'; await persistence.saveRecord(iii, rec); - expect(turnStateGets(iii, 'sess-r1')).toBe(1); + expect(turnStateGets(iii, 'sess-r1')).toBe(0); }); it('saveRecord with a threaded previous reads turn_state zero times', async () => { diff --git a/harness/tests/runtime/state-client.test.ts b/harness/tests/runtime/state-client.test.ts new file mode 100644 index 00000000..7aafc63a --- /dev/null +++ b/harness/tests/runtime/state-client.test.ts @@ -0,0 +1,57 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import { createState } from '../../src/runtime/state.js'; + +function makeIii(triggerImpl: (...args: unknown[]) => unknown): ISdk { + return { + trigger: vi.fn(triggerImpl), + } as unknown as ISdk; +} + +describe('createState', () => { + it('tolerant get returns null and does not throw on trigger failure', async () => { + const iii = makeIii(() => { + throw new Error('backend down'); + }); + await expect(createState(iii).get({ scope: 's', key: 'k' })).resolves.toBeNull(); + }); + + it('strict get propagates trigger failure', async () => { + const iii = makeIii(() => { + throw new Error('backend down'); + }); + await expect(createState(iii, { tolerant: false }).get({ scope: 's', key: 'k' })).rejects.toThrow( + 'backend down', + ); + }); + + it('tolerant list returns [] on trigger failure', async () => { + const iii = makeIii(() => { + throw new Error('list failed'); + }); + await expect(createState(iii).list({ scope: 's' })).resolves.toEqual([]); + }); + + it('strict list propagates trigger failure', async () => { + const iii = makeIii(() => { + throw new Error('list failed'); + }); + await expect(createState(iii, { tolerant: false }).list({ scope: 's' })).rejects.toThrow( + 'list failed', + ); + }); + + it('get normalizes undefined to null', async () => { + const iii = makeIii(async () => undefined); + await expect(createState(iii).get({ scope: 's', key: 'missing' })).resolves.toBeNull(); + }); + + it('list parses flat arrays from state::list', async () => { + const rows = [{ id: 'a' }, { id: 'b' }]; + const iii = makeIii(async ({ function_id }: { function_id: string }) => { + if (function_id === 'state::list') return rows; + return null; + }); + await expect(createState(iii).list({ scope: 'agent' })).resolves.toEqual(rows); + }); +}); diff --git a/harness/tests/runtime/state-list.test.ts b/harness/tests/runtime/state-list.test.ts index 12d5bd2e..e043d382 100644 --- a/harness/tests/runtime/state-list.test.ts +++ b/harness/tests/runtime/state-list.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from 'vitest'; -import { parseStateListKeyedEntries, parseStateListValues } from '../../src/runtime/state.js'; +import { parseStateListValues } from '../../src/runtime/state.js'; describe('parseStateListValues', () => { it('accepts flat array (official iii shape)', () => { @@ -12,26 +12,9 @@ describe('parseStateListValues', () => { expect(parseStateListValues([{ value: inner }])).toEqual([inner]); }); - it('accepts { items: [...] } envelope', () => { - const inner = { id: 'm1' }; - expect(parseStateListValues({ items: [inner, { value: { id: 'm2' } }] })).toEqual([ - inner, - { id: 'm2' }, - ]); - }); - it('returns [] for non-array responses', () => { expect(parseStateListValues(null)).toEqual([]); expect(parseStateListValues({ ok: true })).toEqual([]); - }); -}); - -describe('parseStateListKeyedEntries', () => { - it('preserves key when present', () => { - expect( - parseStateListKeyedEntries({ - items: [{ key: 'session/s1/turn_state', value: { state: 'stopped' } }], - }), - ).toEqual([{ key: 'session/s1/turn_state', value: { state: 'stopped' } }]); + expect(parseStateListValues({ items: [{ id: 'm1' }] })).toEqual([]); }); }); diff --git a/harness/tests/session/tree/store.test.ts b/harness/tests/session/tree/store.test.ts index 78c71d03..e8a9fba2 100644 --- a/harness/tests/session/tree/store.test.ts +++ b/harness/tests/session/tree/store.test.ts @@ -17,7 +17,7 @@ function fakeIii(entries: SessionEntry[]): ISdk { return { trigger: async (req: { function_id: string }): Promise => { if (req.function_id === 'state::list') { - return { items: entries.map((e) => ({ value: e })) } as unknown as R; + return entries as unknown as R; } return null as unknown as R; }, diff --git a/harness/tests/turn-orchestrator/flat-messages.test.ts b/harness/tests/turn-orchestrator/flat-messages.test.ts new file mode 100644 index 00000000..61236a99 --- /dev/null +++ b/harness/tests/turn-orchestrator/flat-messages.test.ts @@ -0,0 +1,16 @@ +import { describe, expect, it } from 'vitest'; +import { parseFlatMessages } from '../../src/turn-orchestrator/flat-messages.js'; + +describe('parseFlatMessages', () => { + it('returns the array when messages are objects', () => { + const messages = [{ role: 'user', content: [], timestamp: 1 }]; + expect(parseFlatMessages(messages)).toEqual(messages); + }); + + it('returns [] for null, undefined, and non-arrays', () => { + expect(parseFlatMessages(null)).toEqual([]); + expect(parseFlatMessages(undefined)).toEqual([]); + expect(parseFlatMessages('bad')).toEqual([]); + expect(parseFlatMessages({})).toEqual([]); + }); +}); diff --git a/harness/tests/turn-orchestrator/parse-turn-state-record.test.ts b/harness/tests/turn-orchestrator/parse-turn-state-record.test.ts new file mode 100644 index 00000000..be83190b --- /dev/null +++ b/harness/tests/turn-orchestrator/parse-turn-state-record.test.ts @@ -0,0 +1,35 @@ +import { describe, expect, it } from 'vitest'; +import { newRecord, parseTurnStateRecord } from '../../src/turn-orchestrator/state.js'; + +describe('parseTurnStateRecord', () => { + it('returns a valid record for a well-formed turn_state', () => { + const rec = newRecord('sess-1'); + expect(parseTurnStateRecord(rec)).toEqual(rec); + }); + + it('returns null for null, undefined, and primitives', () => { + expect(parseTurnStateRecord(null)).toBeNull(); + expect(parseTurnStateRecord(undefined)).toBeNull(); + expect(parseTurnStateRecord('nope')).toBeNull(); + expect(parseTurnStateRecord(42)).toBeNull(); + }); + + it('returns null when required identity fields are missing', () => { + expect(parseTurnStateRecord({ state: 'provisioning' })).toBeNull(); + expect(parseTurnStateRecord({ session_id: 's1' })).toBeNull(); + expect(parseTurnStateRecord({ messages: [] })).toBeNull(); + }); + + it('applies defaults for missing scalar fields on partial records', () => { + const parsed = parseTurnStateRecord({ + session_id: 's1', + state: 'provisioning', + }); + expect(parsed).toMatchObject({ + session_id: 's1', + state: 'provisioning', + turn_count: 0, + turn_end_emitted: false, + }); + }); +}); diff --git a/harness/tests/turn-orchestrator/persistence-prepared.test.ts b/harness/tests/turn-orchestrator/persistence-prepared.test.ts index 29035a08..fb643130 100644 --- a/harness/tests/turn-orchestrator/persistence-prepared.test.ts +++ b/harness/tests/turn-orchestrator/persistence-prepared.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from 'vitest'; -import type { PreparedEntry } from '../../src/turn-orchestrator/persistence.js'; +import type { PreparedEntry } from '../../src/turn-orchestrator/state.js'; describe('PreparedEntry with pre_approved', () => { it('accepts a pre_approved: true entry', () => { diff --git a/harness/tests/turn-orchestrator/run-request.test.ts b/harness/tests/turn-orchestrator/run-request.test.ts index 2a6a4cfd..f2ef28be 100644 --- a/harness/tests/turn-orchestrator/run-request.test.ts +++ b/harness/tests/turn-orchestrator/run-request.test.ts @@ -38,6 +38,18 @@ describe('parseRunRequest', () => { function_schemas: [], }); }); + + it('treats null and undefined as empty run request', () => { + const empty = { + provider: '', + model: '', + mode: null, + system_prompt: '', + function_schemas: [], + }; + expect(parseRunRequest(null)).toEqual(empty); + expect(parseRunRequest(undefined)).toEqual(empty); + }); }); describe('parseRunRequest function_schemas', () => { diff --git a/harness/tests/turn-orchestrator/state.test.ts b/harness/tests/turn-orchestrator/state.test.ts index e78dd510..d1179619 100644 --- a/harness/tests/turn-orchestrator/state.test.ts +++ b/harness/tests/turn-orchestrator/state.test.ts @@ -6,7 +6,7 @@ import type { TurnStateRecord, } from '../../src/turn-orchestrator/state.js'; import { - isTerminal, + AGENT_SCOPE, messagesKey, newRecord, transitionTo, @@ -76,7 +76,8 @@ describe('handleAwaitingApproval with empty queue', () => { }); describe('state keys', () => { - it('namespace by session', () => { + it('namespace by session under agent scope', () => { + expect(AGENT_SCOPE).toBe('agent'); expect(turnStateKey('abc')).toBe('session/abc/turn_state'); expect(messagesKey('abc')).toBe('session/abc/messages'); }); @@ -86,13 +87,14 @@ describe('state record', () => { it('newRecord starts in provisioning, non-terminal, no work', () => { const r = newRecord('s1', 5); expect(r.state).toBe('provisioning'); - expect(isTerminal(r)).toBe(false); + expect(r.state).not.toBe('stopped'); + expect(r.state).not.toBe('failed'); expect(r.work).toBeUndefined(); expect(r.max_turns).toBe(5); }); it('failed is terminal', () => { const r: TurnStateRecord = { ...newRecord('s1'), state: 'failed', error: { kind: 'bug', message: 'x' } }; - expect(isTerminal(r)).toBe(true); + expect(r.state).toBe('failed'); }); }); From 8e2e3818a97aa340d078435df385a58ff4c34565 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Mon, 25 May 2026 07:08:31 -0300 Subject: [PATCH 22/41] feat(turn-orchestrator): add approval-decision state-event schema --- harness/src/turn-orchestrator/schemas.ts | 16 ++++++ .../turn-orchestrator/on-approval.test.ts | 55 +++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 harness/tests/turn-orchestrator/on-approval.test.ts diff --git a/harness/src/turn-orchestrator/schemas.ts b/harness/src/turn-orchestrator/schemas.ts index 2c83ed1b..4743dae4 100644 --- a/harness/src/turn-orchestrator/schemas.ts +++ b/harness/src/turn-orchestrator/schemas.ts @@ -78,3 +78,19 @@ export const AbortSignalWriteEventSchema = AgentAbortSignalWriteEventSchema.tran return { session_id }; }); export type ParsedAbortSignalWrite = z.infer; + +// --- turn::is_approval_decision / turn::on_approval (approvals-scope state event) --- +const ApprovalDecisionWriteEventSchema = z.object({ + type: z.literal('state').optional(), + scope: z.literal('approvals').optional(), + event_type: z.enum(['state:created', 'state:updated']), + key: z.string().regex(/^[^/]+\/[^/]+$/), + new_value: z.object({ decision: z.enum(['allow', 'deny', 'aborted']) }).passthrough(), + old_value: z.unknown().optional(), +}); + +export const ApprovalDecisionEventSchema = ApprovalDecisionWriteEventSchema.transform((data) => { + const session_id = data.key.slice(0, data.key.indexOf('/')); + return { session_id }; +}); +export type ParsedApprovalDecisionWrite = z.infer; diff --git a/harness/tests/turn-orchestrator/on-approval.test.ts b/harness/tests/turn-orchestrator/on-approval.test.ts new file mode 100644 index 00000000..8062372a --- /dev/null +++ b/harness/tests/turn-orchestrator/on-approval.test.ts @@ -0,0 +1,55 @@ +import { describe, expect, it } from 'vitest'; +import { ApprovalDecisionEventSchema } from '../../src/turn-orchestrator/schemas.js'; + +const matchingEvent = { + event_type: 'state:created' as const, + scope: 'approvals' as const, + key: 'sess-abc/fc-1', + old_value: null, + new_value: { decision: 'allow', reason: null }, + message_type: 'state', +}; + +describe('ApprovalDecisionEventSchema', () => { + it('extracts session_id from the / key', () => { + expect(ApprovalDecisionEventSchema.parse(matchingEvent)).toEqual({ session_id: 'sess-abc' }); + }); + + it('accepts deny and aborted decisions', () => { + expect( + ApprovalDecisionEventSchema.parse({ + ...matchingEvent, + new_value: { decision: 'deny', reason: 'policy' }, + }), + ).toEqual({ session_id: 'sess-abc' }); + expect( + ApprovalDecisionEventSchema.parse({ + ...matchingEvent, + new_value: { decision: 'aborted', reason: 'x' }, + }), + ).toEqual({ session_id: 'sess-abc' }); + }); + + it('rejects values without a decision', () => { + expect(() => + ApprovalDecisionEventSchema.parse({ ...matchingEvent, new_value: { reason: 'x' } }), + ).toThrow(); + }); + + it('rejects keys that are not / shaped', () => { + expect(() => + ApprovalDecisionEventSchema.parse({ ...matchingEvent, key: 'session/sess-abc/turn_state' }), + ).toThrow(); + expect(() => + ApprovalDecisionEventSchema.parse({ ...matchingEvent, key: 'no-slash' }), + ).toThrow(); + }); + + it('rejects state:deleted and nested wrappers', () => { + expect(() => + ApprovalDecisionEventSchema.parse({ ...matchingEvent, event_type: 'state:deleted' }), + ).toThrow(); + expect(() => ApprovalDecisionEventSchema.parse({ payload: matchingEvent })).toThrow(); + expect(() => ApprovalDecisionEventSchema.parse(null)).toThrow(); + }); +}); From 6d8185c0566bf394217c36cb0830a30167854c28 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Mon, 25 May 2026 07:09:45 -0300 Subject: [PATCH 23/41] feat(turn-orchestrator): add reactive approval state trigger --- harness/src/turn-orchestrator/on-approval.ts | 86 +++++++++++++++++++ .../turn-orchestrator/on-approval.test.ts | 73 +++++++++++++++- 2 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 harness/src/turn-orchestrator/on-approval.ts diff --git a/harness/src/turn-orchestrator/on-approval.ts b/harness/src/turn-orchestrator/on-approval.ts new file mode 100644 index 00000000..f6a84a2e --- /dev/null +++ b/harness/src/turn-orchestrator/on-approval.ts @@ -0,0 +1,86 @@ +/** + * Reactive approval wake. A `state` trigger on `scope: 'approvals'` filtered by + * the `/` decision key fires this adapter, which enqueues + * `turn::{state}` on the durable FIFO queue so the parked session re-reads its + * decisions in `function_awaiting_approval`. Mirrors `on-abort-signal.ts`. + * + * The decision write is produced by `approval::resolve` (approval-gate) or by + * `abort` — both `state::set` `approvals// = { decision, reason }`. + */ + +import type { ISdk } from '../runtime/iii.js'; +import { logger } from '../runtime/otel.js'; +import { listAgentTurnStateRecords } from './persistence.js'; +import { ApprovalDecisionEventSchema, type ParsedApprovalDecisionWrite } from './schemas.js'; +import { wakeFromRecord } from './wake.js'; + +export function parseApprovalDecisionWrite(event: unknown): ParsedApprovalDecisionWrite | null { + const result = ApprovalDecisionEventSchema.safeParse(event); + return result.success ? result.data : null; +} + +export function isApprovalDecisionWrite(event: unknown): boolean { + return parseApprovalDecisionWrite(event) !== null; +} + +export async function execute(iii: ISdk, write: ParsedApprovalDecisionWrite): Promise { + try { + await wakeFromRecord(iii, write.session_id); + } catch (err) { + logger.warn('turn::on_approval: wake failed', { + session_id: write.session_id, + err: String(err), + }); + } +} + +export async function handleApprovalDecisionWrite(iii: ISdk, event: unknown): Promise { + const write = parseApprovalDecisionWrite(event); + if (!write) return; + await execute(iii, write); +} + +/** Wake sessions still parked on approval (e.g. a decision arrived during downtime). */ +export async function recoverParkedApprovals(iii: ISdk): Promise { + const records = await listAgentTurnStateRecords(iii); + for (const rec of records) { + if (rec.state !== 'function_awaiting_approval') continue; + try { + await wakeFromRecord(iii, rec.session_id); + } catch (err) { + logger.warn('recoverParkedApprovals: wake failed', { + session_id: rec.session_id, + err: String(err), + }); + } + } +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'turn::is_approval_decision', + async (event: unknown) => isApprovalDecisionWrite(event), + { + description: + 'Condition: state event writes a decision to approvals// (state:created or state:updated).', + }, + ); + + iii.registerFunction( + 'turn::on_approval', + async (event: unknown) => handleApprovalDecisionWrite(iii, event), + { + description: + 'State trigger adapter on scope=approvals for decision writes; enqueues turn::{state} so the parked session reads its decision.', + }, + ); + + iii.registerTrigger({ + type: 'state', + function_id: 'turn::on_approval', + config: { + scope: 'approvals', + condition_function_id: 'turn::is_approval_decision', + }, + }); +} diff --git a/harness/tests/turn-orchestrator/on-approval.test.ts b/harness/tests/turn-orchestrator/on-approval.test.ts index 8062372a..44e96463 100644 --- a/harness/tests/turn-orchestrator/on-approval.test.ts +++ b/harness/tests/turn-orchestrator/on-approval.test.ts @@ -1,5 +1,13 @@ -import { describe, expect, it } from 'vitest'; +import { describe, expect, it, vi } from 'vitest'; +import { TriggerAction, type ISdk } from '../../src/runtime/iii.js'; +import { + execute, + handleApprovalDecisionWrite, + isApprovalDecisionWrite, + parseApprovalDecisionWrite, +} from '../../src/turn-orchestrator/on-approval.js'; import { ApprovalDecisionEventSchema } from '../../src/turn-orchestrator/schemas.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; const matchingEvent = { event_type: 'state:created' as const, @@ -53,3 +61,66 @@ describe('ApprovalDecisionEventSchema', () => { expect(() => ApprovalDecisionEventSchema.parse(null)).toThrow(); }); }); + +function mockIiiWithTurnState(rec: ReturnType): { + iii: ISdk; + triggers: Array<{ function_id: string; payload: unknown; action?: unknown }>; +} { + const triggers: Array<{ function_id: string; payload: unknown; action?: unknown }> = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string; payload: unknown; action?: unknown }) => { + if (req.function_id === 'state::get') return rec; + triggers.push(req); + return null; + }), + } as unknown as ISdk; + return { iii, triggers }; +} + +describe('parseApprovalDecisionWrite condition', () => { + it('matches an approvals decision write and extracts session_id', () => { + expect(parseApprovalDecisionWrite(matchingEvent)).toEqual({ session_id: 'sess-abc' }); + expect(isApprovalDecisionWrite(matchingEvent)).toBe(true); + }); + + it('skips writes with no decision and non-/ keys', () => { + expect(parseApprovalDecisionWrite({ ...matchingEvent, new_value: { reason: 'x' } })).toBeNull(); + expect( + parseApprovalDecisionWrite({ ...matchingEvent, key: 'session/s/turn_state' }), + ).toBeNull(); + }); +}); + +describe('handleApprovalDecisionWrite', () => { + it('extracts session_id and enqueues turn::{state}', async () => { + const rec = newRecord('sess-abc'); + rec.state = 'function_awaiting_approval'; + const { iii, triggers } = mockIiiWithTurnState(rec); + + await handleApprovalDecisionWrite(iii, matchingEvent); + + expect(triggers).toHaveLength(1); + expect(triggers[0]?.function_id).toBe('turn::function_awaiting_approval'); + expect(triggers[0]?.payload).toEqual({ session_id: 'sess-abc' }); + expect(triggers[0]?.action).toEqual(TriggerAction.Enqueue({ queue: 'turn-step' })); + }); + + it('no-ops on a non-matching event', async () => { + const iii = { trigger: vi.fn() } as unknown as ISdk; + await handleApprovalDecisionWrite(iii, { ...matchingEvent, new_value: { reason: 'x' } }); + expect(iii.trigger).not.toHaveBeenCalled(); + }); +}); + +describe('on-approval execute', () => { + it('enqueues turn::{state} on the turn-step queue', async () => { + const rec = newRecord('sess-abc'); + rec.state = 'function_awaiting_approval'; + const { iii, triggers } = mockIiiWithTurnState(rec); + + await execute(iii, { session_id: 'sess-abc' }); + + expect(triggers[0]?.function_id).toBe('turn::function_awaiting_approval'); + expect(triggers[0]?.action).toEqual(TriggerAction.Enqueue({ queue: 'turn-step' })); + }); +}); From 836ef4e6b0720be39bf610e031eeb2b0a011765a Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Mon, 25 May 2026 07:10:24 -0300 Subject: [PATCH 24/41] feat(turn-orchestrator): register approval trigger; wake parked sessions on boot --- harness/src/turn-orchestrator/register.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/harness/src/turn-orchestrator/register.ts b/harness/src/turn-orchestrator/register.ts index 5a906e98..018c0a02 100644 --- a/harness/src/turn-orchestrator/register.ts +++ b/harness/src/turn-orchestrator/register.ts @@ -5,7 +5,7 @@ import { loadOrchestratorConfig } from './config.js'; import { register as registerGetState } from './get-state.js'; import { register as registerOnAbortSignal } from './on-abort-signal.js'; import { register as registerRunStart } from './run-start.js'; -import { recoverPendingApprovals } from './approval-resume.js'; +import { recoverParkedApprovals, register as registerOnApproval } from './on-approval.js'; import { registerAssistantStreaming, registerFunctionAwaitingApproval, @@ -25,9 +25,10 @@ export async function register(iii: ISdk, ctx: { configPath: string }): Promise< registerFunctionAwaitingApproval(iii); registerSteeringCheck(iii); registerTearingDown(iii); - await recoverPendingApprovals(iii); registerGetState(iii); registerOnAbortSignal(iii); + registerOnApproval(iii); + await recoverParkedApprovals(iii); void bootstrap.run(iii, orchestratorCfg); } From 0a6699c19c58d9048868801b94dace5ea92eb67f Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Mon, 25 May 2026 07:11:17 -0300 Subject: [PATCH 25/41] feat(approval-gate): resolve writes decision to approvals scope --- harness/src/approval-gate/resolve.ts | 19 ++++++++++++------- harness/tests/approval-gate/resolve.test.ts | 17 +++++++++++------ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/harness/src/approval-gate/resolve.ts b/harness/src/approval-gate/resolve.ts index 1827d42a..fb4f3a8b 100644 --- a/harness/src/approval-gate/resolve.ts +++ b/harness/src/approval-gate/resolve.ts @@ -1,14 +1,16 @@ /** - * Approval resolution handler. `approval::resolve` routes the decision to - * the per-call resume function owned by the turn-orchestrator. + * Approval resolution handler. `approval::resolve` persists the decision to the + * shared `approvals` scope; the turn-orchestrator's reactive trigger + * (turn::on_approval) wakes the parked session. */ import type { ISdk } from 'iii-sdk'; import { logger } from '../runtime/otel.js'; import { + STATE_SCOPE, type ResolvePayloadInput, ResolvePayloadSchema, - approvalResumeFnId, + pendingKey, resolveFunctionOptions, } from './schemas.js'; @@ -24,15 +26,18 @@ export async function handleResolveRequest( if (!parsed.success) return { ok: false, error: 'invalid_payload' }; const { session_id, function_call_id, decision, reason } = parsed.data; - const resumeFnId = approvalResumeFnId(session_id, function_call_id); try { await iii.trigger({ - function_id: resumeFnId, - payload: { decision, reason }, + function_id: 'state::set', + payload: { + scope: STATE_SCOPE, + key: pendingKey(session_id, function_call_id), + value: { decision, reason }, + }, }); } catch (err) { - logger.error('approval-gate: resume fn invoke failed', { err: String(err), resumeFnId }); + logger.error('approval-gate: decision write failed', { err: String(err), session_id }); return { ok: false, error: 'resume_failed' }; } return { ok: true }; diff --git a/harness/tests/approval-gate/resolve.test.ts b/harness/tests/approval-gate/resolve.test.ts index c859cb6d..3f689054 100644 --- a/harness/tests/approval-gate/resolve.test.ts +++ b/harness/tests/approval-gate/resolve.test.ts @@ -9,9 +9,9 @@ import { describe, expect, it, vi } from 'vitest'; import { handleResolveRequest } from '../../src/approval-gate/resolve.js'; import { fakeIii } from './_helpers/fakeIii.js'; -describe('handleResolveRequest — routing the decision', () => { - it('routes to the exact per-call resume fn with a normalized payload', async () => { - const { iii, resumeCalls } = fakeIii(); +describe('handleResolveRequest — writing the decision', () => { + it('writes the decision to approvals// with a normalized payload', async () => { + const { iii, calls, resumeCalls } = fakeIii(); const out = await handleResolveRequest(iii, { session_id: 's1', function_call_id: 'fc-1', @@ -19,12 +19,17 @@ describe('handleResolveRequest — routing the decision', () => { reason: 'user cancelled', }); expect(out).toEqual({ ok: true }); - expect(resumeCalls).toEqual([ + expect(calls).toEqual([ { - function_id: 'turn::approval_resume::s1/fc-1', - payload: { decision: 'deny', reason: 'user cancelled' }, + function_id: 'state::set', + payload: { + scope: 'approvals', + key: 's1/fc-1', + value: { decision: 'deny', reason: 'user cancelled' }, + }, }, ]); + expect(resumeCalls).toHaveLength(0); }); it('never emits to the agent::events stream (denial flows via execution_end)', async () => { From d64940da8266ecaafd5ef99174cb2371440c67e9 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Mon, 25 May 2026 07:12:08 -0300 Subject: [PATCH 26/41] feat(turn-orchestrator): abort writes aborted decisions to approvals scope --- harness/src/turn-orchestrator/abort.ts | 14 +++++++----- harness/tests/turn-orchestrator/abort.test.ts | 22 ++++++------------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/harness/src/turn-orchestrator/abort.ts b/harness/src/turn-orchestrator/abort.ts index b7103929..b32d6dcb 100644 --- a/harness/src/turn-orchestrator/abort.ts +++ b/harness/src/turn-orchestrator/abort.ts @@ -1,10 +1,11 @@ /** * `router::abort` side-effects. The abort path writes the per-session abort - * signal and, when a turn is paused on approvals, invokes each per-call resume - * function with an aborted decision (which persists and wakes turn::step). + * signal and, when a turn is paused on approvals, writes an aborted decision to + * the `approvals` scope per parked call — the reactive approval trigger + * (turn::on_approval) then wakes the session. */ -import { approvalResumeFnId } from '../approval-gate/schemas.js'; +import { STATE_SCOPE, pendingKey } from '../approval-gate/schemas.js'; import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import * as persistence from './persistence.js'; @@ -23,9 +24,10 @@ export async function performAbortSideEffects(iii: ISdk, session_id: string): Pr } for (const entry of rec.awaiting_approval) { - await trigger(iii, approvalResumeFnId(session_id, entry.function_call_id), { - decision: 'aborted', - reason: 'session_aborted', + await trigger(iii, 'state::set', { + scope: STATE_SCOPE, + key: pendingKey(session_id, entry.function_call_id), + value: { decision: 'aborted', reason: 'session_aborted' }, }); } } diff --git a/harness/tests/turn-orchestrator/abort.test.ts b/harness/tests/turn-orchestrator/abort.test.ts index 753c9829..a8d5ea15 100644 --- a/harness/tests/turn-orchestrator/abort.test.ts +++ b/harness/tests/turn-orchestrator/abort.test.ts @@ -65,23 +65,15 @@ describe('performAbortSideEffects', () => { await performAbortSideEffects(iii, 's1'); - const resumeTriggers = triggers.filter((t) => - t.function_id.startsWith('turn::approval_resume::'), - ); - expect(resumeTriggers).toHaveLength(2); - expect(resumeTriggers.map((t) => t.function_id).sort()).toEqual([ - 'turn::approval_resume::s1/fc-1', - 'turn::approval_resume::s1/fc-2', - ]); - for (const t of resumeTriggers) { - expect(t.payload).toMatchObject({ decision: 'aborted', reason: 'session_aborted' }); - } - - const approvalWrites = triggers + const decisionWrites = triggers .filter((t) => t.function_id === 'state::set') - .map((t) => t.payload as Record) + .map((t) => t.payload as { scope?: string; key?: string; value?: unknown }) .filter((p) => p.scope === 'approvals'); - expect(approvalWrites).toHaveLength(0); + expect(decisionWrites.map((p) => p.key).sort()).toEqual(['s1/fc-1', 's1/fc-2']); + for (const w of decisionWrites) { + expect(w.value).toEqual({ decision: 'aborted', reason: 'session_aborted' }); + } + expect(triggers.some((t) => t.function_id.startsWith('turn::approval_resume'))).toBe(false); const publishes = triggers.filter((t) => t.function_id === 'iii::durable::publish'); expect(publishes).toHaveLength(0); From 233c76c3c751fba183b804288d2ca09e0c1d6176 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Mon, 25 May 2026 07:13:23 -0300 Subject: [PATCH 27/41] feat(turn-orchestrator): park without per-call resume registration --- .../turn-orchestrator/states/function-execute.ts | 13 +++++++++---- harness/tests/turn-orchestrator/functions.test.ts | 13 ++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/harness/src/turn-orchestrator/states/function-execute.ts b/harness/src/turn-orchestrator/states/function-execute.ts index 8fcdec0b..9b53cd94 100644 --- a/harness/src/turn-orchestrator/states/function-execute.ts +++ b/harness/src/turn-orchestrator/states/function-execute.ts @@ -8,7 +8,11 @@ import type { ISdk } from '../../runtime/iii.js'; import { logger } from '../../runtime/otel.js'; import type { AgentEvent } from '../../types/agent-event.js'; -import type { AgentMessage, AssistantMessage, FunctionResultMessage } from '../../types/agent-message.js'; +import type { + AgentMessage, + AssistantMessage, + FunctionResultMessage, +} from '../../types/agent-message.js'; import type { FunctionCall, FunctionResult } from '../../types/function.js'; import { dispatchWithHook, @@ -17,7 +21,6 @@ import { triggerFunctionCall, unwrapAgentTrigger, } from '../agent-trigger.js'; -import { registerApprovalResume } from '../approval-resume.js'; import { emit } from '../events.js'; import { publishAfter } from '../hook.js'; import * as persistence from '../persistence.js'; @@ -135,7 +138,10 @@ async function applyAfterHook(iii: ISdk, entry: ExecutedEntry): Promise { it('pushes the call onto awaiting_approval and transitions to function_awaiting_approval on pending', async () => { const dispatchSpy = vi.spyOn(agentTriggerModule, 'dispatchWithHook'); dispatchSpy.mockResolvedValueOnce({ kind: 'pending' }); - const registerResumeSpy = vi - .spyOn(approvalResumeModule, 'registerApprovalResume') - .mockReturnValue({ unregister: vi.fn() } as never); const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); @@ -167,7 +163,6 @@ describe('handleExecute new flow', () => { expect(rec.state).toBe('function_awaiting_approval'); expect(rec.awaiting_approval).toHaveLength(1); expect(rec.awaiting_approval?.[0]?.function_call_id).toBe('fc-1'); - expect(registerResumeSpy).toHaveBeenCalledWith(iii, 's1', 'fc-1'); // work.batch should still be populated (re-entry will continue from it) expect(rec.work?.batch).toHaveLength(1); }); @@ -323,9 +318,7 @@ describe('handleExecute new flow', () => { const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec = newRecord('s1'); rec.state = 'function_execute'; - rec.last_assistant = makeAssistant([ - { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - ]); + rec.last_assistant = makeAssistant([{ id: 'fc-1', function_id: 'shell::run', arguments: {} }]); mockFinalizePersistence(); await handleExecute(iii, rec); @@ -432,7 +425,9 @@ describe('handleExecute new flow', () => { const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec = newRecord('s1'); rec.state = 'function_execute'; - rec.last_assistant = makeAssistant([{ id: 'toolu_01', function_id: 'shell::run', arguments: { command: 'ls' } }]); + rec.last_assistant = makeAssistant([ + { id: 'toolu_01', function_id: 'shell::run', arguments: { command: 'ls' } }, + ]); let storedMessages: unknown[] = []; vi.spyOn(persistence, 'loadMessages').mockImplementation(async () => storedMessages as never); From 1e6b96748d20dc4745cdb512224ada0986683b0a Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Mon, 25 May 2026 07:15:37 -0300 Subject: [PATCH 28/41] refactor(turn-orchestrator): remove per-call approval-resume machinery --- harness/src/approval-gate/iii.worker.yaml | 2 +- harness/src/approval-gate/schemas.ts | 6 +- harness/src/index.ts | 2 +- .../src/turn-orchestrator/approval-resume.ts | 128 ----------- harness/tests/approval-gate/schemas.test.ts | 6 +- .../integration/approval-resume.e2e.test.ts | 59 +++-- .../turn-orchestrator/approval-resume.test.ts | 208 ------------------ 7 files changed, 30 insertions(+), 381 deletions(-) delete mode 100644 harness/src/turn-orchestrator/approval-resume.ts delete mode 100644 harness/tests/turn-orchestrator/approval-resume.test.ts diff --git a/harness/src/approval-gate/iii.worker.yaml b/harness/src/approval-gate/iii.worker.yaml index 384ebc64..0e54c698 100644 --- a/harness/src/approval-gate/iii.worker.yaml +++ b/harness/src/approval-gate/iii.worker.yaml @@ -4,7 +4,7 @@ language: node deploy: binary manifest: package.json bin: iii-approval-gate -description: Registers approval::resolve; routes decisions to per-call turn::approval_resume functions owned by the turn-orchestrator. +description: Registers approval::resolve; persists human decisions to the approvals scope (turn-orchestrator reacts via turn::on_approval). runtime: kind: node diff --git a/harness/src/approval-gate/schemas.ts b/harness/src/approval-gate/schemas.ts index 2929dff9..cccba622 100644 --- a/harness/src/approval-gate/schemas.ts +++ b/harness/src/approval-gate/schemas.ts @@ -107,12 +107,8 @@ export const ApprovalResumePayloadSchema = z.object({ reason: z.string().nullable(), }); -export function approvalResumeFnId(session_id: string, function_call_id: string): string { - return `turn::approval_resume::${pendingKey(session_id, function_call_id)}`; -} - export const resolveFunctionOptions = { description: - 'Flip an approval to allow or deny. Invokes the per-call resume function to persist and wake the turn.', + 'Flip an approval to allow or deny. Persists the decision to the approvals scope to wake the parked turn.', request_format: zodToJsonSchema(ResolvePayloadSchema, { name: 'ResolvePayload' }), } as RegisterFunctionOptions; diff --git a/harness/src/index.ts b/harness/src/index.ts index 78e7f15b..ab7aec59 100644 --- a/harness/src/index.ts +++ b/harness/src/index.ts @@ -49,7 +49,7 @@ const WORKERS: readonly WorkerDefinition[] = [ { name: 'approval-gate', description: - 'Registers approval::resolve; routes human decisions to per-call turn::approval_resume functions owned by the turn-orchestrator.', + 'Registers approval::resolve; persists human decisions to the approvals scope (turn-orchestrator reacts via turn::on_approval).', register: (iii) => registerApprovalGate(iii), }, { diff --git a/harness/src/turn-orchestrator/approval-resume.ts b/harness/src/turn-orchestrator/approval-resume.ts deleted file mode 100644 index 817c9e9a..00000000 --- a/harness/src/turn-orchestrator/approval-resume.ts +++ /dev/null @@ -1,128 +0,0 @@ -/** - * Per-call resume functions for parked approvals. Registered when a call - * enters `function_awaiting_approval`; invoked by `approval::resolve` or - * abort. Persists to scope `approvals` and enqueues `turn::{state}` via wakeFromRecord. - */ - -import { - ApprovalResumePayloadSchema, - STATE_SCOPE, - approvalResumeFnId, - pendingKey, -} from '../approval-gate/schemas.js'; -import type { FunctionRef, ISdk } from '../runtime/iii.js'; -import { logger } from '../runtime/otel.js'; -import { stateGet, stateSet } from '../runtime/state.js'; -import { listAgentTurnStateRecords } from './persistence.js'; -import type { TurnStateRecord } from './state.js'; -import { wakeFromRecord } from './wake.js'; - -const resumeRefs = new Map(); - -/** Agent-scope turn_state still parked on human approval. */ -function pausedApprovalCalls( - rec: TurnStateRecord, -): { session_id: string; function_call_ids: string[] } | null { - if (rec.state !== 'function_awaiting_approval') return null; - - const session_id = rec.session_id; - if (!session_id) return null; - - const function_call_ids = (rec.awaiting_approval ?? []) - .map((entry) => entry.function_call_id) - .filter((id) => id.length > 0); - - return function_call_ids.length > 0 ? { session_id, function_call_ids } : null; -} - -function hasStoredDecision(value: unknown): boolean { - if (!value || typeof value !== 'object') return false; - const decision = (value as Record).decision; - return decision === 'allow' || decision === 'deny' || decision === 'aborted'; -} - -function unregisterApprovalResume(fnId: string): void { - const ref = resumeRefs.get(fnId); - if (!ref) return; - try { - ref.unregister(); - } catch {} - resumeRefs.delete(fnId); -} - -async function handleApprovalResume( - iii: ISdk, - session_id: string, - function_call_id: string, - payload: unknown, -): Promise { - const fnId = approvalResumeFnId(session_id, function_call_id); - if (!resumeRefs.has(fnId)) { - return; - } - const parsed = ApprovalResumePayloadSchema.safeParse(payload); - if (!parsed.success) { - logger.warn('approval resume: malformed payload', { - fnId, - err: String(parsed.error.issues[0]?.message ?? 'unknown'), - }); - return; - } - - const key = pendingKey(session_id, function_call_id); - const existing = await stateGet(iii, STATE_SCOPE, key); - if (!hasStoredDecision(existing)) { - await stateSet(iii, STATE_SCOPE, key, { - decision: parsed.data.decision, - reason: parsed.data.reason, - }); - } - - try { - await wakeFromRecord(iii, session_id); - } catch (err) { - logger.warn('approval resume: turn step wake failed', { session_id, err: String(err) }); - } - - unregisterApprovalResume(fnId); -} - -export function registerApprovalResume( - iii: ISdk, - session_id: string, - function_call_id: string, -): FunctionRef { - const fnId = approvalResumeFnId(session_id, function_call_id); - const existing = resumeRefs.get(fnId); - if (existing) return existing; - - const ref = iii.registerFunction( - fnId, - async (payload: unknown) => handleApprovalResume(iii, session_id, function_call_id, payload), - { - description: - 'Resume a parked approval: persist decision to approvals scope and enqueue turn::{state}.', - }, - ); - resumeRefs.set(fnId, ref); - return ref; -} - -/** Clears in-memory resume refs (unit tests only). */ -export function clearApprovalResumeRegistry(): void { - resumeRefs.clear(); -} - -/** Re-register resume fns for sessions still paused on approval after worker restart. */ -export async function recoverPendingApprovals(iii: ISdk): Promise { - const records = await listAgentTurnStateRecords(iii); - - for (const rec of records) { - const paused = pausedApprovalCalls(rec); - if (!paused) continue; - - for (const function_call_id of paused.function_call_ids) { - registerApprovalResume(iii, paused.session_id, function_call_id); - } - } -} diff --git a/harness/tests/approval-gate/schemas.test.ts b/harness/tests/approval-gate/schemas.test.ts index f32ddbde..66aac23a 100644 --- a/harness/tests/approval-gate/schemas.test.ts +++ b/harness/tests/approval-gate/schemas.test.ts @@ -9,7 +9,6 @@ import { describe, expect, it } from 'vitest'; import { ApprovalResumePayloadSchema, ResolvePayloadSchema, - approvalResumeFnId, parsePolicyReply, pendingKey, resolveFunctionOptions, @@ -86,8 +85,8 @@ describe('parsePolicyReply — fail closed', () => { }); describe('state-key derivation — separator integrity', () => { - it('derives turn::approval_resume::/', () => { - expect(approvalResumeFnId('sess-1', 'fc-1')).toBe('turn::approval_resume::sess-1/fc-1'); + it('derives /', () => { + expect(pendingKey('sess-1', 'fc-1')).toBe('sess-1/fc-1'); }); it.each([ @@ -95,7 +94,6 @@ describe('state-key derivation — separator integrity', () => { ['function_call', 'a', 'b/c'], ])('throws if the %s id smuggles a slash', (_which, session, fcall) => { expect(() => pendingKey(session, fcall)).toThrow(); - expect(() => approvalResumeFnId(session, fcall)).toThrow(); }); }); diff --git a/harness/tests/integration/approval-resume.e2e.test.ts b/harness/tests/integration/approval-resume.e2e.test.ts index 08b3061f..fd4a9b4a 100644 --- a/harness/tests/integration/approval-resume.e2e.test.ts +++ b/harness/tests/integration/approval-resume.e2e.test.ts @@ -1,13 +1,13 @@ -import { afterEach, describe, expect, it, vi } from 'vitest'; +import { describe, expect, it, vi } from 'vitest'; import { handleResolveRequest } from '../../src/approval-gate/resolve.js'; -import { - clearApprovalResumeRegistry, - registerApprovalResume, -} from '../../src/turn-orchestrator/approval-resume.js'; import { handleAbortSignalWrite, isAbortSignalWrite, } from '../../src/turn-orchestrator/on-abort-signal.js'; +import { + handleApprovalDecisionWrite, + isApprovalDecisionWrite, +} from '../../src/turn-orchestrator/on-approval.js'; import type { ISdk } from '../../src/runtime/iii.js'; import { newRecord, turnStateKey } from '../../src/turn-orchestrator/state.js'; @@ -16,6 +16,11 @@ async function flushMicrotasks(): Promise { await Promise.resolve(); } +/** + * Fake iii where `state::set` re-emits a state event and feeds it to the + * matching reactive trigger (abort on the agent scope, approval decisions on + * the approvals scope) — exercising the producer → trigger → wake path. + */ function fakeIii(): { iii: ISdk; wakeTriggers: Array<{ session_id: string; function_id: string }>; @@ -23,13 +28,8 @@ function fakeIii(): { } { const stateStore = new Map(); const wakeTriggers: Array<{ session_id: string; function_id: string }> = []; - const handlers = new Map Promise>(); const iii = { - registerFunction: vi.fn((fnId: string, handler: (payload: unknown) => Promise) => { - handlers.set(fnId, handler); - return { unregister: vi.fn() }; - }), trigger: vi.fn( async ({ function_id, @@ -45,18 +45,18 @@ function fakeIii(): { const fullKey = `${p.scope}/${p.key}`; const old_value = stateStore.get(fullKey) ?? null; stateStore.set(fullKey, p.value); - if (p.scope === 'agent') { - const event = { - event_type: old_value == null ? 'state:created' : 'state:updated', - scope: p.scope, - key: p.key, - old_value, - new_value: p.value, - message_type: 'state', - }; - if (isAbortSignalWrite(event)) { - await handleAbortSignalWrite(iii as unknown as ISdk, event); - } + const event = { + event_type: old_value == null ? 'state:created' : 'state:updated', + scope: p.scope, + key: p.key, + old_value, + new_value: p.value, + message_type: 'state', + }; + if (p.scope === 'agent' && isAbortSignalWrite(event)) { + await handleAbortSignalWrite(iii as unknown as ISdk, event); + } else if (p.scope === 'approvals' && isApprovalDecisionWrite(event)) { + await handleApprovalDecisionWrite(iii as unknown as ISdk, event); } return null; } @@ -72,11 +72,6 @@ function fakeIii(): { return null; } - const handler = handlers.get(function_id); - if (handler) { - return handler(payload); - } - return null; }, ), @@ -85,17 +80,12 @@ function fakeIii(): { return { iii: iii as unknown as ISdk, wakeTriggers, stateStore }; } -describe('approval resume reactive trigger', () => { - afterEach(() => { - clearApprovalResumeRegistry(); - }); - - it('approval::resolve via resume fn automatically enqueues turn::{state}', async () => { +describe('approval reactive trigger', () => { + it('approval::resolve persists the decision and the trigger enqueues turn::{state}', async () => { const { iii, wakeTriggers, stateStore } = fakeIii(); const rec = newRecord('sess-x'); rec.state = 'function_awaiting_approval'; stateStore.set(`agent/${turnStateKey('sess-x')}`, rec); - registerApprovalResume(iii, 'sess-x', 'fc-1'); const out = await handleResolveRequest(iii, { session_id: 'sess-x', @@ -106,6 +96,7 @@ describe('approval resume reactive trigger', () => { await flushMicrotasks(); + expect(stateStore.get('approvals/sess-x/fc-1')).toEqual({ decision: 'allow', reason: null }); expect(wakeTriggers).toHaveLength(1); expect(wakeTriggers[0]).toMatchObject({ session_id: 'sess-x', diff --git a/harness/tests/turn-orchestrator/approval-resume.test.ts b/harness/tests/turn-orchestrator/approval-resume.test.ts deleted file mode 100644 index 3a688b01..00000000 --- a/harness/tests/turn-orchestrator/approval-resume.test.ts +++ /dev/null @@ -1,208 +0,0 @@ -import { afterEach, describe, expect, it, vi } from 'vitest'; -import { TriggerAction, type ISdk } from '../../src/runtime/iii.js'; -import { approvalResumeFnId } from '../../src/approval-gate/schemas.js'; -import { - clearApprovalResumeRegistry, - recoverPendingApprovals, - registerApprovalResume, -} from '../../src/turn-orchestrator/approval-resume.js'; - -type RegisteredFn = { - fnId: string; - handler: (payload: unknown) => Promise; - unregister: ReturnType; -}; - -import { - newRecord, - turnStateKey, - type TurnStateRecord, -} from '../../src/turn-orchestrator/state.js'; - -function makeIiiWithRegistry( - stateStore = new Map(), - agentTurnStates: TurnStateRecord[] = [], -) { - const registered = new Map(); - const wakeCalls: Array<{ session_id: string; action?: unknown; function_id?: string }> = []; - - const iii = { - registerFunction: vi.fn((fnId: string, handler: (payload: unknown) => Promise) => { - const entry: RegisteredFn = { - fnId, - handler, - unregister: vi.fn(), - }; - registered.set(fnId, entry); - return { unregister: entry.unregister }; - }), - trigger: vi.fn( - async ({ - function_id, - payload, - action, - }: { - function_id: string; - payload: unknown; - action?: unknown; - }) => { - if (function_id === 'state::get') { - const p = payload as { scope: string; key: string }; - return stateStore.get(`${p.scope}/${p.key}`) ?? null; - } - if (function_id === 'state::set') { - const p = payload as { scope: string; key: string; value: unknown }; - stateStore.set(`${p.scope}/${p.key}`, p.value); - return null; - } - if (function_id === 'state::list') { - return agentTurnStates; - } - if (function_id.startsWith('turn::') && function_id !== 'turn::on_abort_signal') { - const p = payload as { session_id: string }; - wakeCalls.push({ - session_id: p.session_id, - action, - function_id, - }); - return null; - } - return null; - }, - ), - } as unknown as ISdk; - - return { iii, registered, wakeCalls, stateStore }; -} - -afterEach(() => { - clearApprovalResumeRegistry(); - vi.restoreAllMocks(); -}); - -describe('registerApprovalResume', () => { - it('registers turn::approval_resume::s1/fc-1 on first call', () => { - const { iii, registered } = makeIiiWithRegistry(); - registerApprovalResume(iii, 's1', 'fc-1'); - expect(iii.registerFunction).toHaveBeenCalledWith( - 'turn::approval_resume::s1/fc-1', - expect.any(Function), - expect.objectContaining({ description: expect.any(String) }), - ); - expect(registered.has('turn::approval_resume::s1/fc-1')).toBe(true); - expect(approvalResumeFnId('s1', 'fc-1')).toBe('turn::approval_resume::s1/fc-1'); - }); - - it('returns the same ref when registered twice', () => { - const { iii, registered } = makeIiiWithRegistry(); - const a = registerApprovalResume(iii, 's1', 'fc-1'); - const b = registerApprovalResume(iii, 's1', 'fc-1'); - expect(a).toBe(b); - expect(iii.registerFunction).toHaveBeenCalledTimes(1); - expect(registered.size).toBe(1); - }); -}); - -describe('approval resume handler', () => { - it('persists decision, enqueues turn::{state}, and unregisters', async () => { - const { iii, registered, wakeCalls, stateStore } = makeIiiWithRegistry(); - const rec = newRecord('s1'); - rec.state = 'function_awaiting_approval'; - stateStore.set(`agent/${turnStateKey('s1')}`, rec); - registerApprovalResume(iii, 's1', 'fc-1'); - const entry = registered.get('turn::approval_resume::s1/fc-1'); - if (!entry) throw new Error('handler not registered'); - await entry.handler({ decision: 'allow', reason: null }); - - expect(stateStore.get('approvals/s1/fc-1')).toEqual({ decision: 'allow', reason: null }); - expect(wakeCalls).toEqual([ - { - session_id: 's1', - function_id: 'turn::function_awaiting_approval', - action: TriggerAction.Enqueue({ queue: 'turn-step' }), - }, - ]); - expect(entry!.unregister).toHaveBeenCalled(); - }); - - it('does not overwrite an existing decision (idempotent persist)', async () => { - const { iii, registered, stateStore } = makeIiiWithRegistry(); - stateStore.set('approvals/s1/fc-1', { decision: 'aborted', reason: 'session_aborted' }); - registerApprovalResume(iii, 's1', 'fc-1'); - const entry = registered.get('turn::approval_resume::s1/fc-1'); - if (!entry) throw new Error('handler not registered'); - await entry.handler({ - decision: 'allow', - reason: null, - }); - expect(stateStore.get('approvals/s1/fc-1')).toEqual({ - decision: 'aborted', - reason: 'session_aborted', - }); - }); - - it('does not enqueue turn::{state} again after unregister on second invoke', async () => { - const { iii, registered, wakeCalls } = makeIiiWithRegistry(); - registerApprovalResume(iii, 's1', 'fc-1'); - const entry = registered.get('turn::approval_resume::s1/fc-1'); - if (!entry) throw new Error('handler not registered'); - await entry.handler({ decision: 'deny', reason: 'nope' }); - wakeCalls.length = 0; - - await entry.handler({ decision: 'allow', reason: null }); - expect(wakeCalls).toHaveLength(0); - }); -}); - -describe('recoverPendingApprovals', () => { - it('re-registers resume fns for sessions in function_awaiting_approval', async () => { - const { iii, registered } = makeIiiWithRegistry(new Map(), [ - { - session_id: 's1', - state: 'function_awaiting_approval', - turn_count: 0, - function_results: [], - turn_end_emitted: false, - started_at_ms: 0, - updated_at_ms: 0, - awaiting_approval: [ - { function_call_id: 'fc-1', function_id: 'tool::x', args: {} }, - { function_call_id: 'fc-2', function_id: 'tool::y', args: {} }, - ], - }, - { - session_id: 's2', - state: 'stopped', - turn_count: 0, - function_results: [], - turn_end_emitted: false, - started_at_ms: 0, - updated_at_ms: 0, - }, - ]); - await recoverPendingApprovals(iii); - expect(registered.has('turn::approval_resume::s1/fc-1')).toBe(true); - expect(registered.has('turn::approval_resume::s1/fc-2')).toBe(true); - expect(registered.has('turn::approval_resume::s2/fc-1')).toBe(false); - }); - - it('ignores non-turn_state agent scope values (null, messages, etc.)', async () => { - const { iii, registered } = makeIiiWithRegistry(new Map(), [ - null, - { messages: [] }, - { - session_id: 's1', - state: 'function_awaiting_approval', - turn_count: 0, - function_results: [], - turn_end_emitted: false, - started_at_ms: 0, - updated_at_ms: 0, - awaiting_approval: [{ function_call_id: 'fc-1', function_id: 'tool::x', args: {} }], - }, - ]); - await recoverPendingApprovals(iii); - expect(registered.has('turn::approval_resume::s1/fc-1')).toBe(true); - expect(registered.size).toBe(1); - }); -}); From f7a07f6b3a7794c84a8b4d055abbf3279593fa98 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Mon, 25 May 2026 08:57:34 -0300 Subject: [PATCH 29/41] feat(turn-orchestrator): add finishSession terminal helper --- harness/src/turn-orchestrator/finish.ts | 16 ++++++++++ .../tests/turn-orchestrator/finish.test.ts | 32 +++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 harness/src/turn-orchestrator/finish.ts create mode 100644 harness/tests/turn-orchestrator/finish.test.ts diff --git a/harness/src/turn-orchestrator/finish.ts b/harness/src/turn-orchestrator/finish.ts new file mode 100644 index 00000000..ae285f8c --- /dev/null +++ b/harness/src/turn-orchestrator/finish.ts @@ -0,0 +1,16 @@ +/** + * Terminal teardown: emit the final `agent_end` with the full transcript and + * stop the session. Called inline by the FSM paths that end a turn (replaces + * the former standalone `tearing_down` state). + */ + +import type { ISdk } from '../runtime/iii.js'; +import { emit } from './events.js'; +import * as persistence from './persistence.js'; +import { type TurnStateRecord, transitionTo } from './state.js'; + +export async function finishSession(iii: ISdk, rec: TurnStateRecord): Promise { + const messages = await persistence.loadMessages(iii, rec.session_id); + await emit(iii, rec.session_id, { type: 'agent_end', messages }); + transitionTo(rec, 'stopped'); +} diff --git a/harness/tests/turn-orchestrator/finish.test.ts b/harness/tests/turn-orchestrator/finish.test.ts new file mode 100644 index 00000000..f08606c7 --- /dev/null +++ b/harness/tests/turn-orchestrator/finish.test.ts @@ -0,0 +1,32 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import * as persistence from '../../src/turn-orchestrator/persistence.js'; +import { finishSession } from '../../src/turn-orchestrator/finish.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; + +describe('finishSession', () => { + it('emits agent_end with the transcript and sets state to stopped', async () => { + const messages = [ + { role: 'user' as const, content: [{ type: 'text' as const, text: 'hi' }], timestamp: 1 }, + ]; + vi.spyOn(persistence, 'loadMessages').mockResolvedValue(messages as never); + const emitted: Array<{ type: string; messages?: unknown }> = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { + if (req.function_id === 'stream::set') { + emitted.push((req.payload as { data: { type: string; messages?: unknown } }).data); + } + return null; + }), + } as unknown as ISdk; + + const rec = newRecord('s1'); + rec.state = 'steering_check'; + await finishSession(iii, rec); + + expect(rec.state).toBe('stopped'); + const agentEnd = emitted.find((e) => e.type === 'agent_end'); + expect(agentEnd).toBeDefined(); + expect(agentEnd?.messages).toEqual(messages); + }); +}); From fa5f7bda2b3a520910d09ca03710ad0072b07aaf Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Mon, 25 May 2026 08:59:58 -0300 Subject: [PATCH 30/41] feat(turn-orchestrator): end turns via finishSession instead of tearing_down --- .../states/assistant-streaming.ts | 19 +++++++++---- .../states/function-execute.ts | 7 ++++- .../states/steering-check.ts | 7 +++-- .../tests/turn-orchestrator/assistant.test.ts | 6 ++-- .../tests/turn-orchestrator/functions.test.ts | 28 +++++++++++++++++++ .../tests/turn-orchestrator/steering.test.ts | 19 +++++-------- 6 files changed, 60 insertions(+), 26 deletions(-) diff --git a/harness/src/turn-orchestrator/states/assistant-streaming.ts b/harness/src/turn-orchestrator/states/assistant-streaming.ts index b8bfe217..34494119 100644 --- a/harness/src/turn-orchestrator/states/assistant-streaming.ts +++ b/harness/src/turn-orchestrator/states/assistant-streaming.ts @@ -11,6 +11,7 @@ import { logger } from '../../runtime/otel.js'; import type { AssistantMessage } from '../../types/agent-message.js'; import type { AgentFunction } from '../../types/function.js'; import { emit } from '../events.js'; +import { finishSession } from '../finish.js'; import * as persistence from '../persistence.js'; import { runPreflight } from '../preflight.js'; import { buildInput, decide, targetFunctionId } from '../provider-router.js'; @@ -62,7 +63,7 @@ async function finalizeAssistant(iii: ISdk, rec: TurnStateRecord): Promise if (isErrorOrAborted(asst)) { await emit(iii, rec.session_id, { type: 'turn_end', message: asst, function_results: [] }); rec.turn_end_emitted = true; - transitionTo(rec, 'tearing_down'); + await finishSession(iii, rec); return; } @@ -86,12 +87,14 @@ export async function handleStreaming(iii: ISdk, rec: TurnStateRecord): Promise< const request = await persistence.loadRunRequest(iii, rec.session_id); let messages = await persistence.loadMessages(iii, rec.session_id); const { provider, model, system_prompt } = request; - const tools = (Array.isArray(request.function_schemas) - ? request.function_schemas - : []) as AgentFunction[]; + const tools = ( + Array.isArray(request.function_schemas) ? request.function_schemas : [] + ) as AgentFunction[]; const decision = decide({ provider, model }); - if ((await runPreflight(iii, rec.session_id, messages, decision.provider, model)) === 'compacted') { + if ( + (await runPreflight(iii, rec.session_id, messages, decision.provider, model)) === 'compacted' + ) { messages = await persistence.loadMessages(iii, rec.session_id); } @@ -100,7 +103,11 @@ export async function handleStreaming(iii: ISdk, rec: TurnStateRecord): Promise< targetFn: targetFunctionId(decision), buildInput: (writerRef) => buildInput(decision, writerRef, system_prompt, messages, tools), onDelta: async (partial, event) => { - await emit(iii, rec.session_id, { type: 'message_update', message: partial, llm_event: event }); + await emit(iii, rec.session_id, { + type: 'message_update', + message: partial, + llm_event: event, + }); if (event.type === 'text_delta' || event.type === 'thinking_delta') { rec.assistant_body_streamed = true; } diff --git a/harness/src/turn-orchestrator/states/function-execute.ts b/harness/src/turn-orchestrator/states/function-execute.ts index 9b53cd94..0224b9f5 100644 --- a/harness/src/turn-orchestrator/states/function-execute.ts +++ b/harness/src/turn-orchestrator/states/function-execute.ts @@ -22,6 +22,7 @@ import { unwrapAgentTrigger, } from '../agent-trigger.js'; import { emit } from '../events.js'; +import { finishSession } from '../finish.js'; import { publishAfter } from '../hook.js'; import * as persistence from '../persistence.js'; import { runTransition } from '../run-transition.js'; @@ -204,7 +205,11 @@ async function finalizeExecutedCalls(iii: ISdk, rec: TurnStateRecord): Promise { diff --git a/harness/src/turn-orchestrator/states/steering-check.ts b/harness/src/turn-orchestrator/states/steering-check.ts index d3dc9278..32545b7a 100644 --- a/harness/src/turn-orchestrator/states/steering-check.ts +++ b/harness/src/turn-orchestrator/states/steering-check.ts @@ -8,6 +8,7 @@ import type { ISdk } from '../../runtime/iii.js'; import { type AgentMessage, emptyAssistant } from '../../types/agent-message.js'; import { emit } from '../events.js'; +import { finishSession } from '../finish.js'; import * as persistence from '../persistence.js'; import { runTransition } from '../run-transition.js'; import { AGENT_SCOPE, type TurnStateRecord, abortSignalKey, transitionTo } from '../state.js'; @@ -76,7 +77,7 @@ async function endForMaxTurns(iii: ISdk, rec: TurnStateRecord): Promise { await emit(iii, rec.session_id, { type: 'message_complete', message: msg, body_streamed: false }); await emit(iii, rec.session_id, { type: 'turn_end', message: msg, function_results: [] }); rec.turn_end_emitted = true; - transitionTo(rec, 'tearing_down'); + await finishSession(iii, rec); } async function emitTurnEndOnce(iii: ISdk, rec: TurnStateRecord): Promise { @@ -121,7 +122,7 @@ export async function handleSteering(iii: ISdk, rec: TurnStateRecord): Promise { // stream::set is called by emit(message_complete) and emit(turn_end) in the error path expect(calls.some((c) => c.function_id === 'stream::set')).toBe(true); }); - - }); describe('handleStreaming', () => { @@ -120,7 +118,7 @@ describe('handleStreaming', () => { await handleStreaming(iii, rec); - expect(rec.state).toBe('tearing_down'); + expect(rec.state).toBe('stopped'); expect(rec.last_assistant?.stop_reason).toBe('error'); expect(rec.last_assistant?.error_message).toContain('create_channel failed'); }); @@ -246,7 +244,7 @@ describe('handleStreaming', () => { await handleStreaming(iii, rec); - expect(rec.state).toBe('tearing_down'); + expect(rec.state).toBe('stopped'); expect(rec.turn_end_emitted).toBe(true); expect(saveSpy).not.toHaveBeenCalled(); }); diff --git a/harness/tests/turn-orchestrator/functions.test.ts b/harness/tests/turn-orchestrator/functions.test.ts index 9fad841f..55f7a3f6 100644 --- a/harness/tests/turn-orchestrator/functions.test.ts +++ b/harness/tests/turn-orchestrator/functions.test.ts @@ -102,6 +102,34 @@ describe('handleExecute new flow', () => { expect(rec.function_results[0]?.function_call_id).toBe('fc-1'); }); + it('finishes the session when every function result terminates', async () => { + const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; + const rec: TurnStateRecord = newRecord('s1'); + rec.state = 'function_execute'; + rec.work = { + batch: [ + { function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, blocked: null }, + ], + results: [ + { + function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + result: { + content: [{ type: 'text' as const, text: 'bye' }], + details: {}, + terminate: true, + }, + is_error: false, + duration_ms: 1, + }, + ], + }; + mockFinalizePersistence(); + + await handleExecute(iii, rec); + + expect(rec.state).toBe('stopped'); + }); + it('does not re-emit function_execution_start for already-executed calls on re-entry', async () => { const emitted: Array<{ type: string; function_call_id?: string }> = []; vi.spyOn(events, 'emit').mockImplementation(async (_iii, _sid, ev: never) => { diff --git a/harness/tests/turn-orchestrator/steering.test.ts b/harness/tests/turn-orchestrator/steering.test.ts index ec09c5a1..f79f95dd 100644 --- a/harness/tests/turn-orchestrator/steering.test.ts +++ b/harness/tests/turn-orchestrator/steering.test.ts @@ -81,7 +81,7 @@ describe('handleSteering', () => { await handleSteering(iii, rec); - expect(rec.state).toBe('tearing_down'); + expect(rec.state).toBe('stopped'); expect(rec.turn_end_emitted).toBe(true); expect(rec.last_assistant?.stop_reason).toBe('aborted'); expect(loadSpy).toHaveBeenCalledWith(iii, 's1'); @@ -183,18 +183,17 @@ describe('handleSteering', () => { expect(emitSpy).not.toHaveBeenCalled(); }); - it('end_turn: emits turn_end once and transitions to tearing_down', async () => { + it('end_turn: emits turn_end then finishes the session (agent_end + stopped)', async () => { const { iii } = makeIii(); const rec = steeringRec('s1'); const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); - const loadSpy = vi.spyOn(persistence, 'loadMessages'); await handleSteering(iii, rec); - expect(rec.state).toBe('tearing_down'); + expect(rec.state).toBe('stopped'); expect(rec.turn_end_emitted).toBe(true); expect(emitSpy).toHaveBeenCalledWith(iii, 's1', expect.objectContaining({ type: 'turn_end' })); - expect(loadSpy).not.toHaveBeenCalled(); + expect(emitSpy).toHaveBeenCalledWith(iii, 's1', expect.objectContaining({ type: 'agent_end' })); }); it('reads abort via state::get on abort_signal key', async () => { @@ -227,7 +226,7 @@ describe('handleSteering', () => { await handleSteering(iii, rec); - expect(rec.state).toBe('tearing_down'); + expect(rec.state).toBe('stopped'); expect(rec.turn_end_emitted).toBe(true); expect(rec.last_assistant?.content[0]).toEqual( expect.objectContaining({ type: 'text', text: expect.stringContaining('max_turns') }), @@ -237,11 +236,7 @@ describe('handleSteering', () => { 's1', expect.objectContaining({ type: 'message_complete' }), ); - expect(emitSpy).toHaveBeenCalledWith( - iii, - 's1', - expect.objectContaining({ type: 'turn_end' }), - ); + expect(emitSpy).toHaveBeenCalledWith(iii, 's1', expect.objectContaining({ type: 'turn_end' })); expect(loadSpy).toHaveBeenCalledWith(iii, 's1'); expect(saveSpy).toHaveBeenCalledWith( iii, @@ -268,7 +263,7 @@ describe('handleSteering', () => { await handleSteering(iii, rec); - expect(rec.state).toBe('tearing_down'); + expect(rec.state).toBe('stopped'); expect(rec.turn_end_emitted).toBe(true); expect(rec.last_assistant?.content[0]).toEqual( expect.objectContaining({ text: expect.stringContaining('max_turns') }), From 988978e77b7c0f6426745aa49e8efdbaf70c1958 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Mon, 25 May 2026 09:02:37 -0300 Subject: [PATCH 31/41] refactor(turn-orchestrator): remove tearing_down state --- harness/src/index.ts | 2 +- harness/src/turn-orchestrator/finish.ts | 2 +- harness/src/turn-orchestrator/iii.worker.yaml | 2 +- harness/src/turn-orchestrator/main.ts | 2 +- harness/src/turn-orchestrator/register.ts | 2 - harness/src/turn-orchestrator/state.ts | 2 - harness/src/turn-orchestrator/states/index.ts | 1 - .../turn-orchestrator/states/tearing-down.ts | 34 -------------- .../tests/turn-orchestrator/assistant.test.ts | 4 +- .../tests/turn-orchestrator/steering.test.ts | 2 +- .../turn-orchestrator/tearing-down.test.ts | 47 ------------------- harness/tests/turn-orchestrator/wake.test.ts | 2 +- 12 files changed, 8 insertions(+), 94 deletions(-) delete mode 100644 harness/src/turn-orchestrator/states/tearing-down.ts delete mode 100644 harness/tests/turn-orchestrator/tearing-down.test.ts diff --git a/harness/src/index.ts b/harness/src/index.ts index ab7aec59..7e834461 100644 --- a/harness/src/index.ts +++ b/harness/src/index.ts @@ -43,7 +43,7 @@ const WORKERS: readonly WorkerDefinition[] = [ { name: 'turn-orchestrator', description: - 'Durable run::start state machine driving each agent turn through provisioning, assistant, function-execute, steering, tearing-down.', + 'Durable run::start state machine driving each agent turn through provisioning, assistant, function-execute, and steering.', register: (iii, ctx) => registerTurnOrchestrator(iii, ctx), }, { diff --git a/harness/src/turn-orchestrator/finish.ts b/harness/src/turn-orchestrator/finish.ts index ae285f8c..c66e75a0 100644 --- a/harness/src/turn-orchestrator/finish.ts +++ b/harness/src/turn-orchestrator/finish.ts @@ -1,7 +1,7 @@ /** * Terminal teardown: emit the final `agent_end` with the full transcript and * stop the session. Called inline by the FSM paths that end a turn (replaces - * the former standalone `tearing_down` state). + * the former standalone teardown state). */ import type { ISdk } from '../runtime/iii.js'; diff --git a/harness/src/turn-orchestrator/iii.worker.yaml b/harness/src/turn-orchestrator/iii.worker.yaml index 5341f13f..31086458 100644 --- a/harness/src/turn-orchestrator/iii.worker.yaml +++ b/harness/src/turn-orchestrator/iii.worker.yaml @@ -4,7 +4,7 @@ language: node deploy: binary manifest: package.json bin: iii-turn-orchestrator -description: Durable run::start state machine that drives each agent turn through provisioning, assistant, function-execute, steering, and tearing-down. +description: Durable run::start state machine that drives each agent turn through provisioning, assistant, function-execute, and steering. runtime: kind: node diff --git a/harness/src/turn-orchestrator/main.ts b/harness/src/turn-orchestrator/main.ts index 983cf93c..9880794f 100644 --- a/harness/src/turn-orchestrator/main.ts +++ b/harness/src/turn-orchestrator/main.ts @@ -5,6 +5,6 @@ import { register } from './register.js'; await bootstrapWorker({ name: 'turn-orchestrator', description: - 'Durable run::start state machine driving each agent turn through provisioning, assistant, function-execute, steering, tearing-down.', + 'Durable run::start state machine driving each agent turn through provisioning, assistant, function-execute, and steering.', register: (iii, ctx) => register(iii, ctx), }); diff --git a/harness/src/turn-orchestrator/register.ts b/harness/src/turn-orchestrator/register.ts index 018c0a02..09a4882a 100644 --- a/harness/src/turn-orchestrator/register.ts +++ b/harness/src/turn-orchestrator/register.ts @@ -12,7 +12,6 @@ import { registerFunctionExecute, registerProvisioning, registerSteeringCheck, - registerTearingDown, } from './states/index.js'; export async function register(iii: ISdk, ctx: { configPath: string }): Promise { @@ -24,7 +23,6 @@ export async function register(iii: ISdk, ctx: { configPath: string }): Promise< registerFunctionExecute(iii); registerFunctionAwaitingApproval(iii); registerSteeringCheck(iii); - registerTearingDown(iii); registerGetState(iii); registerOnAbortSignal(iii); registerOnApproval(iii); diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index 2e18da0c..22744a9d 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -20,7 +20,6 @@ export type TurnState = | 'function_execute' | 'function_awaiting_approval' | 'steering_check' - | 'tearing_down' | 'stopped' | 'failed'; @@ -71,7 +70,6 @@ const TURN_STATES = [ 'function_execute', 'function_awaiting_approval', 'steering_check', - 'tearing_down', 'stopped', 'failed', ] as const satisfies readonly TurnState[]; diff --git a/harness/src/turn-orchestrator/states/index.ts b/harness/src/turn-orchestrator/states/index.ts index 100b1743..b56ef19b 100644 --- a/harness/src/turn-orchestrator/states/index.ts +++ b/harness/src/turn-orchestrator/states/index.ts @@ -7,4 +7,3 @@ export { register as registerAssistantStreaming } from './assistant-streaming.js export { register as registerFunctionExecute } from './function-execute.js'; export { register as registerFunctionAwaitingApproval } from './function-awaiting-approval.js'; export { register as registerSteeringCheck } from './steering-check.js'; -export { register as registerTearingDown } from './tearing-down.js'; diff --git a/harness/src/turn-orchestrator/states/tearing-down.ts b/harness/src/turn-orchestrator/states/tearing-down.ts deleted file mode 100644 index d9af5420..00000000 --- a/harness/src/turn-orchestrator/states/tearing-down.ts +++ /dev/null @@ -1,34 +0,0 @@ -/** - * `turn::tearing_down`. Emit `agent_end` and transition to `stopped`. - * - * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. - * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. - */ - -import type { ISdk } from '../../runtime/iii.js'; -import type { AgentMessage } from '../../types/agent-message.js'; -import { emit } from '../events.js'; -import * as persistence from '../persistence.js'; -import { runTransition } from '../run-transition.js'; -import { type TurnStateRecord, transitionTo } from '../state.js'; -import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; - -export async function handleTearingDown(iii: ISdk, rec: TurnStateRecord): Promise { - const messages: AgentMessage[] = await persistence.loadMessages(iii, rec.session_id); - await emit(iii, rec.session_id, { type: 'agent_end', messages }); - transitionTo(rec, 'stopped'); -} - -export function register(iii: ISdk): void { - iii.registerFunction( - 'turn::tearing_down', - async (payload: TurnStepPayload) => { - const parsed = TurnStepPayloadSchema.parse(payload); - return runTransition(iii, 'tearing_down', handleTearingDown, parsed); - }, - { - description: - 'Run one durable FSM transition for session in state tearing_down: emit agent_end and mark stopped.', - }, - ); -} diff --git a/harness/tests/turn-orchestrator/assistant.test.ts b/harness/tests/turn-orchestrator/assistant.test.ts index adbc9979..1df99865 100644 --- a/harness/tests/turn-orchestrator/assistant.test.ts +++ b/harness/tests/turn-orchestrator/assistant.test.ts @@ -99,7 +99,7 @@ describe('handleStreaming turn start', () => { }); describe('handleStreaming', () => { - it('transitions to tearing_down with synthetic error when createChannel fails', async () => { + it('stops with a synthetic error when createChannel fails', async () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; const { iii } = fakeIii({ createChannel: async () => { @@ -226,7 +226,7 @@ describe('handleStreaming', () => { expect(rec.last_assistant).toEqual(finalMsg); }); - it('routes error assistant to tearing_down without persisting transcript', async () => { + it('stops on an error assistant without persisting transcript', async () => { const finalMsg = assistant({ stop_reason: 'error', error_message: 'auth failed' }); const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; const { iii } = fakeIiiWithDone(finalMsg); diff --git a/harness/tests/turn-orchestrator/steering.test.ts b/harness/tests/turn-orchestrator/steering.test.ts index f79f95dd..d71373b1 100644 --- a/harness/tests/turn-orchestrator/steering.test.ts +++ b/harness/tests/turn-orchestrator/steering.test.ts @@ -72,7 +72,7 @@ function steeringRec( } describe('handleSteering', () => { - it('abort: persists aborted assistant, emits turn_end, transitions to tearing_down', async () => { + it('abort: persists aborted assistant, emits turn_end, stops the session', async () => { const { iii } = makeIii({ abort: true }); const rec = steeringRec('s1'); const loadSpy = vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); diff --git a/harness/tests/turn-orchestrator/tearing-down.test.ts b/harness/tests/turn-orchestrator/tearing-down.test.ts deleted file mode 100644 index cc3ed045..00000000 --- a/harness/tests/turn-orchestrator/tearing-down.test.ts +++ /dev/null @@ -1,47 +0,0 @@ -import { afterEach, describe, expect, it, vi } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; -import type { AgentMessage } from '../../src/types/agent-message.js'; -import * as events from '../../src/turn-orchestrator/events.js'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; -import { type TurnStateRecord, newRecord } from '../../src/turn-orchestrator/state.js'; -import { handleTearingDown } from '../../src/turn-orchestrator/states/tearing-down.js'; - -type TriggerCall = { function_id: string; payload: unknown; timeoutMs?: number }; - -function fakeIii(): { iii: ISdk; calls: TriggerCall[] } { - const calls: TriggerCall[] = []; - const iii = { - trigger: async (req: { - function_id: string; - payload: T; - timeoutMs?: number; - }): Promise => { - calls.push({ - function_id: req.function_id, - payload: req.payload, - timeoutMs: req.timeoutMs, - }); - return null as R; - }, - } as unknown as ISdk; - return { iii, calls }; -} - -afterEach(() => { - vi.restoreAllMocks(); -}); - -describe('handleTearingDown', () => { - it('transitions to stopped and emits agent_end with session messages', async () => { - const rec: TurnStateRecord = { ...newRecord('s1'), state: 'tearing_down' }; - const messages: AgentMessage[] = [{ role: 'user', content: 'hi' }]; - const { iii } = fakeIii(); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue(messages); - const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); - - await handleTearingDown(iii, rec); - - expect(rec.state).toBe('stopped'); - expect(emitSpy).toHaveBeenCalledWith(iii, 's1', { type: 'agent_end', messages }); - }); -}); diff --git a/harness/tests/turn-orchestrator/wake.test.ts b/harness/tests/turn-orchestrator/wake.test.ts index f3a5b753..dd732781 100644 --- a/harness/tests/turn-orchestrator/wake.test.ts +++ b/harness/tests/turn-orchestrator/wake.test.ts @@ -15,7 +15,7 @@ describe('shouldWakeStep', () => { }); it('rejects terminal state (stopped)', () => { - expect(shouldWakeStep('tearing_down', 'stopped')).toBe(false); + expect(shouldWakeStep('steering_check', 'stopped')).toBe(false); }); it('rejects function_awaiting_approval (orchestrator parks here)', () => { From a2bbd92135f9671f530053bd82035104bb77d59c Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Mon, 25 May 2026 10:45:08 -0300 Subject: [PATCH 32/41] test(turn-orchestrator): remove tautological and duplicated tests Drop tests that assert properties set on a literal (type-shape only) or duplicate behavior covered elsewhere: - persistence-prepared.test.ts: asserted PreparedEntry fields it just set; PreparedEntry is exercised for real in functions/awaiting-approval tests - agent-trigger.test.ts: DispatchResult shape block read back the discriminant it constructed; real behavior is covered by dispatchWithHook - state.test.ts: removed negative-tautology asserts, the AwaitingApprovalEntry shape test, the handleAwaitingApproval empty-queue case (dup of awaiting-approval.test.ts), and the duplicated newRecord test (work-undefined check folded into the kept one) No behavioral coverage lost: 228 -> 217 passing, tsc clean. --- .../turn-orchestrator/agent-trigger.test.ts | 24 ------- .../persistence-prepared.test.ts | 21 ------ harness/tests/turn-orchestrator/state.test.ts | 70 ++----------------- 3 files changed, 4 insertions(+), 111 deletions(-) delete mode 100644 harness/tests/turn-orchestrator/persistence-prepared.test.ts diff --git a/harness/tests/turn-orchestrator/agent-trigger.test.ts b/harness/tests/turn-orchestrator/agent-trigger.test.ts index ed73f488..47e57782 100644 --- a/harness/tests/turn-orchestrator/agent-trigger.test.ts +++ b/harness/tests/turn-orchestrator/agent-trigger.test.ts @@ -1,6 +1,5 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import { IIIInvocationError, type ISdk } from '../../src/runtime/iii.js'; -import type { DispatchResult } from '../../src/turn-orchestrator/agent-trigger.js'; import { TOOL_NAME, agentTriggerTool, @@ -29,29 +28,6 @@ describe('agent_trigger tool schema', () => { }); }); -describe('DispatchResult shape', () => { - it('result variant carries a FunctionResult', () => { - const r: DispatchResult = { - kind: 'result', - result: { content: [], details: {}, terminate: false }, - }; - expect(r.kind).toBe('result'); - }); - - it('deny variant carries a denial FunctionResult', () => { - const r: DispatchResult = { - kind: 'deny', - result: { content: [], details: { status: 'denied' }, terminate: false }, - }; - expect(r.kind).toBe('deny'); - }); - - it('pending variant carries no result', () => { - const r: DispatchResult = { kind: 'pending' }; - expect(r.kind).toBe('pending'); - }); -}); - describe('isErrorResult', () => { it('treats details.error as error', () => { expect( diff --git a/harness/tests/turn-orchestrator/persistence-prepared.test.ts b/harness/tests/turn-orchestrator/persistence-prepared.test.ts deleted file mode 100644 index fb643130..00000000 --- a/harness/tests/turn-orchestrator/persistence-prepared.test.ts +++ /dev/null @@ -1,21 +0,0 @@ -import { describe, expect, it } from 'vitest'; -import type { PreparedEntry } from '../../src/turn-orchestrator/state.js'; - -describe('PreparedEntry with pre_approved', () => { - it('accepts a pre_approved: true entry', () => { - const entry: PreparedEntry = { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - blocked: null, - pre_approved: true, - }; - expect(entry.pre_approved).toBe(true); - }); - - it('defaults pre_approved to undefined', () => { - const entry: PreparedEntry = { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - blocked: null, - }; - expect(entry.pre_approved).toBeUndefined(); - }); -}); diff --git a/harness/tests/turn-orchestrator/state.test.ts b/harness/tests/turn-orchestrator/state.test.ts index d1179619..3079faa9 100644 --- a/harness/tests/turn-orchestrator/state.test.ts +++ b/harness/tests/turn-orchestrator/state.test.ts @@ -1,10 +1,5 @@ import { describe, expect, it } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; -import type { - AwaitingApprovalEntry, - TurnState, - TurnStateRecord, -} from '../../src/turn-orchestrator/state.js'; +import type { TurnStateRecord } from '../../src/turn-orchestrator/state.js'; import { AGENT_SCOPE, messagesKey, @@ -12,15 +7,14 @@ import { transitionTo, turnStateKey, } from '../../src/turn-orchestrator/state.js'; -import { handleAwaitingApproval } from '../../src/turn-orchestrator/states/function-awaiting-approval.js'; describe('TurnStateRecord', () => { - it('starts in provisioning', () => { + it('starts in provisioning with no work and the given max_turns', () => { const r = newRecord('s1', 32); expect(r.state).toBe('provisioning'); expect(r.session_id).toBe('s1'); expect(r.max_turns).toBe(32); - expect(r.state).not.toBe('stopped'); + expect(r.work).toBeUndefined(); }); it('transitionTo stopped marks terminal', () => { @@ -28,51 +22,11 @@ describe('TurnStateRecord', () => { transitionTo(r, 'stopped'); expect(r.state).toBe('stopped'); }); -}); - -describe('function_awaiting_approval state', () => { - it('accepts function_awaiting_approval as a TurnState value', () => { - const rec = newRecord('s1'); - transitionTo(rec, 'function_awaiting_approval' as TurnState); - expect(rec.state).toBe('function_awaiting_approval'); - }); - - it('is non-terminal', () => { - const rec = newRecord('s1'); - transitionTo(rec, 'function_awaiting_approval' as TurnState); - expect(rec.state).not.toBe('stopped'); - }); -}); -describe('awaiting_approval field', () => { - it('defaults to undefined on fresh records', () => { + it('awaiting_approval defaults to undefined on fresh records', () => { const rec: TurnStateRecord = newRecord('s1'); expect(rec.awaiting_approval).toBeUndefined(); }); - - it('accepts AwaitingApprovalEntry items', () => { - const rec: TurnStateRecord = newRecord('s1'); - const entry: AwaitingApprovalEntry = { - function_call_id: 'fc-1', - function_id: 'shell::run', - args: { command: 'ls' }, - }; - rec.awaiting_approval = [entry]; - expect(rec.awaiting_approval).toHaveLength(1); - expect(rec.awaiting_approval[0].function_call_id).toBe('fc-1'); - }); -}); - -describe('handleAwaitingApproval with empty queue', () => { - it('advances to function_execute when awaiting_approval is empty', async () => { - const rec = newRecord('s1'); - transitionTo(rec, 'function_awaiting_approval'); - rec.awaiting_approval = []; - - await handleAwaitingApproval({} as ISdk, rec); - - expect(rec.state).toBe('function_execute'); - }); }); describe('state keys', () => { @@ -82,19 +36,3 @@ describe('state keys', () => { expect(messagesKey('abc')).toBe('session/abc/messages'); }); }); - -describe('state record', () => { - it('newRecord starts in provisioning, non-terminal, no work', () => { - const r = newRecord('s1', 5); - expect(r.state).toBe('provisioning'); - expect(r.state).not.toBe('stopped'); - expect(r.state).not.toBe('failed'); - expect(r.work).toBeUndefined(); - expect(r.max_turns).toBe(5); - }); - - it('failed is terminal', () => { - const r: TurnStateRecord = { ...newRecord('s1'), state: 'failed', error: { kind: 'bug', message: 'x' } }; - expect(r.state).toBe('failed'); - }); -}); From 4666085242f1a57fc87d04469803900ce89b8710 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Mon, 25 May 2026 15:11:46 -0300 Subject: [PATCH 33/41] refactor(turn-orchestrator): cut reactive-trigger RPC overhead, remove dead abort path - After-function-call hook skips publish_collect when no durable subscriber is registered for the topic (subscriber-presence cache), removing a fixed ~500ms collect wait per executed tool-call result on the turn critical path. - Context compaction subscribes to a dedicated `agent::turn_end` stream (mirrored by the event producer) instead of the full `agent::events` firehose, so it wakes once per turn instead of on every agent event. - Session-create fanout rides a dedicated `session_index` scope marker written once at first persist; the trigger matches by scope alone, dropping the per-write `harness::session::is_create_event` condition RPC. - Remove the orphaned state-based abort subgraph (abort.ts, on-abort-signal.ts, the `turn_abort` scope, the steering_check abort route): it had no production producer (abort/cancel is handled at the ACP layer) and the `router::abort` entrypoint it depended on does not exist. Drops the dead `!router::abort` kernel-deny entry. --- harness/docs/architecture.md | 15 +- harness/docs/workers/approval-gate.md | 6 +- harness/docs/workers/harness.md | 5 +- harness/docs/workers/turn-orchestrator.md | 23 +- harness/src/context-compaction/register.ts | 9 +- harness/src/harness/fanout/sessions-poll.ts | 47 ++-- harness/src/turn-orchestrator/abort.ts | 41 ---- harness/src/turn-orchestrator/events.ts | 54 +++-- harness/src/turn-orchestrator/hook.ts | 14 +- .../src/turn-orchestrator/on-abort-signal.ts | 81 ------- harness/src/turn-orchestrator/on-approval.ts | 6 +- harness/src/turn-orchestrator/persistence.ts | 8 + harness/src/turn-orchestrator/register.ts | 2 - harness/src/turn-orchestrator/schemas.ts | 16 -- harness/src/turn-orchestrator/state.ts | 9 +- .../states/steering-check.ts | 53 +---- .../turn-orchestrator/subscriber-presence.ts | 79 +++++++ .../turn-end-subscription.test.ts | 25 ++ .../harness/fanout/sessions-poll.test.ts | 94 ++++++++ harness/tests/harness/policy.test.ts | 1 - .../integration/approval-resume.e2e.test.ts | 73 +----- .../integration/on-record-written.e2e.test.ts | 36 +++ harness/tests/turn-orchestrator/abort.test.ts | 81 ------- .../tests/turn-orchestrator/events.test.ts | 51 ++++ harness/tests/turn-orchestrator/hook.test.ts | 47 +++- .../turn-orchestrator/on-abort-signal.test.ts | 219 ------------------ .../tests/turn-orchestrator/steering.test.ts | 95 +------- .../subscriber-presence.test.ts | 69 ++++++ iii-permissions.yaml | 1 - 29 files changed, 525 insertions(+), 735 deletions(-) delete mode 100644 harness/src/turn-orchestrator/abort.ts delete mode 100644 harness/src/turn-orchestrator/on-abort-signal.ts create mode 100644 harness/src/turn-orchestrator/subscriber-presence.ts create mode 100644 harness/tests/context-compaction/turn-end-subscription.test.ts create mode 100644 harness/tests/harness/fanout/sessions-poll.test.ts delete mode 100644 harness/tests/turn-orchestrator/abort.test.ts create mode 100644 harness/tests/turn-orchestrator/events.test.ts delete mode 100644 harness/tests/turn-orchestrator/on-abort-signal.test.ts create mode 100644 harness/tests/turn-orchestrator/subscriber-presence.test.ts diff --git a/harness/docs/architecture.md b/harness/docs/architecture.md index 0d9f9c11..7c5a93b1 100644 --- a/harness/docs/architecture.md +++ b/harness/docs/architecture.md @@ -85,8 +85,8 @@ flowchart LR state -- "agent::events stream" --> harness state -- "agent::events stream" --> compact - state -- "state trigger (scope=agent, abort_signal)" --> turnOrch - state -- "state trigger (scope=agent, turn_state created)" --> harness + state -- "state trigger (scope=approvals)" --> turnOrch + state -- "state trigger (scope=session_index)" --> harness harness -- "ui::session::event::" --> client compact -- "session-tree::compact" --> session ``` @@ -99,8 +99,8 @@ function executed via `runTransition` and enqueued onto the `turn-step` FIFO queue by `wakeState` ([wake.ts](harness/src/turn-orchestrator/wake.ts)). `saveRecord` calls `shouldWakeStep` then `wakeState` when the persisted state transitions to a stepable state. Paused or terminal sessions are also woken by -per-call `turn::approval_resume` handlers (approval/abort) or -`turn::on_abort_signal` (abort signal state trigger), both via `wakeFromRecord`. +the approval-decision state trigger (`turn::on_approval` on scope `approvals`) +via `wakeFromRecord`. ```mermaid stateDiagram-v2 @@ -159,11 +159,6 @@ sequenceDiagram Fail-closed: policy unreachable (transport error or 5 s timeout) → `consultBefore` denies the call with a `gate_unavailable` envelope. -Abort: `router::abort` writes `session//abort_signal = true` (waking -the orchestrator through its own `agent`-scope state trigger) and, if the -turn is paused on approvals, triggers each registered -`turn::approval_resume` function with `{decision: 'aborted'}`. - ## Kernel deny list [iii-permissions.yaml](iii-permissions.yaml) at the workspace root is the @@ -177,7 +172,7 @@ Deny shorthands (`!function_id` in the YAML): `approval::resolve`, `state::update`, `state::delete`, `stream::set`, `iii::durable::publish`, `auth::set_token`, `auth::delete_token`, `oauth::anthropic::login`, `oauth::openai-codex::login`, `run::start`, -`router::stream_assistant`, `router::abort`. +`router::stream_assistant`. Bare-string allow rules: `state::get`, `state::list`, `models::list`, `models::get`, `models::supports`, `auth::get_token`, diff --git a/harness/docs/workers/approval-gate.md b/harness/docs/workers/approval-gate.md index 17579004..86ea07ae 100644 --- a/harness/docs/workers/approval-gate.md +++ b/harness/docs/workers/approval-gate.md @@ -27,10 +27,6 @@ and route it to the correct per-call resume function. 4. The resume handler writes `approvals//` (if not already set), invokes `turn::step`, and unregisters the resume fn. 5. `handleAwaitingApproval` reads all decisions, folds them into the prepared snapshot, and returns to `function_execute`. -Abort uses the same resume path: `performAbortSideEffects` triggers each -registered resume fn with `{ decision: 'aborted', reason: 'session_aborted' }` -instead of calling `approval::resolve`. - ## Registered functions - `approval::resolve` — Validates the payload and triggers the per-call resume function. Returns `{ ok: true }` or `{ ok: false, error: 'invalid_payload' | 'resume_failed' }`. @@ -46,7 +42,7 @@ All decision records use scope `approvals` (constant `STATE_SCOPE` in | Key shape | Value | Purpose | |---|---|---| -| `/` | `{ decision: 'allow' \| 'deny' \| 'aborted', reason: string \| null }` | Written by the resume handler when an operator resolves or abort fires. `handleAwaitingApproval` reads these keys while the turn is in `function_awaiting_approval`. | +| `/` | `{ decision: 'allow' \| 'deny' \| 'aborted', reason: string \| null }` | Written by the resume handler when an operator resolves. `handleAwaitingApproval` reads these keys while the turn is in `function_awaiting_approval`. | Pending calls are tracked on the turn record (`awaiting_approval[]`), not as separate rows under `approvals` until a decision lands. diff --git a/harness/docs/workers/harness.md b/harness/docs/workers/harness.md index 9245181a..58a186f0 100644 --- a/harness/docs/workers/harness.md +++ b/harness/docs/workers/harness.md @@ -24,13 +24,12 @@ that drive transitions; its fan-out trigger is a passive stream subscriber. - `harness::fs::read_inline` — Read a host file via shell::fs::read, drain its channel, and return a `{content:[{text}], details:{size, truncated, bytes_read}}` envelope (max 256 KiB inline by default). - `policy::check_permissions` — Evaluate a function call against the current `iii-permissions.yaml`. Returns `{ decision: "allow" | "deny" | "needs_approval", rule_id?, matched_constraint? }`. - `harness::fanout::agent_event_handler` — Internal: `agent::events` fanout handler. -- `harness::session::is_create_event` — Internal condition function bound to the sessions state trigger; matches `state:created` writes to `session//turn_state`. -- `harness::fanout::session_created` — Internal handler invoked by the sessions state trigger; fans the new session id out to every all-sessions subscriber via `ui::sessions::changed::`. +- `harness::fanout::session_created` — Internal handler invoked by the sessions state trigger; fans the new session id out to every all-sessions subscriber via `ui::sessions::changed::`. Gates in-handler on the `state:created` marker. ## Triggers - **Stream subscriber** on `agent::events` → `harness::fanout::agent_event_handler`. Registered by [src/harness/fanout/agent-events.ts](harness/src/harness/fanout/agent-events.ts). -- **State trigger** on `scope: agent` gated by `condition_function_id: harness::session::is_create_event` → `harness::fanout::session_created`. Lives in [src/harness/fanout/sessions-poll.ts](harness/src/harness/fanout/sessions-poll.ts). This replaced the previous 1 Hz `state::list` diff loop: new sessions now reach all-sessions subscribers reactively, on the same `turn_state` write that creates them. +- **State trigger** on `scope: session_index` (no `condition_function_id`) → `harness::fanout::session_created`. Lives in [src/harness/fanout/sessions-poll.ts](harness/src/harness/fanout/sessions-poll.ts). The turn-orchestrator writes a one-time `session_index/` marker when a session's `turn_state` is first persisted, so the trigger matches in-engine by scope alone — no per-write condition predicate. (This itself replaced an earlier 1 Hz `state::list` diff loop.) The fanout handler forwards every `agent::events` frame to the per-browser endpoint `ui::session::event::` for each browser whose diff --git a/harness/docs/workers/turn-orchestrator.md b/harness/docs/workers/turn-orchestrator.md index 5df3212e..4c3a80be 100644 --- a/harness/docs/workers/turn-orchestrator.md +++ b/harness/docs/workers/turn-orchestrator.md @@ -39,19 +39,15 @@ unreachable → deny with a `gate_unavailable` `DenialEnvelope`. - `turn::assistant_streaming` — FSM step: stream the turn over a provider channel; on completion emit `message_complete`, persist the assistant message (dup-guarded), route to `function_execute` / `steering_check` / `tearing_down`. - `turn::function_execute` — FSM step: own the full function lifecycle via `rec.work`; build batch from `rec.last_assistant`, run each call, checkpoint per-call via `writeRecord`, park to `function_awaiting_approval` on a `pending` gate reply, finalize results into messages + emit `turn_end`, route to `steering_check` / `tearing_down`. - `turn::function_awaiting_approval` — FSM step: read decisions for `awaiting_approval[]`; fold them into `rec.work.batch` (`allow` → `pre_approved`, `deny`/`aborted` → `blocked`); clear `awaiting_approval`, advance to `function_execute`. -- `turn::steering_check` — FSM step: check abort signal, drain `steering`/`followup` inboxes, enforce `max_turns` cap (emits synthetic `max_turns` message + `turn_end` → `tearing_down`), route to `assistant_streaming` / `tearing_down`. +- `turn::steering_check` — FSM step: drain `steering`/`followup` inboxes, enforce `max_turns` cap (emits synthetic `max_turns` message + `turn_end` → `tearing_down`), route to `assistant_streaming` / `tearing_down`. - `turn::tearing_down` — FSM step: emit `agent_end`, advance to `stopped`. - `turn::get_state` — One-shot reader returning a lean `TurnStateView` (from `schemas.ts:toView`) for a session. UI clients call this on reload to recover in-progress modals (e.g. `function_awaiting_approval`) without reading iii state directly. Returns `null` for unknown sessions. -- `turn::is_abort_signal_set` — Condition function bound to the agent-scope state trigger; matches `state:created`/`state:updated` writes that set `session//abort_signal` to `true`. -- `turn::on_abort_signal` — State trigger adapter: enqueues `turn::{current_state}` (via `wakeFromRecord`) when the abort signal is set so the FSM observes the abort on the next safe boundary. ## Triggers -- **State trigger** on `scope: agent` gated by `condition_function_id: turn::is_abort_signal_set` → `turn::on_abort_signal`. Registered in [on-abort-signal.ts](harness/src/turn-orchestrator/on-abort-signal.ts). Enqueues the handler for the session's current persisted state the moment `session//abort_signal` is set to `true`, so the FSM advances to `steering_check` without waiting for the current step to time out. - The record-written wake is now inline in `saveRecord` (no separate `on-record-written` adapter): every `saveRecord` call that transitions to a non-terminal, non-parking state calls `wakeState` directly. Similarly, `turn_state_changed` events are emitted inline from `persistRecord` via `emitTurnStateChanged` ([turn-state-write.ts](harness/src/turn-orchestrator/turn-state-write.ts)) — there is no separate `on-turn-state-changed` state trigger. -Paused turns (`function_awaiting_approval`) are woken when `approval::resolve` or abort triggers each per-call `turn::approval_resume` function (see [approval-resume.ts](harness/src/turn-orchestrator/approval-resume.ts) and [workers/approval-gate.md](workers/approval-gate.md)). `recoverPendingApprovals` re-registers these resume functions at worker startup for sessions that were parked before a restart. +Paused turns (`function_awaiting_approval`) are woken when `approval::resolve` triggers each per-call `turn::approval_resume` function (see [approval-resume.ts](harness/src/turn-orchestrator/approval-resume.ts) and [workers/approval-gate.md](workers/approval-gate.md)). `recoverPendingApprovals` re-registers these resume functions at worker startup for sessions that were parked before a restart. ## Turn FSM @@ -65,7 +61,7 @@ The 8 states from [state.ts](harness/src/turn-orchestrator/state.ts): | `assistant_streaming` | [states/assistant-streaming.ts](harness/src/turn-orchestrator/states/assistant-streaming.ts) | Increment `turn_count`; create channel; trigger provider stream; relay `message_update` deltas; on completion call `finalizeAssistant` which emits `message_complete`, persists the assistant message (dup-guarded), then routes → `function_execute` (has calls) / `steering_check` (no calls) / `tearing_down` (error/aborted). | | `function_execute` | [states/function-execute.ts](harness/src/turn-orchestrator/states/function-execute.ts) | Build batch from `rec.last_assistant` (or reuse existing `rec.work`); for each call: emit `function_execution_start`, skip if already executed, dispatch via `dispatchWithHook`; if `pending` → append to `awaiting_approval`, register `turn::approval_resume`, → `function_awaiting_approval`; otherwise commit result (silent `writeRecord` checkpoint) + emit `function_execution_end`; after batch: fold results into messages + emit `turn_end` → `steering_check` / `tearing_down`. | | `function_awaiting_approval` | [states/function-awaiting-approval.ts](harness/src/turn-orchestrator/states/function-awaiting-approval.ts) | Read decision for each `awaiting_approval[]` entry; if any is still missing → return (park); when all present, fold into `rec.work.batch` (`allow` → `pre_approved: true`; `deny`/`aborted` → `blocked` with denial result); clear `awaiting_approval` → `function_execute`. | -| `steering_check` | [states/steering-check.ts](harness/src/turn-orchestrator/states/steering-check.ts) | Priority route: abort → `tearing_down`; steering msg → `assistant_streaming` (unless `max_turns` reached); followup msg → `assistant_streaming` (unless `max_turns` reached); function results present → `assistant_streaming` (unless `max_turns` reached); else emit `turn_end` once → `tearing_down`. `max_turns` path emits a synthetic `message_complete` + `turn_end`. | +| `steering_check` | [states/steering-check.ts](harness/src/turn-orchestrator/states/steering-check.ts) | Priority route: steering msg → `assistant_streaming` (unless `max_turns` reached); followup msg → `assistant_streaming` (unless `max_turns` reached); function results present → `assistant_streaming` (unless `max_turns` reached); else emit `turn_end` once → `stopped`. `max_turns` path emits a synthetic `message_complete` + `turn_end`. | | `tearing_down` | [states/tearing-down.ts](harness/src/turn-orchestrator/states/tearing-down.ts) | Emit `agent_end` → `stopped`. | | `stopped` | (no handler) | Terminal. Idempotent. | | `failed` | (set by `runTransition` on unexpected throw) | Terminal. Carries `error: {kind, message}` on the record. Emits `message_complete{stop_reason:'error'}` + `agent_end` so the UI sees the reason. A handler may throw `TransientError` to use the queue's retry/DLQ instead. | @@ -91,7 +87,6 @@ All keys live under iii state scope `agent`. Key helpers are defined in | `session//run_request` | The `run::start` payload enriched by `provisioning` to include `function_schemas: [agentTriggerTool()]` and the assembled `system_prompt`. Typed as `RunRequest` ([run-request.ts](harness/src/turn-orchestrator/run-request.ts)). | | `session//session_tree_mirror_len` | High-water mark so the session-tree messages mirror is incremental. The session-tree mirror is still inline in `persistence.saveMessages` — its relocation to a reactive subscriber is tracked as a follow-up, not done. | | `session//event_counter` | Monotonic counter for `agent::events` sequence numbers. | -| `session//abort_signal` | Set by `router::abort` via `performAbortSideEffects` to interrupt a streaming turn. | Keys that no longer exist: `function_prepared`, `function_executed`, `function_schemas` (standalone), `tool_prepared`, `tool_executed`, @@ -120,9 +115,9 @@ Unchanged from prior design: `dispatchWithHook` → `consultBefore` → `policy::check_permissions` (5 s timeout, fail-closed). A `needs_approval` reply returns `{ kind: 'pending' }` from `dispatchWithHook`, which parks the session to `function_awaiting_approval` and registers a per-call -`turn::approval_resume` function. `approval::resolve` (or abort via -`performAbortSideEffects`) triggers that resume function, which persists the -decision to scope `approvals` and calls `wakeFromRecord` to re-enqueue the +`turn::approval_resume` function. `approval::resolve` triggers that resume +function, which persists the decision to scope `approvals` and calls +`wakeFromRecord` to re-enqueue the session's current state handler. ## Configuration @@ -146,18 +141,16 @@ From | File | Purpose | |---|---| | [src/turn-orchestrator/main.ts](harness/src/turn-orchestrator/main.ts) | Binary entry point. | -| [src/turn-orchestrator/register.ts](harness/src/turn-orchestrator/register.ts) | Composes all registered functions: `run::start`, per-state `turn::{state}` handlers, abort-signal trigger, approval-resume recovery, `turn::get_state`. | +| [src/turn-orchestrator/register.ts](harness/src/turn-orchestrator/register.ts) | Composes all registered functions: `run::start`, per-state `turn::{state}` handlers, approval-resume recovery, `turn::get_state`. | | [src/turn-orchestrator/run-start.ts](harness/src/turn-orchestrator/run-start.ts) | `run::start` handler — persists run config and messages, seeds `turn_state` to `provisioning` via `saveRecord` (which wakes the FSM). | | [src/turn-orchestrator/run-transition.ts](harness/src/turn-orchestrator/run-transition.ts) | Shared FSM transition runner: load → null-check → stale-skip → handle → save. Routes to `failed` on unexpected throw; re-throws `TransientError` for queue retry. | | [src/turn-orchestrator/wake.ts](harness/src/turn-orchestrator/wake.ts) | `wakeState` / `wakeFromRecord` — enqueue `turn::{state}` onto the `turn-step` FIFO queue; `shouldWakeStep` gates non-stepable states. | -| [src/turn-orchestrator/schemas.ts](harness/src/turn-orchestrator/schemas.ts) | All registered-function I/O schemas and types: `RunStartPayloadSchema`, `TurnStepPayloadSchema`, `TurnStateView`, `toView`, `AbortSignalWriteEventSchema`. | +| [src/turn-orchestrator/schemas.ts](harness/src/turn-orchestrator/schemas.ts) | All registered-function I/O schemas and types: `RunStartPayloadSchema`, `TurnStepPayloadSchema`, `TurnStateView`, `toView`, `ApprovalDecisionEventSchema`. | | [src/turn-orchestrator/run-request.ts](harness/src/turn-orchestrator/run-request.ts) | `RunRequest` type and `parseRunRequest` — the typed, parsed form of `session//run_request` (includes `function_schemas`). | | [src/turn-orchestrator/get-state.ts](harness/src/turn-orchestrator/get-state.ts) | `turn::get_state` — one-shot reader returning `TurnStateView \| null`. | | [src/turn-orchestrator/agent-trigger.ts](harness/src/turn-orchestrator/agent-trigger.ts) | Dispatcher chokepoint: `dispatchWithHook` (consult + trigger), `triggerFunctionCall` (trigger/decode/error), `agentTriggerTool` (schema), `unwrapAgentTrigger`. | | [src/turn-orchestrator/hook.ts](harness/src/turn-orchestrator/hook.ts) | `consultBefore` — `policy::check_permissions` (5 s, fail-closed) → `allow` / `pending` / `deny`. `publishAfter` — `hook-fanout::publish_collect` for after-hook fanout. | | [src/turn-orchestrator/approval-resume.ts](harness/src/turn-orchestrator/approval-resume.ts) | Per-call `turn::approval_resume` registration and handler (persist decision + `wakeFromRecord`); `recoverPendingApprovals` re-registers at startup. | -| [src/turn-orchestrator/abort.ts](harness/src/turn-orchestrator/abort.ts) | `performAbortSideEffects` — writes `session//abort_signal = true` and triggers each `turn::approval_resume` with `{decision: 'aborted'}` for parked sessions. | -| [src/turn-orchestrator/on-abort-signal.ts](harness/src/turn-orchestrator/on-abort-signal.ts) | State trigger adapter — `turn::is_abort_signal_set` (condition) + `turn::on_abort_signal` (handler, calls `wakeFromRecord`). | | [src/turn-orchestrator/turn-state-write.ts](harness/src/turn-orchestrator/turn-state-write.ts) | `emitTurnStateChanged` — inline UI notification emitting `turn_state_changed` with lean `TurnStateView`. Called from `persistRecord`. | | [src/turn-orchestrator/states/provisioning.ts](harness/src/turn-orchestrator/states/provisioning.ts) | `turn::provisioning` handler. | | [src/turn-orchestrator/states/assistant-streaming.ts](harness/src/turn-orchestrator/states/assistant-streaming.ts) | `turn::assistant_streaming` handler. | diff --git a/harness/src/context-compaction/register.ts b/harness/src/context-compaction/register.ts index b103a00e..30e61626 100644 --- a/harness/src/context-compaction/register.ts +++ b/harness/src/context-compaction/register.ts @@ -11,7 +11,10 @@ import { } from './model-resolver.js'; import { prune } from './prune.js'; -const AGENT_EVENTS_STREAM = 'agent::events'; +// Compaction only acts on turn_end, so it subscribes to the dedicated +// turn_end stream (mirrored by the producer) rather than the full +// agent::events firehose — one wake per turn instead of per event. +const TURN_END_STREAM = 'agent::turn_end'; // Sized so preserveRecentBudget clamps to its 2k minimum when the real // model is unknown — compaction is best-effort, not fatal. @@ -61,7 +64,7 @@ export async function register(iii: ISdk): Promise { }, { description: - 'Internal: subscribes to agent::events; triggers async compaction on TurnEnd when running tokens exceed usable(model).', + 'Internal: subscribes to agent::turn_end; triggers async compaction on TurnEnd when running tokens exceed usable(model).', }, ); @@ -183,6 +186,6 @@ export async function register(iii: ISdk): Promise { iii.registerTrigger({ type: 'stream', function_id: 'context-compaction::on_agent_event', - config: { stream_name: AGENT_EVENTS_STREAM }, + config: { stream_name: TURN_END_STREAM }, }); } diff --git a/harness/src/harness/fanout/sessions-poll.ts b/harness/src/harness/fanout/sessions-poll.ts index 94218d10..b5198ed6 100644 --- a/harness/src/harness/fanout/sessions-poll.ts +++ b/harness/src/harness/fanout/sessions-poll.ts @@ -1,38 +1,29 @@ import type { ISdk, Trigger } from '../../runtime/iii.js'; import { logger } from '../../runtime/otel.js'; +import { SESSION_INDEX_SCOPE } from '../../turn-orchestrator/state.js'; import type { FanoutState } from '../ui-subscribe.js'; export const SESSION_CREATED_HANDLER_FN_ID = 'harness::fanout::session_created'; -export const SESSION_CREATE_CONDITION_FN_ID = 'harness::session::is_create_event'; -const SESSION_RECORD_KEY_RE = /^session\/[^/]+\/turn_state$/; -function extractSessionId(key: string): string | null { - const m = SESSION_RECORD_KEY_RE.exec(key); - if (!m) return null; - return key.slice('session/'.length, key.length - '/turn_state'.length); +/** + * A new session is signalled by a one-time marker write on the dedicated + * `session_index` scope (key = session id), made by the turn-orchestrator when + * `turn_state` is first persisted. The state trigger matches that scope in + * engine — no `condition_function_id` RPC per agent-scope write — so this + * handler is the sole gate: it acts only on the `state:created` marker. + */ +function sessionCreatedId(event: unknown): string | null { + const obj = (event ?? {}) as Record; + if (obj.event_type !== 'state:created') return null; + const key = typeof obj.key === 'string' ? obj.key : ''; + return key.length > 0 ? key : null; } export function spawnSessionsPoll(iii: ISdk, state: FanoutState): () => void { - const conditionRef = iii.registerFunction( - SESSION_CREATE_CONDITION_FN_ID, - async (event: unknown) => { - const obj = (event ?? {}) as Record; - const event_type = typeof obj.event_type === 'string' ? obj.event_type : null; - const key = typeof obj.key === 'string' ? obj.key : null; - return event_type === 'state:created' && !!key && SESSION_RECORD_KEY_RE.test(key); - }, - { - description: - 'Condition: state event is a new session record (event_type=state:created, scope=agent, key=session//turn_state).', - }, - ); - const handlerRef = iii.registerFunction( SESSION_CREATED_HANDLER_FN_ID, async (event: unknown) => { - const obj = (event ?? {}) as Record; - const key = typeof obj.key === 'string' ? obj.key : ''; - const session_id = extractSessionId(key); + const session_id = sessionCreatedId(event); if (!session_id) return null; const payload = { added: [session_id], removed: [] as string[] }; for (const browser_id of state.allSubscribers()) { @@ -48,7 +39,7 @@ export function spawnSessionsPoll(iii: ISdk, state: FanoutState): () => void { }, { description: - 'Internal: fans out a single newly-created session id to ui::sessions::changed::.', + 'Internal: fans out a newly-created session id to ui::sessions::changed::.', }, ); @@ -57,10 +48,7 @@ export function spawnSessionsPoll(iii: ISdk, state: FanoutState): () => void { trigger = iii.registerTrigger({ type: 'state', function_id: SESSION_CREATED_HANDLER_FN_ID, - config: { - scope: 'agent', - condition_function_id: SESSION_CREATE_CONDITION_FN_ID, - }, + config: { scope: SESSION_INDEX_SCOPE }, }); } catch (err) { logger.warn('sessions state trigger registration failed', { err: String(err) }); @@ -73,8 +61,5 @@ export function spawnSessionsPoll(iii: ISdk, state: FanoutState): () => void { try { handlerRef.unregister(); } catch {} - try { - conditionRef.unregister(); - } catch {} }; } diff --git a/harness/src/turn-orchestrator/abort.ts b/harness/src/turn-orchestrator/abort.ts deleted file mode 100644 index b32d6dcb..00000000 --- a/harness/src/turn-orchestrator/abort.ts +++ /dev/null @@ -1,41 +0,0 @@ -/** - * `router::abort` side-effects. The abort path writes the per-session abort - * signal and, when a turn is paused on approvals, writes an aborted decision to - * the `approvals` scope per parked call — the reactive approval trigger - * (turn::on_approval) then wakes the session. - */ - -import { STATE_SCOPE, pendingKey } from '../approval-gate/schemas.js'; -import type { ISdk } from '../runtime/iii.js'; -import { logger } from '../runtime/otel.js'; -import * as persistence from './persistence.js'; -import { AGENT_SCOPE, abortSignalKey } from './state.js'; - -export async function performAbortSideEffects(iii: ISdk, session_id: string): Promise { - await trigger(iii, 'state::set', { - scope: AGENT_SCOPE, - key: abortSignalKey(session_id), - value: true, - }); - - const rec = await persistence.loadRecord(iii, session_id); - if (!rec || rec.state !== 'function_awaiting_approval' || !rec.awaiting_approval?.length) { - return; - } - - for (const entry of rec.awaiting_approval) { - await trigger(iii, 'state::set', { - scope: STATE_SCOPE, - key: pendingKey(session_id, entry.function_call_id), - value: { decision: 'aborted', reason: 'session_aborted' }, - }); - } -} - -async function trigger(iii: ISdk, function_id: string, payload: unknown): Promise { - try { - await iii.trigger({ function_id, payload }); - } catch (err) { - logger.warn(`abort side-effect failed: ${function_id}`, { err: String(err) }); - } -} diff --git a/harness/src/turn-orchestrator/events.ts b/harness/src/turn-orchestrator/events.ts index 9f50b009..8ea5b95e 100644 --- a/harness/src/turn-orchestrator/events.ts +++ b/harness/src/turn-orchestrator/events.ts @@ -1,6 +1,7 @@ /** * Emit AgentEvent frames on `agent::events`, one per call with a per-session - * monotonic sequence number. + * monotonic sequence number. `turn_end` frames are additionally mirrored onto + * the dedicated `agent::turn_end` stream (see TURN_END_STREAM). */ import type { ISdk } from '../runtime/iii.js'; @@ -9,11 +10,43 @@ import type { AgentEvent } from '../types/agent-event.js'; import { AGENT_SCOPE, eventCounterKey } from './state.js'; export const EVENTS_STREAM = 'agent::events'; +/** + * Dedicated stream carrying only `turn_end` frames. Compaction subscribes here + * instead of the full `agent::events` firehose so it wakes once per turn rather + * than on every event (token updates, function lifecycle, …). + */ +export const TURN_END_STREAM = 'agent::turn_end'; function formatItemId(session_id: string, seq: number): string { return `${session_id}-${seq.toString().padStart(8, '0')}`; } +function isTurnEnd(event: AgentEvent): boolean { + return (event as { type?: string }).type === 'turn_end'; +} + +async function setStream( + iii: ISdk, + stream_name: string, + session_id: string, + item_id: string, + event: AgentEvent, +): Promise { + try { + await iii.trigger({ + function_id: 'stream::set', + payload: { stream_name, group_id: session_id, item_id, data: event }, + }); + } catch (err) { + logger.warn('stream::set failed', { + stream_name, + session_id, + item_id, + err: String(err), + }); + } +} + async function nextSeq(iii: ISdk, session_id: string): Promise { try { const resp = await iii.trigger({ @@ -37,21 +70,8 @@ async function nextSeq(iii: ISdk, session_id: string): Promise { export async function emit(iii: ISdk, session_id: string, event: AgentEvent): Promise { const seq = await nextSeq(iii, session_id); const item_id = formatItemId(session_id, seq); - try { - await iii.trigger({ - function_id: 'stream::set', - payload: { - stream_name: EVENTS_STREAM, - group_id: session_id, - item_id, - data: event, - }, - }); - } catch (err) { - logger.warn('stream::set agent::events failed', { - session_id, - item_id, - err: String(err), - }); + await setStream(iii, EVENTS_STREAM, session_id, item_id, event); + if (isTurnEnd(event)) { + await setStream(iii, TURN_END_STREAM, session_id, item_id, event); } } diff --git a/harness/src/turn-orchestrator/hook.ts b/harness/src/turn-orchestrator/hook.ts index a442a9b2..d8edb7e8 100644 --- a/harness/src/turn-orchestrator/hook.ts +++ b/harness/src/turn-orchestrator/hook.ts @@ -3,8 +3,11 @@ * the reply to allow / deny / pending. Fail-closed on transport errors: * unreachable policy → deny with `gate_unavailable`. * - * `publishAfter` still goes through hook-fanout because the after-hook is a - * pluggable merge point with multiple potential consumers. + * `publishAfter` goes through hook-fanout only when a durable subscriber is + * registered for the after-hook topic. With no subscriber the publish/collect + * would just block until its deadline and return an empty merge the caller + * discards, so it is skipped. The after-hook stays a pluggable merge point for + * any registered consumer (see subscriber-presence.ts). */ import { permissionsDenyEnvelope } from '../approval-gate/denial.js'; @@ -17,6 +20,7 @@ import type { ISdk } from '../runtime/iii.js'; export type { DenialEnvelope } from '../approval-gate/schemas.js'; import { logger } from '../runtime/otel.js'; import type { FunctionCall } from '../types/function.js'; +import { hasDurableSubscriber } from './subscriber-presence.js'; export const TOPIC_AFTER = 'agent::after_function_call'; @@ -85,6 +89,12 @@ export async function publishAfter( function_call: FunctionCall, result: unknown, ): Promise { + // No subscriber on the after-hook topic → publish_collect would just block + // until its deadline and return an empty merge that the caller discards. + // Skip the dead wait; callers treat `undefined` as "keep the original result". + if (!(await hasDurableSubscriber(iii, TOPIC_AFTER))) { + return undefined; + } try { const resp = await iii.trigger({ function_id: 'hook-fanout::publish_collect', diff --git a/harness/src/turn-orchestrator/on-abort-signal.ts b/harness/src/turn-orchestrator/on-abort-signal.ts deleted file mode 100644 index a5d2788b..00000000 --- a/harness/src/turn-orchestrator/on-abort-signal.ts +++ /dev/null @@ -1,81 +0,0 @@ -/** - * Reactive abort wake. A `state` trigger on `scope: 'agent'` filtered by - * the abort_signal key shape (`session//abort_signal`) and a - * `new_value === true` write fires this adapter, which publishes - * `turn::{state}` on the durable FIFO queue so the orchestrator's FSM advances to - * `steering_check` and observes the abort flag promptly. - * - * Without this wake, a session mid-streaming would only check - * `abort_signal` after the current step completes naturally. The reactive - * trigger doesn't preempt the running step (durable subscriber publishes - * queue), but it guarantees the orchestrator runs another FSM step as - * soon as the current one finishes — which is the earliest moment we - * can safely react. - * - * **Incoming**: agent-scope `state:created` / `state:updated` on - * `session//abort_signal` with `new_value === true` (from `state::set` via - * `performAbortSideEffects` / `router::abort`). Same envelope the engine passes - * to state trigger adapters. - * - * **Outgoing**: `wakeFromRecord` enqueues `{ session_id }` on the `turn-step` queue. - */ - -import type { ISdk } from '../runtime/iii.js'; -import { logger } from '../runtime/otel.js'; -import { AbortSignalWriteEventSchema, type ParsedAbortSignalWrite } from './schemas.js'; -import { wakeFromRecord } from './wake.js'; - -export function parseAbortSignalWrite(event: unknown): ParsedAbortSignalWrite | null { - const result = AbortSignalWriteEventSchema.safeParse(event); - return result.success ? result.data : null; -} - -export function isAbortSignalWrite(event: unknown): boolean { - return parseAbortSignalWrite(event) !== null; -} - -export async function execute(iii: ISdk, write: ParsedAbortSignalWrite): Promise { - try { - await wakeFromRecord(iii, write.session_id); - } catch (err) { - logger.warn('turn::on_abort_signal: wake failed', { - session_id: write.session_id, - err: String(err), - }); - } -} - -export async function handleAbortSignalWrite(iii: ISdk, event: unknown): Promise { - const write = parseAbortSignalWrite(event); - if (!write) return; - await execute(iii, write); -} - -export function register(iii: ISdk): void { - iii.registerFunction( - 'turn::is_abort_signal_set', - async (event: unknown) => isAbortSignalWrite(event), - { - description: - 'Condition: state event sets session//abort_signal = true (state:created or state:updated).', - }, - ); - - iii.registerFunction( - 'turn::on_abort_signal', - async (event: unknown) => handleAbortSignalWrite(iii, event), - { - description: - 'State trigger adapter on scope=agent for abort_signal writes; enqueues turn::{state} so the orchestrator picks up the abort promptly.', - }, - ); - - iii.registerTrigger({ - type: 'state', - function_id: 'turn::on_abort_signal', - config: { - scope: 'agent', - condition_function_id: 'turn::is_abort_signal_set', - }, - }); -} diff --git a/harness/src/turn-orchestrator/on-approval.ts b/harness/src/turn-orchestrator/on-approval.ts index f6a84a2e..b67ce1d8 100644 --- a/harness/src/turn-orchestrator/on-approval.ts +++ b/harness/src/turn-orchestrator/on-approval.ts @@ -2,10 +2,10 @@ * Reactive approval wake. A `state` trigger on `scope: 'approvals'` filtered by * the `/` decision key fires this adapter, which enqueues * `turn::{state}` on the durable FIFO queue so the parked session re-reads its - * decisions in `function_awaiting_approval`. Mirrors `on-abort-signal.ts`. + * decisions in `function_awaiting_approval`. * - * The decision write is produced by `approval::resolve` (approval-gate) or by - * `abort` — both `state::set` `approvals// = { decision, reason }`. + * The decision write is produced by `approval::resolve` (approval-gate) — + * `state::set` `approvals// = { decision, reason }`. */ import type { ISdk } from '../runtime/iii.js'; diff --git a/harness/src/turn-orchestrator/persistence.ts b/harness/src/turn-orchestrator/persistence.ts index 92ce5eff..5e88c6ad 100644 --- a/harness/src/turn-orchestrator/persistence.ts +++ b/harness/src/turn-orchestrator/persistence.ts @@ -10,6 +10,7 @@ import { parseFlatMessages } from './flat-messages.js'; import { type RunRequest, parseRunRequest } from './run-request.js'; import { AGENT_SCOPE, + SESSION_INDEX_SCOPE, type TurnStateRecord, messagesKey, parseTurnStateRecord, @@ -53,6 +54,13 @@ async function persistRecord( const prev = previous !== undefined ? previous : parseTurnStateRecord(result?.old_value ?? null); + if (prev == null) { + // First persist for this session → mark it in the session index. The + // create-fanout trigger watches that dedicated scope, so it matches in + // engine by scope alone — no per-write condition predicate. + await stateSet(iii, SESSION_INDEX_SCOPE, rec.session_id, { created_at_ms: Date.now() }); + } + await emitTurnStateChanged( iii, rec.session_id, diff --git a/harness/src/turn-orchestrator/register.ts b/harness/src/turn-orchestrator/register.ts index 09a4882a..808464f4 100644 --- a/harness/src/turn-orchestrator/register.ts +++ b/harness/src/turn-orchestrator/register.ts @@ -3,7 +3,6 @@ import type { ISdk } from '../runtime/iii.js'; import * as bootstrap from './bootstrap.js'; import { loadOrchestratorConfig } from './config.js'; import { register as registerGetState } from './get-state.js'; -import { register as registerOnAbortSignal } from './on-abort-signal.js'; import { register as registerRunStart } from './run-start.js'; import { recoverParkedApprovals, register as registerOnApproval } from './on-approval.js'; import { @@ -24,7 +23,6 @@ export async function register(iii: ISdk, ctx: { configPath: string }): Promise< registerFunctionAwaitingApproval(iii); registerSteeringCheck(iii); registerGetState(iii); - registerOnAbortSignal(iii); registerOnApproval(iii); await recoverParkedApprovals(iii); diff --git a/harness/src/turn-orchestrator/schemas.ts b/harness/src/turn-orchestrator/schemas.ts index 4743dae4..87bd7b96 100644 --- a/harness/src/turn-orchestrator/schemas.ts +++ b/harness/src/turn-orchestrator/schemas.ts @@ -63,22 +63,6 @@ export function toView(rec: TurnStateRecord): TurnStateView { export type GetStateResult = TurnStateView | null; -// --- turn::is_abort_signal_set / turn::on_abort_signal (agent-scope state event) --- -const AgentAbortSignalWriteEventSchema = z.object({ - type: z.literal('state').optional(), - scope: z.literal('agent').optional(), - event_type: z.enum(['state:created', 'state:updated']), - key: z.string().regex(/^session\/[^/]+\/abort_signal$/), - new_value: z.literal(true), - old_value: z.union([z.literal(true), z.literal(false), z.null()]).optional(), -}); - -export const AbortSignalWriteEventSchema = AgentAbortSignalWriteEventSchema.transform((data) => { - const session_id = data.key.slice('session/'.length, -'/abort_signal'.length); - return { session_id }; -}); -export type ParsedAbortSignalWrite = z.infer; - // --- turn::is_approval_decision / turn::on_approval (approvals-scope state event) --- const ApprovalDecisionWriteEventSchema = z.object({ type: z.literal('state').optional(), diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index 22744a9d..cfca4f86 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -10,6 +10,14 @@ /** iii-state scope for turn FSM records, flat messages, run_request, etc. */ export const AGENT_SCOPE = 'agent' as const; +/** + * Dedicated iii-state scope indexing created sessions, keyed by `session_id`. + * A one-time marker is written here when a session's `turn_state` is first + * persisted, so the session-create fanout trigger matches in-engine by `scope` + * alone — no `condition_function_id` RPC per agent-scope `turn_state` write. + */ +export const SESSION_INDEX_SCOPE = 'session_index' as const; + import { z } from 'zod'; import type { AssistantMessage, FunctionResultMessage } from '../types/agent-message.js'; import type { FunctionCall, FunctionResult } from '../types/function.js'; @@ -117,4 +125,3 @@ export const turnStateKey = (sid: string) => `session/${sid}/turn_state`; export const runRequestKey = (sid: string) => `session/${sid}/run_request`; export const lastSessionTreeLenKey = (sid: string) => `session/${sid}/session_tree_mirror_len`; export const eventCounterKey = (sid: string) => `session/${sid}/event_counter`; -export const abortSignalKey = (sid: string) => `session/${sid}/abort_signal`; diff --git a/harness/src/turn-orchestrator/states/steering-check.ts b/harness/src/turn-orchestrator/states/steering-check.ts index 32545b7a..be53f5cd 100644 --- a/harness/src/turn-orchestrator/states/steering-check.ts +++ b/harness/src/turn-orchestrator/states/steering-check.ts @@ -1,5 +1,5 @@ /** - * `turn::steering_check`. Drains steering / followup inboxes and the abort flag, then routes onward. + * `turn::steering_check`. Drains steering / followup inboxes, then routes onward. * * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. @@ -11,43 +11,24 @@ import { emit } from '../events.js'; import { finishSession } from '../finish.js'; import * as persistence from '../persistence.js'; import { runTransition } from '../run-transition.js'; -import { AGENT_SCOPE, type TurnStateRecord, abortSignalKey, transitionTo } from '../state.js'; +import { type TurnStateRecord, transitionTo } from '../state.js'; import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; import { syntheticAssistant } from '../synthetic-assistant.js'; -export type SteeringRoute = - | 'abort' - | 'steering' - | 'followup' - | 'continue_after_function' - | 'end_turn'; +export type SteeringRoute = 'steering' | 'followup' | 'continue_after_function' | 'end_turn'; /** Pure priority router — no I/O. */ export function route( - abort: boolean, has_steering: boolean, has_followup: boolean, has_function_results: boolean, ): SteeringRoute { - if (abort) return 'abort'; if (has_steering) return 'steering'; if (has_followup) return 'followup'; if (has_function_results) return 'continue_after_function'; return 'end_turn'; } -async function abortSet(iii: ISdk, session_id: string): Promise { - try { - const v = await iii.trigger({ - function_id: 'state::get', - payload: { scope: AGENT_SCOPE, key: abortSignalKey(session_id) }, - }); - return v === true; - } catch { - return false; - } -} - async function drainQueue(iii: ISdk, name: string, session_id: string): Promise { try { const resp = await iii.trigger({ @@ -92,39 +73,15 @@ async function emitTurnEndOnce(iii: ISdk, rec: TurnStateRecord): Promise { } export async function handleSteering(iii: ISdk, rec: TurnStateRecord): Promise { - const abort = await abortSet(iii, rec.session_id); - const steering = abort ? [] : await drainQueue(iii, 'steering', rec.session_id); - const followup = - abort || steering.length > 0 ? [] : await drainQueue(iii, 'followup', rec.session_id); + const steering = await drainQueue(iii, 'steering', rec.session_id); + const followup = steering.length > 0 ? [] : await drainQueue(iii, 'followup', rec.session_id); const decision = route( - abort, steering.length > 0, followup.length > 0, rec.function_results.length > 0, ); switch (decision) { - case 'abort': { - const aborted = syntheticAssistant({ - stop_reason: 'aborted', - provider: 'harness', - model: 'harness', - }); - const messages = await persistence.loadMessages(iii, rec.session_id); - messages.push(aborted); - await persistence.saveMessages(iii, rec.session_id, messages); - rec.last_assistant = aborted; - if (!rec.turn_end_emitted) { - await emit(iii, rec.session_id, { - type: 'turn_end', - message: aborted, - function_results: [], - }); - rec.turn_end_emitted = true; - } - await finishSession(iii, rec); - break; - } case 'steering': case 'followup': { if (maxTurnsReached(rec)) { diff --git a/harness/src/turn-orchestrator/subscriber-presence.ts b/harness/src/turn-orchestrator/subscriber-presence.ts new file mode 100644 index 00000000..cbb014ac --- /dev/null +++ b/harness/src/turn-orchestrator/subscriber-presence.ts @@ -0,0 +1,79 @@ +/** + * Subscriber-presence cache for durable pub/sub topics. + * + * The after-function-call hook publishes to a durable topic and then blocks + * waiting for subscriber replies. When no worker subscribes to that topic the + * wait is pure dead time on the turn's critical path (the collector only exits + * on its deadline). This module answers "does anyone subscribe to ?" by + * querying `engine::triggers::list` for a `durable:subscriber` trigger bound to + * the topic (the registration shape consumers use — see iii queue worker). + * + * The answer is cached with a short TTL so the engine isn't queried per call. + * Hook subscribers are a deploy-time concern (workers register at startup), so + * a coarse TTL is fine; a newly-registered subscriber is picked up within one + * TTL window, which is consistent with the hook's existing "late arrivals may + * be dropped" contract. TTL is preferred over an `engine::functions-available` + * invalidation trigger to avoid adding another always-on trigger. + */ + +import { TriggerInfo } from 'iii-sdk'; +import type { ISdk } from '../runtime/iii.js'; +import { logger } from '../runtime/otel.js'; + +/** How long a subscriber-presence answer stays fresh before re-querying. */ +export const SUBSCRIBER_CACHE_TTL_MS = 30_000; + +const DURABLE_SUBSCRIBER_TYPE = 'durable:subscriber'; + +type CacheEntry = { has: boolean; at: number }; +const cache = new Map(); + +/** Clear the presence cache. Test seam; not used in production. */ +export function resetSubscriberCache(): void { + cache.clear(); +} + +function topicOf(config: unknown): unknown { + if (config && typeof config === 'object') { + return (config as Record).topic; + } + return undefined; +} + +/** + * True if any worker subscribes to `topic` via a durable (queue) subscriber. + * + * Result is cached per topic for `SUBSCRIBER_CACHE_TTL_MS`. On query failure + * this returns `true` (fail-safe): callers fall back to their normal + * publish/collect behavior rather than silently dropping a hook. + * + * `now` is injectable for testing the TTL; production callers omit it. + */ +export async function hasDurableSubscriber( + iii: ISdk, + topic: string, + now: number = Date.now(), +): Promise { + const cached = cache.get(topic); + if (cached && now - cached.at < SUBSCRIBER_CACHE_TTL_MS) { + return cached.has; + } + + try { + const resp = await iii.trigger({ + function_id: 'engine::triggers::list', + payload: {}, + }); + const has = (resp.triggers ?? []).some( + (t) => t.trigger_type === DURABLE_SUBSCRIBER_TYPE && topicOf(t.config) === topic, + ); + cache.set(topic, { has, at: now }); + return has; + } catch (err) { + logger.warn('subscriber presence check failed; assuming subscribers exist', { + topic, + err: String(err), + }); + return true; + } +} diff --git a/harness/tests/context-compaction/turn-end-subscription.test.ts b/harness/tests/context-compaction/turn-end-subscription.test.ts new file mode 100644 index 00000000..9cf86e80 --- /dev/null +++ b/harness/tests/context-compaction/turn-end-subscription.test.ts @@ -0,0 +1,25 @@ +import { describe, expect, it, vi } from 'vitest'; +import { register } from '../../src/context-compaction/register.js'; +import type { ISdk } from '../../src/runtime/iii.js'; + +describe('context-compaction stream subscription', () => { + it('subscribes to agent::turn_end, not the full agent::events firehose', async () => { + const registerTrigger = vi.fn(); + const iii = { + registerFunction: vi.fn(), + registerTrigger, + trigger: vi.fn(async () => null), + } as unknown as ISdk; + + await register(iii); + + const streamTriggers = registerTrigger.mock.calls + .map((c) => c[0] as { type?: string; function_id?: string; config?: { stream_name?: string } }) + .filter( + (t) => t?.type === 'stream' && t?.function_id === 'context-compaction::on_agent_event', + ); + + expect(streamTriggers).toHaveLength(1); + expect(streamTriggers[0].config?.stream_name).toBe('agent::turn_end'); + }); +}); diff --git a/harness/tests/harness/fanout/sessions-poll.test.ts b/harness/tests/harness/fanout/sessions-poll.test.ts new file mode 100644 index 00000000..71a64f4f --- /dev/null +++ b/harness/tests/harness/fanout/sessions-poll.test.ts @@ -0,0 +1,94 @@ +import { describe, expect, it, vi } from 'vitest'; +import { spawnSessionsPoll } from '../../../src/harness/fanout/sessions-poll.js'; +import { FanoutState } from '../../../src/harness/ui-subscribe.js'; +import type { ISdk } from '../../../src/runtime/iii.js'; + +type Handler = (event: unknown) => Promise; + +// Session-create fanout now rides a dedicated `session_index` scope (marker +// written once at creation). The state trigger has NO condition_function_id, so +// the engine hands every write on that scope to the handler — the handler is +// the sole gate. These tests hammer that gate and the registration shape. +function setup(subscribers: string[] = []) { + const handlers = new Map(); + const triggers: Array<{ type?: string; function_id?: string; config?: Record }> = + []; + const sent: Array<{ function_id: string; payload: unknown }> = []; + const iii = { + registerFunction: vi.fn((id: string, h: Handler) => { + handlers.set(id, h); + return { unregister() {} }; + }), + registerTrigger: vi.fn((t) => { + triggers.push(t); + return { unregister() {} }; + }), + trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { + sent.push(req); + return null; + }), + } as unknown as ISdk; + + const state = new FanoutState(); + for (const b of subscribers) state.subscribe(b, null); + spawnSessionsPoll(iii, state); + return { handlers, triggers, sent }; +} + +const createEvent = (over: Record = {}) => ({ + event_type: 'state:created' as const, + scope: 'session_index' as const, + key: 'sess-1', + old_value: null, + new_value: { created_at_ms: 1 }, + message_type: 'state', + ...over, +}); + +function changedCalls(sent: Array<{ function_id: string; payload: unknown }>) { + return sent.filter((s) => s.function_id.startsWith('ui::sessions::changed::')); +} + +describe('spawnSessionsPoll registration (eliminates the per-write predicate RPC)', () => { + it('registers a scope-only session_index trigger with NO condition_function_id and no predicate fn', () => { + const { handlers, triggers } = setup(); + + expect([...handlers.keys()]).not.toContain('harness::session::is_create_event'); + expect([...handlers.keys()]).toContain('harness::fanout::session_created'); + + const t = triggers.find((x) => x.function_id === 'harness::fanout::session_created'); + expect(t?.type).toBe('state'); + expect(t?.config?.scope).toBe('session_index'); + expect(t?.config?.condition_function_id).toBeUndefined(); + }); +}); + +describe('session_created handler (sole gate)', () => { + it('fans out the new session id to every all-sessions subscriber', async () => { + const { handlers, sent } = setup(['b1', 'b2']); + const handler = handlers.get('harness::fanout::session_created'); + + await handler?.(createEvent({ key: 'sess-1' })); + + const changed = changedCalls(sent); + expect(changed.map((c) => c.function_id).sort()).toEqual([ + 'ui::sessions::changed::b1', + 'ui::sessions::changed::b2', + ]); + expect(changed[0]?.payload).toEqual({ added: ['sess-1'], removed: [] }); + }); + + it.each([ + ['state:updated (not a new session)', { event_type: 'state:updated' }], + // The dangerous one: a delete must NOT report the session as "added". + ['state:deleted (removed session)', { event_type: 'state:deleted', new_value: null }], + ['empty key', { key: '' }], + ])('does NOT fan out on %s', async (_label, over) => { + const { handlers, sent } = setup(['b1']); + const handler = handlers.get('harness::fanout::session_created'); + + await handler?.(createEvent(over)); + + expect(changedCalls(sent)).toHaveLength(0); + }); +}); diff --git a/harness/tests/harness/policy.test.ts b/harness/tests/harness/policy.test.ts index d5b95a30..06550262 100644 --- a/harness/tests/harness/policy.test.ts +++ b/harness/tests/harness/policy.test.ts @@ -562,7 +562,6 @@ describe('shipped iii-permissions.yaml', () => { 'auth::delete_token', 'run::start', 'router::stream_assistant', - 'router::abort', ]; it('kernel surfaces are denied unconditionally — hostile args cannot dodge them', async () => { diff --git a/harness/tests/integration/approval-resume.e2e.test.ts b/harness/tests/integration/approval-resume.e2e.test.ts index fd4a9b4a..75a9e550 100644 --- a/harness/tests/integration/approval-resume.e2e.test.ts +++ b/harness/tests/integration/approval-resume.e2e.test.ts @@ -1,9 +1,5 @@ import { describe, expect, it, vi } from 'vitest'; import { handleResolveRequest } from '../../src/approval-gate/resolve.js'; -import { - handleAbortSignalWrite, - isAbortSignalWrite, -} from '../../src/turn-orchestrator/on-abort-signal.js'; import { handleApprovalDecisionWrite, isApprovalDecisionWrite, @@ -18,8 +14,8 @@ async function flushMicrotasks(): Promise { /** * Fake iii where `state::set` re-emits a state event and feeds it to the - * matching reactive trigger (abort on the agent scope, approval decisions on - * the approvals scope) — exercising the producer → trigger → wake path. + * approval reactive trigger on the `approvals` scope — exercising the + * producer → trigger → wake path. */ function fakeIii(): { iii: ISdk; @@ -53,9 +49,7 @@ function fakeIii(): { new_value: p.value, message_type: 'state', }; - if (p.scope === 'agent' && isAbortSignalWrite(event)) { - await handleAbortSignalWrite(iii as unknown as ISdk, event); - } else if (p.scope === 'approvals' && isApprovalDecisionWrite(event)) { + if (p.scope === 'approvals' && isApprovalDecisionWrite(event)) { await handleApprovalDecisionWrite(iii as unknown as ISdk, event); } return null; @@ -104,65 +98,4 @@ describe('approval reactive trigger', () => { }); }); - it('writing session//abort_signal=true enqueues turn::{state}', async () => { - const { iii, wakeTriggers, stateStore } = fakeIii(); - const rec = newRecord('sess-abort'); - rec.state = 'assistant_streaming'; - stateStore.set(`agent/${turnStateKey('sess-abort')}`, rec); - - await iii.trigger({ - function_id: 'state::set', - payload: { - scope: 'agent', - key: 'session/sess-abort/abort_signal', - value: true, - }, - }); - - await flushMicrotasks(); - - expect(wakeTriggers).toHaveLength(1); - expect(wakeTriggers[0]).toMatchObject({ - session_id: 'sess-abort', - function_id: 'turn::assistant_streaming', - }); - }); - - it('writing session//abort_signal=false does NOT trigger (condition rejects clears)', async () => { - const { iii, wakeTriggers, stateStore } = fakeIii(); - const rec = newRecord('sess-clear'); - rec.state = 'function_execute'; - stateStore.set(`agent/${turnStateKey('sess-clear')}`, rec); - - await iii.trigger({ - function_id: 'state::set', - payload: { scope: 'agent', key: 'session/sess-clear/abort_signal', value: true }, - }); - await flushMicrotasks(); - wakeTriggers.length = 0; - - await iii.trigger({ - function_id: 'state::set', - payload: { scope: 'agent', key: 'session/sess-clear/abort_signal', value: false }, - }); - await flushMicrotasks(); - - expect(wakeTriggers).toHaveLength(0); - }); - - it('writing an unrelated agent-scope key does NOT trigger', async () => { - const { iii, wakeTriggers } = fakeIii(); - - await iii.trigger({ - function_id: 'state::set', - payload: { - scope: 'agent', - key: 'session/sess-x/turn_state', - value: { state: 'function_execute' }, - }, - }); - await Promise.resolve(); - - expect(wakeTriggers).toHaveLength(0); - }); }); diff --git a/harness/tests/integration/on-record-written.e2e.test.ts b/harness/tests/integration/on-record-written.e2e.test.ts index 39d72251..3ca6c402 100644 --- a/harness/tests/integration/on-record-written.e2e.test.ts +++ b/harness/tests/integration/on-record-written.e2e.test.ts @@ -139,6 +139,42 @@ function turnStateGets(iii: ISdk, session_id: string): number { ).length; } +describe('session index marker (create-fanout source)', () => { + it('a newly-created session writes a session_index marker keyed by session id', async () => { + const { iii, stateStore } = fakeIii(); + const rec = newRecord('sess-new'); + rec.state = 'provisioning'; + + await persistence.saveRecord(iii, rec); + + expect(stateStore.has('session_index/sess-new')).toBe(true); + }); + + it('a transition on an existing session writes NO new session_index marker', async () => { + const { iii, stateStore } = fakeIii(); + const rec = newRecord('sess-x'); + rec.state = 'provisioning'; + await persistence.saveRecord(iii, rec); // create → marker written + stateStore.delete('session_index/sess-x'); // clear so a re-write would be detectable + + rec.state = 'assistant_streaming'; + await persistence.saveRecord(iii, rec); // transition → must NOT re-mark + + expect(stateStore.has('session_index/sess-x')).toBe(false); + }); + + it('a threaded previous record (transition) writes no marker', async () => { + const { iii, stateStore } = fakeIii(); + const previous = newRecord('sess-y'); + previous.state = 'provisioning'; + const next = { ...previous, state: 'assistant_streaming' as const }; + + await persistence.saveRecord(iii, next, previous); + + expect(stateStore.has('session_index/sess-y')).toBe(false); + }); +}); + describe('saveRecord read elimination (#5)', () => { it('2-arg saveRecord does not pre-read turn_state (uses state::set old_value)', async () => { const { iii } = fakeIii(); diff --git a/harness/tests/turn-orchestrator/abort.test.ts b/harness/tests/turn-orchestrator/abort.test.ts deleted file mode 100644 index a8d5ea15..00000000 --- a/harness/tests/turn-orchestrator/abort.test.ts +++ /dev/null @@ -1,81 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; -import { performAbortSideEffects } from '../../src/turn-orchestrator/abort.js'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; -import { newRecord } from '../../src/turn-orchestrator/state.js'; - -describe('performAbortSideEffects', () => { - it('sets the abort_signal flag', async () => { - const triggers: Array<{ function_id: string; payload: unknown }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { - triggers.push(req); - return null; - }), - } as unknown as ISdk; - vi.spyOn(persistence, 'loadRecord').mockResolvedValue(null); - - await performAbortSideEffects(iii, 's1'); - - const setCalls = triggers.filter((t) => t.function_id === 'state::set'); - expect( - setCalls.some( - (c) => (c.payload as Record).key === 'session/s1/abort_signal', - ), - ).toBe(true); - }); - - it('skips approval cleanup when record state is not function_awaiting_approval', async () => { - const triggers: Array<{ function_id: string; payload: unknown }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { - triggers.push(req); - return null; - }), - } as unknown as ISdk; - const rec = newRecord('s1'); - rec.state = 'assistant_streaming'; - vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); - - await performAbortSideEffects(iii, 's1'); - - const approvalWrites = triggers - .filter((t) => t.function_id === 'state::set') - .map((t) => t.payload as Record) - .filter((p) => p.scope === 'approvals'); - expect(approvalWrites).toHaveLength(0); - expect(triggers.some((t) => t.function_id === 'approval::sweep_session')).toBe(false); - }); - - it('invokes resume fns with aborted decision when paused on approval', async () => { - const triggers: Array<{ function_id: string; payload: unknown }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { - triggers.push(req); - return null; - }), - } as unknown as ISdk; - const rec = newRecord('s1'); - rec.state = 'function_awaiting_approval'; - rec.awaiting_approval = [ - { function_call_id: 'fc-1', function_id: 'shell::run', args: {} }, - { function_call_id: 'fc-2', function_id: 'shell::run', args: {} }, - ]; - vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); - - await performAbortSideEffects(iii, 's1'); - - const decisionWrites = triggers - .filter((t) => t.function_id === 'state::set') - .map((t) => t.payload as { scope?: string; key?: string; value?: unknown }) - .filter((p) => p.scope === 'approvals'); - expect(decisionWrites.map((p) => p.key).sort()).toEqual(['s1/fc-1', 's1/fc-2']); - for (const w of decisionWrites) { - expect(w.value).toEqual({ decision: 'aborted', reason: 'session_aborted' }); - } - expect(triggers.some((t) => t.function_id.startsWith('turn::approval_resume'))).toBe(false); - - const publishes = triggers.filter((t) => t.function_id === 'iii::durable::publish'); - expect(publishes).toHaveLength(0); - }); -}); diff --git a/harness/tests/turn-orchestrator/events.test.ts b/harness/tests/turn-orchestrator/events.test.ts new file mode 100644 index 00000000..19895795 --- /dev/null +++ b/harness/tests/turn-orchestrator/events.test.ts @@ -0,0 +1,51 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import { emit } from '../../src/turn-orchestrator/events.js'; +import type { AgentEvent } from '../../src/types/agent-event.js'; + +function buildSdk() { + const calls: Array<{ function_id: string; payload: Record }> = []; + const trigger = vi.fn(async (req: { function_id: string; payload?: unknown }) => { + calls.push({ + function_id: req.function_id, + payload: (req.payload ?? {}) as Record, + }); + if (req.function_id === 'state::update') return { old_value: 0 }; + return {}; + }); + return { iii: { trigger } as unknown as ISdk, calls }; +} + +const SID = 'sess-1'; + +describe('emit (agent event producer)', () => { + it('writes a non-turn_end event only to agent::events', async () => { + const { iii, calls } = buildSdk(); + const event = { type: 'message_update' } as unknown as AgentEvent; + + await emit(iii, SID, event); + + const sets = calls.filter((c) => c.function_id === 'stream::set'); + expect(sets.map((c) => c.payload.stream_name)).toEqual(['agent::events']); + }); + + it('mirrors a turn_end event onto the dedicated agent::turn_end stream', async () => { + const { iii, calls } = buildSdk(); + const event = { + type: 'turn_end', + message: { role: 'assistant' }, + function_results: [], + } as unknown as AgentEvent; + + await emit(iii, SID, event); + + const sets = calls.filter((c) => c.function_id === 'stream::set'); + const streams = sets.map((c) => c.payload.stream_name); + expect(streams).toContain('agent::events'); + expect(streams).toContain('agent::turn_end'); + + const mirror = sets.find((c) => c.payload.stream_name === 'agent::turn_end'); + expect(mirror?.payload.group_id).toBe(SID); + expect(mirror?.payload.data).toEqual(event); + }); +}); diff --git a/harness/tests/turn-orchestrator/hook.test.ts b/harness/tests/turn-orchestrator/hook.test.ts index 9829dd5d..914e4231 100644 --- a/harness/tests/turn-orchestrator/hook.test.ts +++ b/harness/tests/turn-orchestrator/hook.test.ts @@ -1,10 +1,11 @@ -import { describe, expect, it, vi } from 'vitest'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; import type { CheckPermissionsPayload, PolicyCheckReply, } from '../../src/harness/policy/check-permissions.js'; import type { ISdk } from '../../src/runtime/iii.js'; -import { consultBefore } from '../../src/turn-orchestrator/hook.js'; +import { TOPIC_AFTER, consultBefore, publishAfter } from '../../src/turn-orchestrator/hook.js'; +import { resetSubscriberCache } from '../../src/turn-orchestrator/subscriber-presence.js'; function fakeIii( triggerImpl: (req: { @@ -71,3 +72,45 @@ describe('consultBefore (direct policy call)', () => { expect(trigger.mock.calls[0][0].function_id).toBe('policy::check_permissions'); }); }); + +describe('publishAfter (subscriber-aware after-hook)', () => { + beforeEach(() => resetSubscriberCache()); + + const fc = { id: 'fc-1', function_id: 'shell::fs::write', arguments: { path: '/tmp/x' } }; + const result = { content: [{ type: 'text', text: 'ok' }] }; + + it('skips publish_collect when no subscriber is registered for the topic', async () => { + const trigger = vi.fn(async (req: { function_id: string }) => { + if (req.function_id === 'engine::triggers::list') return { triggers: [] }; + throw new Error(`should not call ${req.function_id}`); + }); + const iii = { trigger } as unknown as ISdk; + + const merged = await publishAfter(iii, fc, result); + + expect(merged).toBeUndefined(); + const fns = trigger.mock.calls.map((c) => c[0].function_id); + expect(fns).not.toContain('hook-fanout::publish_collect'); + }); + + it('calls publish_collect when a subscriber is registered for the topic', async () => { + const trigger = vi.fn(async (req: { function_id: string }) => { + if (req.function_id === 'engine::triggers::list') { + return { + triggers: [{ trigger_type: 'durable:subscriber', config: { topic: TOPIC_AFTER } }], + }; + } + if (req.function_id === 'hook-fanout::publish_collect') return { merged: { rewritten: true } }; + throw new Error(`unexpected ${req.function_id}`); + }); + const iii = { trigger } as unknown as ISdk; + + const merged = await publishAfter(iii, fc, result); + + expect(merged).toEqual({ rewritten: true }); + const fns = trigger.mock.calls.map((c) => c[0].function_id); + // Proves the subscriber gate ran before delegating to the primitive. + expect(fns).toContain('engine::triggers::list'); + expect(fns).toContain('hook-fanout::publish_collect'); + }); +}); diff --git a/harness/tests/turn-orchestrator/on-abort-signal.test.ts b/harness/tests/turn-orchestrator/on-abort-signal.test.ts deleted file mode 100644 index e185cea3..00000000 --- a/harness/tests/turn-orchestrator/on-abort-signal.test.ts +++ /dev/null @@ -1,219 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import { TriggerAction, type ISdk } from '../../src/runtime/iii.js'; -import { - execute, - handleAbortSignalWrite, - isAbortSignalWrite, - parseAbortSignalWrite, -} from '../../src/turn-orchestrator/on-abort-signal.js'; -import { AbortSignalWriteEventSchema } from '../../src/turn-orchestrator/schemas.js'; -import { newRecord } from '../../src/turn-orchestrator/state.js'; - -const matchingEvent = { - event_type: 'state:created' as const, - scope: 'agent' as const, - key: 'session/sess-abc/abort_signal', - old_value: null, - new_value: true as const, - message_type: 'state', -}; - -describe('AbortSignalWriteEventSchema', () => { - it('accepts the agent state write shape from state::set / engine triggers', () => { - expect(AbortSignalWriteEventSchema.parse(matchingEvent)).toEqual({ - session_id: 'sess-abc', - }); - }); - - it('rejects durable publish envelope shapes (not a state trigger event)', () => { - expect(() => - AbortSignalWriteEventSchema.parse({ - topic: 'turn::step_requested', - data: { session_id: 's1' }, - }), - ).toThrow(); - }); - - it('rejects nested payload wrappers', () => { - expect(() => AbortSignalWriteEventSchema.parse({ payload: matchingEvent })).toThrow(); - expect(() => AbortSignalWriteEventSchema.parse({ data: matchingEvent })).toThrow(); - }); - - it('rejects missing key, wrong new_value, or non-abort_signal keys', () => { - expect(() => AbortSignalWriteEventSchema.parse({})).toThrow(); - expect(() => - AbortSignalWriteEventSchema.parse({ - ...matchingEvent, - key: 'session/sess-abc/turn_state', - }), - ).toThrow(); - expect(() => - AbortSignalWriteEventSchema.parse({ - ...matchingEvent, - new_value: false, - }), - ).toThrow(); - expect(() => - AbortSignalWriteEventSchema.parse({ - ...matchingEvent, - event_type: 'state:deleted', - }), - ).toThrow(); - expect(() => AbortSignalWriteEventSchema.parse(null)).toThrow(); - }); -}); - -describe('parseAbortSignalWrite condition', () => { - it('matches session//abort_signal with new_value === true', () => { - expect(parseAbortSignalWrite(matchingEvent)).toEqual({ session_id: 'sess-abc' }); - expect(isAbortSignalWrite(matchingEvent)).toBe(true); - }); - - it('matches state:updated transitioning to true', () => { - const event = { - event_type: 'state:updated' as const, - scope: 'agent' as const, - key: 'session/sess-abc/abort_signal', - old_value: false, - new_value: true as const, - message_type: 'state', - }; - expect(parseAbortSignalWrite(event)).toEqual({ session_id: 'sess-abc' }); - }); - - it('skips state:deleted', () => { - expect( - parseAbortSignalWrite({ - event_type: 'state:deleted', - scope: 'agent', - key: 'session/sess-abc/abort_signal', - old_value: true, - new_value: null, - message_type: 'state', - }), - ).toBeNull(); - }); - - it('skips writes that set the signal to false (idempotent clears)', () => { - expect( - parseAbortSignalWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/abort_signal', - old_value: true, - new_value: false, - message_type: 'state', - }), - ).toBeNull(); - }); - - it('skips non-abort_signal keys in the agent scope', () => { - expect( - parseAbortSignalWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: null, - new_value: { state: 'function_execute' }, - message_type: 'state', - }), - ).toBeNull(); - }); - - it('skips top-level non-session keys', () => { - expect( - parseAbortSignalWrite({ - event_type: 'state:updated', - scope: 'agent', - key: 'harness/index/abc/last_session_id', - old_value: null, - new_value: 'sess-1', - message_type: 'state', - }), - ).toBeNull(); - }); -}); - -function mockIiiWithTurnState(rec: ReturnType): { - iii: ISdk; - triggers: Array<{ function_id: string; payload: unknown; action?: unknown }>; -} { - const triggers: Array<{ function_id: string; payload: unknown; action?: unknown }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown; action?: unknown }) => { - if (req.function_id === 'state::get') return rec; - triggers.push(req); - return null; - }), - } as unknown as ISdk; - return { iii, triggers }; -} - -describe('execute', () => { - it('enqueues turn::{state} on the turn-step FIFO queue', async () => { - const rec = newRecord('sess-abc'); - rec.state = 'assistant_streaming'; - const { iii, triggers } = mockIiiWithTurnState(rec); - - await execute(iii, { session_id: 'sess-abc' }); - - expect(triggers).toHaveLength(1); - expect(triggers[0]?.function_id).toBe('turn::assistant_streaming'); - expect(triggers[0]?.payload).toEqual({ session_id: 'sess-abc' }); - expect(triggers[0]?.action).toEqual(TriggerAction.Enqueue({ queue: 'turn-step' })); - }); - - it('swallows enqueue failures (logs only, never rethrows)', async () => { - const rec = newRecord('sess-abc'); - rec.state = 'provisioning'; - const iii = { - trigger: vi.fn(async (req: { function_id: string }) => { - if (req.function_id === 'state::get') return rec; - throw new Error('durable down'); - }), - } as unknown as ISdk; - - await expect(execute(iii, { session_id: 'sess-abc' })).resolves.toBeUndefined(); - }); -}); - -describe('handleAbortSignalWrite', () => { - it('extracts session_id and enqueues turn::{state}', async () => { - const rec = newRecord('sess-abc'); - rec.state = 'function_execute'; - const { iii, triggers } = mockIiiWithTurnState(rec); - - await handleAbortSignalWrite(iii, matchingEvent); - - expect(triggers).toHaveLength(1); - expect(triggers[0]?.function_id).toBe('turn::function_execute'); - expect(triggers[0]?.payload).toEqual({ session_id: 'sess-abc' }); - expect(triggers[0]?.action).toEqual(TriggerAction.Enqueue({ queue: 'turn-step' })); - }); - - it('no-ops when key does not match the abort_signal pattern', async () => { - const iii = { trigger: vi.fn() } as unknown as ISdk; - await handleAbortSignalWrite(iii, { - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/turn_state', - old_value: null, - new_value: {}, - message_type: 'state', - }); - expect(iii.trigger).not.toHaveBeenCalled(); - }); - - it('no-ops when new_value is not true (direct invoke bypasses engine condition)', async () => { - const iii = { trigger: vi.fn() } as unknown as ISdk; - await handleAbortSignalWrite(iii, { - event_type: 'state:updated', - scope: 'agent', - key: 'session/sess-abc/abort_signal', - old_value: true, - new_value: false, - message_type: 'state', - }); - expect(iii.trigger).not.toHaveBeenCalled(); - }); -}); diff --git a/harness/tests/turn-orchestrator/steering.test.ts b/harness/tests/turn-orchestrator/steering.test.ts index d71373b1..c153a65b 100644 --- a/harness/tests/turn-orchestrator/steering.test.ts +++ b/harness/tests/turn-orchestrator/steering.test.ts @@ -3,11 +3,7 @@ import type { ISdk } from '../../src/runtime/iii.js'; import type { AgentMessage } from '../../src/types/agent-message.js'; import * as events from '../../src/turn-orchestrator/events.js'; import * as persistence from '../../src/turn-orchestrator/persistence.js'; -import { - abortSignalKey, - newRecord, - type TurnStateRecord, -} from '../../src/turn-orchestrator/state.js'; +import { newRecord, type TurnStateRecord } from '../../src/turn-orchestrator/state.js'; import { handleSteering, route } from '../../src/turn-orchestrator/states/steering-check.js'; afterEach(() => { @@ -16,16 +12,14 @@ afterEach(() => { describe('steering route()', () => { it.each([ - [true, true, true, true, 'abort'], - [true, false, false, false, 'abort'], - [false, true, true, true, 'steering'], - [false, true, false, false, 'steering'], - [false, false, true, true, 'followup'], - [false, false, true, false, 'followup'], - [false, false, false, true, 'continue_after_function'], - [false, false, false, false, 'end_turn'], - ] as const)('route(%s, %s, %s, %s) -> %s', (abort, has_steering, has_followup, has_function_results, expected) => { - expect(route(abort, has_steering, has_followup, has_function_results)).toBe(expected); + [true, true, true, 'steering'], + [true, false, false, 'steering'], + [false, true, true, 'followup'], + [false, true, false, 'followup'], + [false, false, true, 'continue_after_function'], + [false, false, false, 'end_turn'], + ] as const)('route(%s, %s, %s) -> %s', (has_steering, has_followup, has_function_results, expected) => { + expect(route(has_steering, has_followup, has_function_results)).toBe(expected); }); }); @@ -34,18 +28,14 @@ function userMessage(text: string): AgentMessage { } function makeIii( - opts: { abort?: boolean; steeringItems?: AgentMessage[]; followupItems?: AgentMessage[] } = {}, + opts: { steeringItems?: AgentMessage[]; followupItems?: AgentMessage[] } = {}, ) { - const { abort = false, steeringItems = [], followupItems = [] } = opts; + const { steeringItems = [], followupItems = [] } = opts; const drainCalls: Array<{ name: string; session_id: string }> = []; const iii = { trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { - if (req.function_id === 'state::get') { - const p = req.payload as { key: string }; - if (p.key.endsWith('/abort_signal')) return abort ? true : null; - return null; - } + if (req.function_id === 'state::get') return null; if (req.function_id === 'session-inbox::drain') { const p = req.payload as { name: string; session_id: string }; drainCalls.push(p); @@ -72,50 +62,6 @@ function steeringRec( } describe('handleSteering', () => { - it('abort: persists aborted assistant, emits turn_end, stops the session', async () => { - const { iii } = makeIii({ abort: true }); - const rec = steeringRec('s1'); - const loadSpy = vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); - - await handleSteering(iii, rec); - - expect(rec.state).toBe('stopped'); - expect(rec.turn_end_emitted).toBe(true); - expect(rec.last_assistant?.stop_reason).toBe('aborted'); - expect(loadSpy).toHaveBeenCalledWith(iii, 's1'); - expect(saveSpy).toHaveBeenCalledWith( - iii, - 's1', - expect.arrayContaining([expect.objectContaining({ stop_reason: 'aborted' })]), - ); - expect(emitSpy).toHaveBeenCalledWith( - iii, - 's1', - expect.objectContaining({ - type: 'turn_end', - message: expect.objectContaining({ stop_reason: 'aborted' }), - }), - ); - }); - - it('abort: skips inbox drains', async () => { - const { iii, drainCalls } = makeIii({ - abort: true, - steeringItems: [userMessage('steer')], - followupItems: [userMessage('follow')], - }); - const rec = steeringRec('s1'); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - vi.spyOn(events, 'emit').mockResolvedValue(undefined); - - await handleSteering(iii, rec); - - expect(drainCalls).toHaveLength(0); - }); - it('steering: appends drained messages and transitions to assistant_streaming', async () => { const steeringItems = [userMessage('steer-me')]; const { iii } = makeIii({ steeringItems }); @@ -196,23 +142,6 @@ describe('handleSteering', () => { expect(emitSpy).toHaveBeenCalledWith(iii, 's1', expect.objectContaining({ type: 'agent_end' })); }); - it('reads abort via state::get on abort_signal key', async () => { - const { iii } = makeIii({ abort: true }); - const rec = steeringRec('s1'); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - vi.spyOn(events, 'emit').mockResolvedValue(undefined); - - await handleSteering(iii, rec); - - expect(iii.trigger).toHaveBeenCalledWith( - expect.objectContaining({ - function_id: 'state::get', - payload: { scope: 'agent', key: abortSignalKey('s1') }, - }), - ); - }); - it('caps at max_turns: emits a max_turns assistant + message_complete + turn_end and tears down instead of continuing', async () => { const { iii } = makeIii(); const rec = steeringRec('s1', { diff --git a/harness/tests/turn-orchestrator/subscriber-presence.test.ts b/harness/tests/turn-orchestrator/subscriber-presence.test.ts new file mode 100644 index 00000000..e1741332 --- /dev/null +++ b/harness/tests/turn-orchestrator/subscriber-presence.test.ts @@ -0,0 +1,69 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import { + hasDurableSubscriber, + resetSubscriberCache, +} from '../../src/turn-orchestrator/subscriber-presence.js'; + +function fakeIii( + triggers: unknown[], + opts?: { onList?: () => void; throwOnList?: boolean }, +) { + const trigger = vi.fn(async (req: { function_id: string }) => { + if (req.function_id === 'engine::triggers::list') { + opts?.onList?.(); + if (opts?.throwOnList) throw new Error('engine unreachable'); + return { triggers }; + } + throw new Error(`unexpected function_id: ${req.function_id}`); + }); + return { iii: { trigger } as unknown as ISdk, trigger }; +} + +describe('hasDurableSubscriber', () => { + beforeEach(() => resetSubscriberCache()); + + it('returns true when a durable:subscriber trigger is bound to the topic', async () => { + const { iii } = fakeIii([ + { trigger_type: 'durable:subscriber', config: { topic: 'agent::after_function_call' } }, + ]); + expect(await hasDurableSubscriber(iii, 'agent::after_function_call')).toBe(true); + }); + + it('returns false when no trigger subscribes to the topic', async () => { + const { iii } = fakeIii([ + { trigger_type: 'stream', config: { stream_name: 'agent::hook_reply' } }, + { trigger_type: 'durable:subscriber', config: { topic: 'some::other::topic' } }, + ]); + expect(await hasDurableSubscriber(iii, 'agent::after_function_call')).toBe(false); + }); + + it('caches the result within the TTL (one engine query for repeated calls)', async () => { + let listCalls = 0; + const { iii } = fakeIii([], { + onList: () => { + listCalls++; + }, + }); + await hasDurableSubscriber(iii, 'agent::after_function_call', 1000); + await hasDurableSubscriber(iii, 'agent::after_function_call', 2000); + expect(listCalls).toBe(1); + }); + + it('re-queries after the TTL expires', async () => { + let listCalls = 0; + const { iii } = fakeIii([], { + onList: () => { + listCalls++; + }, + }); + await hasDurableSubscriber(iii, 'agent::after_function_call', 1000); + await hasDurableSubscriber(iii, 'agent::after_function_call', 1000 + 30_001); + expect(listCalls).toBe(2); + }); + + it('fails safe (returns true) when the engine query throws', async () => { + const { iii } = fakeIii([], { throwOnList: true }); + expect(await hasDurableSubscriber(iii, 'agent::after_function_call')).toBe(true); + }); +}); diff --git a/iii-permissions.yaml b/iii-permissions.yaml index 1edcc4e3..d8112e04 100644 --- a/iii-permissions.yaml +++ b/iii-permissions.yaml @@ -25,7 +25,6 @@ rules: - '!oauth::openai-codex::login' - '!run::start' - '!router::stream_assistant' - - '!router::abort' # Read-only / introspection (extend below for your tools). - state::get From c7c04725f21ad7ee674f956999521c4313c4a9fc Mon Sep 17 00:00:00 2001 From: Ytallo Date: Tue, 26 May 2026 01:20:29 +0200 Subject: [PATCH 34/41] refactor(turn-orchestrator): split state handlers into ports/process modules (#188) * refactor(turn-orchestrator): split state handlers into ports/process modules Reorganize the turn FSM into per-state directories that separate injected dependencies from pure transition logic, and consolidate the shared runtime behind typed ports. State handlers (flat states/*.ts -> per-state dirs): - assistant-streaming, function-execute, function-awaiting-approval, provisioning, steering-check each gain a ports.ts (I/O dependencies) and process.ts (transition logic); function-execute also splits out run.ts and types.ts New state-runtime/ layer: - store.ts all state::* turn-store I/O (was persistence/turn-state-write) - transcript.ts transcript idempotency helpers (was flat-messages) - turn-end.ts turn-end and FSM resume helpers (was finish) - ports.ts shared TurnStatePorts consumed by every state handler context-compaction: - handler-pipeline.ts holds the shared prune -> summarize -> flat-state path for the sync and async handlers (was emit.ts) Dead code removed (with tests): estimate, wake, subscriber-presence, turn-state-write, flat-messages, and the obsolete flat-state-key test. 945 tests pass; tsc -b clean. * style: format harness to biome 2.4.10 CI runs `biome ci` with biome 2.4.10 while the repo toolchain formats with 1.9.4, so newly written and edited files drift from CI's expected output and fail the lint gate. Apply 2.4.10 formatting across the affected harness files (no logic changes). Includes a few pre-existing base-branch files that had the same drift. 945 tests pass; tsc -b clean; `biome ci harness` reports no errors. --- harness/README.md | 2 +- harness/docs/architecture.md | 42 ++- harness/docs/workers/approval-gate.md | 30 +- harness/docs/workers/context-compaction.md | 14 +- harness/docs/workers/harness.md | 6 +- harness/docs/workers/session.md | 12 +- harness/docs/workers/turn-orchestrator.md | 113 ++++--- harness/src/approval-gate/schemas.ts | 5 +- harness/src/context-compaction/config.ts | 58 ++-- harness/src/context-compaction/emit.ts | 49 --- harness/src/context-compaction/flat-state.ts | 14 +- .../src/context-compaction/handler-async.ts | 57 ++-- .../context-compaction/handler-pipeline.ts | 116 ++++++++ .../src/context-compaction/handler-sync.ts | 60 ++-- harness/src/context-compaction/lease.ts | 27 +- .../src/context-compaction/model-resolver.ts | 5 +- harness/src/context-compaction/overflow.ts | 7 +- harness/src/context-compaction/register.ts | 9 +- harness/src/context-compaction/summarize.ts | 9 +- harness/src/harness/fanout/sessions-poll.ts | 13 +- harness/src/runtime/state.ts | 100 ++++--- harness/src/session/config.ts | 2 +- harness/src/session/inbox/key.ts | 2 +- .../src/turn-orchestrator/agent-trigger.ts | 8 +- .../assistant-streaming/ports.ts | 137 +++++++++ .../assistant-streaming/process.ts | 172 +++++++++++ harness/src/turn-orchestrator/estimate.ts | 12 - harness/src/turn-orchestrator/events.ts | 7 +- harness/src/turn-orchestrator/finish.ts | 16 - .../src/turn-orchestrator/flat-messages.ts | 14 - .../function-awaiting-approval/ports.ts | 39 +++ .../function-awaiting-approval/process.ts | 137 +++++++++ .../function-execute/ports.ts | 94 ++++++ .../function-execute/process.ts | 40 +++ .../turn-orchestrator/function-execute/run.ts | 232 +++++++++++++++ .../function-execute/types.ts | 54 ++++ harness/src/turn-orchestrator/get-state.ts | 4 +- harness/src/turn-orchestrator/hook.ts | 37 --- harness/src/turn-orchestrator/iii.worker.yaml | 1 - harness/src/turn-orchestrator/on-approval.ts | 11 +- harness/src/turn-orchestrator/persistence.ts | 114 ------- harness/src/turn-orchestrator/preflight.ts | 8 +- .../src/turn-orchestrator/provider-stream.ts | 5 +- .../provisioning/load-skills.ts | 18 ++ .../turn-orchestrator/provisioning/ports.ts | 74 +++++ .../turn-orchestrator/provisioning/process.ts | 88 ++++++ harness/src/turn-orchestrator/register.ts | 12 +- harness/src/turn-orchestrator/run-request.ts | 8 +- harness/src/turn-orchestrator/run-start.ts | 9 +- .../src/turn-orchestrator/run-transition.ts | 18 +- .../turn-orchestrator/session-tree-mirror.ts | 20 +- .../turn-orchestrator/state-runtime/ports.ts | 60 ++++ .../turn-orchestrator/state-runtime/store.ts | 167 +++++++++++ .../state-runtime/transcript.ts | 33 ++ .../state-runtime/turn-end.ts | 35 +++ harness/src/turn-orchestrator/state.ts | 51 +--- .../states/assistant-streaming.ts | 149 ---------- .../states/function-awaiting-approval.ts | 107 ------- .../states/function-execute.ts | 281 ------------------ harness/src/turn-orchestrator/states/index.ts | 9 - .../turn-orchestrator/states/provisioning.ts | 113 ------- .../states/steering-check.ts | 129 -------- .../turn-orchestrator/steering-check/ports.ts | 46 +++ .../steering-check/process.ts | 133 +++++++++ .../turn-orchestrator/subscriber-presence.ts | 79 ----- .../src/turn-orchestrator/turn-state-write.ts | 31 -- harness/src/turn-orchestrator/wake.ts | 40 --- harness/src/types/agent-event.ts | 2 +- harness/src/types/function.ts | 12 - harness/tests/_helpers/stateStoreKey.ts | 8 + .../tests/approval-gate/_helpers/fakeIii.ts | 8 +- harness/tests/approval-gate/resolve.test.ts | 5 +- harness/tests/approval-gate/schemas.test.ts | 17 +- .../e2e/full-session.test.ts | 37 ++- .../context-compaction/flat-state-key.test.ts | 25 -- .../integration/flow-sync.test.ts | 27 +- .../tests/context-compaction/lease.test.ts | 44 +-- .../turn-end-subscription.test.ts | 4 +- .../harness/fanout/sessions-poll.test.ts | 17 +- .../integration/approval-resume.e2e.test.ts | 6 +- .../integration/on-record-written.e2e.test.ts | 68 +++-- harness/tests/runtime/state-client.test.ts | 6 +- harness/tests/session/inbox.test.ts | 2 +- .../_helpers/mockTurnStore.ts | 42 +++ .../turn-orchestrator/agent-trigger.test.ts | 6 +- .../assistant-streaming.test.ts | 167 +++++++++++ .../tests/turn-orchestrator/assistant.test.ts | 97 ++---- .../awaiting-approval.test.ts | 60 ++-- .../tests/turn-orchestrator/estimate.test.ts | 33 -- .../tests/turn-orchestrator/finish.test.ts | 10 +- .../turn-orchestrator/flat-messages.test.ts | 16 - .../function-awaiting-approval.test.ts | 175 +++++++++++ .../function-execute.test.ts | 220 ++++++++++++++ .../tests/turn-orchestrator/functions.test.ts | 203 ++++++------- .../tests/turn-orchestrator/get-state.test.ts | 4 +- harness/tests/turn-orchestrator/hook.test.ts | 47 +-- .../provisioning-layer.test.ts | 107 +++++++ .../turn-orchestrator/provisioning.test.ts | 76 +++-- .../tests/turn-orchestrator/run-start.test.ts | 4 +- .../turn-orchestrator/run-transition.test.ts | 97 +++--- harness/tests/turn-orchestrator/state.test.ts | 13 +- .../steering-check-layer.test.ts | 174 +++++++++++ .../tests/turn-orchestrator/steering.test.ts | 60 ++-- harness/tests/turn-orchestrator/store.test.ts | 198 ++++++++++++ .../subscriber-presence.test.ts | 69 ----- .../turn-state-write.test.ts | 62 ---- harness/tests/turn-orchestrator/wake.test.ts | 112 ------- 107 files changed, 3559 insertions(+), 2385 deletions(-) delete mode 100644 harness/src/context-compaction/emit.ts create mode 100644 harness/src/context-compaction/handler-pipeline.ts create mode 100644 harness/src/turn-orchestrator/assistant-streaming/ports.ts create mode 100644 harness/src/turn-orchestrator/assistant-streaming/process.ts delete mode 100644 harness/src/turn-orchestrator/estimate.ts delete mode 100644 harness/src/turn-orchestrator/finish.ts delete mode 100644 harness/src/turn-orchestrator/flat-messages.ts create mode 100644 harness/src/turn-orchestrator/function-awaiting-approval/ports.ts create mode 100644 harness/src/turn-orchestrator/function-awaiting-approval/process.ts create mode 100644 harness/src/turn-orchestrator/function-execute/ports.ts create mode 100644 harness/src/turn-orchestrator/function-execute/process.ts create mode 100644 harness/src/turn-orchestrator/function-execute/run.ts create mode 100644 harness/src/turn-orchestrator/function-execute/types.ts delete mode 100644 harness/src/turn-orchestrator/persistence.ts create mode 100644 harness/src/turn-orchestrator/provisioning/load-skills.ts create mode 100644 harness/src/turn-orchestrator/provisioning/ports.ts create mode 100644 harness/src/turn-orchestrator/provisioning/process.ts create mode 100644 harness/src/turn-orchestrator/state-runtime/ports.ts create mode 100644 harness/src/turn-orchestrator/state-runtime/store.ts create mode 100644 harness/src/turn-orchestrator/state-runtime/transcript.ts create mode 100644 harness/src/turn-orchestrator/state-runtime/turn-end.ts delete mode 100644 harness/src/turn-orchestrator/states/assistant-streaming.ts delete mode 100644 harness/src/turn-orchestrator/states/function-awaiting-approval.ts delete mode 100644 harness/src/turn-orchestrator/states/function-execute.ts delete mode 100644 harness/src/turn-orchestrator/states/index.ts delete mode 100644 harness/src/turn-orchestrator/states/provisioning.ts delete mode 100644 harness/src/turn-orchestrator/states/steering-check.ts create mode 100644 harness/src/turn-orchestrator/steering-check/ports.ts create mode 100644 harness/src/turn-orchestrator/steering-check/process.ts delete mode 100644 harness/src/turn-orchestrator/subscriber-presence.ts delete mode 100644 harness/src/turn-orchestrator/turn-state-write.ts delete mode 100644 harness/src/turn-orchestrator/wake.ts create mode 100644 harness/tests/_helpers/stateStoreKey.ts delete mode 100644 harness/tests/context-compaction/flat-state-key.test.ts create mode 100644 harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts create mode 100644 harness/tests/turn-orchestrator/assistant-streaming.test.ts delete mode 100644 harness/tests/turn-orchestrator/estimate.test.ts delete mode 100644 harness/tests/turn-orchestrator/flat-messages.test.ts create mode 100644 harness/tests/turn-orchestrator/function-awaiting-approval.test.ts create mode 100644 harness/tests/turn-orchestrator/function-execute.test.ts create mode 100644 harness/tests/turn-orchestrator/provisioning-layer.test.ts create mode 100644 harness/tests/turn-orchestrator/steering-check-layer.test.ts create mode 100644 harness/tests/turn-orchestrator/store.test.ts delete mode 100644 harness/tests/turn-orchestrator/subscriber-presence.test.ts delete mode 100644 harness/tests/turn-orchestrator/turn-state-write.test.ts delete mode 100644 harness/tests/turn-orchestrator/wake.test.ts diff --git a/harness/README.md b/harness/README.md index 3a8bfcd6..ee050fa5 100644 --- a/harness/README.md +++ b/harness/README.md @@ -13,7 +13,7 @@ alongside `harness` over the iii bus. | Folder | Bus surface | Role | |---|---|---| | `src/harness/` | `ui::subscribe`/`unsubscribe`, `harness::fs::read_inline`, `policy::check_permissions` | Meta-worker; loads `iii-permissions.yaml`; spins up `ui::*` fanout pumps. | -| `src/approval-gate/` | `approval::resolve` | Routes operator decisions to per-call `turn::approval_resume` fns (registered by turn-orchestrator). | +| `src/approval-gate/` | `approval::resolve` | Persists operator decisions to scope `approvals` (turn-orchestrator reacts via `turn::on_approval`). | | `src/turn-orchestrator/` | `run::start`, `turn::{state}`, `turn::get_state` | Durable FSM driving each agent turn; `dispatchWithHook` approval chokepoint. | | `src/session/` | `session-tree::*` (11 fns), `session-inbox::*` (3 fns) | Branching session storage + per-session inbox queues. | | `src/llm-budget/` | `budget::*` (14 fns) | Workspace + agent LLM spend caps. | diff --git a/harness/docs/architecture.md b/harness/docs/architecture.md index 7c5a93b1..be938120 100644 --- a/harness/docs/architecture.md +++ b/harness/docs/architecture.md @@ -20,7 +20,7 @@ workers. |---|---|---|---| | harness | [src/harness/](harness/src/harness/) | Meta-worker; loads `iii-permissions.yaml`, exposes `harness::trigger` (WS ingestion bridge — see [Telemetry & trace correlation](#telemetry--trace-correlation)) / `policy::check_permissions` / `ui::*`, spins up `agent::events` fan-out. | [workers/harness.md](harness/docs/workers/harness.md) | | turn-orchestrator | [src/turn-orchestrator/](harness/src/turn-orchestrator/) | Durable FSM driving each agent turn; `dispatchWithHook` approval chokepoint. | [workers/turn-orchestrator.md](harness/docs/workers/turn-orchestrator.md) | -| approval-gate | [src/approval-gate/](harness/src/approval-gate/) | Registers `approval::resolve` and shared approval wire schemas; routes decisions to per-call `turn::approval_resume` fns owned by the turn-orchestrator. | [workers/approval-gate.md](harness/docs/workers/approval-gate.md) | +| approval-gate | [src/approval-gate/](harness/src/approval-gate/) | Registers `approval::resolve` and shared approval wire schemas; persists decisions to scope `approvals` (turn-orchestrator reacts via `turn::on_approval`). | [workers/approval-gate.md](harness/docs/workers/approval-gate.md) | | session | [src/session/](harness/src/session/) | Branching session storage (`session-tree::*`) plus per-session inbox queues (`session-inbox::*`). | [workers/session.md](harness/docs/workers/session.md) | | llm-budget | [src/llm-budget/](harness/src/llm-budget/) | Workspace + agent LLM spend caps with alerts, forecast, period rollover. | [workers/llm-budget.md](harness/docs/workers/llm-budget.md) | | hook-fanout | [src/hook-fanout/](harness/src/hook-fanout/) | Generic publish-and-collect primitive over a stream topic. | [workers/hook-fanout.md](harness/docs/workers/hook-fanout.md) | @@ -69,13 +69,12 @@ flowchart LR turnOrch -- "provider::*::stream" --> provKimi turnOrch -- "provider::*::stream" --> provLms turnOrch -- "consultBefore: policy::check_permissions" --> harness - turnOrch -- "publishAfter → hook-fanout::publish_collect (after-hook)" --> hook turnOrch -- "session-tree::* mirror" --> session turnOrch -- "state::* persistence" --> state client -- "approval::resolve" --> approval - approval -- "trigger turn::approval_resume::/" --> turnOrch - turnOrch -- "state::set approvals//" --> state + approval -- "state::set approvals//" --> state + state -- "state trigger (scope=approvals)" --> turnOrch turnOrch -- "enqueue turn::{state} on turn-step queue" --> turnOrch provAnth -- "auth::get_token" --> auth @@ -86,7 +85,7 @@ flowchart LR state -- "agent::events stream" --> harness state -- "agent::events stream" --> compact state -- "state trigger (scope=approvals)" --> turnOrch - state -- "state trigger (scope=session_index)" --> harness + state -- "state trigger (scope=turn_state)" --> harness harness -- "ui::session::event::" --> client compact -- "session-tree::compact" --> session ``` @@ -94,13 +93,13 @@ flowchart LR ## Turn FSM [src/turn-orchestrator/state.ts](harness/src/turn-orchestrator/state.ts) -defines an 8-state durable FSM. Each state is a registered `turn::{state}` +defines a 7-state durable FSM. Each state is a registered `turn::{state}` function executed via `runTransition` and enqueued onto the `turn-step` FIFO -queue by `wakeState` ([wake.ts](harness/src/turn-orchestrator/wake.ts)). -`saveRecord` calls `shouldWakeStep` then `wakeState` when the persisted state -transitions to a stepable state. Paused or terminal sessions are also woken by +queue by `TurnStore.wakeStep` ([store.ts](harness/src/turn-orchestrator/state-runtime/store.ts)). +`saveRecord` calls `shouldWakeStep` then `wakeStep` when the persisted state +transitions to a stepable state. Paused sessions are also woken by the approval-decision state trigger (`turn::on_approval` on scope `approvals`) -via `wakeFromRecord`. +via `TurnStore.wakeFromRecord`. ```mermaid stateDiagram-v2 @@ -108,14 +107,13 @@ stateDiagram-v2 provisioning --> assistant_streaming assistant_streaming --> function_execute: has function calls assistant_streaming --> steering_check: no function calls - assistant_streaming --> tearing_down: error or aborted + assistant_streaming --> stopped: error or aborted via finishSession function_execute --> function_awaiting_approval: any call needs approval function_execute --> steering_check: batch complete - function_execute --> tearing_down: all calls terminate session + function_execute --> stopped: all calls terminate session via finishSession function_awaiting_approval --> function_execute: all decisions written steering_check --> assistant_streaming: continue turn - steering_check --> tearing_down: stop or max turns - tearing_down --> stopped + steering_check --> stopped: stop or max turns via finishSession stopped --> [*] failed --> [*] ``` @@ -127,10 +125,10 @@ unexpectedly (unless it opts into queue retry via `TransientError`). The orchestrator consults `policy::check_permissions` directly inside `consultBefore` — `allow`, `deny`, or `pending`. There is no hook fanout on -the before path. The orchestrator parks the turn in `function_awaiting_approval`, -registers a `turn::approval_resume` function per pending call, and waits until -`approval::resolve` (or abort) triggers that function, which persists the -decision and invokes `wakeFromRecord` to re-enqueue the current state handler. +the before path. The orchestrator parks the turn in `function_awaiting_approval` +and waits until `approval::resolve` writes the decision to scope `approvals`, +which fires `turn::on_approval` and calls `wakeFromRecord` to re-enqueue the +current state handler. ```mermaid sequenceDiagram @@ -144,15 +142,15 @@ sequenceDiagram alt rule.action == allow Harness-->>Turn: allow → dispatch the call else rule.action == deny - Harness-->>Turn: deny + DenialEnvelope → DenialResult + Harness-->>Turn: deny + DenialEnvelope → error FunctionResult else no rule (needs_approval) Harness-->>Turn: needs_approval → park in function_awaiting_approval Note over Turn,Bus: saveRecord does not wake stepable handlers for
function_awaiting_approval. awaiting_approval pins open calls. User->>Gate: approval::resolve(decision, reason) - Gate->>Turn: trigger turn::approval_resume::/ - Turn->>Bus: state::set approvals// = {decision, reason} + Gate->>Bus: state::set approvals// = {decision, reason} + Bus-->>Turn: turn::on_approval state trigger Turn->>Turn: wakeFromRecord → function_awaiting_approval reads
approvals// for each pending entry - Turn->>Turn: fold decisions into work batch,
transition back to function_execute + Turn->>Turn: fold decisions into work.prepared,
transition back to function_execute end ``` diff --git a/harness/docs/workers/approval-gate.md b/harness/docs/workers/approval-gate.md index 86ea07ae..b4e6c178 100644 --- a/harness/docs/workers/approval-gate.md +++ b/harness/docs/workers/approval-gate.md @@ -1,8 +1,7 @@ # approval-gate Registers `approval::resolve` and shared wire schemas for the approval path. -Per-call resume functions (`turn::approval_resume::/`) live in -the turn-orchestrator. +The turn-orchestrator reacts via the reactive `turn::on_approval` state trigger. ## Purpose @@ -10,30 +9,29 @@ The approval gate is the bus entry point for human decisions on parked tool calls. It does **not** intercept function calls on the bus — the turn-orchestrator consults `policy::check_permissions` directly inside `consultBefore`. The gate's job is to accept operator input from the console -and route it to the correct per-call resume function. +and persist the decision where the orchestrator can read it. | Policy outcome (in orchestrator) | Orchestrator effect | |---|---| | `allow` | dispatch proceeds immediately | | `deny` | dispatch short-circuits with a `DenialEnvelope` | -| `needs_approval` | orchestrator parks the call in `function_awaiting_approval` and registers a resume fn | +| `needs_approval` | orchestrator parks the call in `function_awaiting_approval` | ## Resolution flow -1. While parked, the orchestrator calls `registerApprovalResume` for each - pending call (see [approval-resume.ts](harness/src/turn-orchestrator/approval-resume.ts)). +1. While parked, the orchestrator keeps pending calls in `awaiting_approval[]` on the turn record. 2. The console calls `approval::resolve` with `{ session_id, function_call_id, decision, reason? }`. -3. `approval::resolve` triggers `turn::approval_resume::/` with the decision payload. -4. The resume handler writes `approvals//` (if not already set), invokes `turn::step`, and unregisters the resume fn. -5. `handleAwaitingApproval` reads all decisions, folds them into the prepared snapshot, and returns to `function_execute`. +3. `approval::resolve` writes `approvals//` via `state::set`. +4. The `turn::on_approval` state trigger (scope `approvals`) fires and calls `wakeFromRecord`. +5. `function_awaiting_approval` reads all decisions, folds them into the prepared snapshot, and returns to `function_execute`. ## Registered functions -- `approval::resolve` — Validates the payload and triggers the per-call resume function. Returns `{ ok: true }` or `{ ok: false, error: 'invalid_payload' | 'resume_failed' }`. +- `approval::resolve` — Validates the payload and persists the decision to scope `approvals`. Returns `{ ok: true }` or `{ ok: false, error: 'invalid_payload' | 'resume_failed' }`. -Per-call resume functions are registered by the turn-orchestrator, not this worker: +Reactive wake is owned by the turn-orchestrator: -- `turn::approval_resume::/` — Persists the decision to scope `approvals` and wakes `turn::step`. +- `turn::on_approval` — State trigger on scope `approvals`; enqueues `turn::{state}` for the parked session. ## State keys @@ -42,7 +40,7 @@ All decision records use scope `approvals` (constant `STATE_SCOPE` in | Key shape | Value | Purpose | |---|---|---| -| `/` | `{ decision: 'allow' \| 'deny' \| 'aborted', reason: string \| null }` | Written by the resume handler when an operator resolves. `handleAwaitingApproval` reads these keys while the turn is in `function_awaiting_approval`. | +| `/` | `{ decision: 'allow' \| 'deny' \| 'aborted', reason: string \| null }` | Written by `approval::resolve`. `function_awaiting_approval` reads these keys while the turn is in `function_awaiting_approval`. | Pending calls are tracked on the turn record (`awaiting_approval[]`), not as separate rows under `approvals` until a decision lands. @@ -82,12 +80,12 @@ no explicit dependency block. | File | Purpose | |---|---| | [src/approval-gate/main.ts](harness/src/approval-gate/main.ts) | Binary entry point (`iii-approval-gate`). | -| [src/approval-gate/resolve.ts](harness/src/approval-gate/resolve.ts) | Registers `approval::resolve`; triggers per-call resume fns. | -| [src/approval-gate/schemas.ts](harness/src/approval-gate/schemas.ts) | `STATE_SCOPE`, wire schemas, `parsePolicyReply`, `pendingKey`, `approvalResumeFnId`, `ResolvePayloadSchema`. | +| [src/approval-gate/resolve.ts](harness/src/approval-gate/resolve.ts) | Registers `approval::resolve`; persists decisions to scope `approvals`. | +| [src/approval-gate/schemas.ts](harness/src/approval-gate/schemas.ts) | `STATE_SCOPE`, wire schemas, `parsePolicyReply`, `pendingKey`, `ApprovalDecisionSchema`, `ResolvePayloadSchema`. | | [src/approval-gate/denial.ts](harness/src/approval-gate/denial.ts) | `permissionsDenyEnvelope` and related helpers. | | [src/approval-gate/redact.ts](harness/src/approval-gate/redact.ts) | `redact` / `clip` for safe `args_excerpt` on denials. | | [src/approval-gate/iii.worker.yaml](harness/src/approval-gate/iii.worker.yaml) | Worker manifest. | Related orchestrator code: -[approval-resume.ts](harness/src/turn-orchestrator/approval-resume.ts), +[on-approval.ts](harness/src/turn-orchestrator/on-approval.ts), [hook.ts](harness/src/turn-orchestrator/hook.ts). diff --git a/harness/docs/workers/context-compaction.md b/harness/docs/workers/context-compaction.md index 7a4ea3c3..1fdc3ce5 100644 --- a/harness/docs/workers/context-compaction.md +++ b/harness/docs/workers/context-compaction.md @@ -222,15 +222,17 @@ The summariser provider and model are always inherited from the session's own selection. Routing goes through `turn-orchestrator/provider-router`, so adding a provider there automatically covers `/compact`. -## State keys +## State scopes -All keys live under iii state scope `agent`: +Compaction-related keys use dedicated scopes (key = `session_id`): -| Key shape | Purpose | +| Scope | Purpose | |---|---| -| `session//compaction_lease` | `{ nonce, ts }` — held for up to `LEASE_TTL_SECS = 300 s`. Acquired by writing a unique nonce and reading it back; the first writer whose nonce survives wins. | -| `session//prune_lease` | Same nonce-and-readback pattern, separate key so the prune path does not block async compaction. | -| `session//last_compaction_at` | Wall-clock ms of the most recent successful compaction. Stamped by `stampLastCompaction`. | +| `compaction_lease` | `{ nonce, ts }` — held for up to `LEASE_TTL_SECS = 300 s`. | +| `prune_lease` | Same nonce-and-readback pattern, separate scope so the prune path does not block async compaction. | +| `last_compaction_at` | Wall-clock ms of the most recent successful compaction. Stamped by `stampLastCompaction`. | + +Flat transcript rewrites use scope `messages`, key `session_id` (see [flat-state.ts](harness/src/context-compaction/flat-state.ts)). ## Observability diff --git a/harness/docs/workers/harness.md b/harness/docs/workers/harness.md index 58a186f0..a3384754 100644 --- a/harness/docs/workers/harness.md +++ b/harness/docs/workers/harness.md @@ -29,7 +29,7 @@ that drive transitions; its fan-out trigger is a passive stream subscriber. ## Triggers - **Stream subscriber** on `agent::events` → `harness::fanout::agent_event_handler`. Registered by [src/harness/fanout/agent-events.ts](harness/src/harness/fanout/agent-events.ts). -- **State trigger** on `scope: session_index` (no `condition_function_id`) → `harness::fanout::session_created`. Lives in [src/harness/fanout/sessions-poll.ts](harness/src/harness/fanout/sessions-poll.ts). The turn-orchestrator writes a one-time `session_index/` marker when a session's `turn_state` is first persisted, so the trigger matches in-engine by scope alone — no per-write condition predicate. (This itself replaced an earlier 1 Hz `state::list` diff loop.) +- **State trigger** on `scope: turn_state` (no `condition_function_id`) → `harness::fanout::session_created`. Lives in [src/harness/fanout/sessions-poll.ts](harness/src/harness/fanout/sessions-poll.ts). The handler gates on `state:created` events where key = session id — the first persist of a turn record signals session creation. (This replaced the earlier `session_index` marker scope.) The fanout handler forwards every `agent::events` frame to the per-browser endpoint `ui::session::event::` for each browser whose @@ -40,7 +40,7 @@ evicted from the in-process subscription set. ## State keys The harness reads state but doesn't own any keys. The sessions state -trigger observes `session//turn_state` writes — those entries are +trigger observes `turn_state` scope `state:created` events — those entries are owned by the orchestrator (see [workers/turn-orchestrator.md](harness/docs/workers/turn-orchestrator.md)). @@ -83,5 +83,5 @@ From [src/harness/iii.worker.yaml](harness/src/harness/iii.worker.yaml): | [src/harness/policy/types.ts](harness/src/harness/policy/types.ts) | `RuleSpec`, `ConstraintSpec`, `Decision`, `MatchedConstraint` types for `iii-permissions.yaml` rules and evaluation results. | | [src/harness/fanout/index.ts](harness/src/harness/fanout/index.ts) | Spawns the two fan-out pumps. | | [src/harness/fanout/agent-events.ts](harness/src/harness/fanout/agent-events.ts) | `agent::events` stream subscriber → per-browser fan-out. | -| [src/harness/fanout/sessions-poll.ts](harness/src/harness/fanout/sessions-poll.ts) | State-trigger handler that detects `session//turn_state` creates and fans the new session id out to every all-sessions subscriber via `ui::sessions::changed::`. (Filename kept for history; the implementation is no longer a poll loop.) | +| [src/harness/fanout/sessions-poll.ts](harness/src/harness/fanout/sessions-poll.ts) | State-trigger handler on scope `turn_state` that fans new session ids to every all-sessions subscriber via `ui::sessions::changed::`. | | [src/harness/iii.worker.yaml](harness/src/harness/iii.worker.yaml) | iii worker manifest (dependencies, install/start scripts). | diff --git a/harness/docs/workers/session.md b/harness/docs/workers/session.md index 60809967..852f4006 100644 --- a/harness/docs/workers/session.md +++ b/harness/docs/workers/session.md @@ -62,11 +62,11 @@ resumed approval replies in the correct transcript position when their ids are non-monotonic relative to wall-clock order. `session-inbox::*` (under the configured `session.state_scope`, default -`agent`): +`inbox`): -| Key shape | Value | -|---|---| -| `session//` | An append-only JSON array of opaque items. | +| Scope | Key | Value | +|---|---|---| +| `inbox` | `/` | An append-only JSON array of opaque items. | ## Configuration @@ -74,7 +74,7 @@ From the `session` section of [config.yaml](harness/config.yaml): - `store_backend` (default `iii_state`; alternative `memory`) — which `SessionStore` implementation `register()` instantiates. -- `state_scope` (default `agent`) — iii state scope used by +- `state_scope` (default `inbox`) — iii state scope used by `session-inbox::*`. Note: the tree backend uses its own hard-coded scopes (`session_tree:*`, `session_tree_meta`); only the inbox honours this setting. @@ -96,5 +96,5 @@ From [src/session/iii.worker.yaml](harness/src/session/iii.worker.yaml): | [src/session/tree/store.ts](harness/src/session/tree/store.ts) | `SessionStore` interface + `InMemoryStore` + `IiiStateSessionStore`. | | [src/session/tree/types.ts](harness/src/session/tree/types.ts) | `SessionEntry` (`message` / `custom_message` / `branch_summary` / `compaction`, each with an explicit `timestamp`), `SessionMeta`, `TreeNode`, `ReconcileResult`, `SessionError`, plus the `entryTimestamp` helper used by the `(timestamp, id)` sort. | | [src/session/inbox/handlers.ts](harness/src/session/inbox/handlers.ts) | Registers the three `session-inbox::*` functions. | -| [src/session/inbox/key.ts](harness/src/session/inbox/key.ts) | `inboxKey(name, session_id) → "session//"`. | +| [src/session/inbox/key.ts](harness/src/session/inbox/key.ts) | `inboxKey(name, session_id) → "/"` under scope `inbox`. | | [src/session/iii.worker.yaml](harness/src/session/iii.worker.yaml) | Worker manifest. | diff --git a/harness/docs/workers/turn-orchestrator.md b/harness/docs/workers/turn-orchestrator.md index 4c3a80be..dc2e231c 100644 --- a/harness/docs/workers/turn-orchestrator.md +++ b/harness/docs/workers/turn-orchestrator.md @@ -1,17 +1,17 @@ # turn-orchestrator Durable `run::start` state machine that drives each agent turn through -provisioning, assistant, function-execute, steering, and tearing-down. +provisioning, assistant, function-execute, steering, and session finish. ## Purpose This is the heart of the bundle. `run::start` opens a session and returns immediately; the rest of the work happens inside per-state durable functions (`turn::provisioning`, `turn::assistant_streaming`, …), each enqueued onto -the `turn-step` FIFO queue via `wakeState` ([wake.ts](harness/src/turn-orchestrator/wake.ts)). +the `turn-step` FIFO queue via `TurnStore.wakeStep`. Saving the record with a new non-terminal, non-parking state automatically enqueues the next handler (`saveRecord` in -[persistence.ts](harness/src/turn-orchestrator/persistence.ts) calls `shouldWakeStep` then `wakeState`). +[state-runtime/store.ts](harness/src/turn-orchestrator/state-runtime/store.ts) calls `shouldWakeStep` then `wakeStep`). Every per-state handler is wrapped by `runTransition` ([run-transition.ts](harness/src/turn-orchestrator/run-transition.ts)): @@ -36,70 +36,68 @@ unreachable → deny with a `gate_unavailable` `DenialEnvelope`. - `run::start` — Persist run config and messages, seed `turn_state` to `provisioning`, and wake the FSM via `saveRecord`. - `turn::provisioning` — FSM step: build system prompt + single `agent_trigger` schema, write enriched `run_request`, advance to `assistant_streaming`. -- `turn::assistant_streaming` — FSM step: stream the turn over a provider channel; on completion emit `message_complete`, persist the assistant message (dup-guarded), route to `function_execute` / `steering_check` / `tearing_down`. -- `turn::function_execute` — FSM step: own the full function lifecycle via `rec.work`; build batch from `rec.last_assistant`, run each call, checkpoint per-call via `writeRecord`, park to `function_awaiting_approval` on a `pending` gate reply, finalize results into messages + emit `turn_end`, route to `steering_check` / `tearing_down`. -- `turn::function_awaiting_approval` — FSM step: read decisions for `awaiting_approval[]`; fold them into `rec.work.batch` (`allow` → `pre_approved`, `deny`/`aborted` → `blocked`); clear `awaiting_approval`, advance to `function_execute`. -- `turn::steering_check` — FSM step: drain `steering`/`followup` inboxes, enforce `max_turns` cap (emits synthetic `max_turns` message + `turn_end` → `tearing_down`), route to `assistant_streaming` / `tearing_down`. -- `turn::tearing_down` — FSM step: emit `agent_end`, advance to `stopped`. +- `turn::assistant_streaming` — FSM step: stream the turn over a provider channel; on completion emit `message_complete`, persist the assistant message (dup-guarded), route to `function_execute` / `steering_check` / `stopped` (via `finishSession`). +- `turn::function_execute` — FSM step: own the full function lifecycle via `rec.work`; build batch from `rec.last_assistant`, run each call, checkpoint per-call via `writeRecord`, park to `function_awaiting_approval` on a `pending` gate reply, finalize results into messages + emit `turn_end`, route to `steering_check` / `stopped` (via `finishSession`). +- `turn::function_awaiting_approval` — FSM step: read decisions for `awaiting_approval[]`; fold them into `rec.work.prepared` (`allow` → `pre_approved`, `deny`/`aborted` → `synthetic`); clear `awaiting_approval`, advance to `function_execute`. +- `turn::steering_check` — FSM step: drain `steering`/`followup` inboxes, enforce `max_turns` cap (emits synthetic `max_turns` message + `turn_end` → `stopped` via `finishSession`), route to `assistant_streaming` / `stopped`. - `turn::get_state` — One-shot reader returning a lean `TurnStateView` (from `schemas.ts:toView`) for a session. UI clients call this on reload to recover in-progress modals (e.g. `function_awaiting_approval`) without reading iii state directly. Returns `null` for unknown sessions. ## Triggers -The record-written wake is now inline in `saveRecord` (no separate `on-record-written` adapter): every `saveRecord` call that transitions to a non-terminal, non-parking state calls `wakeState` directly. Similarly, `turn_state_changed` events are emitted inline from `persistRecord` via `emitTurnStateChanged` ([turn-state-write.ts](harness/src/turn-orchestrator/turn-state-write.ts)) — there is no separate `on-turn-state-changed` state trigger. +The record-written wake is now inline in `saveRecord` (no separate `on-record-written` adapter): every `saveRecord` call that transitions to a non-terminal, non-parking state calls `wakeStep` directly. Similarly, `turn_state_changed` events are emitted inline from `persistRecord` inside `TurnStore` — there is no separate `on-turn-state-changed` state trigger. -Paused turns (`function_awaiting_approval`) are woken when `approval::resolve` triggers each per-call `turn::approval_resume` function (see [approval-resume.ts](harness/src/turn-orchestrator/approval-resume.ts) and [workers/approval-gate.md](workers/approval-gate.md)). `recoverPendingApprovals` re-registers these resume functions at worker startup for sessions that were parked before a restart. +Paused turns (`function_awaiting_approval`) are woken when `approval::resolve` writes a decision to scope `approvals`, which fires the reactive `turn::on_approval` state trigger (see [on-approval.ts](harness/src/turn-orchestrator/on-approval.ts) and [workers/approval-gate.md](workers/approval-gate.md)). `recoverParkedApprovals` re-wakes parked sessions at worker startup. ## Turn FSM Each state is a registered `turn::{state}` function executed via -`runTransition` and enqueued onto the `turn-step` FIFO queue by `wakeState`. -The 8 states from [state.ts](harness/src/turn-orchestrator/state.ts): +`runTransition` and enqueued onto the `turn-step` FIFO queue by `TurnStore.wakeStep`. +The 7 states from [state.ts](harness/src/turn-orchestrator/state.ts): | State | Handler file | Role | |---|---|---| -| `provisioning` | [states/provisioning.ts](harness/src/turn-orchestrator/states/provisioning.ts) | Fetch skills index + default-skill bodies, build system prompt, write enriched `run_request` (with `function_schemas: [agentTriggerTool()]`), → `assistant_streaming`. | -| `assistant_streaming` | [states/assistant-streaming.ts](harness/src/turn-orchestrator/states/assistant-streaming.ts) | Increment `turn_count`; create channel; trigger provider stream; relay `message_update` deltas; on completion call `finalizeAssistant` which emits `message_complete`, persists the assistant message (dup-guarded), then routes → `function_execute` (has calls) / `steering_check` (no calls) / `tearing_down` (error/aborted). | -| `function_execute` | [states/function-execute.ts](harness/src/turn-orchestrator/states/function-execute.ts) | Build batch from `rec.last_assistant` (or reuse existing `rec.work`); for each call: emit `function_execution_start`, skip if already executed, dispatch via `dispatchWithHook`; if `pending` → append to `awaiting_approval`, register `turn::approval_resume`, → `function_awaiting_approval`; otherwise commit result (silent `writeRecord` checkpoint) + emit `function_execution_end`; after batch: fold results into messages + emit `turn_end` → `steering_check` / `tearing_down`. | -| `function_awaiting_approval` | [states/function-awaiting-approval.ts](harness/src/turn-orchestrator/states/function-awaiting-approval.ts) | Read decision for each `awaiting_approval[]` entry; if any is still missing → return (park); when all present, fold into `rec.work.batch` (`allow` → `pre_approved: true`; `deny`/`aborted` → `blocked` with denial result); clear `awaiting_approval` → `function_execute`. | -| `steering_check` | [states/steering-check.ts](harness/src/turn-orchestrator/states/steering-check.ts) | Priority route: steering msg → `assistant_streaming` (unless `max_turns` reached); followup msg → `assistant_streaming` (unless `max_turns` reached); function results present → `assistant_streaming` (unless `max_turns` reached); else emit `turn_end` once → `stopped`. `max_turns` path emits a synthetic `message_complete` + `turn_end`. | -| `tearing_down` | [states/tearing-down.ts](harness/src/turn-orchestrator/states/tearing-down.ts) | Emit `agent_end` → `stopped`. | -| `stopped` | (no handler) | Terminal. Idempotent. | +| `provisioning` | [provisioning/process.ts](harness/src/turn-orchestrator/provisioning/process.ts) | Fetch skills index + default-skill bodies, build system prompt, write enriched `run_request` (with `function_schemas: [agentTriggerTool()]`), → `assistant_streaming`. | +| `assistant_streaming` | [assistant-streaming/process.ts](harness/src/turn-orchestrator/assistant-streaming/process.ts) | Increment `turn_count`; create channel; trigger provider stream; relay `message_update` deltas; on completion call `finalizeAssistantTurn` which emits `message_complete`, persists the assistant message (dup-guarded), then routes → `function_execute` (has calls) / `steering_check` (no calls) / `stopped` via `finishSession` (error/aborted). | +| `function_execute` | [function-execute/process.ts](harness/src/turn-orchestrator/function-execute/process.ts) | Build batch from `rec.last_assistant` (or reuse existing `rec.work`); for each call: emit `function_execution_start`, skip if already executed, dispatch via `dispatchWithHook`; if `pending` → append to `awaiting_approval`, → `function_awaiting_approval`; otherwise commit result (silent `writeRecord` checkpoint) + emit `function_execution_end`; after batch: fold results into messages + emit `turn_end` → `steering_check` / `stopped` via `finishSession`. | +| `function_awaiting_approval` | [function-awaiting-approval/process.ts](harness/src/turn-orchestrator/function-awaiting-approval/process.ts) | Read decision for each `awaiting_approval[]` entry; if any is still missing → return (park); when all present, fold into `rec.work.prepared` (`allow` → `pre_approved`; `deny`/`aborted` → `synthetic` with denial result); clear `awaiting_approval` → `function_execute`. | +| `steering_check` | [steering-check/process.ts](harness/src/turn-orchestrator/steering-check/process.ts) | Priority route: steering msg → `assistant_streaming` (unless `max_turns` reached); followup msg → `assistant_streaming` (unless `max_turns` reached); function results present → `assistant_streaming` (unless `max_turns` reached); else emit `turn_end` once → `stopped` via `finishSession`. `max_turns` path emits a synthetic `message_complete` + `turn_end`. | +| `stopped` | (no handler) | Terminal. Idempotent. Session teardown (`agent_end`) happens inline via `TurnStatePorts.finishSession` before entering this state. | | `failed` | (set by `runTransition` on unexpected throw) | Terminal. Carries `error: {kind, message}` on the record. Emits `message_complete{stop_reason:'error'}` + `agent_end` so the UI sees the reason. A handler may throw `TransientError` to use the queue's retry/DLQ instead. | -`NON_STEPABLE_STATES` in [wake.ts](harness/src/turn-orchestrator/wake.ts) are +`NON_STEPABLE_STATES` in [store.ts](harness/src/turn-orchestrator/state-runtime/store.ts) are `stopped`, `failed`, and `function_awaiting_approval` — `saveRecord` does not enqueue a handler for these. -`dispatchWithHook` returns one of three shapes: `{ kind: 'result' }`, -`{ kind: 'deny' }`, or `{ kind: 'pending' }`. `pending` triggers the -`function_awaiting_approval` park. +`dispatchWithHook` returns `{ kind: 'result', result }` or `{ kind: 'pending' }`. +Policy denies are returned as `{ kind: 'result' }` with a denied `FunctionResult`. +`pending` triggers the `function_awaiting_approval` park. -## State keys +## State scopes -All keys live under iii state scope `agent`. Key helpers are defined in -[state.ts](harness/src/turn-orchestrator/state.ts); persistence helpers in -[persistence.ts](harness/src/turn-orchestrator/persistence.ts). +Session-scoped iii state uses semantic scopes from +[state.ts](harness/src/turn-orchestrator/state.ts) with +`session_id` as the key. I/O goes through +[state-runtime/store.ts](harness/src/turn-orchestrator/state-runtime/store.ts) (`TurnStore`). -| Key shape | Purpose | -|---|---| -| `session//turn_state` | Serialised `TurnStateRecord` (incl. `work?: TurnWork` and `error?: {kind, message}`). | -| `session//messages` | Active path `AgentMessage[]`; mirrored into `session-tree::*` on every save (inline in `persistence.saveMessages`). | -| `session//run_request` | The `run::start` payload enriched by `provisioning` to include `function_schemas: [agentTriggerTool()]` and the assembled `system_prompt`. Typed as `RunRequest` ([run-request.ts](harness/src/turn-orchestrator/run-request.ts)). | -| `session//session_tree_mirror_len` | High-water mark so the session-tree messages mirror is incremental. The session-tree mirror is still inline in `persistence.saveMessages` — its relocation to a reactive subscriber is tracked as a follow-up, not done. | -| `session//event_counter` | Monotonic counter for `agent::events` sequence numbers. | +| Scope | Key | Purpose | +|---|---|---| +| `turn_state` | `` | Serialised `TurnStateRecord` (incl. `work?: TurnWork` and `error?: {kind, message}`). | +| `messages` | `` | Active path `AgentMessage[]`; mirrored into `session-tree::*` on every save (inline in `TurnStore.saveMessages` / `appendMessages`). | +| `run_request` | `` | The `run::start` payload enriched by `provisioning` to include `function_schemas: [agentTriggerTool()]` and the assembled `system_prompt`. Typed as `RunRequest` ([run-request.ts](harness/src/turn-orchestrator/run-request.ts)). | +| `session_tree_mirror_len` | `` | High-water mark so the session-tree messages mirror is incremental. | +| `event_counter` | `` | Monotonic counter for `agent::events` sequence numbers. | Keys that no longer exist: `function_prepared`, `function_executed`, `function_schemas` (standalone), `tool_prepared`, `tool_executed`, `tool_schemas`, `sandbox_id`, `last_compaction_at`, `last_compaction_consumed_at` — these were removed in the rewrite. -The `TurnStateRecord` carries `work?: TurnWork` (inline `{batch: PreparedEntry[]; results: ExecutedEntry[]}`) in place of the former separate state keys. `PreparedEntry`, `ExecutedEntry`, and `TurnWork` are all defined in [state.ts](harness/src/turn-orchestrator/state.ts). +The `TurnStateRecord` carries `work?: TurnWork` (inline `{ prepared: PreparedCall[]; executed: Record }`) in place of the former separate state keys. `PreparedCall`, `ExecutedCall`, and `TurnWork` are defined in [function-execute/types.ts](harness/src/turn-orchestrator/function-execute/types.ts). ## UI events -`turn_state_changed` is emitted inline by `persistRecord` (via -[turn-state-write.ts](harness/src/turn-orchestrator/turn-state-write.ts)) -on every `saveRecord` / `persistRecord` call. It carries a lean +`turn_state_changed` is emitted inline by `TurnStore.saveRecord` on every +persist that goes through the full save path. It carries a lean `TurnStateView` (not the full `TurnStateRecord`) as `new_value` (and `old_value` when updating). `TurnStateView` is defined in [schemas.ts](harness/src/turn-orchestrator/schemas.ts) and contains: @@ -114,11 +112,9 @@ consumers. Unchanged from prior design: `dispatchWithHook` → `consultBefore` → `policy::check_permissions` (5 s timeout, fail-closed). A `needs_approval` reply returns `{ kind: 'pending' }` from `dispatchWithHook`, which parks the -session to `function_awaiting_approval` and registers a per-call -`turn::approval_resume` function. `approval::resolve` triggers that resume -function, which persists the decision to scope `approvals` and calls -`wakeFromRecord` to re-enqueue the -session's current state handler. +session to `function_awaiting_approval`. `approval::resolve` writes the +decision to scope `approvals`, which fires `turn::on_approval` and calls +`TurnStore.wakeFromRecord` to re-enqueue the session's current state handler. ## Configuration @@ -133,7 +129,7 @@ From the top-level `turn-orchestrator` section of From [src/turn-orchestrator/iii.worker.yaml](harness/src/turn-orchestrator/iii.worker.yaml): -`session ^0.2.0`, `hook-fanout ^0.2.0`, `provider-anthropic ^0.2.0`, +`session ^0.2.0`, `provider-anthropic ^0.2.0`, `provider-openai ^0.2.0`. ## Source layout @@ -141,26 +137,23 @@ From | File | Purpose | |---|---| | [src/turn-orchestrator/main.ts](harness/src/turn-orchestrator/main.ts) | Binary entry point. | -| [src/turn-orchestrator/register.ts](harness/src/turn-orchestrator/register.ts) | Composes all registered functions: `run::start`, per-state `turn::{state}` handlers, approval-resume recovery, `turn::get_state`. | +| [src/turn-orchestrator/register.ts](harness/src/turn-orchestrator/register.ts) | Composes all registered functions: `run::start`, per-state `turn::{state}` handlers, `turn::on_approval`, `turn::get_state`. | | [src/turn-orchestrator/run-start.ts](harness/src/turn-orchestrator/run-start.ts) | `run::start` handler — persists run config and messages, seeds `turn_state` to `provisioning` via `saveRecord` (which wakes the FSM). | | [src/turn-orchestrator/run-transition.ts](harness/src/turn-orchestrator/run-transition.ts) | Shared FSM transition runner: load → null-check → stale-skip → handle → save. Routes to `failed` on unexpected throw; re-throws `TransientError` for queue retry. | -| [src/turn-orchestrator/wake.ts](harness/src/turn-orchestrator/wake.ts) | `wakeState` / `wakeFromRecord` — enqueue `turn::{state}` onto the `turn-step` FIFO queue; `shouldWakeStep` gates non-stepable states. | -| [src/turn-orchestrator/schemas.ts](harness/src/turn-orchestrator/schemas.ts) | All registered-function I/O schemas and types: `RunStartPayloadSchema`, `TurnStepPayloadSchema`, `TurnStateView`, `toView`, `ApprovalDecisionEventSchema`. | -| [src/turn-orchestrator/run-request.ts](harness/src/turn-orchestrator/run-request.ts) | `RunRequest` type and `parseRunRequest` — the typed, parsed form of `session//run_request` (includes `function_schemas`). | +| [src/turn-orchestrator/state-runtime/store.ts](harness/src/turn-orchestrator/state-runtime/store.ts) | `TurnStore` / `createTurnStore` — agent-scope load/save, `shouldWakeStep`, `wakeStep`, `wakeFromRecord`. | +| [src/turn-orchestrator/run-request.ts](harness/src/turn-orchestrator/run-request.ts) | `RunRequest` type and `parseRunRequest` — the typed, parsed form of scope `run_request` (includes `function_schemas`). | | [src/turn-orchestrator/get-state.ts](harness/src/turn-orchestrator/get-state.ts) | `turn::get_state` — one-shot reader returning `TurnStateView \| null`. | | [src/turn-orchestrator/agent-trigger.ts](harness/src/turn-orchestrator/agent-trigger.ts) | Dispatcher chokepoint: `dispatchWithHook` (consult + trigger), `triggerFunctionCall` (trigger/decode/error), `agentTriggerTool` (schema), `unwrapAgentTrigger`. | -| [src/turn-orchestrator/hook.ts](harness/src/turn-orchestrator/hook.ts) | `consultBefore` — `policy::check_permissions` (5 s, fail-closed) → `allow` / `pending` / `deny`. `publishAfter` — `hook-fanout::publish_collect` for after-hook fanout. | -| [src/turn-orchestrator/approval-resume.ts](harness/src/turn-orchestrator/approval-resume.ts) | Per-call `turn::approval_resume` registration and handler (persist decision + `wakeFromRecord`); `recoverPendingApprovals` re-registers at startup. | -| [src/turn-orchestrator/turn-state-write.ts](harness/src/turn-orchestrator/turn-state-write.ts) | `emitTurnStateChanged` — inline UI notification emitting `turn_state_changed` with lean `TurnStateView`. Called from `persistRecord`. | -| [src/turn-orchestrator/states/provisioning.ts](harness/src/turn-orchestrator/states/provisioning.ts) | `turn::provisioning` handler. | -| [src/turn-orchestrator/states/assistant-streaming.ts](harness/src/turn-orchestrator/states/assistant-streaming.ts) | `turn::assistant_streaming` handler. | -| [src/turn-orchestrator/states/function-execute.ts](harness/src/turn-orchestrator/states/function-execute.ts) | `turn::function_execute` handler. | -| [src/turn-orchestrator/states/function-awaiting-approval.ts](harness/src/turn-orchestrator/states/function-awaiting-approval.ts) | `turn::function_awaiting_approval` handler. | -| [src/turn-orchestrator/states/steering-check.ts](harness/src/turn-orchestrator/states/steering-check.ts) | `turn::steering_check` handler. | -| [src/turn-orchestrator/states/tearing-down.ts](harness/src/turn-orchestrator/states/tearing-down.ts) | `turn::tearing_down` handler. | -| [src/turn-orchestrator/states/index.ts](harness/src/turn-orchestrator/states/index.ts) | Re-exports per-state `register` functions. | -| [src/turn-orchestrator/state.ts](harness/src/turn-orchestrator/state.ts) | `TurnState`, `TurnStateRecord`, `TurnWork`, `PreparedEntry`, `ExecutedEntry`, `AwaitingApprovalEntry`, state-key helpers, `newRecord`, `transitionTo`. | -| [src/turn-orchestrator/persistence.ts](harness/src/turn-orchestrator/persistence.ts) | Load/save helpers: `loadRecord`, `saveRecord` (persist + wake), `persistRecord` (persist + UI event, no wake), `writeRecord` (silent checkpoint), `saveMessages` (+ session-tree mirror). | +| [src/turn-orchestrator/hook.ts](harness/src/turn-orchestrator/hook.ts) | `consultBefore` — `policy::check_permissions` (5 s, fail-closed) → `allow` / `pending` / `deny`. | +| [src/turn-orchestrator/on-approval.ts](harness/src/turn-orchestrator/on-approval.ts) | Reactive `turn::on_approval` state trigger on scope `approvals`; `recoverParkedApprovals` re-wakes parked sessions at startup. | +| [src/turn-orchestrator/schemas.ts](harness/src/turn-orchestrator/schemas.ts) | All registered-function I/O schemas and types: `RunStartPayloadSchema`, `TurnStepPayloadSchema`, `TurnStateView`, `toView`, `ApprovalDecisionEventSchema`. | +| [src/turn-orchestrator/state-runtime/ports.ts](harness/src/turn-orchestrator/state-runtime/ports.ts) | `TurnStatePorts` / `createTurnStatePorts` — shared dependency ports for per-state handlers (incl. `finishSession`). | +| [src/turn-orchestrator/provisioning/process.ts](harness/src/turn-orchestrator/provisioning/process.ts) | `turn::provisioning` handler and provisioning pipeline. | +| [src/turn-orchestrator/assistant-streaming/process.ts](harness/src/turn-orchestrator/assistant-streaming/process.ts) | `turn::assistant_streaming` handler and stream orchestration. | +| [src/turn-orchestrator/function-execute/process.ts](harness/src/turn-orchestrator/function-execute/process.ts) | `turn::function_execute` handler. | +| [src/turn-orchestrator/function-awaiting-approval/process.ts](harness/src/turn-orchestrator/function-awaiting-approval/process.ts) | `turn::function_awaiting_approval` handler. | +| [src/turn-orchestrator/steering-check/process.ts](harness/src/turn-orchestrator/steering-check/process.ts) | `turn::steering_check` handler. | +| [src/turn-orchestrator/state.ts](harness/src/turn-orchestrator/state.ts) | `TurnState`, `TurnStateRecord`, `TurnWork`, `AwaitingApprovalEntry`, state-key helpers, `newRecord`, `transitionTo`. | | [src/turn-orchestrator/errors.ts](harness/src/turn-orchestrator/errors.ts) | `TransientError` (opt into queue retry), `ContextOverflowError`, `CompactionBusyError`. | | [src/turn-orchestrator/events.ts](harness/src/turn-orchestrator/events.ts) | `emit(iii, sid, event)` — appends a sequenced `AgentEvent` to the `agent::events` stream. | | [src/turn-orchestrator/preflight.ts](harness/src/turn-orchestrator/preflight.ts) | `runPreflight` — context-compaction check before each provider call. | diff --git a/harness/src/approval-gate/schemas.ts b/harness/src/approval-gate/schemas.ts index cccba622..13baebe1 100644 --- a/harness/src/approval-gate/schemas.ts +++ b/harness/src/approval-gate/schemas.ts @@ -102,11 +102,14 @@ export function pendingKey(session_id: string, function_call_id: string): string const approvalDecisionSchema = z.enum(['allow', 'deny', 'aborted']); -export const ApprovalResumePayloadSchema = z.object({ +export const ApprovalDecisionSchema = z.object({ decision: approvalDecisionSchema, reason: z.string().nullable(), }); +/** @deprecated Use ApprovalDecisionSchema */ +export const ApprovalResumePayloadSchema = ApprovalDecisionSchema; + export const resolveFunctionOptions = { description: 'Flip an approval to allow or deny. Persists the decision to the approvals scope to wake the parked turn.', diff --git a/harness/src/context-compaction/config.ts b/harness/src/context-compaction/config.ts index 1f9ac4a9..b6241dfa 100644 --- a/harness/src/context-compaction/config.ts +++ b/harness/src/context-compaction/config.ts @@ -9,6 +9,20 @@ const DEFAULT_TOOL_OUTPUT_MAX_CHARS = 2_000; // `busy` to users when async compaction is mid-flight. const DEFAULT_BUSY_TIMEOUT_MS = 30_000; +export const MIN_PRESERVE_RECENT_TOKENS = DEFAULT_MIN_PRESERVE_RECENT_TOKENS; +export const MAX_PRESERVE_RECENT_TOKENS = DEFAULT_MAX_PRESERVE_RECENT_TOKENS; + +export type CompactionConfig = Readonly<{ + reservedTokens: number; + tailTurns: number; + preserveRecentTokensOverride: number | undefined; + pruneProtect: number; + pruneMinFree: number; + toolOutputMaxChars: number; + busyTimeoutMs: number; + pruneProtectedTools: string[]; +}>; + function intEnv(name: string, def: number): number { const v = process.env[name]; if (!v) return def; @@ -16,41 +30,14 @@ function intEnv(name: string, def: number): number { return Number.isFinite(n) && n > 0 ? n : def; } -export function reservedTokens(): number { - return intEnv('COMPACT_RESERVED_TOKENS', DEFAULT_RESERVED_TOKENS); -} - -export function tailTurns(): number { - return intEnv('COMPACT_TAIL_TURNS', DEFAULT_TAIL_TURNS); -} - -export function preserveRecentTokensOverride(): number | undefined { +function readPreserveRecentTokensOverride(): number | undefined { const v = process.env.COMPACT_PRESERVE_RECENT_TOKENS; if (!v) return undefined; const n = Number.parseInt(v, 10); return Number.isFinite(n) && n > 0 ? n : undefined; } -export const MIN_PRESERVE_RECENT_TOKENS = DEFAULT_MIN_PRESERVE_RECENT_TOKENS; -export const MAX_PRESERVE_RECENT_TOKENS = DEFAULT_MAX_PRESERVE_RECENT_TOKENS; - -export function pruneProtect(): number { - return intEnv('COMPACT_PRUNE_PROTECT', DEFAULT_PRUNE_PROTECT); -} - -export function pruneMinFree(): number { - return intEnv('COMPACT_PRUNE_MIN_FREE', DEFAULT_PRUNE_MIN_FREE); -} - -export function toolOutputMaxChars(): number { - return intEnv('COMPACT_TOOL_OUTPUT_MAX_CHARS', DEFAULT_TOOL_OUTPUT_MAX_CHARS); -} - -export function busyTimeoutMs(): number { - return intEnv('COMPACT_BUSY_TIMEOUT_MS', DEFAULT_BUSY_TIMEOUT_MS); -} - -export function pruneProtectedTools(): string[] { +function readPruneProtectedTools(): string[] { const v = process.env.COMPACT_PRUNE_PROTECTED_TOOLS; if (!v) return []; return v @@ -58,3 +45,16 @@ export function pruneProtectedTools(): string[] { .map((s) => s.trim()) .filter(Boolean); } + +export function compactionConfig(): CompactionConfig { + return { + reservedTokens: intEnv('COMPACT_RESERVED_TOKENS', DEFAULT_RESERVED_TOKENS), + tailTurns: intEnv('COMPACT_TAIL_TURNS', DEFAULT_TAIL_TURNS), + preserveRecentTokensOverride: readPreserveRecentTokensOverride(), + pruneProtect: intEnv('COMPACT_PRUNE_PROTECT', DEFAULT_PRUNE_PROTECT), + pruneMinFree: intEnv('COMPACT_PRUNE_MIN_FREE', DEFAULT_PRUNE_MIN_FREE), + toolOutputMaxChars: intEnv('COMPACT_TOOL_OUTPUT_MAX_CHARS', DEFAULT_TOOL_OUTPUT_MAX_CHARS), + busyTimeoutMs: intEnv('COMPACT_BUSY_TIMEOUT_MS', DEFAULT_BUSY_TIMEOUT_MS), + pruneProtectedTools: readPruneProtectedTools(), + }; +} diff --git a/harness/src/context-compaction/emit.ts b/harness/src/context-compaction/emit.ts deleted file mode 100644 index 88d1ddaf..00000000 --- a/harness/src/context-compaction/emit.ts +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Shared helper for emitting `compaction_done` after sync/async - * handlers finish rewriting flat-state. Pre-extraction this exact - * try/catch + payload block lived byte-for-byte in both handlers; the - * helper keeps the two handlers in sync and gives the failure log a - * stable code for monitoring. - */ -import { logger } from '../runtime/otel.js'; -import type { ISdk } from '../runtime/iii.js'; -import { emit } from '../turn-orchestrator/events.js'; - -export type CompactionMode = 'sync' | 'async'; - -export interface CompactionDonePayload { - summary_text: string; - tokens_before: number; - compaction_entry_id: string; - /** First entry_id of the preserved tail; null when nothing was kept. */ - tail_start_id: string | null; -} - -/** - * Best-effort: a publish failure is logged but never thrown — the - * caller has already done the load-bearing work (rewriting flat - * state) and the UI marker is a nice-to-have. - */ -export async function emitCompactionDone( - iii: ISdk, - session_id: string, - mode: CompactionMode, - payload: CompactionDonePayload, -): Promise { - try { - await emit(iii, session_id, { - type: 'compaction_done', - mode, - summary_text: payload.summary_text, - tokens_before: payload.tokens_before, - compaction_entry_id: payload.compaction_entry_id, - tail_start_id: payload.tail_start_id, - }); - } catch (err) { - logger.warn(`handler-${mode}: compaction_done emit failed`, { - code: 'compaction_done_emit_failed', - session_id, - err: String(err), - }); - } -} diff --git a/harness/src/context-compaction/flat-state.ts b/harness/src/context-compaction/flat-state.ts index d6f5b5da..a0295dfe 100644 --- a/harness/src/context-compaction/flat-state.ts +++ b/harness/src/context-compaction/flat-state.ts @@ -1,20 +1,12 @@ /** - * Keep flatMessagesKey in sync with turn-orchestrator/state.ts::messagesKey. - * Importing it directly would create a package-layer cycle (orchestrator - * depends on context-compaction via preflight). A drift-guard test asserts - * the two stay identical. + * Rewrite flat transcript messages in scope `messages`. */ import type { ISdk } from '../runtime/iii.js'; import { stateSet } from '../runtime/state.js'; +import { MESSAGES_SCOPE } from '../turn-orchestrator/state.js'; import type { AgentMessage, AssistantMessage } from '../types/agent-message.js'; -const FLAT_STATE_SCOPE = 'agent'; - -export function flatMessagesKey(session_id: string): string { - return `session/${session_id}/messages`; -} - export function buildSummaryMessage(summary_text: string): AssistantMessage { return { role: 'assistant', @@ -39,5 +31,5 @@ export async function rewriteFlatMessages( session_id: string, messages: AgentMessage[], ): Promise { - await stateSet(iii, FLAT_STATE_SCOPE, flatMessagesKey(session_id), messages); + await stateSet(iii, MESSAGES_SCOPE, session_id, messages); } diff --git a/harness/src/context-compaction/handler-async.ts b/harness/src/context-compaction/handler-async.ts index 2504ff52..b025aff2 100644 --- a/harness/src/context-compaction/handler-async.ts +++ b/harness/src/context-compaction/handler-async.ts @@ -7,14 +7,16 @@ import { setCurrentSpanAttribute, withSpan } from 'iii-sdk/telemetry'; import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; -import { emitCompactionDone } from './emit.js'; -import { pruneMinFree, pruneProtect, pruneProtectedTools, reservedTokens } from './config.js'; -import { buildSummaryMessage, rewriteFlatMessages } from './flat-state.js'; +import { compactionConfig } from './config.js'; +import { + isSummarizeOk, + persistCompactionFlatState, + publishCompactionDone, + runSummarizeCompaction, +} from './handler-pipeline.js'; import { acquireLease, releaseLease } from './lease.js'; import { fetchModelLimit } from './model-resolver.js'; -import { type ModelLimit, isOverflow } from './overflow.js'; -import { prune } from './prune.js'; -import { summarizeAndAppend } from './summarize.js'; +import { isOverflow } from './overflow.js'; export function extractEventPayload( payload: unknown, @@ -53,10 +55,9 @@ export function turnEndUsage(event: unknown): Record | null { type ResolvedModel = { providerID: string; modelID: string; - modelLimit: ModelLimit; + modelLimit: { context: number; input: number; output: number }; } | null; -// Priority: event.message → last assistant in session-tree → models::get. async function resolveModelFromEvent( iii: ISdk, session_id: string, @@ -144,7 +145,7 @@ export async function handleAsync(iii: ISdk, frame: unknown): Promise { !isOverflow({ tokens: usageObj, model: { id: model.modelID, limit: model.modelLimit }, - reserved: reservedTokens(), + reserved: compactionConfig().reservedTokens, }) ) { return; @@ -159,38 +160,22 @@ export async function handleAsync(iii: ISdk, frame: unknown): Promise { } try { - await prune(iii, payload.session_id, { - protectTokens: pruneProtect(), - minFree: pruneMinFree(), - protectedTools: pruneProtectedTools(), - }); - const result = await summarizeAndAppend( + const result = await runSummarizeCompaction( iii, payload.session_id, { mode: 'async' }, - { - providerID: model.providerID, - modelID: model.modelID, - modelLimit: model.modelLimit, - }, + model, ); - const succeeded = result !== 'empty' && result.kind === 'ok'; - setCurrentSpanAttribute('used_prior_summary', succeeded); - if (succeeded) { - await rewriteFlatMessages(iii, payload.session_id, [ - buildSummaryMessage(result.summary_text), - ...result.tail_messages, - ]); - // Tell the UI we just compacted so it can insert a marker and - // re-estimate context usage. Best-effort: a publish failure must - // not leak out of the background handler. - await emitCompactionDone(iii, payload.session_id, 'async', { - summary_text: result.summary_text, - tokens_before: result.tokens_before, - compaction_entry_id: result.compaction_entry_id, - tail_start_id: result.tail_start_id, - }); + setCurrentSpanAttribute('used_prior_summary', isSummarizeOk(result)); + if (isSummarizeOk(result)) { + await persistCompactionFlatState( + iii, + payload.session_id, + result.summary_text, + result.tail_messages, + ); + await publishCompactionDone(iii, payload.session_id, 'async', result); } } catch (err) { logger.warn('handler-async: compaction failed', { diff --git a/harness/src/context-compaction/handler-pipeline.ts b/harness/src/context-compaction/handler-pipeline.ts new file mode 100644 index 00000000..7613d7e3 --- /dev/null +++ b/harness/src/context-compaction/handler-pipeline.ts @@ -0,0 +1,116 @@ +/** + * Shared prune → summarize → flat-state rewrite path for sync and async handlers. + */ + +import { logger } from '../runtime/otel.js'; +import type { ISdk } from '../runtime/iii.js'; +import { emit } from '../turn-orchestrator/events.js'; +import type { AgentMessage } from '../types/agent-message.js'; +import { compactionConfig } from './config.js'; +import { buildSummaryMessage, rewriteFlatMessages } from './flat-state.js'; +import type { ModelLimit } from './overflow.js'; +import { prune } from './prune.js'; +import { + type SummarizeOk, + type SummarizeOptions, + type SummarizeOutcome, + summarizeAndAppend, +} from './summarize.js'; + +export type CompactionMode = 'sync' | 'async'; + +export interface CompactionDonePayload { + summary_text: string; + tokens_before: number; + compaction_entry_id: string; + /** First entry_id of the preserved tail; null when nothing was kept. */ + tail_start_id: string | null; +} + +/** + * Best-effort: a publish failure is logged but never thrown — the + * caller has already done the load-bearing work (rewriting flat + * state) and the UI marker is a nice-to-have. + */ +async function emitCompactionDone( + iii: ISdk, + session_id: string, + mode: CompactionMode, + payload: CompactionDonePayload, +): Promise { + try { + await emit(iii, session_id, { + type: 'compaction_done', + mode, + summary_text: payload.summary_text, + tokens_before: payload.tokens_before, + compaction_entry_id: payload.compaction_entry_id, + tail_start_id: payload.tail_start_id, + }); + } catch (err) { + logger.warn(`handler-${mode}: compaction_done emit failed`, { + code: 'compaction_done_emit_failed', + session_id, + err: String(err), + }); + } +} + +export type CompactionModel = { + providerID: string; + modelID: string; + modelLimit: ModelLimit; +}; + +export async function pruneSessionToolOutputs(iii: ISdk, session_id: string): Promise { + const cfg = compactionConfig(); + await prune(iii, session_id, { + protectTokens: cfg.pruneProtect, + minFree: cfg.pruneMinFree, + protectedTools: cfg.pruneProtectedTools, + }); +} + +export async function runSummarizeCompaction( + iii: ISdk, + session_id: string, + options: SummarizeOptions, + model: CompactionModel, +): Promise { + await pruneSessionToolOutputs(iii, session_id); + return summarizeAndAppend(iii, session_id, options, { + providerID: model.providerID, + modelID: model.modelID, + modelLimit: model.modelLimit, + }); +} + +export async function persistCompactionFlatState( + iii: ISdk, + session_id: string, + summary_text: string, + tail_messages: AgentMessage[], + extra?: AgentMessage[], +): Promise { + const messages: AgentMessage[] = [buildSummaryMessage(summary_text), ...tail_messages]; + if (extra) messages.push(...extra); + await rewriteFlatMessages(iii, session_id, messages); +} + +export async function publishCompactionDone( + iii: ISdk, + session_id: string, + mode: CompactionMode, + result: SummarizeOk, +): Promise { + await emitCompactionDone(iii, session_id, mode, { + summary_text: result.summary_text, + tokens_before: result.tokens_before, + compaction_entry_id: result.compaction_entry_id, + tail_start_id: result.tail_start_id, + }); +} + +export function isSummarizeOk(result: SummarizeOutcome): result is SummarizeOk { + return result !== 'empty' && result.kind === 'ok'; +} diff --git a/harness/src/context-compaction/handler-sync.ts b/harness/src/context-compaction/handler-sync.ts index fe23abdd..fce870f3 100644 --- a/harness/src/context-compaction/handler-sync.ts +++ b/harness/src/context-compaction/handler-sync.ts @@ -8,15 +8,16 @@ import { setCurrentSpanAttribute, withSpan } from 'iii-sdk/telemetry'; import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; -import { emitCompactionDone } from './emit.js'; import type { AgentMessage } from '../types/agent-message.js'; -import { busyTimeoutMs, pruneMinFree, pruneProtect, pruneProtectedTools } from './config.js'; -import { buildSummaryMessage, rewriteFlatMessages } from './flat-state.js'; +import { compactionConfig } from './config.js'; +import { + persistCompactionFlatState, + publishCompactionDone, + runSummarizeCompaction, +} from './handler-pipeline.js'; import { acquireLeaseWithWait, releaseLease } from './lease.js'; import type { ModelLimit } from './overflow.js'; -import { prune } from './prune.js'; import { type MessageWithEntryId, extractReplayTarget, reinjectReplay } from './replay.js'; -import { summarizeAndAppend } from './summarize.js'; export type CompactNowInput = { session_id: string; @@ -43,14 +44,18 @@ export async function handleSync(iii: ISdk, input: CompactNowInput): Promise } @@ -73,14 +78,7 @@ export async function handleSync(iii: ISdk, input: CompactNowInput): Promise replay user msg -> synthetic continue. let lastEntryId = result.compaction_entry_id || null; if (replay) { lastEntryId = await reinjectReplay(iii, input.session_id, replay, lastEntryId); @@ -119,26 +114,15 @@ export async function handleSync(iii: ISdk, input: CompactNowInput): Promise { - const key = leaseKey(session_id, kind); + const scope = leaseScope(kind); + const key = session_id; const now_ms = Date.now(); const now_secs = Math.floor(now_ms / 1000); // Fast path: skip the atomic set when a valid lease is clearly held. - const existing = await stateGet(iii, STATE_SCOPE, key); + const existing = await stateGet(iii, scope, key); if (existing && isLeaseActive(existing, now_secs)) return null; const nonce = mintLeaseNonce(); // path: '' targets FieldPath::root in the engine — set the whole value // atomically. Without `path`, the engine fails to deserialize the op and // stateUpdate falls into its catch + returns null. - const result = await stateUpdate(iii, STATE_SCOPE, key, [ + const result = await stateUpdate(iii, scope, key, [ { type: 'set', path: '', value: { nonce, ts: now_ms } }, ]); // stateUpdate swallows backend errors and returns null. Treat a null @@ -67,7 +71,7 @@ export async function acquireLease( // and bow out. stateUpdate is atomic, so only one caller can see // old_value == null (or expired) — exactly one winner. if (oldValue && isLeaseActive(oldValue, now_secs)) { - await stateSet(iii, STATE_SCOPE, key, oldValue); + await stateSet(iii, scope, key, oldValue); return null; } return nonce; @@ -84,8 +88,9 @@ export async function releaseLease( ourNonce: string, kind: LeaseKind = 'compaction', ): Promise { - const key = leaseKey(session_id, kind); - const stored = await stateGet(iii, STATE_SCOPE, key); + const scope = leaseScope(kind); + const key = session_id; + const stored = await stateGet(iii, scope, key); const storedNonce = stored && typeof stored === 'object' && @@ -93,12 +98,12 @@ export async function releaseLease( ? ((stored as Record).nonce as string) : null; if (storedNonce === ourNonce) { - await stateSet(iii, STATE_SCOPE, key, null); + await stateSet(iii, scope, key, null); } } export async function stampLastCompaction(iii: ISdk, session_id: string): Promise { - await stateSet(iii, STATE_SCOPE, `session/${session_id}/last_compaction_at`, Date.now()); + await stateSet(iii, LAST_COMPACTION_AT_SCOPE, session_id, Date.now()); } export async function acquireLeaseWithWait( diff --git a/harness/src/context-compaction/model-resolver.ts b/harness/src/context-compaction/model-resolver.ts index ea012bfc..996f1e2b 100644 --- a/harness/src/context-compaction/model-resolver.ts +++ b/harness/src/context-compaction/model-resolver.ts @@ -1,3 +1,4 @@ +import { RUN_REQUEST_SCOPE } from '../turn-orchestrator/state.js'; import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import type { ModelLimit } from './overflow.js'; @@ -88,7 +89,7 @@ export async function resolveModelFromSession( // Fallback when no assistant message carries provider/model yet (first-turn // sessions, error-only sessions). The orchestrator writes run_request at -// agent::session//run_request during run::start. +// `run_request` scope during run::start. export async function resolveModelFromRunRequest( iii: ISdk, session_id: string, @@ -96,7 +97,7 @@ export async function resolveModelFromRunRequest( try { const req = await iii.trigger({ function_id: 'state::get', - payload: { scope: 'agent', key: `session/${session_id}/run_request` }, + payload: { scope: RUN_REQUEST_SCOPE, key: session_id }, timeoutMs: 5_000, }); const providerID = typeof req?.provider === 'string' && req.provider ? req.provider : null; diff --git a/harness/src/context-compaction/overflow.ts b/harness/src/context-compaction/overflow.ts index 0b88a60f..dfc9cca0 100644 --- a/harness/src/context-compaction/overflow.ts +++ b/harness/src/context-compaction/overflow.ts @@ -1,8 +1,7 @@ import { MAX_PRESERVE_RECENT_TOKENS, MIN_PRESERVE_RECENT_TOKENS, - preserveRecentTokensOverride, - reservedTokens, + compactionConfig, } from './config.js'; export type ModelLimit = { @@ -27,7 +26,7 @@ export type TokensLike = { export function usable(input: { model: ModelLike; reserved?: number }): number { const { model } = input; if (model.limit.context === 0) return 0; - const reserved = input.reserved ?? reservedTokens(); + const reserved = input.reserved ?? compactionConfig().reservedTokens; const base = model.limit.input > 0 ? Math.max(0, model.limit.input - reserved) @@ -58,7 +57,7 @@ export function preserveRecentBudget(input: { reserved?: number; override?: number; }): number { - const ovr = input.override ?? preserveRecentTokensOverride(); + const ovr = input.override ?? compactionConfig().preserveRecentTokensOverride; if (ovr !== undefined) return ovr; const u = usable({ model: input.model, reserved: input.reserved }); return Math.min( diff --git a/harness/src/context-compaction/register.ts b/harness/src/context-compaction/register.ts index 30e61626..9d407cf6 100644 --- a/harness/src/context-compaction/register.ts +++ b/harness/src/context-compaction/register.ts @@ -1,6 +1,6 @@ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; -import { pruneMinFree, pruneProtect, pruneProtectedTools } from './config.js'; +import { compactionConfig } from './config.js'; import { handleAsync } from './handler-async.js'; import { type CompactNowInput, handleSync } from './handler-sync.js'; import { acquireLease, releaseLease } from './lease.js'; @@ -118,10 +118,11 @@ export async function register(iii: ISdk): Promise { const nonce = await acquireLease(iii, session_id, 'prune'); if (!nonce) return { pruned_tokens: 0, pruned_parts: 0, scanned_parts: 0, busy: true }; try { + const cfg = compactionConfig(); return await prune(iii, session_id, { - protectTokens: pruneProtect(), - minFree: pruneMinFree(), - protectedTools: pruneProtectedTools(), + protectTokens: cfg.pruneProtect, + minFree: cfg.pruneMinFree, + protectedTools: cfg.pruneProtectedTools, }); } finally { await releaseLease(iii, session_id, nonce, 'prune'); diff --git a/harness/src/context-compaction/summarize.ts b/harness/src/context-compaction/summarize.ts index 2b634405..9e61a563 100644 --- a/harness/src/context-compaction/summarize.ts +++ b/harness/src/context-compaction/summarize.ts @@ -2,7 +2,7 @@ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import { decide, targetFunctionId } from '../turn-orchestrator/provider-router.js'; import type { AgentMessage, AssistantMessage } from '../types/agent-message.js'; -import { preserveRecentTokensOverride, tailTurns, toolOutputMaxChars } from './config.js'; +import { compactionConfig } from './config.js'; import { stampLastCompaction } from './lease.js'; import { type ModelLimit, preserveRecentBudget } from './overflow.js'; import { @@ -139,15 +139,16 @@ export async function summarizeAndAppend( const prior = completedCompactions(await loadCompactionEntries(iii, session_id)); const previousSummary = prior.at(-1)?.summary; + const cfg = compactionConfig(); const budget = preserveRecentBudget({ model: { id: model.modelID, limit: model.modelLimit }, - override: preserveRecentTokensOverride(), + override: cfg.preserveRecentTokensOverride, }); const sel = selectWithEntryIds({ entries, budget, - tailTurns: tailTurns(), + tailTurns: cfg.tailTurns, estimate: estimateTokenCount, }); if (sel.head.length === 0) { @@ -157,7 +158,7 @@ export async function summarizeAndAppend( const head_messages = sel.head.map((e) => e.message); const tail_messages: AgentMessage[] = entries.slice(sel.head.length).map((e) => e.message); const tokens_before = estimateTokenCount(head_messages); - const stripped = stripMedia(head_messages, { toolOutputMaxChars: toolOutputMaxChars() }); + const stripped = stripMedia(head_messages, { toolOutputMaxChars: cfg.toolOutputMaxChars }); const systemPrompt = buildPrompt({ previousSummary, context: [] }); const userPrompt = renderUserPrompt(stripped); diff --git a/harness/src/harness/fanout/sessions-poll.ts b/harness/src/harness/fanout/sessions-poll.ts index b5198ed6..2355c68c 100644 --- a/harness/src/harness/fanout/sessions-poll.ts +++ b/harness/src/harness/fanout/sessions-poll.ts @@ -1,16 +1,15 @@ import type { ISdk, Trigger } from '../../runtime/iii.js'; import { logger } from '../../runtime/otel.js'; -import { SESSION_INDEX_SCOPE } from '../../turn-orchestrator/state.js'; +import { TURN_STATE_SCOPE } from '../../turn-orchestrator/state.js'; import type { FanoutState } from '../ui-subscribe.js'; export const SESSION_CREATED_HANDLER_FN_ID = 'harness::fanout::session_created'; /** - * A new session is signalled by a one-time marker write on the dedicated - * `session_index` scope (key = session id), made by the turn-orchestrator when - * `turn_state` is first persisted. The state trigger matches that scope in - * engine — no `condition_function_id` RPC per agent-scope write — so this - * handler is the sole gate: it acts only on the `state:created` marker. + * A new session is signalled by the first `state:created` write on scope + * `turn_state` (key = session id). The state trigger matches that scope in + * engine — no `condition_function_id` RPC per turn_state update — so this + * handler is the sole gate: it acts only on `state:created`. */ function sessionCreatedId(event: unknown): string | null { const obj = (event ?? {}) as Record; @@ -48,7 +47,7 @@ export function spawnSessionsPoll(iii: ISdk, state: FanoutState): () => void { trigger = iii.registerTrigger({ type: 'state', function_id: SESSION_CREATED_HANDLER_FN_ID, - config: { scope: SESSION_INDEX_SCOPE }, + config: { scope: TURN_STATE_SCOPE }, }); } catch (err) { logger.warn('sessions state trigger registration failed', { err: String(err) }); diff --git a/harness/src/runtime/state.ts b/harness/src/runtime/state.ts index d06500a0..01762249 100644 --- a/harness/src/runtime/state.ts +++ b/harness/src/runtime/state.ts @@ -94,57 +94,79 @@ export function createState(iii: ISdk, opts: CreateStateOptions = {}): IState { return { get: (input: StateGetInput): Promise => - run('state::get', { scope: input.scope, key: input.key }, async () => { - const v = await iii.trigger({ - function_id: 'state::get', - payload: input, - }); - return normalizeGetResult(v); - }, null), + run( + 'state::get', + { scope: input.scope, key: input.key }, + async () => { + const v = await iii.trigger({ + function_id: 'state::get', + payload: input, + }); + return normalizeGetResult(v); + }, + null, + ), set: (input: StateSetInput): Promise | null> => - run('state::set', { scope: input.scope, key: input.key }, async () => { - const result = await iii.trigger>({ - function_id: 'state::set', - payload: input, - }); - return result ?? null; - }, null), + run( + 'state::set', + { scope: input.scope, key: input.key }, + async () => { + const result = await iii.trigger>({ + function_id: 'state::set', + payload: input, + }); + return result ?? null; + }, + null, + ), delete: (input: StateDeleteInput): Promise => - run('state::delete', { scope: input.scope, key: input.key }, async () => { - const result = await iii.trigger({ - function_id: 'state::delete', - payload: input, - }); - return result ?? {}; - }, {}), + run( + 'state::delete', + { scope: input.scope, key: input.key }, + async () => { + const result = await iii.trigger({ + function_id: 'state::delete', + payload: input, + }); + return result ?? {}; + }, + {}, + ), list: (input: StateListInput): Promise => - run('state::list', { scope: input.scope }, async () => { - const resp = await iii.trigger({ - function_id: 'state::list', - payload: input, - }); - return parseStateListValues(resp); - }, []), + run( + 'state::list', + { scope: input.scope }, + async () => { + const resp = await iii.trigger({ + function_id: 'state::list', + payload: input, + }); + return parseStateListValues(resp); + }, + [], + ), update: (input: StateUpdateInput): Promise | null> => - run('state::update', { scope: input.scope, key: input.key }, async () => { - const result = await iii.trigger>({ - function_id: 'state::update', - payload: input, - }); - return result ?? null; - }, null), + run( + 'state::update', + { scope: input.scope, key: input.key }, + async () => { + const result = await iii.trigger>({ + function_id: 'state::update', + payload: input, + }); + return result ?? null; + }, + null, + ), }; } /** Lists all scope names that contain state data. */ -export async function stateListGroups( - iii: ISdk, - opts: CreateStateOptions = {}, -): Promise { +export async function stateListGroups(iii: ISdk, opts: CreateStateOptions = {}): Promise { const tolerant = opts.tolerant !== false; try { const result = await iii.trigger, StateListGroupsResult | string[]>({ diff --git a/harness/src/session/config.ts b/harness/src/session/config.ts index 21506dde..10592c7a 100644 --- a/harness/src/session/config.ts +++ b/harness/src/session/config.ts @@ -10,6 +10,6 @@ export function loadSessionConfig(cfg: Record): SessionConfig { const backend = getString(section, 'store_backend', 'iii_state'); return { store_backend: backend === 'memory' ? 'memory' : 'iii_state', - state_scope: getString(section, 'state_scope', 'agent'), + state_scope: getString(section, 'state_scope', 'inbox'), }; } diff --git a/harness/src/session/inbox/key.ts b/harness/src/session/inbox/key.ts index 1ba41883..cd1ea580 100644 --- a/harness/src/session/inbox/key.ts +++ b/harness/src/session/inbox/key.ts @@ -1,3 +1,3 @@ export function inboxKey(name: string, session_id: string): string { - return `session/${session_id}/${name}`; + return `${session_id}/${name}`; } diff --git a/harness/src/turn-orchestrator/agent-trigger.ts b/harness/src/turn-orchestrator/agent-trigger.ts index 67faa8f2..294ffd56 100644 --- a/harness/src/turn-orchestrator/agent-trigger.ts +++ b/harness/src/turn-orchestrator/agent-trigger.ts @@ -15,10 +15,7 @@ import { type DenialEnvelope, consultBefore, gateUnavailableEnvelope } from './h export const TOOL_NAME = 'agent_trigger'; -export type DispatchResult = - | { kind: 'result'; result: FunctionResult } - | { kind: 'deny'; result: FunctionResult } - | { kind: 'pending' }; +export type DispatchResult = { kind: 'result'; result: FunctionResult } | { kind: 'pending' }; export function missingFunctionResult(): FunctionResult { return errorResult({ @@ -28,7 +25,6 @@ export function missingFunctionResult(): FunctionResult { } export function unwrapAgentTrigger(fc: FunctionCall): FunctionCall { - if (fc.function_id !== TOOL_NAME) return fc; const args = (fc.arguments ?? {}) as Record; const fn = typeof args.function === 'string' ? args.function : ''; const payload = args.payload ?? {}; @@ -198,7 +194,7 @@ export async function dispatchWithHook( ): Promise { const outcome = await consultBefore(iii, function_call); if (outcome.kind === 'deny') { - return { kind: 'deny', result: denialResult(outcome.denial) }; + return { kind: 'result', result: denialResult(outcome.denial) }; } if (outcome.kind === 'pending') { return { kind: 'pending' }; diff --git a/harness/src/turn-orchestrator/assistant-streaming/ports.ts b/harness/src/turn-orchestrator/assistant-streaming/ports.ts new file mode 100644 index 00000000..29cc1817 --- /dev/null +++ b/harness/src/turn-orchestrator/assistant-streaming/ports.ts @@ -0,0 +1,137 @@ +/** + * Typed dependency ports and domain types for assistant_streaming. + */ + +import { z } from 'zod'; +import { logger } from '../../runtime/otel.js'; +import type { ISdk } from '../../runtime/iii.js'; +import type { AgentMessage, AssistantMessage } from '../../types/agent-message.js'; +import type { AgentFunction } from '../../types/function.js'; +import type { AssistantMessageEvent } from '../../types/stream-event.js'; +import { AgentFunctionSchema } from '../../types/provider.js'; +import { emit } from '../events.js'; +import { runPreflight } from '../preflight.js'; +import { buildInput, targetFunctionId, type RouteDecision } from '../provider-router.js'; +import { streamProviderTurn } from '../provider-stream.js'; +import type { RunRequest } from '../run-request.js'; +import { createTurnStatePorts, type TurnStatePorts } from '../state-runtime/ports.js'; +import { isDuplicateAssistant } from '../state-runtime/transcript.js'; + +export type StreamContext = { + session_id: string; + decision: RouteDecision; + system_prompt: string; + tools: AgentFunction[]; + messages: AgentMessage[]; +}; + +export type StreamTurnOutcome = { + final: AssistantMessage | null; + error: string | null; + body_streamed: boolean; +}; + +export type AssistantRoute = + | { kind: 'stopped'; reason: 'error' | 'aborted' } + | { kind: 'function_execute' } + | { kind: 'steering_check' }; + +export function parseFunctionSchemas(raw: unknown[]): AgentFunction[] { + return z.array(AgentFunctionSchema).parse(raw) as AgentFunction[]; +} + +export function hasFunctionCalls(asst: AssistantMessage): boolean { + return asst.content.some((b) => b.type === 'function_call'); +} + +export function isErrorOrAborted(asst: AssistantMessage): boolean { + return asst.stop_reason === 'error' || asst.stop_reason === 'aborted'; +} + +export class AssistantStreamingInvariantError extends Error { + constructor(message: string) { + super(message); + this.name = 'AssistantStreamingInvariantError'; + } +} + +export type DeltaHandler = ( + partial: AssistantMessage, + event: AssistantMessageEvent, +) => Promise; + +export type AssistantStreamingPorts = TurnStatePorts & { + loadRunRequest(session_id: string): Promise; + runPreflight( + session_id: string, + messages: AgentMessage[], + provider: string, + model: string, + ): Promise<'ok' | 'compacted'>; + streamTurn( + ctx: StreamContext, + onDelta: DeltaHandler, + ): Promise<{ final: AssistantMessage | null; error: string | null }>; + emitMessageUpdate( + session_id: string, + message: AssistantMessage, + event: AssistantMessageEvent, + ): Promise; + emitMessageComplete( + session_id: string, + message: AssistantMessage, + body_streamed: boolean, + ): Promise; + persistAssistantIfNew(session_id: string, asst: AssistantMessage): Promise; +}; + +export function createStreamingPorts(iii: ISdk): AssistantStreamingPorts { + const base = createTurnStatePorts(iii); + + return { + ...base, + + async runPreflight(session_id, messages, provider, model) { + return runPreflight(iii, session_id, messages, provider, model); + }, + + async streamTurn(ctx, onDelta) { + const { final, error } = await streamProviderTurn(iii, { + session_id: ctx.session_id, + targetFn: targetFunctionId(ctx.decision), + buildInput: (writerRef) => + buildInput(ctx.decision, writerRef, ctx.system_prompt, ctx.messages, ctx.tools), + onDelta, + }); + return { final, error }; + }, + + async emitMessageUpdate(session_id, message, event) { + await emit(iii, session_id, { + type: 'message_update', + message, + llm_event: event, + }); + }, + + async emitMessageComplete(session_id, message, body_streamed) { + await emit(iii, session_id, { + type: 'message_complete', + message, + body_streamed, + }); + }, + + async persistAssistantIfNew(session_id, asst) { + const messages = await base.loadMessages(session_id); + if (isDuplicateAssistant(messages, asst)) { + logger.warn('finalizeAssistant: skipping duplicate assistant push (re-entry detected)', { + session_id, + timestamp: asst.timestamp, + }); + return; + } + await base.appendMessages(session_id, [asst]); + }, + }; +} diff --git a/harness/src/turn-orchestrator/assistant-streaming/process.ts b/harness/src/turn-orchestrator/assistant-streaming/process.ts new file mode 100644 index 00000000..2f4b45df --- /dev/null +++ b/harness/src/turn-orchestrator/assistant-streaming/process.ts @@ -0,0 +1,172 @@ +/** + * Stream one provider turn, persist the assistant message, route onward, and register the FSM step. + */ + +import type { ISdk } from '../../runtime/iii.js'; +import type { AssistantMessage } from '../../types/agent-message.js'; +import { decide } from '../provider-router.js'; +import { runTransition } from '../run-transition.js'; +import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; +import { syntheticAssistant } from '../synthetic-assistant.js'; +import { emitTurnEndOnce } from '../state-runtime/turn-end.js'; +import { transitionTo, type TurnStateRecord } from '../state.js'; +import { + AssistantStreamingInvariantError, + createStreamingPorts, + hasFunctionCalls, + isErrorOrAborted, + parseFunctionSchemas, + type AssistantRoute, + type AssistantStreamingPorts, + type StreamContext, + type StreamTurnOutcome, +} from './ports.js'; + +export function beginTurn(rec: TurnStateRecord): void { + rec.turn_count++; + rec.turn_end_emitted = false; + rec.assistant_body_streamed = false; +} + +export async function prepareStreamContext( + ports: AssistantStreamingPorts, + rec: TurnStateRecord, +): Promise { + const request = await ports.loadRunRequest(rec.session_id); + let messages = await ports.loadMessages(rec.session_id); + const { provider, model, system_prompt, function_schemas } = request; + const decision = decide({ provider, model }); + const tools = parseFunctionSchemas(function_schemas); + + if ( + (await ports.runPreflight(rec.session_id, messages, decision.provider, model)) === 'compacted' + ) { + messages = await ports.loadMessages(rec.session_id); + } + + return { + session_id: rec.session_id, + decision, + system_prompt, + tools, + messages, + }; +} + +export async function runStreamTurn( + ports: AssistantStreamingPorts, + session_id: string, + ctx: StreamContext, +): Promise { + let body_streamed = false; + + const { final, error } = await ports.streamTurn(ctx, async (partial, event) => { + await ports.emitMessageUpdate(session_id, partial, event); + if (event.type === 'text_delta' || event.type === 'thinking_delta') { + body_streamed = true; + } + }); + + return { final, error, body_streamed }; +} + +export function resolveAssistantMessage( + outcome: StreamTurnOutcome, + decision: StreamContext['decision'], +): AssistantMessage { + if (outcome.final) return outcome.final; + + const reason = outcome.error ?? 'provider channel closed without final'; + return syntheticAssistant({ + stop_reason: 'error', + text: reason, + provider: decision.provider, + model: decision.model, + }); +} + +/** Reason text for a synthetic error update when the provider did not return a final message. */ +export function syntheticStreamReason(outcome: StreamTurnOutcome): string | null { + if (outcome.final) return null; + return outcome.error ?? 'provider channel closed without final'; +} + +export function routeAssistantTurn(asst: AssistantMessage): AssistantRoute { + if (isErrorOrAborted(asst)) { + return { + kind: 'stopped', + reason: asst.stop_reason === 'aborted' ? 'aborted' : 'error', + }; + } + if (hasFunctionCalls(asst)) { + return { kind: 'function_execute' }; + } + return { kind: 'steering_check' }; +} + +export async function finalizeAssistantTurn( + ports: AssistantStreamingPorts, + rec: TurnStateRecord, +): Promise { + const asst = rec.last_assistant; + if (!asst) { + throw new AssistantStreamingInvariantError( + 'assistant_streaming finalize without last_assistant', + ); + } + + await ports.emitMessageComplete(rec.session_id, asst, rec.assistant_body_streamed === true); + + const route = routeAssistantTurn(asst); + + if (route.kind === 'stopped') { + await emitTurnEndOnce(ports, rec, asst); + await ports.finishSession(rec); + return; + } + + await ports.persistAssistantIfNew(rec.session_id, asst); + + if (route.kind === 'function_execute') { + rec.function_results = []; + rec.work = undefined; + transitionTo(rec, 'function_execute'); + return; + } + + transitionTo(rec, 'steering_check'); +} + +export async function handleStreaming(iii: ISdk, rec: TurnStateRecord): Promise { + const ports = createStreamingPorts(iii); + beginTurn(rec); + const ctx = await prepareStreamContext(ports, rec); + const outcome = await runStreamTurn(ports, rec.session_id, ctx); + rec.last_assistant = resolveAssistantMessage(outcome, ctx.decision); + rec.assistant_body_streamed = outcome.body_streamed; + + const syntheticReason = syntheticStreamReason(outcome); + if (syntheticReason) { + await ports.emitMessageUpdate(rec.session_id, rec.last_assistant, { + type: 'text_delta', + partial: rec.last_assistant, + delta: syntheticReason, + }); + } + + await finalizeAssistantTurn(ports, rec); +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'turn::assistant_streaming', + async (payload: TurnStepPayload) => { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition(iii, 'assistant_streaming', handleStreaming, parsed); + }, + { + description: + 'Run one durable FSM transition for session in state assistant_streaming: start turn, stream provider response, finalize, and route onward.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/estimate.ts b/harness/src/turn-orchestrator/estimate.ts deleted file mode 100644 index 13233242..00000000 --- a/harness/src/turn-orchestrator/estimate.ts +++ /dev/null @@ -1,12 +0,0 @@ -/** - * Cheap chars/4 token estimate used for pre-flight overflow detection. - * Same heuristic as context-compaction's estimateTokenCount. - */ - -import type { AgentMessage } from '../types/agent-message.js'; - -export function estimateMessages(messages: AgentMessage[]): number { - let chars = 0; - for (const m of messages) chars += JSON.stringify(m).length; - return Math.floor(chars / 4); -} diff --git a/harness/src/turn-orchestrator/events.ts b/harness/src/turn-orchestrator/events.ts index 8ea5b95e..75e0ab1c 100644 --- a/harness/src/turn-orchestrator/events.ts +++ b/harness/src/turn-orchestrator/events.ts @@ -7,7 +7,8 @@ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import type { AgentEvent } from '../types/agent-event.js'; -import { AGENT_SCOPE, eventCounterKey } from './state.js'; + +const EVENT_COUNTER_SCOPE = 'event_counter'; export const EVENTS_STREAM = 'agent::events'; /** @@ -52,8 +53,8 @@ async function nextSeq(iii: ISdk, session_id: string): Promise { const resp = await iii.trigger({ function_id: 'state::update', payload: { - scope: AGENT_SCOPE, - key: eventCounterKey(session_id), + scope: EVENT_COUNTER_SCOPE, + key: session_id, ops: [{ type: 'increment', path: '', by: 1 }], }, }); diff --git a/harness/src/turn-orchestrator/finish.ts b/harness/src/turn-orchestrator/finish.ts deleted file mode 100644 index c66e75a0..00000000 --- a/harness/src/turn-orchestrator/finish.ts +++ /dev/null @@ -1,16 +0,0 @@ -/** - * Terminal teardown: emit the final `agent_end` with the full transcript and - * stop the session. Called inline by the FSM paths that end a turn (replaces - * the former standalone teardown state). - */ - -import type { ISdk } from '../runtime/iii.js'; -import { emit } from './events.js'; -import * as persistence from './persistence.js'; -import { type TurnStateRecord, transitionTo } from './state.js'; - -export async function finishSession(iii: ISdk, rec: TurnStateRecord): Promise { - const messages = await persistence.loadMessages(iii, rec.session_id); - await emit(iii, rec.session_id, { type: 'agent_end', messages }); - transitionTo(rec, 'stopped'); -} diff --git a/harness/src/turn-orchestrator/flat-messages.ts b/harness/src/turn-orchestrator/flat-messages.ts deleted file mode 100644 index e2d4589a..00000000 --- a/harness/src/turn-orchestrator/flat-messages.ts +++ /dev/null @@ -1,14 +0,0 @@ -/** - * Parser for the flat `session//messages` agent-scope array. - */ - -import { z } from 'zod'; -import type { AgentMessage } from '../types/agent-message.js'; - -const FlatMessagesSchema = z - .array(z.custom((v) => v != null && typeof v === 'object')) - .catch([]); - -export function parseFlatMessages(raw: unknown): AgentMessage[] { - return FlatMessagesSchema.parse(raw ?? []); -} diff --git a/harness/src/turn-orchestrator/function-awaiting-approval/ports.ts b/harness/src/turn-orchestrator/function-awaiting-approval/ports.ts new file mode 100644 index 00000000..fd8de255 --- /dev/null +++ b/harness/src/turn-orchestrator/function-awaiting-approval/ports.ts @@ -0,0 +1,39 @@ +/** + * Typed dependency ports and domain types for function_awaiting_approval. + */ + +import { ApprovalDecisionSchema, STATE_SCOPE } from '../../approval-gate/schemas.js'; +import type { ISdk } from '../../runtime/iii.js'; +import type { z } from 'zod'; +import type { PreparedCall } from '../function-execute/types.js'; + +export type ApprovalDecision = z.infer; + +/** Explicit control flow — replaces void + early return. */ +export type AwaitingApprovalOutcome = + | { kind: 'resume_empty' } + | { kind: 'parked' } + | { kind: 'resume'; prepared: PreparedCall[] }; + +/** Decode stored approval decision from `state::get` (scope `approvals`). */ +export function parseApprovalDecision(value: unknown): ApprovalDecision | null { + const parsed = ApprovalDecisionSchema.safeParse(value); + return parsed.success ? parsed.data : null; +} + +export type AwaitingApprovalPorts = { + readDecision(session_id: string, function_call_id: string): Promise; +}; + +export function createAwaitingApprovalPorts(iii: ISdk): AwaitingApprovalPorts { + return { + async readDecision(session_id, function_call_id) { + const key = `${session_id}/${function_call_id}`; + const raw = await iii.trigger({ + function_id: 'state::get', + payload: { scope: STATE_SCOPE, key }, + }); + return parseApprovalDecision(raw); + }, + }; +} diff --git a/harness/src/turn-orchestrator/function-awaiting-approval/process.ts b/harness/src/turn-orchestrator/function-awaiting-approval/process.ts new file mode 100644 index 00000000..a2e301f3 --- /dev/null +++ b/harness/src/turn-orchestrator/function-awaiting-approval/process.ts @@ -0,0 +1,137 @@ +/** + * Read approval decisions, compute resume or park outcome, and register the FSM step. + */ + +import type { ISdk } from '../../runtime/iii.js'; +import { text } from '../../types/content.js'; +import type { FunctionResult } from '../../types/function.js'; +import type { PreparedCall } from '../function-execute/types.js'; +import { runTransition } from '../run-transition.js'; +import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; +import { transitionTo, type AwaitingApprovalEntry, type TurnStateRecord } from '../state.js'; +import { + createAwaitingApprovalPorts, + type ApprovalDecision, + type AwaitingApprovalOutcome, + type AwaitingApprovalPorts, +} from './ports.js'; + +export function denialResultFromDecision(decision: ApprovalDecision): FunctionResult { + const reason = + decision.reason ?? (decision.decision === 'aborted' ? 'session_aborted' : 'denied'); + const message = + decision.decision === 'aborted' + ? `Function call aborted: ${reason}` + : `Permission denied by user: ${reason}`; + return { + content: [text(message)], + details: { + approval_denied: true, + decision: decision.decision, + reason, + }, + terminate: false, + }; +} + +export function applyDecisionToPrepared( + current: PreparedCall, + decision: ApprovalDecision, +): PreparedCall { + if (decision.decision === 'allow') { + return { route: 'pre_approved', call: current.call }; + } + return { + route: 'synthetic', + call: current.call, + result: denialResultFromDecision(decision), + }; +} + +export function foldDecisionsIntoPrepared( + prepared: readonly PreparedCall[], + awaiting: AwaitingApprovalEntry[], + decisions: ApprovalDecision[], +): PreparedCall[] { + const next = [...prepared]; + for (let i = 0; i < awaiting.length; i++) { + const entry = awaiting[i]; + const decision = decisions[i]; + if (!entry || !decision) continue; + const idx = next.findIndex((pe) => pe.call.id === entry.function_call_id); + if (idx < 0) continue; + const current = next[idx]; + if (!current) continue; + next[idx] = applyDecisionToPrepared(current, decision); + } + return next; +} + +export async function processAwaitingApproval( + ports: AwaitingApprovalPorts, + rec: TurnStateRecord, +): Promise { + const awaiting = rec.awaiting_approval ?? []; + if (awaiting.length === 0) { + return { kind: 'resume_empty' }; + } + + const decisions = await Promise.all( + awaiting.map((entry) => ports.readDecision(rec.session_id, entry.function_call_id)), + ); + + if (decisions.some((decision) => decision === null)) { + return { kind: 'parked' }; + } + + const prepared = foldDecisionsIntoPrepared( + rec.work?.prepared ?? [], + awaiting, + decisions as NonNullable<(typeof decisions)[number]>[], + ); + + return { kind: 'resume', prepared }; +} + +export function applyAwaitingApprovalOutcome( + rec: TurnStateRecord, + outcome: AwaitingApprovalOutcome, +): void { + if (outcome.kind === 'parked') { + return; + } + + if (outcome.kind === 'resume' && rec.work) { + rec.work = { ...rec.work, prepared: outcome.prepared }; + } + + rec.awaiting_approval = []; + transitionTo(rec, 'function_execute'); +} + +export async function runAwaitingApproval( + ports: AwaitingApprovalPorts, + rec: TurnStateRecord, +): Promise { + const outcome = await processAwaitingApproval(ports, rec); + applyAwaitingApprovalOutcome(rec, outcome); +} + +export async function handleAwaitingApproval(iii: ISdk, rec: TurnStateRecord): Promise { + const ports = createAwaitingApprovalPorts(iii); + await runAwaitingApproval(ports, rec); +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'turn::function_awaiting_approval', + async (payload: TurnStepPayload) => { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition(iii, 'function_awaiting_approval', handleAwaitingApproval, parsed); + }, + { + description: + 'Run one durable FSM transition for session in state function_awaiting_approval: read approval decisions and resume.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/function-execute/ports.ts b/harness/src/turn-orchestrator/function-execute/ports.ts new file mode 100644 index 00000000..28dde52f --- /dev/null +++ b/harness/src/turn-orchestrator/function-execute/ports.ts @@ -0,0 +1,94 @@ +/** + * Typed dependency ports for function_execute — production wiring and test doubles. + */ + +import { z } from 'zod'; +import type { DispatchResult } from '../agent-trigger.js'; +import { dispatchWithHook, triggerFunctionCall } from '../agent-trigger.js'; +import { emit } from '../events.js'; +import type { ISdk } from '../../runtime/iii.js'; +import type { FunctionCall, FunctionResult } from '../../types/function.js'; +import { createTurnStatePorts, type TurnStatePorts } from '../state-runtime/ports.js'; +import type { ExecutedCall } from './types.js'; + +const RoutingEnvelopeSchema = z + .object({ + session_id: z.string(), + function_call_id: z.string(), + function_id: z.string(), + function_call: z.object({ + id: z.string(), + function_id: z.string(), + arguments: z.unknown(), + }), + }) + .catchall(z.unknown()); + +type RoutingEnvelope = z.infer; + +function baseArgs(arguments_: FunctionCall['arguments']): Record { + if (arguments_ && typeof arguments_ === 'object' && !Array.isArray(arguments_)) { + return { ...(arguments_ as Record) }; + } + return { arguments: arguments_ }; +} + +/** Attach session + call identity to arguments for policy and target functions. */ +export function withRoutingEnvelope(call: FunctionCall, session_id: string): FunctionCall { + const envelope: RoutingEnvelope = { + ...baseArgs(call.arguments), + session_id, + function_call_id: call.id, + function_id: call.function_id, + function_call: { id: call.id, function_id: call.function_id, arguments: call.arguments }, + }; + RoutingEnvelopeSchema.parse(envelope); + return { id: call.id, function_id: call.function_id, arguments: envelope }; +} + +export type FunctionExecutePorts = TurnStatePorts & { + emitStart(session_id: string, call: FunctionCall): Promise; + emitEnd(session_id: string, executed: ExecutedCall): Promise; + dispatch(call: FunctionCall, session_id: string): Promise; + triggerPreApproved(call: FunctionCall): Promise; +}; + +function buildFunctionExecutionEnd(executed: ExecutedCall) { + return { + type: 'function_execution_end' as const, + function_call_id: executed.call.id, + function_id: executed.call.function_id, + result: executed.result, + is_error: executed.is_error, + duration_ms: executed.duration_ms, + }; +} + +export function createPorts(iii: ISdk): FunctionExecutePorts { + const base = createTurnStatePorts(iii); + + return { + ...base, + + async emitStart(session_id, call) { + await emit(iii, session_id, { + type: 'function_execution_start', + function_call_id: call.id, + function_id: call.function_id, + args: call.arguments, + }); + }, + + async emitEnd(session_id, executed) { + await emit(iii, session_id, buildFunctionExecutionEnd(executed)); + }, + + async dispatch(call, session_id) { + return dispatchWithHook(iii, withRoutingEnvelope(call, session_id)); + }, + + async triggerPreApproved(call) { + return triggerFunctionCall(iii, call); + }, + }; +} diff --git a/harness/src/turn-orchestrator/function-execute/process.ts b/harness/src/turn-orchestrator/function-execute/process.ts new file mode 100644 index 00000000..d6e2e3ec --- /dev/null +++ b/harness/src/turn-orchestrator/function-execute/process.ts @@ -0,0 +1,40 @@ +/** + * Run prepared function calls, finalize results, route onward, and register the FSM step. + */ + +import type { ISdk } from '../../runtime/iii.js'; +import { runTransition } from '../run-transition.js'; +import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; +import { transitionTo, type TurnStateRecord } from '../state.js'; +import { finalizeBatch, loadOrPlanWork, runBatch } from './run.js'; +import { createPorts } from './ports.js'; + +export async function handleExecute(iii: ISdk, rec: TurnStateRecord): Promise { + const ports = createPorts(iii); + const work = loadOrPlanWork(rec); + + const outcome = await runBatch(ports, rec, work); + + if (outcome.kind === 'parked') { + rec.work = outcome.work; + rec.awaiting_approval = [...(rec.awaiting_approval ?? []), outcome.pending]; + transitionTo(rec, 'function_awaiting_approval'); + return; + } + + await finalizeBatch(ports, rec, outcome.work); +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'turn::function_execute', + async (payload: TurnStepPayload) => { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition(iii, 'function_execute', handleExecute, parsed); + }, + { + description: + 'Run one durable FSM transition for session in state function_execute: dispatch prepared calls and finalize results.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/function-execute/run.ts b/harness/src/turn-orchestrator/function-execute/run.ts new file mode 100644 index 00000000..481648ab --- /dev/null +++ b/harness/src/turn-orchestrator/function-execute/run.ts @@ -0,0 +1,232 @@ +/** + * Plan, execute, and finalize function call batches. + */ + +import { logger } from '../../runtime/otel.js'; +import type { AssistantMessage, FunctionResultMessage } from '../../types/agent-message.js'; +import type { FunctionCallContent } from '../../types/content.js'; +import type { FunctionCall, FunctionResult } from '../../types/function.js'; +import { + TOOL_NAME, + isErrorResult, + missingFunctionResult, + unwrapAgentTrigger, +} from '../agent-trigger.js'; +import { emitTurnEndOnce } from '../state-runtime/turn-end.js'; +import { persistedTrailingResultIds } from '../state-runtime/transcript.js'; +import { transitionTo, type TurnStateRecord } from '../state.js'; +import type { FunctionExecutePorts } from './ports.js'; +import { + emptyBatchWork, + preparedCallId, + type BatchOutcome, + type ExecutedCall, + type FunctionBatchWork, + type PreparedCall, + type ResolveCallResult, + type RunOneCallResult, +} from './types.js'; + +export class FunctionExecuteInvariantError extends Error { + constructor(message: string) { + super(message); + this.name = 'FunctionExecuteInvariantError'; + } +} + +function isFunctionCallBlock( + block: AssistantMessage['content'][number], +): block is FunctionCallContent { + return block.type === 'function_call'; +} + +function extractFunctionCalls(msg: AssistantMessage): FunctionCall[] { + return msg.content.filter(isFunctionCallBlock).map((b) => ({ + id: b.id, + function_id: b.function_id, + arguments: b.arguments, + })); +} + +function toPreparedCall(raw: FunctionCall): PreparedCall { + if (raw.function_id !== TOOL_NAME) { + return { route: 'synthetic', call: raw, result: missingFunctionResult() }; + } + const call = unwrapAgentTrigger(raw); + if (!call.function_id) { + return { route: 'synthetic', call, result: missingFunctionResult() }; + } + return { route: 'dispatch', call }; +} + +/** Build prepared calls from the assistant message that requested them. */ +export function planBatchFromAssistant(asst: AssistantMessage): PreparedCall[] { + return extractFunctionCalls(asst).map(toPreparedCall); +} + +/** Use existing work or plan a new batch from last_assistant. */ +export function loadOrPlanWork(rec: TurnStateRecord): FunctionBatchWork { + if (rec.work) { + return rec.work; + } + const asst = rec.last_assistant; + if (!asst) { + throw new FunctionExecuteInvariantError('function_execute without last_assistant or work'); + } + return emptyBatchWork(planBatchFromAssistant(asst)); +} + +async function resolvePreparedCall( + ports: FunctionExecutePorts, + prepared: PreparedCall, + session_id: string, +): Promise { + switch (prepared.route) { + case 'synthetic': + return { kind: 'resolved', result: prepared.result, is_error: true }; + case 'pre_approved': { + const result = await ports.triggerPreApproved(prepared.call); + return { kind: 'resolved', result, is_error: isErrorResult(result) }; + } + case 'dispatch': { + const out = await ports.dispatch(prepared.call, session_id); + if (out.kind === 'pending') { + return { kind: 'pending' }; + } + return { kind: 'resolved', result: out.result, is_error: isErrorResult(out.result) }; + } + } +} + +export async function runOneCall( + ports: FunctionExecutePorts, + session_id: string, + prepared: PreparedCall, + executed: Record, +): Promise { + const call: FunctionCall = prepared.call; + + const prior = executed[call.id]; + if (prior) { + await ports.emitEnd(session_id, prior); + return { kind: 'skipped' }; + } + + await ports.emitStart(session_id, call); + const startedAt = Date.now(); + + const resolved = await resolvePreparedCall(ports, prepared, session_id); + if (resolved.kind === 'pending') { + return { kind: 'pending', call }; + } + + const entry: ExecutedCall = { + call, + result: resolved.result, + is_error: resolved.is_error, + duration_ms: Date.now() - startedAt, + }; + executed[call.id] = entry; + await ports.emitEnd(session_id, entry); + return { kind: 'executed', entry }; +} + +export async function runBatch( + ports: FunctionExecutePorts, + rec: TurnStateRecord, + work: FunctionBatchWork, +): Promise { + const executed = { ...work.executed }; + + for (const prepared of work.prepared) { + const outcome = await runOneCall(ports, rec.session_id, prepared, executed); + + if (outcome.kind === 'pending') { + return { + kind: 'parked', + work: { prepared: work.prepared, executed }, + pending: { + function_call_id: outcome.call.id, + function_id: outcome.call.function_id, + args: outcome.call.arguments, + }, + }; + } + + if (outcome.kind === 'executed') { + rec.work = { prepared: work.prepared, executed }; + await ports.checkpoint(rec); + } + } + + return { kind: 'completed', work: { prepared: work.prepared, executed } }; +} + +function toFunctionResultMessage( + entry: ExecutedCall, + result: FunctionResult, +): FunctionResultMessage { + return { + role: 'function_result', + function_call_id: entry.call.id, + function_id: entry.call.function_id, + content: result.content, + details: result.details, + is_error: entry.is_error, + timestamp: Date.now(), + }; +} + +/** Collect executed entries in batch order (assistant tool order). */ +function executedInBatchOrder(work: FunctionBatchWork): ExecutedCall[] { + const ordered: ExecutedCall[] = []; + for (const prepared of work.prepared) { + const entry = work.executed[preparedCallId(prepared)]; + if (entry) ordered.push(entry); + } + return ordered; +} + +export async function finalizeBatch( + ports: FunctionExecutePorts, + rec: TurnStateRecord, + work: FunctionBatchWork, +): Promise { + const executed = executedInBatchOrder(work); + const function_results: FunctionResultMessage[] = []; + let allTerminate = executed.length > 0; + + for (const entry of executed) { + const result = entry.result; + if (!result.terminate) allTerminate = false; + function_results.push(toFunctionResultMessage(entry, result)); + } + + const messages = await ports.loadMessages(rec.session_id); + const alreadyPersisted = persistedTrailingResultIds(messages); + const fresh = function_results.filter((r) => !alreadyPersisted.has(r.function_call_id)); + if (fresh.length < function_results.length) { + logger.warn('finalizeBatch: skipped duplicate function_results (re-entry detected)', { + session_id: rec.session_id, + total: function_results.length, + skipped: function_results.length - fresh.length, + }); + } + if (fresh.length > 0) { + await ports.appendMessages(rec.session_id, fresh); + } + + const asst = rec.last_assistant; + rec.function_results = function_results; + rec.work = undefined; + + if (asst) { + await emitTurnEndOnce(ports, rec, asst, function_results); + } + + if (allTerminate) { + await ports.finishSession(rec); + } else { + transitionTo(rec, 'steering_check'); + } +} diff --git a/harness/src/turn-orchestrator/function-execute/types.ts b/harness/src/turn-orchestrator/function-execute/types.ts new file mode 100644 index 00000000..fda48005 --- /dev/null +++ b/harness/src/turn-orchestrator/function-execute/types.ts @@ -0,0 +1,54 @@ +/** + * Domain types for the function_execute pipeline. + */ + +import type { FunctionCall, FunctionResult } from '../../types/function.js'; + +/** Exactly one execution route per prepared call: dispatch, pre_approved, or synthetic. */ +export type PreparedCall = + | { route: 'dispatch'; call: FunctionCall } + | { route: 'pre_approved'; call: FunctionCall } + | { route: 'synthetic'; call: FunctionCall; result: FunctionResult }; + +export type ExecutedCall = { + call: FunctionCall; + result: FunctionResult; + is_error: boolean; + duration_ms: number; +}; + +/** Durable mid-batch state persisted on TurnStateRecord.work. */ +export type FunctionBatchWork = { + prepared: readonly PreparedCall[]; + executed: Record; +}; + +export type PendingApproval = { + function_call_id: string; + function_id: string; + args: FunctionCall['arguments']; +}; + +/** Batch loop outcome — explicit control flow instead of early return + void. */ +export type BatchOutcome = + | { kind: 'completed'; work: FunctionBatchWork } + | { kind: 'parked'; work: FunctionBatchWork; pending: PendingApproval }; + +export type RunOneCallResult = + | { kind: 'skipped' } + | { kind: 'executed'; entry: ExecutedCall } + | { kind: 'pending'; call: FunctionCall }; + +export type ResolveCallResult = + | { kind: 'pending' } + | { kind: 'resolved'; result: FunctionResult; is_error: boolean }; + +/** Extract the FunctionCall from any PreparedCall variant. */ +export function preparedCallId(prepared: PreparedCall): string { + return prepared.call.id; +} + +/** Empty durable work for a fresh batch. */ +export function emptyBatchWork(prepared: readonly PreparedCall[]): FunctionBatchWork { + return { prepared, executed: {} }; +} diff --git a/harness/src/turn-orchestrator/get-state.ts b/harness/src/turn-orchestrator/get-state.ts index 7aec7c03..c59c9f14 100644 --- a/harness/src/turn-orchestrator/get-state.ts +++ b/harness/src/turn-orchestrator/get-state.ts @@ -6,16 +6,16 @@ */ import type { ISdk } from '../runtime/iii.js'; -import * as persistence from './persistence.js'; import { GetStatePayloadSchema, type GetStatePayload, type GetStateResult, toView, } from './schemas.js'; +import { createTurnStore } from './state-runtime/store.js'; export async function execute(iii: ISdk, payload: GetStatePayload): Promise { - const rec = await persistence.loadRecord(iii, payload.session_id); + const rec = await createTurnStore(iii).loadRecord(payload.session_id); return rec ? toView(rec) : null; } diff --git a/harness/src/turn-orchestrator/hook.ts b/harness/src/turn-orchestrator/hook.ts index d8edb7e8..416d14fb 100644 --- a/harness/src/turn-orchestrator/hook.ts +++ b/harness/src/turn-orchestrator/hook.ts @@ -2,12 +2,6 @@ * Approval consultation. Calls `policy::check_permissions` directly and maps * the reply to allow / deny / pending. Fail-closed on transport errors: * unreachable policy → deny with `gate_unavailable`. - * - * `publishAfter` goes through hook-fanout only when a durable subscriber is - * registered for the after-hook topic. With no subscriber the publish/collect - * would just block until its deadline and return an empty merge the caller - * discards, so it is skipped. The after-hook stays a pluggable merge point for - * any registered consumer (see subscriber-presence.ts). */ import { permissionsDenyEnvelope } from '../approval-gate/denial.js'; @@ -20,11 +14,7 @@ import type { ISdk } from '../runtime/iii.js'; export type { DenialEnvelope } from '../approval-gate/schemas.js'; import { logger } from '../runtime/otel.js'; import type { FunctionCall } from '../types/function.js'; -import { hasDurableSubscriber } from './subscriber-presence.js'; -export const TOPIC_AFTER = 'agent::after_function_call'; - -export const HOOK_TIMEOUT_MS = 500; /** Fail-closed budget for the synchronous policy consult before a call. */ export const POLICY_TIMEOUT_MS = 5_000; @@ -83,30 +73,3 @@ export async function consultBefore(iii: ISdk, function_call: FunctionCall): Pro }; } } - -export async function publishAfter( - iii: ISdk, - function_call: FunctionCall, - result: unknown, -): Promise { - // No subscriber on the after-hook topic → publish_collect would just block - // until its deadline and return an empty merge that the caller discards. - // Skip the dead wait; callers treat `undefined` as "keep the original result". - if (!(await hasDurableSubscriber(iii, TOPIC_AFTER))) { - return undefined; - } - try { - const resp = await iii.trigger({ - function_id: 'hook-fanout::publish_collect', - payload: { - topic: TOPIC_AFTER, - payload: { function_call, result }, - merge_rule: 'field_merge', - timeout_ms: HOOK_TIMEOUT_MS, - }, - }); - return resp.merged; - } catch { - return null; - } -} diff --git a/harness/src/turn-orchestrator/iii.worker.yaml b/harness/src/turn-orchestrator/iii.worker.yaml index 31086458..4d9be949 100644 --- a/harness/src/turn-orchestrator/iii.worker.yaml +++ b/harness/src/turn-orchestrator/iii.worker.yaml @@ -15,6 +15,5 @@ scripts: dependencies: session: "^0.2.0" - hook-fanout: "^0.2.0" provider-anthropic: "^0.2.0" provider-openai: "^0.2.0" diff --git a/harness/src/turn-orchestrator/on-approval.ts b/harness/src/turn-orchestrator/on-approval.ts index b67ce1d8..d6e0e21b 100644 --- a/harness/src/turn-orchestrator/on-approval.ts +++ b/harness/src/turn-orchestrator/on-approval.ts @@ -10,9 +10,8 @@ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; -import { listAgentTurnStateRecords } from './persistence.js'; import { ApprovalDecisionEventSchema, type ParsedApprovalDecisionWrite } from './schemas.js'; -import { wakeFromRecord } from './wake.js'; +import { createTurnStore } from './state-runtime/store.js'; export function parseApprovalDecisionWrite(event: unknown): ParsedApprovalDecisionWrite | null { const result = ApprovalDecisionEventSchema.safeParse(event); @@ -24,8 +23,9 @@ export function isApprovalDecisionWrite(event: unknown): boolean { } export async function execute(iii: ISdk, write: ParsedApprovalDecisionWrite): Promise { + const store = createTurnStore(iii); try { - await wakeFromRecord(iii, write.session_id); + await store.wakeFromRecord(write.session_id); } catch (err) { logger.warn('turn::on_approval: wake failed', { session_id: write.session_id, @@ -42,11 +42,12 @@ export async function handleApprovalDecisionWrite(iii: ISdk, event: unknown): Pr /** Wake sessions still parked on approval (e.g. a decision arrived during downtime). */ export async function recoverParkedApprovals(iii: ISdk): Promise { - const records = await listAgentTurnStateRecords(iii); + const store = createTurnStore(iii); + const records = await store.listTurnStateRecords(); for (const rec of records) { if (rec.state !== 'function_awaiting_approval') continue; try { - await wakeFromRecord(iii, rec.session_id); + await store.wakeFromRecord(rec.session_id); } catch (err) { logger.warn('recoverParkedApprovals: wake failed', { session_id: rec.session_id, diff --git a/harness/src/turn-orchestrator/persistence.ts b/harness/src/turn-orchestrator/persistence.ts deleted file mode 100644 index 5e88c6ad..00000000 --- a/harness/src/turn-orchestrator/persistence.ts +++ /dev/null @@ -1,114 +0,0 @@ -/** - * State load/save helpers. All `state::*` I/O goes through - * `../runtime/state.js` (agent scope). - */ - -import { stateGet, stateListValues, stateSet } from '../runtime/state.js'; -import type { ISdk } from '../runtime/iii.js'; -import type { AgentMessage } from '../types/agent-message.js'; -import { parseFlatMessages } from './flat-messages.js'; -import { type RunRequest, parseRunRequest } from './run-request.js'; -import { - AGENT_SCOPE, - SESSION_INDEX_SCOPE, - type TurnStateRecord, - messagesKey, - parseTurnStateRecord, - runRequestKey, - turnStateKey, -} from './state.js'; -import { toView } from './schemas.js'; -import { mirrorMessagesToSessionTree } from './session-tree-mirror.js'; -import { emitTurnStateChanged } from './turn-state-write.js'; -import { shouldWakeStep, wakeState } from './wake.js'; - -const agentGet = (iii: ISdk, key: string) => stateGet(iii, AGENT_SCOPE, key); -const agentSet = (iii: ISdk, key: string, value: unknown) => - stateSet(iii, AGENT_SCOPE, key, value); - -// --- turn_state --- - -export async function loadRecord(iii: ISdk, session_id: string): Promise { - return parseTurnStateRecord(await agentGet(iii, turnStateKey(session_id))); -} - -/** All turn_state values in agent scope; non-records are dropped by shape parse. */ -export async function listAgentTurnStateRecords(iii: ISdk): Promise { - const values = await stateListValues(iii, { scope: AGENT_SCOPE }); - return values - .map((value) => parseTurnStateRecord(value)) - .filter((rec): rec is TurnStateRecord => rec !== null); -} - -/** Silent checkpoint — no UI event, no FSM wake. */ -export async function writeRecord(iii: ISdk, rec: TurnStateRecord): Promise { - await agentSet(iii, turnStateKey(rec.session_id), rec); -} - -async function persistRecord( - iii: ISdk, - rec: TurnStateRecord, - previous?: TurnStateRecord | null, -): Promise { - const result = await agentSet(iii, turnStateKey(rec.session_id), rec); - const prev = - previous !== undefined ? previous : parseTurnStateRecord(result?.old_value ?? null); - - if (prev == null) { - // First persist for this session → mark it in the session index. The - // create-fanout trigger watches that dedicated scope, so it matches in - // engine by scope alone — no per-write condition predicate. - await stateSet(iii, SESSION_INDEX_SCOPE, rec.session_id, { created_at_ms: Date.now() }); - } - - await emitTurnStateChanged( - iii, - rec.session_id, - prev == null ? 'state:created' : 'state:updated', - toView(rec), - prev != null ? toView(prev) : undefined, - ); - - return prev; -} - -export async function saveRecord( - iii: ISdk, - rec: TurnStateRecord, - previous?: TurnStateRecord | null, -): Promise { - const prev = await persistRecord(iii, rec, previous); - - if (shouldWakeStep(prev?.state ?? null, rec.state)) { - await wakeState(iii, rec.session_id, rec.state); - } -} - -// --- messages --- - -export async function loadMessages(iii: ISdk, session_id: string): Promise { - return parseFlatMessages(await agentGet(iii, messagesKey(session_id))); -} - -export async function saveMessages( - iii: ISdk, - session_id: string, - messages: AgentMessage[], -): Promise { - await agentSet(iii, messagesKey(session_id), messages); - await mirrorMessagesToSessionTree(iii, session_id, messages); -} - -// --- run_request --- - -export async function saveRunRequest( - iii: ISdk, - session_id: string, - request: RunRequest, -): Promise { - await agentSet(iii, runRequestKey(session_id), request); -} - -export async function loadRunRequest(iii: ISdk, session_id: string): Promise { - return parseRunRequest(await agentGet(iii, runRequestKey(session_id))); -} diff --git a/harness/src/turn-orchestrator/preflight.ts b/harness/src/turn-orchestrator/preflight.ts index 17b0032c..5b5124f8 100644 --- a/harness/src/turn-orchestrator/preflight.ts +++ b/harness/src/turn-orchestrator/preflight.ts @@ -12,7 +12,13 @@ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import type { AgentMessage } from '../types/agent-message.js'; import { CompactionBusyError, ContextOverflowError } from './errors.js'; -import { estimateMessages } from './estimate.js'; + +/** Cheap chars/4 token estimate — same heuristic as context-compaction's estimateTokenCount. */ +function estimateMessages(messages: AgentMessage[]): number { + let chars = 0; + for (const m of messages) chars += JSON.stringify(m).length; + return Math.floor(chars / 4); +} function findLastUserEntryId( entries: Array<{ entry_id?: string; message?: { role?: string } }>, diff --git a/harness/src/turn-orchestrator/provider-stream.ts b/harness/src/turn-orchestrator/provider-stream.ts index 30ad42c2..95677c3a 100644 --- a/harness/src/turn-orchestrator/provider-stream.ts +++ b/harness/src/turn-orchestrator/provider-stream.ts @@ -123,7 +123,10 @@ export async function streamProviderTurn( timeoutMs: PROVIDER_STREAM_TIMEOUT_MS, }) .catch((err) => { - logger.warn('provider stream trigger failed', { targetFn: params.targetFn, err: String(err) }); + logger.warn('provider stream trigger failed', { + targetFn: params.targetFn, + err: String(err), + }); error = formatProviderError(err); pump.end(); return null; diff --git a/harness/src/turn-orchestrator/provisioning/load-skills.ts b/harness/src/turn-orchestrator/provisioning/load-skills.ts new file mode 100644 index 00000000..1b0fe51b --- /dev/null +++ b/harness/src/turn-orchestrator/provisioning/load-skills.ts @@ -0,0 +1,18 @@ +/** + * Load default skill bodies via provisioning ports. + */ + +import { defaultSkillBody, skillIdFromUri, type DefaultSkillBody } from '../system-prompt.js'; +import type { ProvisioningPorts } from './ports.js'; + +export async function loadDefaultSkillBodies( + ports: Pick, + uris: readonly string[], +): Promise { + const bodies: DefaultSkillBody[] = []; + for (const uri of uris) { + const body = await ports.fetchSkillBody(skillIdFromUri(uri)); + bodies.push(defaultSkillBody(uri, body)); + } + return bodies; +} diff --git a/harness/src/turn-orchestrator/provisioning/ports.ts b/harness/src/turn-orchestrator/provisioning/ports.ts new file mode 100644 index 00000000..e9edd7d5 --- /dev/null +++ b/harness/src/turn-orchestrator/provisioning/ports.ts @@ -0,0 +1,74 @@ +/** + * Typed dependency ports for provisioning. + */ + +import { logger } from '../../runtime/otel.js'; +import type { ISdk } from '../../runtime/iii.js'; +import type { TurnOrchestratorConfig } from '../config.js'; +import type { RunRequest } from '../run-request.js'; +import { createTurnStore } from '../state-runtime/store.js'; + +const FETCH_TIMEOUT_MS = 10_000; + +/** Decode directory skill responses from iii trigger payloads. */ +export function parseDirectoryBody(resp: unknown): string | null { + if (typeof resp === 'string') return resp; + if (resp && typeof resp === 'object') { + const body = (resp as { body?: unknown }).body; + if (typeof body === 'string') return body; + } + return null; +} + +export type ProvisioningPorts = { + defaultSkillUris: readonly string[]; + loadRunRequest(session_id: string): Promise; + saveRunRequest(session_id: string, request: RunRequest): Promise; + fetchSkillsIndex(): Promise; + fetchSkillBody(id: string): Promise; +}; + +export function createProvisioningPorts(iii: ISdk, cfg: TurnOrchestratorConfig): ProvisioningPorts { + const store = createTurnStore(iii); + + return { + defaultSkillUris: cfg.system_default_skills, + + loadRunRequest(session_id) { + return store.loadRunRequest(session_id); + }, + + saveRunRequest(session_id, request) { + return store.saveRunRequest(session_id, request); + }, + + async fetchSkillsIndex() { + try { + const resp = await iii.trigger({ + function_id: 'directory::skills::index', + payload: {}, + timeoutMs: FETCH_TIMEOUT_MS, + }); + const body = parseDirectoryBody(resp); + return body && body.length > 0 ? body : null; + } catch (err) { + logger.warn('directory::skills::index failed', { err: String(err) }); + return null; + } + }, + + async fetchSkillBody(id) { + try { + const resp = await iii.trigger({ + function_id: 'directory::skills::get', + payload: { id }, + timeoutMs: FETCH_TIMEOUT_MS, + }); + return parseDirectoryBody(resp); + } catch (err) { + logger.warn('directory::skills::get failed', { id, err: String(err) }); + return null; + } + }, + }; +} diff --git a/harness/src/turn-orchestrator/provisioning/process.ts b/harness/src/turn-orchestrator/provisioning/process.ts new file mode 100644 index 00000000..566e6f70 --- /dev/null +++ b/harness/src/turn-orchestrator/provisioning/process.ts @@ -0,0 +1,88 @@ +/** + * Load run request, fetch skills, build the provisioned RunRequest, and register the FSM step. + */ + +import type { ISdk } from '../../runtime/iii.js'; +import { agentTriggerTool } from '../agent-trigger.js'; +import type { TurnOrchestratorConfig } from '../config.js'; +import { runTransition } from '../run-transition.js'; +import type { RunRequest } from '../run-request.js'; +import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; +import { buildSystemPrompt } from '../system-prompt.js'; +import { transitionTo, type TurnStateRecord } from '../state.js'; +import { loadDefaultSkillBodies } from './load-skills.js'; +import { createProvisioningPorts, type ProvisioningPorts } from './ports.js'; + +export type ProvisioningOutcome = { + kind: 'ready'; + runRequest: RunRequest; +}; + +export async function processProvisioning( + ports: ProvisioningPorts, + rec: TurnStateRecord, +): Promise { + const request = await ports.loadRunRequest(rec.session_id); + + const override = request.system_prompt.length > 0 ? request.system_prompt : null; + + const [skillsIndex, bodies] = await Promise.all([ + ports.fetchSkillsIndex(), + loadDefaultSkillBodies(ports, ports.defaultSkillUris), + ]); + const prompt = buildSystemPrompt(bodies, { override, mode: request.mode, skillsIndex }); + + return { + kind: 'ready', + runRequest: { + ...request, + system_prompt: prompt, + function_schemas: [agentTriggerTool()], + }, + }; +} + +export async function applyProvisioningOutcome( + ports: ProvisioningPorts, + rec: TurnStateRecord, + outcome: ProvisioningOutcome, +): Promise { + await ports.saveRunRequest(rec.session_id, outcome.runRequest); + transitionTo(rec, 'assistant_streaming'); +} + +export async function runProvisioning( + ports: ProvisioningPorts, + rec: TurnStateRecord, +): Promise { + const outcome = await processProvisioning(ports, rec); + await applyProvisioningOutcome(ports, rec, outcome); +} + +export async function handleProvisioning( + iii: ISdk, + cfg: TurnOrchestratorConfig, + rec: TurnStateRecord, +): Promise { + const ports = createProvisioningPorts(iii, cfg); + await runProvisioning(ports, rec); +} + +export function register(iii: ISdk, cfg: TurnOrchestratorConfig): void { + iii.registerFunction( + 'turn::provisioning', + async (payload: TurnStepPayload) => { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition( + iii, + 'provisioning', + (i, rec) => handleProvisioning(i, cfg, rec), + parsed, + ); + }, + { + description: + 'Run one durable FSM transition for session in state provisioning: build the system prompt, attach the agent_trigger function schema, advance to assistant_streaming.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/register.ts b/harness/src/turn-orchestrator/register.ts index 808464f4..2c5740e8 100644 --- a/harness/src/turn-orchestrator/register.ts +++ b/harness/src/turn-orchestrator/register.ts @@ -2,16 +2,14 @@ import { loadConfig } from '../runtime/config.js'; import type { ISdk } from '../runtime/iii.js'; import * as bootstrap from './bootstrap.js'; import { loadOrchestratorConfig } from './config.js'; +import { register as registerAssistantStreaming } from './assistant-streaming/process.js'; +import { register as registerFunctionAwaitingApproval } from './function-awaiting-approval/process.js'; +import { register as registerFunctionExecute } from './function-execute/process.js'; import { register as registerGetState } from './get-state.js'; import { register as registerRunStart } from './run-start.js'; import { recoverParkedApprovals, register as registerOnApproval } from './on-approval.js'; -import { - registerAssistantStreaming, - registerFunctionAwaitingApproval, - registerFunctionExecute, - registerProvisioning, - registerSteeringCheck, -} from './states/index.js'; +import { register as registerProvisioning } from './provisioning/process.js'; +import { register as registerSteeringCheck } from './steering-check/process.js'; export async function register(iii: ISdk, ctx: { configPath: string }): Promise { const cfg = await loadConfig(ctx.configPath); diff --git a/harness/src/turn-orchestrator/run-request.ts b/harness/src/turn-orchestrator/run-request.ts index 85307222..bcc820c2 100644 --- a/harness/src/turn-orchestrator/run-request.ts +++ b/harness/src/turn-orchestrator/run-request.ts @@ -1,6 +1,6 @@ /** * The persisted run request and its single typed parser. `loadRunRequest` - * (persistence) parses the raw `session//run_request` value through + * (persistence) parses the raw scope `run_request` value through * `parseRunRequest` once, so every consumer reads a fully-typed `RunRequest` * instead of re-guarding `unknown` fields. */ @@ -8,14 +8,12 @@ import { z } from 'zod'; import type { Mode } from './system-prompt.js'; -export const RunRequestSchema = z.object({ +const RunRequestSchema = z.object({ provider: z.string().catch(''), model: z.string().catch(''), mode: z .unknown() - .transform((v): Mode | null => - v === 'plan' || v === 'ask' || v === 'agent' ? v : null, - ), + .transform((v): Mode | null => (v === 'plan' || v === 'ask' || v === 'agent' ? v : null)), system_prompt: z.string().catch(''), function_schemas: z.array(z.unknown()).catch([]), }); diff --git a/harness/src/turn-orchestrator/run-start.ts b/harness/src/turn-orchestrator/run-start.ts index 99f232e1..59d9a621 100644 --- a/harness/src/turn-orchestrator/run-start.ts +++ b/harness/src/turn-orchestrator/run-start.ts @@ -10,22 +10,23 @@ */ import type { ISdk } from '../runtime/iii.js'; -import * as persistence from './persistence.js'; import { RunStartPayloadSchema, type RunStartPayload, type RunStartResult } from './schemas.js'; +import { createTurnStore } from './state-runtime/store.js'; import { newRecord } from './state.js'; export async function execute(iii: ISdk, payload: RunStartPayload): Promise { + const store = createTurnStore(iii); const { session_id, messages, max_turns, message_id: _message_id, ...run } = payload; - await persistence.saveRunRequest(iii, session_id, { + await store.saveRunRequest(session_id, { ...run, mode: run.mode ?? null, function_schemas: [], }); - await persistence.saveMessages(iii, session_id, messages); + await store.saveMessages(session_id, messages); const record = newRecord(session_id, max_turns); - await persistence.saveRecord(iii, record); + await store.saveRecord(record); return { session_id }; } diff --git a/harness/src/turn-orchestrator/run-transition.ts b/harness/src/turn-orchestrator/run-transition.ts index 890ae4e6..c9827a02 100644 --- a/harness/src/turn-orchestrator/run-transition.ts +++ b/harness/src/turn-orchestrator/run-transition.ts @@ -13,8 +13,8 @@ import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import { TransientError } from './errors.js'; import { emit } from './events.js'; -import * as persistence from './persistence.js'; import { type TurnStepPayload, type TurnStepResult } from './schemas.js'; +import { createTurnStore } from './state-runtime/store.js'; import { type TurnState, type TurnStateRecord, transitionTo } from './state.js'; import { syntheticAssistant } from './synthetic-assistant.js'; @@ -38,19 +38,24 @@ async function failTransition( from_state: TurnState, err: unknown, ): Promise { + const store = createTurnStore(iii); const message = err instanceof Error ? err.message : String(err); rec.error = { kind: 'transition_error', message: `from ${from_state}: ${message}` }; transitionTo(rec, 'failed'); - await persistence.saveRecord(iii, rec, previous); + await store.saveRecord(rec, previous); // Surface the failure to the live UI (mirrors the graceful error path): // message_complete{stop_reason:'error'} → the translator emits a `stop-reason` // event so the user sees WHY; a bare agent_end renders as a silent end. // (The UI translator reads stop_reason, not error_kind.) const failed = syntheticAssistant({ stop_reason: 'error', text: rec.error.message }); - await emit(iii, rec.session_id, { type: 'message_complete', message: failed, body_streamed: false }); + await emit(iii, rec.session_id, { + type: 'message_complete', + message: failed, + body_streamed: false, + }); - const messages = await persistence.loadMessages(iii, rec.session_id); + const messages = await store.loadMessages(rec.session_id); await emit(iii, rec.session_id, { type: 'agent_end', messages }); logger.error('transition failed; session marked failed', { session_id: rec.session_id, @@ -66,7 +71,8 @@ export async function runTransition( handle: TransitionHandler, payload: TurnStepPayload, ): Promise { - const rec = await persistence.loadRecord(iii, payload.session_id); + const store = createTurnStore(iii); + const rec = await store.loadRecord(payload.session_id); if (!rec) { throw new Error(`turn::${state} invariant: missing session ${payload.session_id}`); } @@ -82,6 +88,6 @@ export async function runTransition( if (err instanceof TransientError) throw err; return failTransition(iii, rec, previous, from_state, err); } - await persistence.saveRecord(iii, rec, previous); + await store.saveRecord(rec, previous); return { ok: true, from_state, to_state: rec.state }; } diff --git a/harness/src/turn-orchestrator/session-tree-mirror.ts b/harness/src/turn-orchestrator/session-tree-mirror.ts index e44fa409..26c34ee3 100644 --- a/harness/src/turn-orchestrator/session-tree-mirror.ts +++ b/harness/src/turn-orchestrator/session-tree-mirror.ts @@ -7,7 +7,8 @@ import { stateGet, stateSet } from '../runtime/state.js'; import type { ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import type { AgentMessage } from '../types/agent-message.js'; -import { AGENT_SCOPE, lastSessionTreeLenKey } from './state.js'; + +const SESSION_TREE_MIRROR_LEN_SCOPE = 'session_tree_mirror_len'; const MirrorLenSchema = z.coerce.number().int().nonnegative().catch(0); @@ -20,8 +21,9 @@ export async function mirrorMessagesToSessionTree( session_id: string, messages: AgentMessage[], ): Promise { - const lastKey = lastSessionTreeLenKey(session_id); - const alreadyMirrored = parseMirrorLen(await stateGet(iii, AGENT_SCOPE, lastKey)); + const alreadyMirrored = parseMirrorLen( + await stateGet(iii, SESSION_TREE_MIRROR_LEN_SCOPE, session_id), + ); if (messages.length <= alreadyMirrored) return; if (alreadyMirrored === 0) { @@ -42,16 +44,16 @@ export async function mirrorMessagesToSessionTree( } for (const msg of messages.slice(alreadyMirrored)) { - const resp = await triggerSessionTree<{ entry_id?: string }>( - iii, - 'session-tree::append', - { session_id, parent_id: lastAppended, message: msg }, - ); + const resp = await triggerSessionTree<{ entry_id?: string }>(iii, 'session-tree::append', { + session_id, + parent_id: lastAppended, + message: msg, + }); if (!resp) return; lastAppended = resp.entry_id ?? lastAppended; } - await stateSet(iii, AGENT_SCOPE, lastKey, messages.length); + await stateSet(iii, SESSION_TREE_MIRROR_LEN_SCOPE, session_id, messages.length); } async function triggerSessionTree( diff --git a/harness/src/turn-orchestrator/state-runtime/ports.ts b/harness/src/turn-orchestrator/state-runtime/ports.ts new file mode 100644 index 00000000..3b47bab2 --- /dev/null +++ b/harness/src/turn-orchestrator/state-runtime/ports.ts @@ -0,0 +1,60 @@ +/** + * Shared dependency ports for turn FSM state handlers. + */ + +import { emit } from '../events.js'; +import type { RunRequest } from '../run-request.js'; +import type { ISdk } from '../../runtime/iii.js'; +import type { AgentMessage, FunctionResultMessage } from '../../types/agent-message.js'; +import { transitionTo, type TurnStateRecord } from '../state.js'; +import { createTurnStore, type TurnStore } from './store.js'; + +export type TurnStatePorts = { + loadMessages(session_id: string): Promise; + appendMessages(session_id: string, msgs: AgentMessage[]): Promise; + checkpoint(rec: TurnStateRecord): Promise; + loadRunRequest(session_id: string): Promise; + saveRunRequest(session_id: string, request: RunRequest): Promise; + emitTurnEnd( + session_id: string, + message: AgentMessage, + function_results: FunctionResultMessage[], + ): Promise; + finishSession(rec: TurnStateRecord): Promise; +}; + +export function createTurnStatePorts(iii: ISdk, store?: TurnStore): TurnStatePorts { + const s = store ?? createTurnStore(iii); + + return { + loadMessages(session_id) { + return s.loadMessages(session_id); + }, + + appendMessages(session_id, msgs) { + return s.appendMessages(session_id, msgs); + }, + + checkpoint(rec) { + return s.writeRecord(rec); + }, + + loadRunRequest(session_id) { + return s.loadRunRequest(session_id); + }, + + saveRunRequest(session_id, request) { + return s.saveRunRequest(session_id, request); + }, + + async emitTurnEnd(session_id, message, function_results) { + await emit(iii, session_id, { type: 'turn_end', message, function_results }); + }, + + async finishSession(rec) { + const messages = await s.loadMessages(rec.session_id); + await emit(iii, rec.session_id, { type: 'agent_end', messages }); + transitionTo(rec, 'stopped'); + }, + }; +} diff --git a/harness/src/turn-orchestrator/state-runtime/store.ts b/harness/src/turn-orchestrator/state-runtime/store.ts new file mode 100644 index 00000000..ff6907f8 --- /dev/null +++ b/harness/src/turn-orchestrator/state-runtime/store.ts @@ -0,0 +1,167 @@ +/** + * Agent-scope turn FSM store. All `state::*` I/O for turn-orchestrator goes + * through `createTurnStore`. + */ + +import { z } from 'zod'; +import { TriggerAction, type ISdk } from '../../runtime/iii.js'; +import { stateGet, stateListValues, stateSet } from '../../runtime/state.js'; +import { logger } from '../../runtime/otel.js'; +import type { AgentMessage } from '../../types/agent-message.js'; +import { MESSAGES_SCOPE, RUN_REQUEST_SCOPE, TURN_STATE_SCOPE } from '../state.js'; +import { emit } from '../events.js'; +import { type RunRequest, parseRunRequest } from '../run-request.js'; +import { toView, type TurnStateView } from '../schemas.js'; +import { mirrorMessagesToSessionTree } from '../session-tree-mirror.js'; +import { type TurnState, type TurnStateRecord, parseTurnStateRecord } from '../state.js'; + +export const TURN_STEP_QUEUE = 'turn-step'; + +const NON_STEPABLE_STATES = new Set(['stopped', 'failed', 'function_awaiting_approval']); + +/** True when a persisted turn_state transition should enqueue `turn::{newState}`. */ +export function shouldWakeStep(previousState: TurnState | null, newState: TurnState): boolean { + if (NON_STEPABLE_STATES.has(newState)) return false; + if (previousState !== null && previousState === newState) return false; + return true; +} + +async function enqueueTurnStep(iii: ISdk, session_id: string, state: TurnState): Promise { + try { + await iii.trigger({ + function_id: `turn::${state}`, + payload: { session_id }, + action: TriggerAction.Enqueue({ queue: TURN_STEP_QUEUE }), + }); + } catch (err) { + logger.warn('wakeStep failed', { session_id, state, err: String(err) }); + } +} + +export type TurnStore = { + loadRecord(session_id: string): Promise; + saveRecord(rec: TurnStateRecord, previous?: TurnStateRecord | null): Promise; + writeRecord(rec: TurnStateRecord): Promise; + loadMessages(session_id: string): Promise; + saveMessages(session_id: string, messages: AgentMessage[]): Promise; + appendMessages(session_id: string, msgs: AgentMessage[]): Promise; + loadRunRequest(session_id: string): Promise; + saveRunRequest(session_id: string, request: RunRequest): Promise; + listTurnStateRecords(): Promise; + wakeStep(session_id: string, state: TurnState): Promise; + wakeFromRecord(session_id: string): Promise; +}; + +const FlatMessagesSchema = z + .array(z.custom((v) => v != null && typeof v === 'object')) + .catch([]); + +/** @internal Exported for unit tests. */ +export function parseFlatMessages(raw: unknown): AgentMessage[] { + return FlatMessagesSchema.parse(raw ?? []); +} + +const scopedGet = (iii: ISdk, scope: string, session_id: string) => + stateGet(iii, scope, session_id); +const scopedSet = (iii: ISdk, scope: string, session_id: string, value: unknown) => + stateSet(iii, scope, session_id, value); + +async function emitTurnStateChanged( + iii: ISdk, + session_id: string, + event_type: 'state:created' | 'state:updated', + new_value: TurnStateView, + old_value?: TurnStateView, +): Promise { + try { + await emit(iii, session_id, { + type: 'turn_state_changed', + event_type, + new_value, + ...(old_value !== undefined && { old_value }), + }); + } catch (err) { + logger.warn('emitTurnStateChanged failed', { + session_id, + err: String(err), + }); + } +} + +async function persistRecord( + iii: ISdk, + rec: TurnStateRecord, + previous?: TurnStateRecord | null, +): Promise { + const result = await scopedSet(iii, TURN_STATE_SCOPE, rec.session_id, rec); + const prev = previous !== undefined ? previous : parseTurnStateRecord(result?.old_value ?? null); + + await emitTurnStateChanged( + iii, + rec.session_id, + prev == null ? 'state:created' : 'state:updated', + toView(rec), + prev != null ? toView(prev) : undefined, + ); + + return prev; +} + +export function createTurnStore(iii: ISdk): TurnStore { + return { + async loadRecord(session_id) { + return parseTurnStateRecord(await scopedGet(iii, TURN_STATE_SCOPE, session_id)); + }, + + async listTurnStateRecords() { + const values = await stateListValues(iii, { scope: TURN_STATE_SCOPE }); + return values + .map((value) => parseTurnStateRecord(value)) + .filter((rec): rec is TurnStateRecord => rec !== null); + }, + + async writeRecord(rec) { + await scopedSet(iii, TURN_STATE_SCOPE, rec.session_id, rec); + }, + + async saveRecord(rec, previous) { + const prev = await persistRecord(iii, rec, previous); + if (shouldWakeStep(prev?.state ?? null, rec.state)) { + await enqueueTurnStep(iii, rec.session_id, rec.state); + } + }, + + wakeStep(session_id, state) { + return enqueueTurnStep(iii, session_id, state); + }, + + async wakeFromRecord(session_id) { + const rec = parseTurnStateRecord(await scopedGet(iii, TURN_STATE_SCOPE, session_id)); + if (!rec || rec.state === 'stopped' || rec.state === 'failed') return; + await enqueueTurnStep(iii, session_id, rec.state); + }, + + async loadMessages(session_id) { + return parseFlatMessages(await scopedGet(iii, MESSAGES_SCOPE, session_id)); + }, + + async saveMessages(session_id, messages) { + await scopedSet(iii, MESSAGES_SCOPE, session_id, messages); + await mirrorMessagesToSessionTree(iii, session_id, messages); + }, + + async appendMessages(session_id, msgs) { + const messages = parseFlatMessages(await scopedGet(iii, MESSAGES_SCOPE, session_id)); + await scopedSet(iii, MESSAGES_SCOPE, session_id, [...messages, ...msgs]); + await mirrorMessagesToSessionTree(iii, session_id, [...messages, ...msgs]); + }, + + async saveRunRequest(session_id, request) { + await scopedSet(iii, RUN_REQUEST_SCOPE, session_id, request); + }, + + async loadRunRequest(session_id) { + return parseRunRequest(await scopedGet(iii, RUN_REQUEST_SCOPE, session_id)); + }, + }; +} diff --git a/harness/src/turn-orchestrator/state-runtime/transcript.ts b/harness/src/turn-orchestrator/state-runtime/transcript.ts new file mode 100644 index 00000000..bd6b6e1b --- /dev/null +++ b/harness/src/turn-orchestrator/state-runtime/transcript.ts @@ -0,0 +1,33 @@ +/** + * Shared transcript idempotency helpers for turn FSM handlers. + */ + +import type { AgentMessage, AssistantMessage } from '../../types/agent-message.js'; + +/** + * Function_call_ids already persisted for the current turn. Results are appended + * right after the assistant that requested them, so they form the trailing run + * of `function_result` messages; the first non-result from the tail is the turn + * boundary. + */ +export function persistedTrailingResultIds(messages: AgentMessage[]): Set { + const ids = new Set(); + for (let i = messages.length - 1; i >= 0; i--) { + const m = messages[i]; + if (m?.role === 'function_result') ids.add(m.function_call_id); + else break; + } + return ids; +} + +/** True when the trailing assistant message matches the candidate (re-entry dup). */ +export function isDuplicateAssistant(messages: AgentMessage[], asst: AssistantMessage): boolean { + const last = messages[messages.length - 1]; + return ( + last !== undefined && + last.role === 'assistant' && + last.timestamp === asst.timestamp && + last.model === asst.model && + last.provider === asst.provider + ); +} diff --git a/harness/src/turn-orchestrator/state-runtime/turn-end.ts b/harness/src/turn-orchestrator/state-runtime/turn-end.ts new file mode 100644 index 00000000..d2be3af2 --- /dev/null +++ b/harness/src/turn-orchestrator/state-runtime/turn-end.ts @@ -0,0 +1,35 @@ +/** + * Shared turn-end and FSM resume helpers for step outcome application. + */ + +import { + emptyAssistant, + type AssistantMessage, + type FunctionResultMessage, +} from '../../types/agent-message.js'; +import { transitionTo, type TurnStateRecord } from '../state.js'; + +export type TurnEndEmitter = { + emitTurnEnd( + session_id: string, + message: AssistantMessage, + function_results: FunctionResultMessage[], + ): Promise; +}; + +export async function emitTurnEndOnce( + ports: TurnEndEmitter, + rec: TurnStateRecord, + message?: AssistantMessage, + function_results: FunctionResultMessage[] = [], +): Promise { + if (rec.turn_end_emitted) return; + const last = message ?? rec.last_assistant ?? emptyAssistant(); + await ports.emitTurnEnd(rec.session_id, last, function_results); + rec.turn_end_emitted = true; +} + +export function resumeToAssistantStreaming(rec: TurnStateRecord): void { + rec.function_results = []; + transitionTo(rec, 'assistant_streaming'); +} diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index cfca4f86..33dba8de 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -1,26 +1,18 @@ /** - * TurnState + TurnStateRecord + agent-scope key helpers. + * TurnState + TurnStateRecord types and parsers. * - * All turn-orchestrator persistence uses iii scope {@link AGENT_SCOPE} with - * keys from the helpers below (`session//turn_state`, etc.). Because - * `state::list` returns values without keys, recovery paths filter listed - * values with {@link parseTurnStateRecord} rather than key-prefix matching. + * Persistence uses semantic iii scopes (`turn_state`, `messages`, `run_request`, …) + * keyed by `session_id`. Recovery lists scope `turn_state` via {@link parseTurnStateRecord}. */ -/** iii-state scope for turn FSM records, flat messages, run_request, etc. */ -export const AGENT_SCOPE = 'agent' as const; - -/** - * Dedicated iii-state scope indexing created sessions, keyed by `session_id`. - * A one-time marker is written here when a session's `turn_state` is first - * persisted, so the session-create fanout trigger matches in-engine by `scope` - * alone — no `condition_function_id` RPC per agent-scope `turn_state` write. - */ -export const SESSION_INDEX_SCOPE = 'session_index' as const; - import { z } from 'zod'; import type { AssistantMessage, FunctionResultMessage } from '../types/agent-message.js'; -import type { FunctionCall, FunctionResult } from '../types/function.js'; +import type { ExecutedCall, FunctionBatchWork, PreparedCall } from './function-execute/types.js'; + +/** Shared iii scope names for turn-orchestrator persistence (key = session_id). */ +export const TURN_STATE_SCOPE = 'turn_state'; +export const MESSAGES_SCOPE = 'messages'; +export const RUN_REQUEST_SCOPE = 'run_request'; export type TurnState = | 'provisioning' @@ -37,23 +29,10 @@ export type AwaitingApprovalEntry = { args: unknown; }; -export type PreparedEntry = { - function_call: FunctionCall; - blocked: FunctionResult | null; - pre_approved?: boolean; -}; +/** Durable mid-batch work for function_execute. */ +export type TurnWork = FunctionBatchWork; -export type ExecutedEntry = { - function_call: FunctionCall; - result: FunctionResult; - is_error: boolean; - duration_ms: number; -}; - -export type TurnWork = { - batch: PreparedEntry[]; - results: ExecutedEntry[]; -}; +export type { ExecutedCall, FunctionBatchWork, PreparedCall }; export type TurnStateRecord = { session_id: string; @@ -119,9 +98,3 @@ export function transitionTo(rec: TurnStateRecord, next: TurnState): void { rec.state = next; rec.updated_at_ms = Date.now(); } - -export const messagesKey = (sid: string) => `session/${sid}/messages`; -export const turnStateKey = (sid: string) => `session/${sid}/turn_state`; -export const runRequestKey = (sid: string) => `session/${sid}/run_request`; -export const lastSessionTreeLenKey = (sid: string) => `session/${sid}/session_tree_mirror_len`; -export const eventCounterKey = (sid: string) => `session/${sid}/event_counter`; diff --git a/harness/src/turn-orchestrator/states/assistant-streaming.ts b/harness/src/turn-orchestrator/states/assistant-streaming.ts deleted file mode 100644 index 34494119..00000000 --- a/harness/src/turn-orchestrator/states/assistant-streaming.ts +++ /dev/null @@ -1,149 +0,0 @@ -/** - * `turn::assistant_streaming`. Stream one provider turn, persist the assistant - * message, and route onward. - * - * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. - * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. - */ - -import type { ISdk } from '../../runtime/iii.js'; -import { logger } from '../../runtime/otel.js'; -import type { AssistantMessage } from '../../types/agent-message.js'; -import type { AgentFunction } from '../../types/function.js'; -import { emit } from '../events.js'; -import { finishSession } from '../finish.js'; -import * as persistence from '../persistence.js'; -import { runPreflight } from '../preflight.js'; -import { buildInput, decide, targetFunctionId } from '../provider-router.js'; -import { streamProviderTurn } from '../provider-stream.js'; -import { runTransition } from '../run-transition.js'; -import { type TurnStateRecord, transitionTo } from '../state.js'; -import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; -import { syntheticAssistant } from '../synthetic-assistant.js'; - -function isErrorOrAborted(asst: AssistantMessage): boolean { - return asst.stop_reason === 'error' || asst.stop_reason === 'aborted'; -} - -/** Append the assistant message unless a re-entry already persisted it. */ -async function persistAssistantOnce( - iii: ISdk, - rec: TurnStateRecord, - asst: AssistantMessage, -): Promise { - const messages = await persistence.loadMessages(iii, rec.session_id); - const last = messages[messages.length - 1]; - const dup = - last && - last.role === 'assistant' && - last.timestamp === asst.timestamp && - last.model === asst.model && - last.provider === asst.provider; - if (dup) { - logger.warn('finalizeAssistant: skipping duplicate assistant push (re-entry detected)', { - session_id: rec.session_id, - timestamp: asst.timestamp, - }); - return; - } - messages.push(asst); - await persistence.saveMessages(iii, rec.session_id, messages); -} - -async function finalizeAssistant(iii: ISdk, rec: TurnStateRecord): Promise { - const asst = rec.last_assistant; - if (!asst) throw new Error('assistant_streaming finalize without last_assistant'); - - await emit(iii, rec.session_id, { - type: 'message_complete', - message: asst, - body_streamed: rec.assistant_body_streamed === true, - }); - - if (isErrorOrAborted(asst)) { - await emit(iii, rec.session_id, { type: 'turn_end', message: asst, function_results: [] }); - rec.turn_end_emitted = true; - await finishSession(iii, rec); - return; - } - - await persistAssistantOnce(iii, rec, asst); - - const hasCalls = asst.content.some((b) => b.type === 'function_call'); - if (!hasCalls) { - transitionTo(rec, 'steering_check'); - return; - } - rec.function_results = []; - rec.work = undefined; // function_execute builds the batch from last_assistant - transitionTo(rec, 'function_execute'); -} - -export async function handleStreaming(iii: ISdk, rec: TurnStateRecord): Promise { - rec.turn_count++; - rec.turn_end_emitted = false; - rec.assistant_body_streamed = false; - - const request = await persistence.loadRunRequest(iii, rec.session_id); - let messages = await persistence.loadMessages(iii, rec.session_id); - const { provider, model, system_prompt } = request; - const tools = ( - Array.isArray(request.function_schemas) ? request.function_schemas : [] - ) as AgentFunction[]; - const decision = decide({ provider, model }); - - if ( - (await runPreflight(iii, rec.session_id, messages, decision.provider, model)) === 'compacted' - ) { - messages = await persistence.loadMessages(iii, rec.session_id); - } - - const { final, error } = await streamProviderTurn(iii, { - session_id: rec.session_id, - targetFn: targetFunctionId(decision), - buildInput: (writerRef) => buildInput(decision, writerRef, system_prompt, messages, tools), - onDelta: async (partial, event) => { - await emit(iii, rec.session_id, { - type: 'message_update', - message: partial, - llm_event: event, - }); - if (event.type === 'text_delta' || event.type === 'thinking_delta') { - rec.assistant_body_streamed = true; - } - }, - }); - - if (final) { - rec.last_assistant = final; - } else { - const reason = error ?? 'provider channel closed without final'; - const synthetic = syntheticAssistant({ - stop_reason: 'error', - text: reason, - provider: decision.provider, - model: decision.model, - }); - await emit(iii, rec.session_id, { - type: 'message_update', - message: synthetic, - llm_event: { type: 'text_delta', partial: synthetic, delta: reason }, - }); - rec.last_assistant = synthetic; - } - await finalizeAssistant(iii, rec); -} - -export function register(iii: ISdk): void { - iii.registerFunction( - 'turn::assistant_streaming', - async (payload: TurnStepPayload) => { - const parsed = TurnStepPayloadSchema.parse(payload); - return runTransition(iii, 'assistant_streaming', handleStreaming, parsed); - }, - { - description: - 'Run one durable FSM transition for session in state assistant_streaming: start turn, stream provider response, finalize, and route onward.', - }, - ); -} diff --git a/harness/src/turn-orchestrator/states/function-awaiting-approval.ts b/harness/src/turn-orchestrator/states/function-awaiting-approval.ts deleted file mode 100644 index bdda1d40..00000000 --- a/harness/src/turn-orchestrator/states/function-awaiting-approval.ts +++ /dev/null @@ -1,107 +0,0 @@ -/** - * `turn::function_awaiting_approval`. Read approval decisions and resume execute. - * - * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. - * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. - */ - -import { ApprovalResumePayloadSchema, STATE_SCOPE } from '../../approval-gate/schemas.js'; -import type { z } from 'zod'; -import type { ISdk } from '../../runtime/iii.js'; -import type { FunctionResult } from '../../types/function.js'; -import { text } from '../../types/content.js'; -import { runTransition } from '../run-transition.js'; -import { type TurnStateRecord, transitionTo } from '../state.js'; -import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; - -export type ApprovalDecision = z.infer; - -/** Decode stored approval decision from `state::get` (scope `approvals`). */ -export function parseApprovalDecision(value: unknown): ApprovalDecision | null { - const parsed = ApprovalResumePayloadSchema.safeParse(value); - return parsed.success ? parsed.data : null; -} - -async function readDecision( - iii: ISdk, - session_id: string, - function_call_id: string, -): Promise { - const key = `${session_id}/${function_call_id}`; - const raw = await iii.trigger({ - function_id: 'state::get', - payload: { scope: STATE_SCOPE, key }, - }); - return parseApprovalDecision(raw); -} - -function denialResultFromDecision(decision: ApprovalDecision): FunctionResult { - const reason = - decision.reason ?? (decision.decision === 'aborted' ? 'session_aborted' : 'denied'); - const message = - decision.decision === 'aborted' - ? `Function call aborted: ${reason}` - : `Permission denied by user: ${reason}`; - return { - content: [text(message)], - details: { - approval_denied: true, - decision: decision.decision, - reason, - }, - terminate: false, - }; -} - -export async function handleAwaitingApproval(iii: ISdk, rec: TurnStateRecord): Promise { - const awaiting = rec.awaiting_approval ?? []; - if (awaiting.length === 0) { - transitionTo(rec, 'function_execute'); - return; - } - - const decisions = await Promise.all( - awaiting.map((entry) => readDecision(iii, rec.session_id, entry.function_call_id)), - ); - - if (decisions.some((decision) => decision === null)) { - return; - } - - const batch = rec.work?.batch ?? []; - for (let i = 0; i < awaiting.length; i++) { - const entry = awaiting[i]; - const decision = decisions[i]; - if (!entry || !decision) continue; - const idx = batch.findIndex((pe) => pe.function_call.id === entry.function_call_id); - if (idx < 0) continue; - const current = batch[idx]; - if (!current) continue; - if (decision.decision === 'allow') { - batch[idx] = { ...current, pre_approved: true, blocked: null }; - } else { - batch[idx] = { - ...current, - pre_approved: false, - blocked: denialResultFromDecision(decision), - }; - } - } - - rec.awaiting_approval = []; - transitionTo(rec, 'function_execute'); -} - -export function register(iii: ISdk): void { - iii.registerFunction( - 'turn::function_awaiting_approval', - async (payload: TurnStepPayload) => { - const parsed = TurnStepPayloadSchema.parse(payload); - return runTransition(iii, 'function_awaiting_approval', handleAwaitingApproval, parsed); - }, - { - description: - 'Run one durable FSM transition for session in state function_awaiting_approval: read approval decisions and resume.', - }, - ); -} diff --git a/harness/src/turn-orchestrator/states/function-execute.ts b/harness/src/turn-orchestrator/states/function-execute.ts deleted file mode 100644 index 0224b9f5..00000000 --- a/harness/src/turn-orchestrator/states/function-execute.ts +++ /dev/null @@ -1,281 +0,0 @@ -/** - * `turn::function_execute`. Run prepared function calls, finalize results, route onward. - * - * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. - * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. - */ - -import type { ISdk } from '../../runtime/iii.js'; -import { logger } from '../../runtime/otel.js'; -import type { AgentEvent } from '../../types/agent-event.js'; -import type { - AgentMessage, - AssistantMessage, - FunctionResultMessage, -} from '../../types/agent-message.js'; -import type { FunctionCall, FunctionResult } from '../../types/function.js'; -import { - dispatchWithHook, - isErrorResult, - missingFunctionResult, - triggerFunctionCall, - unwrapAgentTrigger, -} from '../agent-trigger.js'; -import { emit } from '../events.js'; -import { finishSession } from '../finish.js'; -import { publishAfter } from '../hook.js'; -import * as persistence from '../persistence.js'; -import { runTransition } from '../run-transition.js'; -import { - type ExecutedEntry, - type PreparedEntry, - type TurnWork, - type TurnStateRecord, - transitionTo, -} from '../state.js'; -import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; - -function buildFunctionExecutionEnd( - fc: FunctionCall, - result: FunctionResult, - is_error: boolean, - duration_ms: number, -): AgentEvent { - return { - type: 'function_execution_end', - function_call_id: fc.id, - function_id: fc.function_id, - result, - is_error, - duration_ms, - }; -} - -/** - * Attach the call's identity + session to its arguments so the target function - * receives the routing context. Pure: never mutates `fc` or its arguments. - */ -function augmentFunctionCall(fc: FunctionCall, session_id: string): FunctionCall { - const baseArgs = - fc.arguments && typeof fc.arguments === 'object' && !Array.isArray(fc.arguments) - ? (fc.arguments as Record) - : { arguments: fc.arguments }; - const augmented = { - ...baseArgs, - session_id, - function_call_id: fc.id, - function_id: fc.function_id, - function_call: { id: fc.id, function_id: fc.function_id, arguments: fc.arguments }, - }; - return { id: fc.id, function_id: fc.function_id, arguments: augmented }; -} - -function extractFunctionCalls(msg: AssistantMessage): FunctionCall[] { - const out: FunctionCall[] = []; - for (const b of msg.content) { - if (b.type === 'function_call') { - out.push({ id: b.id, function_id: b.function_id, arguments: b.arguments }); - } - } - return out; -} - -function buildBatch(asst: AssistantMessage): PreparedEntry[] { - return extractFunctionCalls(asst).map((raw) => { - const function_call = unwrapAgentTrigger(raw); - if (!function_call.function_id) { - return { function_call, blocked: missingFunctionResult() }; - } - return { function_call, blocked: null }; - }); -} - -function upsertExecutedCall(executed: ExecutedEntry[], entry: ExecutedEntry): void { - const idx = executed.findIndex((e) => e.function_call.id === entry.function_call.id); - if (idx >= 0) executed[idx] = entry; - else executed.push(entry); -} - -function ensureWork(rec: TurnStateRecord): TurnWork { - if (!rec.work) { - const asst = rec.last_assistant; - if (!asst) throw new Error('function_execute without last_assistant'); - rec.work = { batch: buildBatch(asst), results: [] }; - } - return rec.work; -} - -async function commitExecutedCall( - iii: ISdk, - rec: TurnStateRecord, - work: TurnWork, - fc: FunctionCall, - result: FunctionResult, - startedAt: number, - is_error?: boolean, -): Promise { - const duration_ms = Date.now() - startedAt; - const error = is_error ?? isErrorResult(result); - upsertExecutedCall(work.results, { - function_call: fc, - result, - is_error: error, - duration_ms, - }); - await persistence.writeRecord(iii, rec); - await emit(iii, rec.session_id, buildFunctionExecutionEnd(fc, result, error, duration_ms)); -} - -/** Run the registered after-hook and adopt its merged result when it returns one. */ -async function applyAfterHook(iii: ISdk, entry: ExecutedEntry): Promise { - const merged = await publishAfter(iii, entry.function_call, entry.result); - if ( - merged && - typeof merged === 'object' && - Array.isArray((merged as Record).content) - ) { - return merged as FunctionResult; - } - return entry.result; -} - -function toFunctionResultMessage( - entry: ExecutedEntry, - result: FunctionResult, -): FunctionResultMessage { - return { - role: 'function_result', - function_call_id: entry.function_call.id, - function_id: entry.function_call.function_id, - content: result.content, - details: result.details, - is_error: entry.is_error, - timestamp: Date.now(), - }; -} - -/** - * Function_call_ids already persisted for the current turn. Results are appended - * right after the assistant that requested them, so they form the trailing run - * of `function_result` messages; the first non-result from the tail is the turn - * boundary. - */ -function persistedResultIds(messages: AgentMessage[]): Set { - const ids = new Set(); - for (let i = messages.length - 1; i >= 0; i--) { - const m = messages[i]; - if (m?.role === 'function_result') ids.add(m.function_call_id); - else break; - } - return ids; -} - -async function finalizeExecutedCalls(iii: ISdk, rec: TurnStateRecord): Promise { - const executed = rec.work?.results ?? []; - const function_results: FunctionResultMessage[] = []; - let allTerminate = executed.length > 0; - for (const entry of executed) { - const result = await applyAfterHook(iii, entry); - if (!result.terminate) allTerminate = false; - function_results.push(toFunctionResultMessage(entry, result)); - } - - // Idempotency: a durable retry / step-fanout race can replay finalize with the - // same work after the results were appended but before the transition - // persisted. Re-appending duplicate function_result blocks makes providers - // reject the turn ("multiple tool_result blocks with id ..."), so drop any id - // already present in this turn's trailing results. - const messages = await persistence.loadMessages(iii, rec.session_id); - const alreadyPersisted = persistedResultIds(messages); - const fresh = function_results.filter((r) => !alreadyPersisted.has(r.function_call_id)); - if (fresh.length < function_results.length) { - logger.warn('finalizeExecutedCalls: skipped duplicate function_results (re-entry detected)', { - session_id: rec.session_id, - total: function_results.length, - skipped: function_results.length - fresh.length, - }); - } - await persistence.saveMessages(iii, rec.session_id, [...messages, ...fresh]); - - const asst = rec.last_assistant; - rec.function_results = function_results; - rec.work = undefined; - - if (asst) { - await emit(iii, rec.session_id, { type: 'turn_end', message: asst, function_results }); - rec.turn_end_emitted = true; - } - if (allTerminate) { - await finishSession(iii, rec); - } else { - transitionTo(rec, 'steering_check'); - } -} - -export async function handleExecute(iii: ISdk, rec: TurnStateRecord): Promise { - const work = ensureWork(rec); - - for (const entry of work.batch) { - const fc = entry.function_call; - - // Re-entry (approval resume / crash mid-batch): a call already in - // work.results replays its end event only. Emitting the start first - // would make the UI show a phantom restart of a completed function. - const existing = work.results.find((e) => e.function_call.id === fc.id); - if (existing) { - await emit( - iii, - rec.session_id, - buildFunctionExecutionEnd(fc, existing.result, existing.is_error, existing.duration_ms), - ); - continue; - } - - await emit(iii, rec.session_id, { - type: 'function_execution_start', - function_call_id: fc.id, - function_id: fc.function_id, - args: fc.arguments, - }); - const startedAt = Date.now(); - - if (entry.pre_approved === true) { - await commitExecutedCall(iii, rec, work, fc, await triggerFunctionCall(iii, fc), startedAt); - continue; - } - - if (entry.blocked) { - await commitExecutedCall(iii, rec, work, fc, entry.blocked, startedAt, true); - continue; - } - - const out = await dispatchWithHook(iii, augmentFunctionCall(fc, rec.session_id)); - if (out.kind === 'pending') { - rec.awaiting_approval = rec.awaiting_approval ?? []; - rec.awaiting_approval.push({ - function_call_id: fc.id, - function_id: fc.function_id, - args: fc.arguments, - }); - transitionTo(rec, 'function_awaiting_approval'); - return; - } - - await commitExecutedCall(iii, rec, work, fc, out.result, startedAt); - } - await finalizeExecutedCalls(iii, rec); -} - -export function register(iii: ISdk): void { - iii.registerFunction( - 'turn::function_execute', - async (payload: TurnStepPayload) => { - const parsed = TurnStepPayloadSchema.parse(payload); - return runTransition(iii, 'function_execute', handleExecute, parsed); - }, - { - description: - 'Run one durable FSM transition for session in state function_execute: dispatch prepared calls and finalize results.', - }, - ); -} diff --git a/harness/src/turn-orchestrator/states/index.ts b/harness/src/turn-orchestrator/states/index.ts deleted file mode 100644 index b56ef19b..00000000 --- a/harness/src/turn-orchestrator/states/index.ts +++ /dev/null @@ -1,9 +0,0 @@ -/** - * Re-export per-state register functions. Each `turn::{state}` lives in its own file. - */ - -export { register as registerProvisioning } from './provisioning.js'; -export { register as registerAssistantStreaming } from './assistant-streaming.js'; -export { register as registerFunctionExecute } from './function-execute.js'; -export { register as registerFunctionAwaitingApproval } from './function-awaiting-approval.js'; -export { register as registerSteeringCheck } from './steering-check.js'; diff --git a/harness/src/turn-orchestrator/states/provisioning.ts b/harness/src/turn-orchestrator/states/provisioning.ts deleted file mode 100644 index e33a8ed4..00000000 --- a/harness/src/turn-orchestrator/states/provisioning.ts +++ /dev/null @@ -1,113 +0,0 @@ -/** - * `turn::provisioning`. First FSM step after `run::start`: assemble the system - * prompt, attach the agent_trigger function schema to the run request, then - * advance to assistant_streaming. - * - * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. - * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. - */ - -import type { ISdk } from '../../runtime/iii.js'; -import { logger } from '../../runtime/otel.js'; -import { agentTriggerTool } from '../agent-trigger.js'; -import type { TurnOrchestratorConfig } from '../config.js'; -import * as persistence from '../persistence.js'; -import { type RunRequest } from '../run-request.js'; -import { runTransition } from '../run-transition.js'; -import { type TurnStateRecord, transitionTo } from '../state.js'; -import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; -import { - type DefaultSkillBody, - buildSystemPrompt, - defaultSkillBody, - skillIdFromUri, -} from '../system-prompt.js'; - -const FETCH_TIMEOUT_MS = 10_000; - -export function parseDirectoryBody(resp: unknown): string | null { - if (typeof resp === 'string') return resp; - if (resp && typeof resp === 'object') { - const body = (resp as { body?: unknown }).body; - if (typeof body === 'string') return body; - } - return null; -} - -async function fetchSkill(iii: ISdk, id: string): Promise { - try { - const resp = await iii.trigger({ - function_id: 'directory::skills::get', - payload: { id }, - timeoutMs: FETCH_TIMEOUT_MS, - }); - return parseDirectoryBody(resp); - } catch (err) { - logger.warn('directory::skills::get failed', { id, err: String(err) }); - return null; - } -} - -async function fetchDefaultSkills(iii: ISdk, uris: readonly string[]): Promise { - const bodies: DefaultSkillBody[] = []; - for (const uri of uris) { - const body = await fetchSkill(iii, skillIdFromUri(uri)); - bodies.push(defaultSkillBody(uri, body)); - } - return bodies; -} - -async function fetchSkillsIndex(iii: ISdk): Promise { - try { - const resp = await iii.trigger({ - function_id: 'directory::skills::index', - payload: {}, - timeoutMs: FETCH_TIMEOUT_MS, - }); - const body = parseDirectoryBody(resp); - return body && body.length > 0 ? body : null; - } catch (err) { - logger.warn('directory::skills::index failed', { err: String(err) }); - return null; - } -} - -export async function handleProvisioning( - iii: ISdk, - cfg: TurnOrchestratorConfig, - rec: TurnStateRecord, -): Promise { - const request = await persistence.loadRunRequest(iii, rec.session_id); - - const override = request.system_prompt.length > 0 ? request.system_prompt : null; - - const [skillsIndex, bodies] = await Promise.all([ - fetchSkillsIndex(iii), - fetchDefaultSkills(iii, cfg.system_default_skills), - ]); - const prompt = buildSystemPrompt(bodies, { override, mode: request.mode, skillsIndex }); - - const updated: RunRequest = { ...request, system_prompt: prompt, function_schemas: [agentTriggerTool()] }; - await persistence.saveRunRequest(iii, rec.session_id, updated); - - transitionTo(rec, 'assistant_streaming'); -} - -export function register(iii: ISdk, cfg: TurnOrchestratorConfig): void { - iii.registerFunction( - 'turn::provisioning', - async (payload: TurnStepPayload) => { - const parsed = TurnStepPayloadSchema.parse(payload); - return runTransition( - iii, - 'provisioning', - (i, rec) => handleProvisioning(i, cfg, rec), - parsed, - ); - }, - { - description: - 'Run one durable FSM transition for session in state provisioning: build the system prompt, attach the agent_trigger function schema, advance to assistant_streaming.', - }, - ); -} diff --git a/harness/src/turn-orchestrator/states/steering-check.ts b/harness/src/turn-orchestrator/states/steering-check.ts deleted file mode 100644 index be53f5cd..00000000 --- a/harness/src/turn-orchestrator/states/steering-check.ts +++ /dev/null @@ -1,129 +0,0 @@ -/** - * `turn::steering_check`. Drains steering / followup inboxes, then routes onward. - * - * **Incoming**: flat `{ session_id }` via FIFO enqueue on `turn-step`. - * **Outgoing**: `{ ok, from_state, to_state }` on success; stale skip when state drifted. - */ - -import type { ISdk } from '../../runtime/iii.js'; -import { type AgentMessage, emptyAssistant } from '../../types/agent-message.js'; -import { emit } from '../events.js'; -import { finishSession } from '../finish.js'; -import * as persistence from '../persistence.js'; -import { runTransition } from '../run-transition.js'; -import { type TurnStateRecord, transitionTo } from '../state.js'; -import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; -import { syntheticAssistant } from '../synthetic-assistant.js'; - -export type SteeringRoute = 'steering' | 'followup' | 'continue_after_function' | 'end_turn'; - -/** Pure priority router — no I/O. */ -export function route( - has_steering: boolean, - has_followup: boolean, - has_function_results: boolean, -): SteeringRoute { - if (has_steering) return 'steering'; - if (has_followup) return 'followup'; - if (has_function_results) return 'continue_after_function'; - return 'end_turn'; -} - -async function drainQueue(iii: ISdk, name: string, session_id: string): Promise { - try { - const resp = await iii.trigger({ - function_id: 'session-inbox::drain', - payload: { name, session_id }, - }); - if (Array.isArray(resp?.items)) return resp.items as AgentMessage[]; - } catch { - // ignore - } - return []; -} - -function maxTurnsReached(rec: TurnStateRecord): boolean { - return rec.max_turns !== undefined && rec.turn_count >= rec.max_turns; -} - -async function endForMaxTurns(iii: ISdk, rec: TurnStateRecord): Promise { - const msg = syntheticAssistant({ - stop_reason: 'end', - text: `loop stopped: max_turns (${rec.max_turns ?? 0}) reached`, - }); - rec.last_assistant = msg; - const messages = await persistence.loadMessages(iii, rec.session_id); - messages.push(msg); - await persistence.saveMessages(iii, rec.session_id, messages); - await emit(iii, rec.session_id, { type: 'message_complete', message: msg, body_streamed: false }); - await emit(iii, rec.session_id, { type: 'turn_end', message: msg, function_results: [] }); - rec.turn_end_emitted = true; - await finishSession(iii, rec); -} - -async function emitTurnEndOnce(iii: ISdk, rec: TurnStateRecord): Promise { - if (rec.turn_end_emitted) return; - const last = rec.last_assistant ?? emptyAssistant(); - await emit(iii, rec.session_id, { - type: 'turn_end', - message: last, - function_results: [], - }); - rec.turn_end_emitted = true; -} - -export async function handleSteering(iii: ISdk, rec: TurnStateRecord): Promise { - const steering = await drainQueue(iii, 'steering', rec.session_id); - const followup = steering.length > 0 ? [] : await drainQueue(iii, 'followup', rec.session_id); - - const decision = route( - steering.length > 0, - followup.length > 0, - rec.function_results.length > 0, - ); - switch (decision) { - case 'steering': - case 'followup': { - if (maxTurnsReached(rec)) { - await endForMaxTurns(iii, rec); - break; - } - const inbox = decision === 'steering' ? steering : followup; - await emitTurnEndOnce(iii, rec); - const messages = await persistence.loadMessages(iii, rec.session_id); - messages.push(...inbox); - await persistence.saveMessages(iii, rec.session_id, messages); - rec.function_results = []; - transitionTo(rec, 'assistant_streaming'); - break; - } - case 'continue_after_function': { - if (maxTurnsReached(rec)) { - await endForMaxTurns(iii, rec); - break; - } - rec.function_results = []; - transitionTo(rec, 'assistant_streaming'); - break; - } - case 'end_turn': { - await emitTurnEndOnce(iii, rec); - await finishSession(iii, rec); - break; - } - } -} - -export function register(iii: ISdk): void { - iii.registerFunction( - 'turn::steering_check', - async (payload: TurnStepPayload) => { - const parsed = TurnStepPayloadSchema.parse(payload); - return runTransition(iii, 'steering_check', handleSteering, parsed); - }, - { - description: - 'Run one durable FSM transition for session in state steering_check: drain inboxes and route onward.', - }, - ); -} diff --git a/harness/src/turn-orchestrator/steering-check/ports.ts b/harness/src/turn-orchestrator/steering-check/ports.ts new file mode 100644 index 00000000..a4bca3f7 --- /dev/null +++ b/harness/src/turn-orchestrator/steering-check/ports.ts @@ -0,0 +1,46 @@ +/** + * Typed dependency ports for steering_check. + */ + +import type { ISdk } from '../../runtime/iii.js'; +import type { AgentEvent } from '../../types/agent-event.js'; +import type { AgentMessage } from '../../types/agent-message.js'; +import { emit } from '../events.js'; +import { createTurnStatePorts, type TurnStatePorts } from '../state-runtime/ports.js'; + +/** Decode session-inbox drain responses. */ +export function parseDrainItems(resp: unknown): AgentMessage[] { + if (resp && typeof resp === 'object' && Array.isArray((resp as { items?: unknown }).items)) { + return (resp as { items: AgentMessage[] }).items; + } + return []; +} + +export type SteeringCheckPorts = TurnStatePorts & { + drainInbox(name: 'steering' | 'followup', session_id: string): Promise; + emit(session_id: string, event: AgentEvent): Promise; +}; + +export function createSteeringCheckPorts(iii: ISdk): SteeringCheckPorts { + const base = createTurnStatePorts(iii); + + return { + ...base, + + async drainInbox(name, session_id) { + try { + const resp = await iii.trigger({ + function_id: 'session-inbox::drain', + payload: { name, session_id }, + }); + return parseDrainItems(resp); + } catch { + return []; + } + }, + + emit(session_id, event) { + return emit(iii, session_id, event); + }, + }; +} diff --git a/harness/src/turn-orchestrator/steering-check/process.ts b/harness/src/turn-orchestrator/steering-check/process.ts new file mode 100644 index 00000000..5b95b22b --- /dev/null +++ b/harness/src/turn-orchestrator/steering-check/process.ts @@ -0,0 +1,133 @@ +/** + * Drain inboxes, route, apply steering_check outcomes, and register the FSM step. + */ + +import type { ISdk } from '../../runtime/iii.js'; +import type { AgentMessage } from '../../types/agent-message.js'; +import { runTransition } from '../run-transition.js'; +import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; +import { syntheticAssistant } from '../synthetic-assistant.js'; +import { emitTurnEndOnce, resumeToAssistantStreaming } from '../state-runtime/turn-end.js'; +import { type TurnStateRecord } from '../state.js'; +import { createSteeringCheckPorts, type SteeringCheckPorts } from './ports.js'; + +export type SteeringRoute = 'steering' | 'followup' | 'continue_after_function' | 'end_turn'; + +export type SteeringCheckOutcome = + | { kind: 'max_turns_reached' } + | { kind: 'resume_with_inbox'; inbox: AgentMessage[] } + | { kind: 'continue_after_function' } + | { kind: 'end_turn' }; + +export function route( + has_steering: boolean, + has_followup: boolean, + has_function_results: boolean, +): SteeringRoute { + if (has_steering) return 'steering'; + if (has_followup) return 'followup'; + if (has_function_results) return 'continue_after_function'; + return 'end_turn'; +} + +function maxTurnsReached(rec: TurnStateRecord): boolean { + return rec.max_turns !== undefined && rec.turn_count >= rec.max_turns; +} + +async function endForMaxTurns(ports: SteeringCheckPorts, rec: TurnStateRecord): Promise { + const msg = syntheticAssistant({ + stop_reason: 'end', + text: `loop stopped: max_turns (${rec.max_turns ?? 0}) reached`, + }); + rec.last_assistant = msg; + await ports.appendMessages(rec.session_id, [msg]); + await ports.emit(rec.session_id, { + type: 'message_complete', + message: msg, + body_streamed: false, + }); + await emitTurnEndOnce(ports, rec, msg); + await ports.finishSession(rec); +} + +export async function processSteeringCheck( + ports: SteeringCheckPorts, + rec: TurnStateRecord, +): Promise { + const steering = await ports.drainInbox('steering', rec.session_id); + const followup = steering.length > 0 ? [] : await ports.drainInbox('followup', rec.session_id); + + const decision = route(steering.length > 0, followup.length > 0, rec.function_results.length > 0); + + if ( + (decision === 'steering' || + decision === 'followup' || + decision === 'continue_after_function') && + maxTurnsReached(rec) + ) { + return { kind: 'max_turns_reached' }; + } + + switch (decision) { + case 'steering': + return { kind: 'resume_with_inbox', inbox: steering }; + case 'followup': + return { kind: 'resume_with_inbox', inbox: followup }; + case 'continue_after_function': + return { kind: 'continue_after_function' }; + case 'end_turn': + return { kind: 'end_turn' }; + } +} + +export async function applySteeringCheckOutcome( + ports: SteeringCheckPorts, + rec: TurnStateRecord, + outcome: SteeringCheckOutcome, +): Promise { + switch (outcome.kind) { + case 'max_turns_reached': + await endForMaxTurns(ports, rec); + return; + case 'resume_with_inbox': { + await emitTurnEndOnce(ports, rec); + await ports.appendMessages(rec.session_id, outcome.inbox); + resumeToAssistantStreaming(rec); + return; + } + case 'continue_after_function': + resumeToAssistantStreaming(rec); + return; + case 'end_turn': + await emitTurnEndOnce(ports, rec); + await ports.finishSession(rec); + return; + } +} + +export async function runSteeringCheck( + ports: SteeringCheckPorts, + rec: TurnStateRecord, +): Promise { + const outcome = await processSteeringCheck(ports, rec); + await applySteeringCheckOutcome(ports, rec, outcome); +} + +export async function handleSteering(iii: ISdk, rec: TurnStateRecord): Promise { + const ports = createSteeringCheckPorts(iii); + await runSteeringCheck(ports, rec); +} + +export function register(iii: ISdk): void { + iii.registerFunction( + 'turn::steering_check', + async (payload: TurnStepPayload) => { + const parsed = TurnStepPayloadSchema.parse(payload); + return runTransition(iii, 'steering_check', handleSteering, parsed); + }, + { + description: + 'Run one durable FSM transition for session in state steering_check: drain inboxes and route onward.', + }, + ); +} diff --git a/harness/src/turn-orchestrator/subscriber-presence.ts b/harness/src/turn-orchestrator/subscriber-presence.ts deleted file mode 100644 index cbb014ac..00000000 --- a/harness/src/turn-orchestrator/subscriber-presence.ts +++ /dev/null @@ -1,79 +0,0 @@ -/** - * Subscriber-presence cache for durable pub/sub topics. - * - * The after-function-call hook publishes to a durable topic and then blocks - * waiting for subscriber replies. When no worker subscribes to that topic the - * wait is pure dead time on the turn's critical path (the collector only exits - * on its deadline). This module answers "does anyone subscribe to ?" by - * querying `engine::triggers::list` for a `durable:subscriber` trigger bound to - * the topic (the registration shape consumers use — see iii queue worker). - * - * The answer is cached with a short TTL so the engine isn't queried per call. - * Hook subscribers are a deploy-time concern (workers register at startup), so - * a coarse TTL is fine; a newly-registered subscriber is picked up within one - * TTL window, which is consistent with the hook's existing "late arrivals may - * be dropped" contract. TTL is preferred over an `engine::functions-available` - * invalidation trigger to avoid adding another always-on trigger. - */ - -import { TriggerInfo } from 'iii-sdk'; -import type { ISdk } from '../runtime/iii.js'; -import { logger } from '../runtime/otel.js'; - -/** How long a subscriber-presence answer stays fresh before re-querying. */ -export const SUBSCRIBER_CACHE_TTL_MS = 30_000; - -const DURABLE_SUBSCRIBER_TYPE = 'durable:subscriber'; - -type CacheEntry = { has: boolean; at: number }; -const cache = new Map(); - -/** Clear the presence cache. Test seam; not used in production. */ -export function resetSubscriberCache(): void { - cache.clear(); -} - -function topicOf(config: unknown): unknown { - if (config && typeof config === 'object') { - return (config as Record).topic; - } - return undefined; -} - -/** - * True if any worker subscribes to `topic` via a durable (queue) subscriber. - * - * Result is cached per topic for `SUBSCRIBER_CACHE_TTL_MS`. On query failure - * this returns `true` (fail-safe): callers fall back to their normal - * publish/collect behavior rather than silently dropping a hook. - * - * `now` is injectable for testing the TTL; production callers omit it. - */ -export async function hasDurableSubscriber( - iii: ISdk, - topic: string, - now: number = Date.now(), -): Promise { - const cached = cache.get(topic); - if (cached && now - cached.at < SUBSCRIBER_CACHE_TTL_MS) { - return cached.has; - } - - try { - const resp = await iii.trigger({ - function_id: 'engine::triggers::list', - payload: {}, - }); - const has = (resp.triggers ?? []).some( - (t) => t.trigger_type === DURABLE_SUBSCRIBER_TYPE && topicOf(t.config) === topic, - ); - cache.set(topic, { has, at: now }); - return has; - } catch (err) { - logger.warn('subscriber presence check failed; assuming subscribers exist', { - topic, - err: String(err), - }); - return true; - } -} diff --git a/harness/src/turn-orchestrator/turn-state-write.ts b/harness/src/turn-orchestrator/turn-state-write.ts deleted file mode 100644 index 18e2b855..00000000 --- a/harness/src/turn-orchestrator/turn-state-write.ts +++ /dev/null @@ -1,31 +0,0 @@ -/** - * UI notification when agent-scope turn_state is persisted via `saveRecord` / - * `persistRecord`. - */ - -import type { ISdk } from '../runtime/iii.js'; -import { logger } from '../runtime/otel.js'; -import { emit } from './events.js'; -import type { TurnStateView } from './schemas.js'; - -export async function emitTurnStateChanged( - iii: ISdk, - session_id: string, - event_type: 'state:created' | 'state:updated', - new_value: TurnStateView, - old_value?: TurnStateView, -): Promise { - try { - await emit(iii, session_id, { - type: 'turn_state_changed', - event_type, - new_value, - ...(old_value !== undefined && { old_value }), - }); - } catch (err) { - logger.warn('emitTurnStateChanged failed', { - session_id, - err: String(err), - }); - } -} diff --git a/harness/src/turn-orchestrator/wake.ts b/harness/src/turn-orchestrator/wake.ts deleted file mode 100644 index 258192ec..00000000 --- a/harness/src/turn-orchestrator/wake.ts +++ /dev/null @@ -1,40 +0,0 @@ -/** - * Durable FSM wake via iii-queue FIFO `turn-step`. Enqueues `turn::{state}` per - * persisted turn_state, not a generic dispatcher. - */ - -import { TriggerAction, type ISdk } from '../runtime/iii.js'; -import { logger } from '../runtime/otel.js'; -import * as persistence from './persistence.js'; -import { type TurnState } from './state.js'; - -export const TURN_STEP_QUEUE = 'turn-step'; - -const NON_STEPABLE_STATES = new Set(['stopped', 'failed', 'function_awaiting_approval']); - -/** True when a persisted turn_state transition should enqueue `turn::{newState}`. */ -export function shouldWakeStep(previousState: TurnState | null, newState: TurnState): boolean { - if (NON_STEPABLE_STATES.has(newState)) return false; - if (previousState !== null && previousState === newState) return false; - return true; -} - - -export async function wakeState(iii: ISdk, session_id: string, state: TurnState): Promise { - try { - await iii.trigger({ - function_id: `turn::${state}`, - payload: { session_id }, - action: TriggerAction.Enqueue({ queue: TURN_STEP_QUEUE }), - }); - } catch (err) { - logger.warn('wakeState failed', { session_id, state, err: String(err) }); - } -} - -/** Enqueue the handler for the session's current persisted state (approval/abort). */ -export async function wakeFromRecord(iii: ISdk, session_id: string): Promise { - const rec = await persistence.loadRecord(iii, session_id); - if (!rec || rec.state === 'stopped' || rec.state === 'failed') return; - await wakeState(iii, session_id, rec.state); -} diff --git a/harness/src/types/agent-event.ts b/harness/src/types/agent-event.ts index 83076cf8..32705310 100644 --- a/harness/src/types/agent-event.ts +++ b/harness/src/types/agent-event.ts @@ -41,7 +41,7 @@ export type AgentEvent = result: FunctionResult; is_error: boolean; /** Wall-clock ms between the matching function_execution_start and end. - * Reused from persisted ExecutedEntry on resumed runs so replayed + * Reused from persisted ExecutedCall on resumed runs so replayed * calls keep their original timing. */ duration_ms: number; } diff --git a/harness/src/types/function.ts b/harness/src/types/function.ts index a061df03..55336a74 100644 --- a/harness/src/types/function.ts +++ b/harness/src/types/function.ts @@ -45,15 +45,3 @@ export type FunctionResult = { details: unknown; terminate?: boolean; }; - -/** Prepared call entry used during FSM function execution. */ -export type PreparedFunctionCall = - | { kind: 'prepared'; function_call: FunctionCall } - | { kind: 'immediate'; result: FunctionResult; is_error: boolean }; - -/** Finalized call entry after function execution completes. */ -export type FinalizedFunctionCall = { - function_call: FunctionCall; - result: FunctionResult; - is_error: boolean; -}; diff --git a/harness/tests/_helpers/stateStoreKey.ts b/harness/tests/_helpers/stateStoreKey.ts new file mode 100644 index 00000000..351c3a02 --- /dev/null +++ b/harness/tests/_helpers/stateStoreKey.ts @@ -0,0 +1,8 @@ +/** Composite key used by test mocks mirroring `${scope}/${key}` iii state storage. */ +export function stateStoreKey(scope: string, key: string): string { + return `${scope}/${key}`; +} + +export function payloadStoreKey(payload: { scope?: string; key?: string }): string { + return stateStoreKey(payload.scope ?? '', payload.key ?? ''); +} diff --git a/harness/tests/approval-gate/_helpers/fakeIii.ts b/harness/tests/approval-gate/_helpers/fakeIii.ts index b37b19ff..22c159c4 100644 --- a/harness/tests/approval-gate/_helpers/fakeIii.ts +++ b/harness/tests/approval-gate/_helpers/fakeIii.ts @@ -12,26 +12,22 @@ export type TriggerCall = { function_id: string; payload: unknown }; export type FakeIii = { iii: ISdk; calls: TriggerCall[]; - resumeCalls: TriggerCall[]; streamSets: unknown[]; }; export function fakeIii(): FakeIii { const calls: TriggerCall[] = []; - const resumeCalls: TriggerCall[] = []; const streamSets: unknown[] = []; const iii = { trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { calls.push({ function_id, payload }); - if (function_id.startsWith('turn::approval_resume::')) { - resumeCalls.push({ function_id, payload }); - } else if (function_id === 'stream::set') { + if (function_id === 'stream::set') { streamSets.push(payload); } return null; }), } as unknown as ISdk; - return { iii, calls, resumeCalls, streamSets }; + return { iii, calls, streamSets }; } diff --git a/harness/tests/approval-gate/resolve.test.ts b/harness/tests/approval-gate/resolve.test.ts index 3f689054..ae74e3ca 100644 --- a/harness/tests/approval-gate/resolve.test.ts +++ b/harness/tests/approval-gate/resolve.test.ts @@ -11,7 +11,7 @@ import { fakeIii } from './_helpers/fakeIii.js'; describe('handleResolveRequest — writing the decision', () => { it('writes the decision to approvals// with a normalized payload', async () => { - const { iii, calls, resumeCalls } = fakeIii(); + const { iii, calls } = fakeIii(); const out = await handleResolveRequest(iii, { session_id: 's1', function_call_id: 'fc-1', @@ -29,7 +29,6 @@ describe('handleResolveRequest — writing the decision', () => { }, }, ]); - expect(resumeCalls).toHaveLength(0); }); it('never emits to the agent::events stream (denial flows via execution_end)', async () => { @@ -94,7 +93,7 @@ describe('handleResolveRequest — hostile / malformed input is rejected, not cr }); describe('handleResolveRequest — downstream failure is surfaced as resume_failed', () => { - it('returns resume_failed when the resume trigger rejects', async () => { + it('returns resume_failed when the state::set write rejects', async () => { const { iii } = fakeIii(); (iii.trigger as ReturnType).mockRejectedValue(new Error('boom')); const out = await handleResolveRequest(iii, { diff --git a/harness/tests/approval-gate/schemas.test.ts b/harness/tests/approval-gate/schemas.test.ts index 66aac23a..1ba24949 100644 --- a/harness/tests/approval-gate/schemas.test.ts +++ b/harness/tests/approval-gate/schemas.test.ts @@ -7,6 +7,7 @@ */ import { describe, expect, it } from 'vitest'; import { + ApprovalDecisionSchema, ApprovalResumePayloadSchema, ResolvePayloadSchema, parsePolicyReply, @@ -97,10 +98,10 @@ describe('state-key derivation — separator integrity', () => { }); }); -describe('ApprovalResumePayloadSchema', () => { +describe('ApprovalDecisionSchema', () => { it('accepts the three terminal decisions with an explicit reason', () => { for (const decision of ['allow', 'deny', 'aborted'] as const) { - expect(ApprovalResumePayloadSchema.parse({ decision, reason: null })).toEqual({ + expect(ApprovalDecisionSchema.parse({ decision, reason: null })).toEqual({ decision, reason: null, }); @@ -108,10 +109,14 @@ describe('ApprovalResumePayloadSchema', () => { }); it('rejects a missing reason and an unknown decision', () => { - expect(ApprovalResumePayloadSchema.safeParse({ decision: 'allow' }).success).toBe(false); - expect( - ApprovalResumePayloadSchema.safeParse({ decision: 'paused', reason: null }).success, - ).toBe(false); + expect(ApprovalDecisionSchema.safeParse({ decision: 'allow' }).success).toBe(false); + expect(ApprovalDecisionSchema.safeParse({ decision: 'paused', reason: null }).success).toBe( + false, + ); + }); + + it('keeps ApprovalResumePayloadSchema as a deprecated alias', () => { + expect(ApprovalResumePayloadSchema).toBe(ApprovalDecisionSchema); }); }); diff --git a/harness/tests/context-compaction/e2e/full-session.test.ts b/harness/tests/context-compaction/e2e/full-session.test.ts index 756607b1..5a65f107 100644 --- a/harness/tests/context-compaction/e2e/full-session.test.ts +++ b/harness/tests/context-compaction/e2e/full-session.test.ts @@ -6,7 +6,7 @@ * up to an InMemoryStore. Verifies the three structural guarantees that * the unit/integration tests cannot observe in isolation: * - * 1. The flat state at scope `agent`, key `session//messages` is + * 1. The flat state at scope `messages`, key `` is * rewritten to a reduced array: [summary-as-asst-msg, ...tail, replay]. * 2. The session tree's active path stays connected — the Compaction * entry, replayed user message, and synthetic continue-prompt are @@ -16,9 +16,10 @@ */ import { describe, expect, it, vi } from 'vitest'; -import { flatMessagesKey } from '../../../src/context-compaction/flat-state.js'; +import { payloadStoreKey, stateStoreKey } from '../../_helpers/stateStoreKey.js'; import { handleSync } from '../../../src/context-compaction/handler-sync.js'; import type { ISdk } from '../../../src/runtime/iii.js'; +import { MESSAGES_SCOPE } from '../../../src/turn-orchestrator/state.js'; import { registerTree } from '../../../src/session/tree/register.js'; import { InMemoryStore } from '../../../src/session/tree/store.js'; import type { SessionEntry } from '../../../src/session/tree/types.js'; @@ -87,7 +88,7 @@ function buildTestSdk(opts: { const store = new InMemoryStore(); // Pre-seed flat state with the overflowing transcript. - stateStore.set(flatMessagesKey(opts.session_id), opts.flatMessages); + stateStore.set(stateStoreKey(MESSAGES_SCOPE, opts.session_id), opts.flatMessages); // Stub channel writer so streamAndCollect can deliver a synthetic done event. let channelCb: ((raw: string) => void) | null = null; @@ -107,25 +108,31 @@ function buildTestSdk(opts: { // 1) state::* — back the lease / flat-state rewrite with stateStore. if (fn === 'state::get') { - const p = (payload ?? {}) as { key: string }; - const v = stateStore.get(p.key); + const p = (payload ?? {}) as { scope: string; key: string }; + const v = stateStore.get(payloadStoreKey(p)); return v !== undefined ? v : null; } if (fn === 'state::set') { - const p = (payload ?? {}) as { key: string; value: unknown }; - if (p.value === null || p.value === undefined) stateStore.delete(p.key); - else stateStore.set(p.key, p.value); + const p = (payload ?? {}) as { scope: string; key: string; value: unknown }; + const storeKey = payloadStoreKey(p); + if (p.value === null || p.value === undefined) stateStore.delete(storeKey); + else stateStore.set(storeKey, p.value); return { ok: true }; } if (fn === 'state::update') { - const p = (payload ?? {}) as { key: string; ops: Array<{ type: string; value?: unknown }> }; - const oldValue = stateStore.has(p.key) ? stateStore.get(p.key) : null; + const p = (payload ?? {}) as { + scope: string; + key: string; + ops: Array<{ type: string; value?: unknown }>; + }; + const storeKey = payloadStoreKey(p); + const oldValue = stateStore.has(storeKey) ? stateStore.get(storeKey) : null; let newValue: unknown = oldValue; for (const op of p.ops ?? []) { if (op.type === 'set') newValue = op.value; } - if (newValue === null || newValue === undefined) stateStore.delete(p.key); - else stateStore.set(p.key, newValue); + if (newValue === null || newValue === undefined) stateStore.delete(storeKey); + else stateStore.set(storeKey, newValue); return { old_value: oldValue ?? null, new_value: newValue ?? null }; } @@ -264,7 +271,7 @@ describe('e2e full-session compaction', () => { const lastUserId = entryIds[entryIds.length - 1] ?? ''; // the final user msg // Sanity check: the pre-compaction flat state matches the seed. - const beforeFlat = stateStore.get(flatMessagesKey(SESSION_ID)) as AgentMessage[]; + const beforeFlat = stateStore.get(stateStoreKey(MESSAGES_SCOPE, SESSION_ID)) as AgentMessage[]; expect(beforeFlat.length).toBe(overflowing.length); // Run preflight. The 30-turn fixture should overflow the 8k usable budget. @@ -272,7 +279,7 @@ describe('e2e full-session compaction', () => { expect(result).toBe('compacted'); // --- Assertion 1: flat state is reduced and shaped correctly. --- - const afterFlat = stateStore.get(flatMessagesKey(SESSION_ID)) as AgentMessage[]; + const afterFlat = stateStore.get(stateStoreKey(MESSAGES_SCOPE, SESSION_ID)) as AgentMessage[]; expect(afterFlat.length).toBeLessThan(overflowing.length); // First message must be the summary-as-assistant-msg containing SUMMARY. @@ -367,7 +374,7 @@ describe('e2e full-session compaction', () => { } // Flat state is untouched. - const after = stateStore.get(flatMessagesKey(SID)) as AgentMessage[]; + const after = stateStore.get(stateStoreKey(MESSAGES_SCOPE, SID)) as AgentMessage[]; expect(after.length).toBe(tinyMessages.length); // Summariser was never invoked. diff --git a/harness/tests/context-compaction/flat-state-key.test.ts b/harness/tests/context-compaction/flat-state-key.test.ts deleted file mode 100644 index 64ef249e..00000000 --- a/harness/tests/context-compaction/flat-state-key.test.ts +++ /dev/null @@ -1,25 +0,0 @@ -/** - * Drift-guard: the context-compaction worker writes flat session messages - * to a key it composes itself in `flat-state.ts::flatMessagesKey`, because - * importing `turn-orchestrator/state.ts::messagesKey` directly would - * create a package-layer dependency (the orchestrator depends on - * context-compaction via preflight). The two functions MUST agree forever - * — otherwise compaction silently writes the rewritten history to a - * shadow key the orchestrator never reads from. - * - * If this test fails, fix the key shape in flat-state.ts to match - * messagesKey, then take a hard look at whether the duplication should - * be lifted into a shared `runtime/keys.ts` module. - */ - -import { describe, expect, it } from 'vitest'; -import { flatMessagesKey } from '../../src/context-compaction/flat-state.js'; -import { messagesKey } from '../../src/turn-orchestrator/state.js'; - -describe('flatMessagesKey ↔ turn-orchestrator messagesKey', () => { - it('produces the identical key for any session id', () => { - for (const sid of ['s', 'console-abc', 'session-with-dashes-12345', 'x']) { - expect(flatMessagesKey(sid)).toBe(messagesKey(sid)); - } - }); -}); diff --git a/harness/tests/context-compaction/integration/flow-sync.test.ts b/harness/tests/context-compaction/integration/flow-sync.test.ts index 8ccd3d39..56f9cb06 100644 --- a/harness/tests/context-compaction/integration/flow-sync.test.ts +++ b/harness/tests/context-compaction/integration/flow-sync.test.ts @@ -8,6 +8,7 @@ * 3. Lease held → status === 'busy'. */ import { describe, expect, it, vi } from 'vitest'; +import { payloadStoreKey, stateStoreKey } from '../../_helpers/stateStoreKey.js'; import { handleSync } from '../../../src/context-compaction/handler-sync.js'; import type { ISdk } from '../../../src/runtime/iii.js'; import { loadFixture } from '../../fixtures/load.js'; @@ -91,29 +92,35 @@ function buildSyncMock(opts: { return { ok: true }; } if (function_id === 'state::get') { - const v = stateStore.get((payload as { key: string }).key); + const v = stateStore.get(payloadStoreKey(payload as { scope?: string; key?: string })); return v !== undefined ? v : null; } if (function_id === 'state::set') { - const p = payload as { key: string; value: unknown }; + const p = payload as { key: string; value: unknown; scope?: string }; + const storeKey = payloadStoreKey(p); if (p.value === null || p.value === undefined) { - stateStore.delete(p.key); + stateStore.delete(storeKey); } else { - stateStore.set(p.key, p.value); + stateStore.set(storeKey, p.value); } return { ok: true }; } if (function_id === 'state::update') { - const p = payload as { key: string; ops: Array<{ type: string; value?: unknown }> }; - const oldValue = stateStore.has(p.key) ? stateStore.get(p.key) : null; + const p = payload as { + key: string; + scope?: string; + ops: Array<{ type: string; value?: unknown }>; + }; + const storeKey = payloadStoreKey(p); + const oldValue = stateStore.has(storeKey) ? stateStore.get(storeKey) : null; let newValue: unknown = oldValue; for (const op of p.ops ?? []) { if (op.type === 'set') newValue = op.value; } if (newValue === null || newValue === undefined) { - stateStore.delete(p.key); + stateStore.delete(storeKey); } else { - stateStore.set(p.key, newValue); + stateStore.set(storeKey, newValue); } return { old_value: oldValue ?? null, new_value: newValue ?? null }; } @@ -215,8 +222,8 @@ describe('flow-sync: lease held → busy', () => { // Pre-populate the state store with an active lease for this session const sessionId = `${mediumFixture.session_id}-busy`; const stateStore = new Map(); - const leaseKey = `session/${sessionId}/compaction_lease`; - stateStore.set(leaseKey, { nonce: 'held-by-another', ts: Date.now() - 1000 }); + const leaseStoreKey = stateStoreKey('compaction_lease', sessionId); + stateStore.set(leaseStoreKey, { nonce: 'held-by-another', ts: Date.now() - 1000 }); const { iii } = buildSyncMock({ fixtureMessages, stateStore }); diff --git a/harness/tests/context-compaction/lease.test.ts b/harness/tests/context-compaction/lease.test.ts index 612e2e9b..7fb40a80 100644 --- a/harness/tests/context-compaction/lease.test.ts +++ b/harness/tests/context-compaction/lease.test.ts @@ -1,7 +1,7 @@ import { describe, expect, it, vi } from 'vitest'; +import { payloadStoreKey, stateStoreKey } from '../_helpers/stateStoreKey.js'; import { LEASE_TTL_SECS, - leaseKey, mintLeaseNonce, readLeaseTimestampSecs, acquireLease, @@ -9,10 +9,9 @@ import { } from '../../src/context-compaction/lease.js'; describe('lease helpers', () => { - it('leaseKey namespaces by session', () => { - const k = leaseKey('s9'); - expect(k).toContain('s9'); - expect(k).toContain('compaction_lease'); + it('stateStoreKey namespaces compaction lease by session', () => { + const k = stateStoreKey('compaction_lease', 's9'); + expect(k).toBe('compaction_lease/s9'); }); it('mintLeaseNonce produces unique values across rapid calls', () => { @@ -49,20 +48,21 @@ function makeStateIii() { trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { const p = payload as Record; if (function_id === 'state::get') { - const v = store.get(p['key'] as string); + const v = store.get(payloadStoreKey(p as { scope?: string; key?: string })); return v !== undefined ? v : null; } if (function_id === 'state::set') { const v = p['value']; + const key = payloadStoreKey(p as { scope?: string; key?: string }); if (v === null || v === undefined) { - store.delete(p['key'] as string); + store.delete(key); } else { - store.set(p['key'] as string, v); + store.set(key, v); } return { ok: true }; } if (function_id === 'state::update') { - const key = p['key'] as string; + const key = payloadStoreKey(p as { scope?: string; key?: string }); const ops = (p['ops'] ?? []) as Array<{ type: string; value?: unknown }>; const oldValue = store.has(key) ? store.get(key) : null; let newValue: unknown = oldValue; @@ -94,11 +94,11 @@ describe('lease kinds', () => { expect(nonce1).not.toBe(nonce2); }); - it('leaseKey produces different keys for compaction vs prune', () => { - const k1 = leaseKey('sess1', 'compaction'); - const k2 = leaseKey('sess1', 'prune'); - expect(k1).toContain('compaction_lease'); - expect(k2).toContain('prune_lease'); + it('stateStoreKey produces different keys for compaction vs prune', () => { + const k1 = stateStoreKey('compaction_lease', 'sess1'); + const k2 = stateStoreKey('prune_lease', 'sess1'); + expect(k1).toBe('compaction_lease/sess1'); + expect(k2).toBe('prune_lease/sess1'); expect(k1).not.toBe(k2); }); }); @@ -126,22 +126,23 @@ function makeRacyStateIii(writeLatencyMs: number) { trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { const p = payload as Record; if (function_id === 'state::get') { - const v = store.get(p['key'] as string); + const v = store.get(payloadStoreKey(p as { scope?: string; key?: string })); return v !== undefined ? v : null; } if (function_id === 'state::set') { await new Promise((r) => setTimeout(r, writeLatencyMs)); const v = p['value']; + const key = payloadStoreKey(p as { scope?: string; key?: string }); if (v === null || v === undefined) { - store.delete(p['key'] as string); + store.delete(key); } else { - store.set(p['key'] as string, v); + store.set(key, v); } return { ok: true }; } if (function_id === 'state::update') { await new Promise((r) => setTimeout(r, writeLatencyMs)); - const key = p['key'] as string; + const key = payloadStoreKey(p as { scope?: string; key?: string }); const ops = (p['ops'] ?? []) as Array<{ type: string; value?: unknown }>; const oldValue = store.has(key) ? store.get(key) : null; let newValue: unknown = oldValue; @@ -171,15 +172,16 @@ function makeFailingUpdateIii() { trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { const p = payload as Record; if (function_id === 'state::get') { - const v = store.get(p['key'] as string); + const v = store.get(payloadStoreKey(p as { scope?: string; key?: string })); return v !== undefined ? v : null; } if (function_id === 'state::set') { const v = p['value']; + const key = payloadStoreKey(p as { scope?: string; key?: string }); if (v === null || v === undefined) { - store.delete(p['key'] as string); + store.delete(key); } else { - store.set(p['key'] as string, v); + store.set(key, v); } return { ok: true }; } diff --git a/harness/tests/context-compaction/turn-end-subscription.test.ts b/harness/tests/context-compaction/turn-end-subscription.test.ts index 9cf86e80..7d2db72e 100644 --- a/harness/tests/context-compaction/turn-end-subscription.test.ts +++ b/harness/tests/context-compaction/turn-end-subscription.test.ts @@ -14,7 +14,9 @@ describe('context-compaction stream subscription', () => { await register(iii); const streamTriggers = registerTrigger.mock.calls - .map((c) => c[0] as { type?: string; function_id?: string; config?: { stream_name?: string } }) + .map( + (c) => c[0] as { type?: string; function_id?: string; config?: { stream_name?: string } }, + ) .filter( (t) => t?.type === 'stream' && t?.function_id === 'context-compaction::on_agent_event', ); diff --git a/harness/tests/harness/fanout/sessions-poll.test.ts b/harness/tests/harness/fanout/sessions-poll.test.ts index 71a64f4f..08ecf367 100644 --- a/harness/tests/harness/fanout/sessions-poll.test.ts +++ b/harness/tests/harness/fanout/sessions-poll.test.ts @@ -1,14 +1,15 @@ import { describe, expect, it, vi } from 'vitest'; import { spawnSessionsPoll } from '../../../src/harness/fanout/sessions-poll.js'; import { FanoutState } from '../../../src/harness/ui-subscribe.js'; +import { TURN_STATE_SCOPE } from '../../../src/turn-orchestrator/state.js'; import type { ISdk } from '../../../src/runtime/iii.js'; type Handler = (event: unknown) => Promise; -// Session-create fanout now rides a dedicated `session_index` scope (marker -// written once at creation). The state trigger has NO condition_function_id, so -// the engine hands every write on that scope to the handler — the handler is -// the sole gate. These tests hammer that gate and the registration shape. +// Session-create fanout watches scope `turn_state`. The state trigger has NO +// condition_function_id, so the engine hands every write on that scope to the +// handler — the handler is the sole gate. These tests hammer that gate and +// the registration shape. function setup(subscribers: string[] = []) { const handlers = new Map(); const triggers: Array<{ type?: string; function_id?: string; config?: Record }> = @@ -37,10 +38,10 @@ function setup(subscribers: string[] = []) { const createEvent = (over: Record = {}) => ({ event_type: 'state:created' as const, - scope: 'session_index' as const, + scope: TURN_STATE_SCOPE, key: 'sess-1', old_value: null, - new_value: { created_at_ms: 1 }, + new_value: { session_id: 'sess-1', state: 'provisioning' }, message_type: 'state', ...over, }); @@ -50,7 +51,7 @@ function changedCalls(sent: Array<{ function_id: string; payload: unknown }>) { } describe('spawnSessionsPoll registration (eliminates the per-write predicate RPC)', () => { - it('registers a scope-only session_index trigger with NO condition_function_id and no predicate fn', () => { + it('registers a scope-only turn_state trigger with NO condition_function_id and no predicate fn', () => { const { handlers, triggers } = setup(); expect([...handlers.keys()]).not.toContain('harness::session::is_create_event'); @@ -58,7 +59,7 @@ describe('spawnSessionsPoll registration (eliminates the per-write predicate RPC const t = triggers.find((x) => x.function_id === 'harness::fanout::session_created'); expect(t?.type).toBe('state'); - expect(t?.config?.scope).toBe('session_index'); + expect(t?.config?.scope).toBe(TURN_STATE_SCOPE); expect(t?.config?.condition_function_id).toBeUndefined(); }); }); diff --git a/harness/tests/integration/approval-resume.e2e.test.ts b/harness/tests/integration/approval-resume.e2e.test.ts index 75a9e550..bcdc0f6c 100644 --- a/harness/tests/integration/approval-resume.e2e.test.ts +++ b/harness/tests/integration/approval-resume.e2e.test.ts @@ -5,7 +5,8 @@ import { isApprovalDecisionWrite, } from '../../src/turn-orchestrator/on-approval.js'; import type { ISdk } from '../../src/runtime/iii.js'; -import { newRecord, turnStateKey } from '../../src/turn-orchestrator/state.js'; +import { TURN_STATE_SCOPE } from '../../src/turn-orchestrator/state.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; async function flushMicrotasks(): Promise { await Promise.resolve(); @@ -79,7 +80,7 @@ describe('approval reactive trigger', () => { const { iii, wakeTriggers, stateStore } = fakeIii(); const rec = newRecord('sess-x'); rec.state = 'function_awaiting_approval'; - stateStore.set(`agent/${turnStateKey('sess-x')}`, rec); + stateStore.set(`${TURN_STATE_SCOPE}/sess-x`, rec); const out = await handleResolveRequest(iii, { session_id: 'sess-x', @@ -97,5 +98,4 @@ describe('approval reactive trigger', () => { function_id: 'turn::function_awaiting_approval', }); }); - }); diff --git a/harness/tests/integration/on-record-written.e2e.test.ts b/harness/tests/integration/on-record-written.e2e.test.ts index 3ca6c402..b0552ab2 100644 --- a/harness/tests/integration/on-record-written.e2e.test.ts +++ b/harness/tests/integration/on-record-written.e2e.test.ts @@ -1,8 +1,9 @@ import { describe, expect, it, vi } from 'vitest'; import { TriggerAction } from '../../src/runtime/iii.js'; import type { ISdk } from '../../src/runtime/iii.js'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; -import { newRecord, turnStateKey } from '../../src/turn-orchestrator/state.js'; +import { createTurnStore } from '../../src/turn-orchestrator/state-runtime/store.js'; +import { TURN_STATE_SCOPE } from '../../src/turn-orchestrator/state.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; function fakeIii(): { iii: ISdk; @@ -60,10 +61,11 @@ function fakeIii(): { describe('saveRecord wake integration', () => { it('writing a new stepable turn_state enqueues turn::provisioning', async () => { const { iii, wakeInvocations } = fakeIii(); + const store = createTurnStore(iii); const rec = newRecord('sess-a'); rec.state = 'provisioning'; - await persistence.saveRecord(iii, rec); + await store.saveRecord(rec); expect(wakeInvocations).toEqual([ { @@ -76,12 +78,13 @@ describe('saveRecord wake integration', () => { it('subsequent transitions enqueue turn::{newState}', async () => { const { iii, wakeInvocations } = fakeIii(); + const store = createTurnStore(iii); const rec = newRecord('sess-b'); rec.state = 'provisioning'; - await persistence.saveRecord(iii, rec); + await store.saveRecord(rec); rec.state = 'assistant_streaming'; - await persistence.saveRecord(iii, rec); + await store.saveRecord(rec); expect(wakeInvocations).toEqual([ { @@ -99,32 +102,35 @@ describe('saveRecord wake integration', () => { it('parking in function_awaiting_approval does NOT wake', async () => { const { iii, wakeInvocations } = fakeIii(); + const store = createTurnStore(iii); const rec = newRecord('sess-c'); rec.state = 'function_awaiting_approval'; - await persistence.saveRecord(iii, rec); + await store.saveRecord(rec); expect(wakeInvocations).toEqual([]); }); it('terminal stopped state does NOT wake', async () => { const { iii, wakeInvocations } = fakeIii(); + const store = createTurnStore(iii); const rec = newRecord('sess-d'); rec.state = 'stopped'; - await persistence.saveRecord(iii, rec); + await store.saveRecord(rec); expect(wakeInvocations).toEqual([]); }); it('same-state re-save does NOT wake', async () => { const { iii, wakeInvocations } = fakeIii(); + const store = createTurnStore(iii); const rec = newRecord('sess-e'); rec.state = 'function_execute'; - await persistence.saveRecord(iii, rec); + await store.saveRecord(rec); wakeInvocations.length = 0; - await persistence.saveRecord(iii, rec); + await store.saveRecord(rec); expect(wakeInvocations).toEqual([]); }); @@ -132,67 +138,79 @@ describe('saveRecord wake integration', () => { function turnStateGets(iii: ISdk, session_id: string): number { const trigger = iii.trigger as unknown as { - mock: { calls: Array<[{ function_id: string; payload?: { key?: string } }]> }; + mock: { + calls: Array<[{ function_id: string; payload?: { scope?: string; key?: string } }]>; + }; }; return trigger.mock.calls.filter( - ([arg]) => arg.function_id === 'state::get' && arg.payload?.key === turnStateKey(session_id), + ([arg]) => + arg.function_id === 'state::get' && + arg.payload?.scope === TURN_STATE_SCOPE && + arg.payload?.key === session_id, ).length; } -describe('session index marker (create-fanout source)', () => { - it('a newly-created session writes a session_index marker keyed by session id', async () => { +describe('turn_state persistence (create-fanout source)', () => { + it('a newly-created session persists turn_state keyed by session id', async () => { const { iii, stateStore } = fakeIii(); + const store = createTurnStore(iii); const rec = newRecord('sess-new'); rec.state = 'provisioning'; - await persistence.saveRecord(iii, rec); + await store.saveRecord(rec); - expect(stateStore.has('session_index/sess-new')).toBe(true); + expect(stateStore.has(`${TURN_STATE_SCOPE}/sess-new`)).toBe(true); }); - it('a transition on an existing session writes NO new session_index marker', async () => { + it('a transition on an existing session updates the same turn_state key', async () => { const { iii, stateStore } = fakeIii(); + const store = createTurnStore(iii); const rec = newRecord('sess-x'); rec.state = 'provisioning'; - await persistence.saveRecord(iii, rec); // create → marker written - stateStore.delete('session_index/sess-x'); // clear so a re-write would be detectable + await store.saveRecord(rec); rec.state = 'assistant_streaming'; - await persistence.saveRecord(iii, rec); // transition → must NOT re-mark + await store.saveRecord(rec); - expect(stateStore.has('session_index/sess-x')).toBe(false); + expect(stateStore.has(`${TURN_STATE_SCOPE}/sess-x`)).toBe(true); + expect((stateStore.get(`${TURN_STATE_SCOPE}/sess-x`) as { state: string }).state).toBe( + 'assistant_streaming', + ); }); - it('a threaded previous record (transition) writes no marker', async () => { + it('a threaded previous record (transition) keeps one turn_state entry', async () => { const { iii, stateStore } = fakeIii(); + const store = createTurnStore(iii); const previous = newRecord('sess-y'); previous.state = 'provisioning'; const next = { ...previous, state: 'assistant_streaming' as const }; - await persistence.saveRecord(iii, next, previous); + await store.saveRecord(next, previous); - expect(stateStore.has('session_index/sess-y')).toBe(false); + expect(stateStore.has(`${TURN_STATE_SCOPE}/sess-y`)).toBe(true); }); }); describe('saveRecord read elimination (#5)', () => { it('2-arg saveRecord does not pre-read turn_state (uses state::set old_value)', async () => { const { iii } = fakeIii(); + const store = createTurnStore(iii); const rec = newRecord('sess-r1'); rec.state = 'provisioning'; - await persistence.saveRecord(iii, rec); + await store.saveRecord(rec); expect(turnStateGets(iii, 'sess-r1')).toBe(0); }); it('saveRecord with a threaded previous reads turn_state zero times', async () => { const { iii } = fakeIii(); + const store = createTurnStore(iii); const previous = newRecord('sess-r2'); previous.state = 'provisioning'; const next = { ...previous, state: 'assistant_streaming' as const }; - await persistence.saveRecord(iii, next, previous); + await store.saveRecord(next, previous); expect(turnStateGets(iii, 'sess-r2')).toBe(0); }); diff --git a/harness/tests/runtime/state-client.test.ts b/harness/tests/runtime/state-client.test.ts index 7aafc63a..b1eab48d 100644 --- a/harness/tests/runtime/state-client.test.ts +++ b/harness/tests/runtime/state-client.test.ts @@ -20,9 +20,9 @@ describe('createState', () => { const iii = makeIii(() => { throw new Error('backend down'); }); - await expect(createState(iii, { tolerant: false }).get({ scope: 's', key: 'k' })).rejects.toThrow( - 'backend down', - ); + await expect( + createState(iii, { tolerant: false }).get({ scope: 's', key: 'k' }), + ).rejects.toThrow('backend down'); }); it('tolerant list returns [] on trigger failure', async () => { diff --git a/harness/tests/session/inbox.test.ts b/harness/tests/session/inbox.test.ts index bc7a2d9c..a11280af 100644 --- a/harness/tests/session/inbox.test.ts +++ b/harness/tests/session/inbox.test.ts @@ -3,6 +3,6 @@ import { inboxKey } from '../../src/session/inbox/key.js'; describe('inboxKey', () => { it('namespaces by session and name', () => { - expect(inboxKey('steering', 's1')).toBe('session/s1/steering'); + expect(inboxKey('steering', 's1')).toBe('s1/steering'); }); }); diff --git a/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts b/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts new file mode 100644 index 00000000..f4635b25 --- /dev/null +++ b/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts @@ -0,0 +1,42 @@ +import { vi, type Mock } from 'vitest'; +import type { RunRequest } from '../../../src/turn-orchestrator/run-request.js'; +import * as storeModule from '../../../src/turn-orchestrator/state-runtime/store.js'; +import type { TurnStore } from '../../../src/turn-orchestrator/state-runtime/store.js'; + +export const defaultRunRequest: RunRequest = { + provider: 'openai', + model: 'gpt-4', + mode: 'agent', + system_prompt: '', + function_schemas: [], +}; + +export type MockTurnStore = { + [K in keyof TurnStore]: TurnStore[K] extends (...args: infer A) => infer R + ? Mock<(...args: A) => R> + : TurnStore[K]; +}; + +export function mockTurnStore(overrides: Partial = {}): MockTurnStore { + return { + loadRecord: vi.fn(async () => null), + saveRecord: vi.fn(async () => {}), + writeRecord: vi.fn(async () => {}), + loadMessages: vi.fn(async () => []), + saveMessages: vi.fn(async () => {}), + appendMessages: vi.fn(async () => {}), + loadRunRequest: vi.fn(async () => defaultRunRequest), + saveRunRequest: vi.fn(async () => {}), + listTurnStateRecords: vi.fn(async () => []), + wakeStep: vi.fn(async () => {}), + wakeFromRecord: vi.fn(async () => {}), + ...overrides, + } as MockTurnStore; +} + +/** Mock `createTurnStore` and return the store instance for assertions. */ +export function installMockTurnStore(overrides: Partial = {}): MockTurnStore { + const store = mockTurnStore(overrides); + vi.spyOn(storeModule, 'createTurnStore').mockReturnValue(store); + return store; +} diff --git a/harness/tests/turn-orchestrator/agent-trigger.test.ts b/harness/tests/turn-orchestrator/agent-trigger.test.ts index 47e57782..5f76ef3c 100644 --- a/harness/tests/turn-orchestrator/agent-trigger.test.ts +++ b/harness/tests/turn-orchestrator/agent-trigger.test.ts @@ -157,7 +157,7 @@ describe('dispatchWithHook returns DispatchResult', () => { expect(out.kind).toBe('pending'); }); - it('returns kind:deny on hard deny', async () => { + it('returns kind:result with denied details on hard deny', async () => { vi.spyOn(hookModule, 'consultBefore').mockResolvedValue({ kind: 'deny', denial: { @@ -174,8 +174,8 @@ describe('dispatchWithHook returns DispatchResult', () => { function_id: 'shell::run', arguments: {}, }); - expect(out.kind).toBe('deny'); - if (out.kind === 'deny') { + expect(out.kind).toBe('result'); + if (out.kind === 'result') { expect(out.result.details).toMatchObject({ status: 'denied' }); } }); diff --git a/harness/tests/turn-orchestrator/assistant-streaming.test.ts b/harness/tests/turn-orchestrator/assistant-streaming.test.ts new file mode 100644 index 00000000..2783e36e --- /dev/null +++ b/harness/tests/turn-orchestrator/assistant-streaming.test.ts @@ -0,0 +1,167 @@ +import { describe, expect, it, vi } from 'vitest'; +import { + finalizeAssistantTurn, + prepareStreamContext, + resolveAssistantMessage, + routeAssistantTurn, + syntheticStreamReason, +} from '../../src/turn-orchestrator/assistant-streaming/process.js'; +import { + parseFunctionSchemas, + type AssistantStreamingPorts, +} from '../../src/turn-orchestrator/assistant-streaming/ports.js'; +import { isDuplicateAssistant } from '../../src/turn-orchestrator/state-runtime/transcript.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; +import type { AssistantMessage } from '../../src/types/agent-message.js'; + +function assistant(overrides: Partial = {}): AssistantMessage { + return { + role: 'assistant', + content: [{ type: 'text', text: 'hello' }], + stop_reason: 'end', + error_message: null, + error_kind: null, + usage: null, + model: 'gpt-4o', + provider: 'openai', + timestamp: 1, + ...overrides, + }; +} + +function stubStreamingPorts( + overrides: Partial = {}, +): AssistantStreamingPorts { + return { + loadMessages: vi.fn(async () => []), + appendMessages: vi.fn(async () => {}), + checkpoint: vi.fn(async () => {}), + emitTurnEnd: vi.fn(async () => {}), + finishSession: vi.fn(async (rec) => { + rec.state = 'stopped'; + }), + loadRunRequest: vi.fn(async () => ({ + provider: 'openai', + model: 'gpt-4o', + mode: null, + system_prompt: 'sys', + function_schemas: [{ name: 'agent_trigger', description: 'd', parameters: {} }], + })), + runPreflight: vi.fn(async () => 'ok' as const), + streamTurn: vi.fn(async () => ({ final: null, error: null })), + emitMessageUpdate: vi.fn(async () => {}), + emitMessageComplete: vi.fn(async () => {}), + persistAssistantIfNew: vi.fn(async () => {}), + ...overrides, + }; +} + +describe('parseFunctionSchemas', () => { + it('parses valid function schemas via AgentFunctionSchema', () => { + const tools = parseFunctionSchemas([ + { name: 'agent_trigger', description: 'trigger', parameters: { type: 'object' } }, + ]); + expect(tools).toHaveLength(1); + expect(tools[0]?.name).toBe('agent_trigger'); + }); +}); + +describe('prepareStreamContext', () => { + it('reloads messages when preflight compacts', async () => { + const loadMessages = vi + .fn() + .mockResolvedValueOnce([{ role: 'user', content: [], timestamp: 1 }]) + .mockResolvedValueOnce([{ role: 'user', content: [], timestamp: 2 }]); + const ports = stubStreamingPorts({ + loadMessages, + runPreflight: vi.fn(async () => 'compacted'), + }); + const rec = newRecord('s1'); + + const ctx = await prepareStreamContext(ports, rec); + + expect(loadMessages).toHaveBeenCalledTimes(2); + expect(ctx.messages).toEqual([{ role: 'user', content: [], timestamp: 2 }]); + expect(ctx.tools[0]?.name).toBe('agent_trigger'); + }); +}); + +describe('resolveAssistantMessage', () => { + it('returns the provider final message when present', () => { + const final = assistant({ content: [{ type: 'text', text: 'done' }] }); + const msg = resolveAssistantMessage( + { final, error: null, body_streamed: false }, + { provider: 'openai', model: 'gpt-4o' }, + ); + expect(msg).toEqual(final); + expect(syntheticStreamReason({ final, error: null, body_streamed: false })).toBeNull(); + }); + + it('builds a synthetic error when the stream ends without a final', () => { + const msg = resolveAssistantMessage( + { final: null, error: 'channel unavailable', body_streamed: false }, + { provider: 'openai', model: 'gpt-4o' }, + ); + expect(msg.stop_reason).toBe('error'); + expect(msg.error_message).toContain('channel unavailable'); + }); +}); + +describe('routeAssistantTurn', () => { + it('routes error assistants to stopped', () => { + expect(routeAssistantTurn(assistant({ stop_reason: 'error' })).kind).toBe('stopped'); + }); + + it('routes function_call content to function_execute', () => { + expect( + routeAssistantTurn( + assistant({ + content: [ + { type: 'function_call', id: 'fc-1', function_id: 'shell::run', arguments: {} }, + ], + }), + ).kind, + ).toBe('function_execute'); + }); + + it('routes text-only assistants to steering_check', () => { + expect(routeAssistantTurn(assistant()).kind).toBe('steering_check'); + }); +}); + +describe('finalizeAssistantTurn', () => { + it('stops without persisting on error assistant', async () => { + const ports = stubStreamingPorts(); + const rec = newRecord('s1'); + rec.last_assistant = assistant({ stop_reason: 'error', error_message: 'auth failed' }); + + await finalizeAssistantTurn(ports, rec); + + expect(rec.state).toBe('stopped'); + expect(rec.turn_end_emitted).toBe(true); + expect(ports.persistAssistantIfNew).not.toHaveBeenCalled(); + }); + + it('persists and routes to function_execute when calls exist', async () => { + const ports = stubStreamingPorts(); + const rec = newRecord('s1'); + rec.last_assistant = assistant({ + content: [{ type: 'function_call', id: 'fc-1', function_id: 'shell::run', arguments: {} }], + }); + + await finalizeAssistantTurn(ports, rec); + + expect(ports.persistAssistantIfNew).toHaveBeenCalledOnce(); + expect(rec.state).toBe('function_execute'); + expect(rec.work).toBeUndefined(); + expect(rec.function_results).toEqual([]); + }); +}); + +describe('isDuplicateAssistant', () => { + it('detects trailing assistant dup for re-entry', () => { + const asst = assistant({ timestamp: 42, model: 'm', provider: 'p' }); + expect(isDuplicateAssistant([asst], asst)).toBe(true); + expect(isDuplicateAssistant([], asst)).toBe(false); + }); +}); diff --git a/harness/tests/turn-orchestrator/assistant.test.ts b/harness/tests/turn-orchestrator/assistant.test.ts index 1df99865..b2e5cd21 100644 --- a/harness/tests/turn-orchestrator/assistant.test.ts +++ b/harness/tests/turn-orchestrator/assistant.test.ts @@ -1,10 +1,10 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; import type { AssistantMessage } from '../../src/types/agent-message.js'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; +import { installMockTurnStore } from './_helpers/mockTurnStore.js'; import * as preflightModule from '../../src/turn-orchestrator/preflight.js'; import { type TurnStateRecord, newRecord } from '../../src/turn-orchestrator/state.js'; -import { handleStreaming } from '../../src/turn-orchestrator/states/assistant-streaming.js'; +import { handleStreaming } from '../../src/turn-orchestrator/assistant-streaming/process.js'; type TriggerCall = { function_id: string; payload: unknown; timeoutMs?: number }; @@ -69,6 +69,20 @@ afterEach(() => { vi.restoreAllMocks(); }); +function mockStreamingStore(overrides: Parameters[0] = {}) { + return installMockTurnStore({ + loadRunRequest: vi.fn(async () => ({ + provider: 'openai', + model: 'gpt-4o', + mode: null, + system_prompt: '', + function_schemas: [], + })), + loadMessages: vi.fn(async () => []), + ...overrides, + }); +} + describe('handleStreaming turn start', () => { it('starts a normal assistant turn without approval::consume resurrection', async () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; @@ -77,14 +91,7 @@ describe('handleStreaming turn start', () => { throw new Error('channel unavailable'); }, }); - vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ - provider: 'openai', - model: 'gpt-4o', - mode: null, - system_prompt: '', - function_schemas: [], - }); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + mockStreamingStore(); vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); await handleStreaming(iii, rec); @@ -106,14 +113,7 @@ describe('handleStreaming', () => { throw new Error('channel unavailable'); }, }); - vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ - provider: 'openai', - model: 'gpt-4o', - mode: null, - system_prompt: '', - function_schemas: [], - }); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + mockStreamingStore(); vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); await handleStreaming(iii, rec); @@ -137,23 +137,16 @@ describe('handleStreaming', () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; const { iii, calls } = fakeIiiWithDone(finalMsg); - vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ - provider: 'openai', - model: 'gpt-4o', - mode: null, - system_prompt: '', - function_schemas: [], - }); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + const store = mockStreamingStore(); vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); - const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + const appendSpy = store.appendMessages; await handleStreaming(iii, rec); // emitted message_complete via stream::set trigger expect(calls.some((c) => c.function_id === 'stream::set')).toBe(true); // assistant persisted - expect(saveSpy).toHaveBeenCalledOnce(); + expect(appendSpy).toHaveBeenCalledOnce(); // routed to function_execute (NOT assistant_finished) expect(rec.state).toBe('function_execute'); expect(rec.last_assistant).toEqual(finalMsg); @@ -166,16 +159,8 @@ describe('handleStreaming', () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; const { iii } = fakeIiiWithDone(finalMsg); - vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ - provider: 'openai', - model: 'gpt-4o', - mode: null, - system_prompt: '', - function_schemas: [], - }); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + mockStreamingStore(); vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); await handleStreaming(iii, rec); @@ -209,16 +194,8 @@ describe('handleStreaming', () => { }), }); - vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ - provider: 'openai', - model: 'gpt-4o', - mode: null, - system_prompt: '', - function_schemas: [], - }); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + mockStreamingStore(); vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); await handleStreaming(iii, rec); @@ -231,22 +208,15 @@ describe('handleStreaming', () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; const { iii } = fakeIiiWithDone(finalMsg); - vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ - provider: 'openai', - model: 'gpt-4o', - mode: null, - system_prompt: '', - function_schemas: [], - }); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + const store = mockStreamingStore(); vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); - const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + const appendSpy = store.appendMessages; await handleStreaming(iii, rec); expect(rec.state).toBe('stopped'); expect(rec.turn_end_emitted).toBe(true); - expect(saveSpy).not.toHaveBeenCalled(); + expect(appendSpy).not.toHaveBeenCalled(); }); it('does NOT duplicate the assistant message on re-entry', async () => { @@ -266,16 +236,11 @@ describe('handleStreaming', () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; const { iii } = fakeIiiWithDone(finalMsg); - vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ - provider: 'openai', - model: 'gpt-4o', - mode: null, - system_prompt: '', - function_schemas: [], - }); - vi.spyOn(persistence, 'loadMessages').mockImplementation(async () => storedMessages as never); - vi.spyOn(persistence, 'saveMessages').mockImplementation(async (_iii, _sid, msgs) => { - storedMessages = msgs as never; + mockStreamingStore({ + loadMessages: vi.fn(async () => storedMessages as never), + appendMessages: vi.fn(async (_sid, msgs) => { + storedMessages = [...storedMessages, ...msgs]; + }), }); vi.spyOn(preflightModule, 'runPreflight').mockResolvedValue('ok'); diff --git a/harness/tests/turn-orchestrator/awaiting-approval.test.ts b/harness/tests/turn-orchestrator/awaiting-approval.test.ts index 1d09384b..7aeee31c 100644 --- a/harness/tests/turn-orchestrator/awaiting-approval.test.ts +++ b/harness/tests/turn-orchestrator/awaiting-approval.test.ts @@ -1,7 +1,7 @@ import { describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; -import type { PreparedEntry, TurnStateRecord, TurnWork } from '../../src/turn-orchestrator/state.js'; -import { handleAwaitingApproval } from '../../src/turn-orchestrator/states/function-awaiting-approval.js'; +import type { PreparedCall, TurnStateRecord, TurnWork } from '../../src/turn-orchestrator/state.js'; +import { handleAwaitingApproval } from '../../src/turn-orchestrator/function-awaiting-approval/process.js'; function fakeIii(stateGetImpl: (scope: string, key: string) => unknown): ISdk { return { @@ -35,8 +35,8 @@ function recordWith( }; } -function workWith(batch: PreparedEntry[]): TurnWork { - return { batch, results: [] }; +function workWith(prepared: PreparedCall[]): TurnWork { + return { prepared, executed: {} }; } describe('handleAwaitingApproval', () => { @@ -52,17 +52,16 @@ describe('handleAwaitingApproval', () => { const rec = recordWith( [{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], workWith([ - { function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, blocked: null }, + { route: 'dispatch', call: { id: 'fc-1', function_id: 'shell::run', arguments: {} } }, ]), ); await handleAwaitingApproval(iii, rec); expect(rec.state).toBe('function_awaiting_approval'); expect(rec.awaiting_approval).toHaveLength(1); - // batch unchanged — no decision folded in - expect(rec.work?.batch[0]?.pre_approved).toBeUndefined(); + expect(rec.work?.prepared[0]?.route).toBe('dispatch'); }); - it('folds pre_approved into work.batch on allow and transitions to function_execute', async () => { + it('folds pre_approved route into work.prepared on allow and transitions to function_execute', async () => { const iii = fakeIii((_scope, key) => { if (key === 's1/fc-1') return { decision: 'allow', reason: null }; return null; @@ -71,8 +70,8 @@ describe('handleAwaitingApproval', () => { [{ function_call_id: 'fc-1', function_id: 'shell::run', args: { command: 'ls' } }], workWith([ { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, - blocked: null, + route: 'dispatch', + call: { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, }, ]), ); @@ -81,11 +80,10 @@ describe('handleAwaitingApproval', () => { expect(rec.state).toBe('function_execute'); expect(rec.awaiting_approval).toEqual([]); - expect(rec.work?.batch[0]?.pre_approved).toBe(true); - expect(rec.work?.batch[0]?.blocked).toBeNull(); + expect(rec.work?.prepared[0]?.route).toBe('pre_approved'); }); - it('sets blocked denial result in work.batch on deny and transitions to function_execute', async () => { + it('sets synthetic denial result in work.prepared on deny and transitions to function_execute', async () => { const iii = fakeIii((_scope, key) => { if (key === 's1/fc-1') return { decision: 'deny', reason: 'policy' }; return null; @@ -93,24 +91,25 @@ describe('handleAwaitingApproval', () => { const rec = recordWith( [{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], workWith([ - { function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, blocked: null }, + { route: 'dispatch', call: { id: 'fc-1', function_id: 'shell::run', arguments: {} } }, ]), ); await handleAwaitingApproval(iii, rec); expect(rec.state).toBe('function_execute'); - expect(rec.work?.batch[0]?.pre_approved).toBeFalsy(); - expect(rec.work?.batch[0]?.blocked).toMatchObject({ - details: expect.objectContaining({ + const entry = rec.work?.prepared[0]; + expect(entry?.route).toBe('synthetic'); + if (entry?.route === 'synthetic') { + expect(entry.result.details).toMatchObject({ approval_denied: true, decision: 'deny', reason: 'policy', - }), - }); + }); + } }); - it('handles aborted decision like deny (folded into work.batch)', async () => { + it('handles aborted decision like deny (folded into work.prepared)', async () => { const iii = fakeIii((_scope, key) => { if (key === 's1/fc-1') return { decision: 'aborted', reason: 'session_aborted' }; return null; @@ -118,15 +117,18 @@ describe('handleAwaitingApproval', () => { const rec = recordWith( [{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], workWith([ - { function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, blocked: null }, + { route: 'dispatch', call: { id: 'fc-1', function_id: 'shell::run', arguments: {} } }, ]), ); await handleAwaitingApproval(iii, rec); expect(rec.state).toBe('function_execute'); - expect(rec.work?.batch[0]?.pre_approved).toBeFalsy(); - expect(rec.work?.batch[0]?.blocked?.details).toMatchObject({ decision: 'aborted' }); + const entry = rec.work?.prepared[0]; + expect(entry?.route).toBe('synthetic'); + if (entry?.route === 'synthetic') { + expect(entry.result.details).toMatchObject({ decision: 'aborted' }); + } }); it('folds independent decisions across a multi-call batch', async () => { @@ -141,10 +143,10 @@ describe('handleAwaitingApproval', () => { { function_call_id: 'fc-2', function_id: 'shell::fs::write', args: {} }, ], workWith([ - { function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, blocked: null }, + { route: 'dispatch', call: { id: 'fc-1', function_id: 'shell::run', arguments: {} } }, { - function_call: { id: 'fc-2', function_id: 'shell::fs::write', arguments: {} }, - blocked: null, + route: 'dispatch', + call: { id: 'fc-2', function_id: 'shell::fs::write', arguments: {} }, }, ]), ); @@ -152,9 +154,7 @@ describe('handleAwaitingApproval', () => { await handleAwaitingApproval(iii, rec); expect(rec.state).toBe('function_execute'); - expect(rec.work?.batch[0]?.pre_approved).toBe(true); - expect(rec.work?.batch[0]?.blocked).toBeNull(); - expect(rec.work?.batch[1]?.pre_approved).toBeFalsy(); - expect(rec.work?.batch[1]?.blocked?.details).toMatchObject({ decision: 'deny' }); + expect(rec.work?.prepared[0]?.route).toBe('pre_approved'); + expect(rec.work?.prepared[1]?.route).toBe('synthetic'); }); }); diff --git a/harness/tests/turn-orchestrator/estimate.test.ts b/harness/tests/turn-orchestrator/estimate.test.ts deleted file mode 100644 index 00cdccc4..00000000 --- a/harness/tests/turn-orchestrator/estimate.test.ts +++ /dev/null @@ -1,33 +0,0 @@ -import { describe, expect, it } from 'vitest'; -import { estimateMessages } from '../../src/turn-orchestrator/estimate.js'; - -describe('estimateMessages', () => { - it('returns positive count for non-empty messages', () => { - expect( - estimateMessages([ - { role: 'user', content: [{ type: 'text', text: 'x'.repeat(100) }], timestamp: 0 }, - ]), - ).toBeGreaterThan(0); - }); - - it('returns 0 for empty array', () => { - expect(estimateMessages([])).toBe(0); - }); - - it('uses chars/4 heuristic', () => { - const msg = { role: 'user' as const, content: [] as never[], timestamp: 0 }; - const serialized = JSON.stringify(msg); - const expected = Math.floor(serialized.length / 4); - expect(estimateMessages([msg])).toBe(expected); - }); - - it('accumulates across multiple messages', () => { - const msgs = [ - { role: 'user' as const, content: [{ type: 'text' as const, text: 'hello' }], timestamp: 1 }, - { role: 'user' as const, content: [{ type: 'text' as const, text: 'world' }], timestamp: 2 }, - ]; - const single = estimateMessages(msgs.slice(0, 1)); - const both = estimateMessages(msgs); - expect(both).toBeGreaterThan(single); - }); -}); diff --git a/harness/tests/turn-orchestrator/finish.test.ts b/harness/tests/turn-orchestrator/finish.test.ts index f08606c7..3775f750 100644 --- a/harness/tests/turn-orchestrator/finish.test.ts +++ b/harness/tests/turn-orchestrator/finish.test.ts @@ -1,15 +1,15 @@ import { describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; -import { finishSession } from '../../src/turn-orchestrator/finish.js'; +import { createTurnStatePorts } from '../../src/turn-orchestrator/state-runtime/ports.js'; import { newRecord } from '../../src/turn-orchestrator/state.js'; +import { installMockTurnStore } from './_helpers/mockTurnStore.js'; -describe('finishSession', () => { +describe('TurnStatePorts.finishSession', () => { it('emits agent_end with the transcript and sets state to stopped', async () => { const messages = [ { role: 'user' as const, content: [{ type: 'text' as const, text: 'hi' }], timestamp: 1 }, ]; - vi.spyOn(persistence, 'loadMessages').mockResolvedValue(messages as never); + installMockTurnStore({ loadMessages: vi.fn(async () => messages) }); const emitted: Array<{ type: string; messages?: unknown }> = []; const iii = { trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { @@ -22,7 +22,7 @@ describe('finishSession', () => { const rec = newRecord('s1'); rec.state = 'steering_check'; - await finishSession(iii, rec); + await createTurnStatePorts(iii).finishSession(rec); expect(rec.state).toBe('stopped'); const agentEnd = emitted.find((e) => e.type === 'agent_end'); diff --git a/harness/tests/turn-orchestrator/flat-messages.test.ts b/harness/tests/turn-orchestrator/flat-messages.test.ts deleted file mode 100644 index 61236a99..00000000 --- a/harness/tests/turn-orchestrator/flat-messages.test.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { describe, expect, it } from 'vitest'; -import { parseFlatMessages } from '../../src/turn-orchestrator/flat-messages.js'; - -describe('parseFlatMessages', () => { - it('returns the array when messages are objects', () => { - const messages = [{ role: 'user', content: [], timestamp: 1 }]; - expect(parseFlatMessages(messages)).toEqual(messages); - }); - - it('returns [] for null, undefined, and non-arrays', () => { - expect(parseFlatMessages(null)).toEqual([]); - expect(parseFlatMessages(undefined)).toEqual([]); - expect(parseFlatMessages('bad')).toEqual([]); - expect(parseFlatMessages({})).toEqual([]); - }); -}); diff --git a/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts b/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts new file mode 100644 index 00000000..1f487608 --- /dev/null +++ b/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts @@ -0,0 +1,175 @@ +import { describe, expect, it, vi } from 'vitest'; +import { + applyAwaitingApprovalOutcome, + applyDecisionToPrepared, + foldDecisionsIntoPrepared, + processAwaitingApproval, +} from '../../src/turn-orchestrator/function-awaiting-approval/process.js'; +import type { AwaitingApprovalPorts } from '../../src/turn-orchestrator/function-awaiting-approval/ports.js'; +import type { PreparedCall, TurnStateRecord, TurnWork } from '../../src/turn-orchestrator/state.js'; + +const dispatchCall = { + route: 'dispatch' as const, + call: { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, +}; + +function recordWith( + awaiting: { function_call_id: string; function_id: string; args: unknown }[], + work?: TurnWork, +): TurnStateRecord { + return { + session_id: 's1', + state: 'function_awaiting_approval', + turn_count: 0, + max_turns: undefined, + last_assistant: null, + function_results: [], + turn_end_emitted: false, + started_at_ms: 0, + updated_at_ms: 0, + awaiting_approval: awaiting, + work, + }; +} + +function stubPorts( + decisions: Record, +): AwaitingApprovalPorts { + return { + readDecision: vi.fn(async (_session_id, function_call_id) => { + const decision = decisions[function_call_id]; + return decision ?? null; + }), + }; +} + +describe('applyDecisionToPrepared', () => { + it('maps allow to pre_approved', () => { + const next = applyDecisionToPrepared(dispatchCall, { decision: 'allow', reason: null }); + expect(next).toEqual({ + route: 'pre_approved', + call: dispatchCall.call, + }); + }); + + it('maps deny to synthetic with denial result', () => { + const next = applyDecisionToPrepared(dispatchCall, { decision: 'deny', reason: 'policy' }); + expect(next.route).toBe('synthetic'); + if (next.route === 'synthetic') { + expect(next.result.details).toMatchObject({ + approval_denied: true, + decision: 'deny', + reason: 'policy', + }); + } + }); + + it('maps aborted to synthetic with aborted details', () => { + const next = applyDecisionToPrepared(dispatchCall, { + decision: 'aborted', + reason: 'session_aborted', + }); + expect(next.route).toBe('synthetic'); + if (next.route === 'synthetic') { + expect(next.result.details).toMatchObject({ decision: 'aborted' }); + } + }); +}); + +describe('foldDecisionsIntoPrepared', () => { + it('folds each awaiting entry by function_call_id', () => { + const prepared: PreparedCall[] = [ + dispatchCall, + { + route: 'dispatch', + call: { id: 'fc-2', function_id: 'shell::fs::write', arguments: {} }, + }, + ]; + const awaiting = [ + { function_call_id: 'fc-1', function_id: 'shell::run', args: {} }, + { function_call_id: 'fc-2', function_id: 'shell::fs::write', args: {} }, + ]; + const decisions = [ + { decision: 'allow' as const, reason: null }, + { decision: 'deny' as const, reason: 'policy' }, + ]; + + const folded = foldDecisionsIntoPrepared(prepared, awaiting, decisions); + + expect(folded[0]?.route).toBe('pre_approved'); + expect(folded[1]?.route).toBe('synthetic'); + }); + + it('skips awaiting entries not found in prepared', () => { + const prepared: PreparedCall[] = [dispatchCall]; + const awaiting = [{ function_call_id: 'fc-missing', function_id: 'x', args: {} }]; + const folded = foldDecisionsIntoPrepared(prepared, awaiting, [ + { decision: 'allow', reason: null }, + ]); + expect(folded).toEqual(prepared); + }); +}); + +describe('processAwaitingApproval', () => { + it('returns resume_empty when awaiting is empty', async () => { + const rec = recordWith([]); + const outcome = await processAwaitingApproval(stubPorts({}), rec); + expect(outcome).toEqual({ kind: 'resume_empty' }); + }); + + it('returns parked when any decision is missing', async () => { + const rec = recordWith([{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], { + prepared: [dispatchCall], + executed: {}, + }); + const outcome = await processAwaitingApproval(stubPorts({}), rec); + expect(outcome).toEqual({ kind: 'parked' }); + }); + + it('returns resume with folded prepared when all decisions present', async () => { + const rec = recordWith([{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], { + prepared: [dispatchCall], + executed: {}, + }); + const outcome = await processAwaitingApproval( + stubPorts({ 'fc-1': { decision: 'allow', reason: null } }), + rec, + ); + expect(outcome.kind).toBe('resume'); + if (outcome.kind === 'resume') { + expect(outcome.prepared[0]?.route).toBe('pre_approved'); + } + }); +}); + +describe('applyAwaitingApprovalOutcome', () => { + it('no-ops when parked', () => { + const rec = recordWith([{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], { + prepared: [dispatchCall], + executed: {}, + }); + applyAwaitingApprovalOutcome(rec, { kind: 'parked' }); + expect(rec.state).toBe('function_awaiting_approval'); + expect(rec.awaiting_approval).toHaveLength(1); + expect(rec.work?.prepared[0]?.route).toBe('dispatch'); + }); + + it('clears awaiting and transitions on resume_empty', () => { + const rec = recordWith([{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }]); + applyAwaitingApprovalOutcome(rec, { kind: 'resume_empty' }); + expect(rec.state).toBe('function_execute'); + expect(rec.awaiting_approval).toEqual([]); + }); + + it('updates prepared and transitions on resume', () => { + const rec = recordWith([{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], { + prepared: [dispatchCall], + executed: {}, + }); + const prepared: PreparedCall[] = [{ route: 'pre_approved', call: dispatchCall.call }]; + applyAwaitingApprovalOutcome(rec, { kind: 'resume', prepared }); + expect(rec.state).toBe('function_execute'); + expect(rec.awaiting_approval).toEqual([]); + expect(rec.work?.prepared).toEqual(prepared); + }); +}); diff --git a/harness/tests/turn-orchestrator/function-execute.test.ts b/harness/tests/turn-orchestrator/function-execute.test.ts new file mode 100644 index 00000000..dddd75c3 --- /dev/null +++ b/harness/tests/turn-orchestrator/function-execute.test.ts @@ -0,0 +1,220 @@ +import { describe, expect, it, vi } from 'vitest'; +import { + missingFunctionResult, + unwrapAgentTrigger, +} from '../../src/turn-orchestrator/agent-trigger.js'; +import { + finalizeBatch, + FunctionExecuteInvariantError, + loadOrPlanWork, + planBatchFromAssistant, + runOneCall, +} from '../../src/turn-orchestrator/function-execute/run.js'; +import { withRoutingEnvelope } from '../../src/turn-orchestrator/function-execute/ports.js'; +import type { FunctionExecutePorts } from '../../src/turn-orchestrator/function-execute/ports.js'; +import type { ExecutedCall } from '../../src/turn-orchestrator/function-execute/types.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; +import type { AssistantMessage } from '../../src/types/agent-message.js'; + +function makeAssistant( + calls: Array<{ id: string; function_id: string; arguments?: unknown }>, +): AssistantMessage { + return { + role: 'assistant', + content: calls.map((c) => ({ + type: 'function_call' as const, + id: c.id, + function_id: c.function_id, + arguments: c.arguments ?? {}, + })), + stop_reason: 'function_call', + error_message: null, + error_kind: null, + usage: null, + model: 'm', + provider: 'p', + timestamp: 1, + }; +} + +function stubPorts(overrides: Partial = {}): FunctionExecutePorts { + return { + emitStart: vi.fn(async () => {}), + emitEnd: vi.fn(async () => {}), + checkpoint: vi.fn(async () => {}), + dispatch: vi.fn(async () => ({ + kind: 'result' as const, + result: { content: [{ type: 'text' as const, text: 'ok' }], details: {} }, + })), + triggerPreApproved: vi.fn(async () => ({ + content: [{ type: 'text' as const, text: 'ok' }], + details: {}, + })), + loadMessages: vi.fn(async () => []), + appendMessages: vi.fn(async () => {}), + emitTurnEnd: vi.fn(async () => {}), + finishSession: vi.fn(async (rec) => { + rec.state = 'stopped'; + }), + ...overrides, + }; +} + +describe('planBatchFromAssistant', () => { + it('unwraps agent_trigger and maps empty function_id to synthetic', () => { + const batch = planBatchFromAssistant( + makeAssistant([ + { + id: 'fc-1', + function_id: 'agent_trigger', + arguments: { function: 'shell::run', payload: { x: 1 } }, + }, + { id: 'fc-2', function_id: 'agent_trigger', arguments: {} }, + ]), + ); + + expect(batch[0]).toEqual({ + route: 'dispatch', + call: unwrapAgentTrigger({ + id: 'fc-1', + function_id: 'agent_trigger', + arguments: { function: 'shell::run', payload: { x: 1 } }, + }), + }); + expect(batch[1]).toMatchObject({ + route: 'synthetic', + result: missingFunctionResult(), + }); + }); + + it('maps non-agent_trigger function_id to synthetic error', () => { + const batch = planBatchFromAssistant( + makeAssistant([{ id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }]), + ); + expect(batch[0]).toMatchObject({ + route: 'synthetic', + result: missingFunctionResult(), + }); + }); +}); + +describe('withRoutingEnvelope', () => { + it('merges routing fields without mutating the original call object', () => { + const call = { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }; + const augmented = withRoutingEnvelope(call, 'sess-1'); + expect(augmented.arguments).toMatchObject({ + command: 'ls', + session_id: 'sess-1', + function_call_id: 'fc-1', + function_id: 'shell::run', + }); + expect(call.arguments).toEqual({ command: 'ls' }); + }); +}); + +describe('loadOrPlanWork', () => { + it('throws when work and last_assistant are both missing', () => { + const rec = newRecord('s1'); + rec.state = 'function_execute'; + expect(() => loadOrPlanWork(rec)).toThrow(FunctionExecuteInvariantError); + }); +}); + +describe('runOneCall', () => { + it('replays end event only when call id is already executed', async () => { + const ports = stubPorts(); + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + const executed: Record = { + 'fc-1': { + call: fc, + result: { content: [{ type: 'text' as const, text: 'cached' }], details: {} }, + is_error: false, + duration_ms: 10, + }, + }; + + const outcome = await runOneCall(ports, 's1', { route: 'dispatch', call: fc }, executed); + + expect(outcome.kind).toBe('skipped'); + expect(ports.emitStart).not.toHaveBeenCalled(); + expect(ports.emitEnd).toHaveBeenCalledOnce(); + expect(ports.dispatch).not.toHaveBeenCalled(); + }); + + it('returns pending without mutating executed map', async () => { + const ports = stubPorts({ + dispatch: vi.fn(async () => ({ kind: 'pending' as const })), + }); + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + const executed: Record = {}; + + const outcome = await runOneCall(ports, 's1', { route: 'dispatch', call: fc }, executed); + + expect(outcome.kind).toBe('pending'); + expect(executed).toEqual({}); + }); +}); + +describe('finalizeBatch', () => { + it('routes to stopped when every result terminates', async () => { + const ports = stubPorts(); + const rec = newRecord('s1'); + rec.state = 'function_execute'; + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + + await finalizeBatch(ports, rec, { + prepared: [{ route: 'dispatch', call: fc }], + executed: { + 'fc-1': { + call: fc, + result: { + content: [{ type: 'text' as const, text: 'bye' }], + details: {}, + terminate: true, + }, + is_error: false, + duration_ms: 1, + }, + }, + }); + + expect(rec.state).toBe('stopped'); + expect(ports.finishSession).toHaveBeenCalledOnce(); + }); + + it('skips duplicate function_result ids on re-entry', async () => { + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + const appendMessages = vi.fn(async () => {}); + const ports = stubPorts({ + loadMessages: vi.fn(async () => [ + { + role: 'function_result' as const, + function_call_id: 'fc-1', + function_id: 'shell::run', + content: [{ type: 'text' as const, text: 'existing' }], + details: {}, + is_error: false, + timestamp: 1, + }, + ]), + appendMessages, + }); + const rec = newRecord('s1'); + rec.state = 'function_execute'; + + await finalizeBatch(ports, rec, { + prepared: [{ route: 'dispatch', call: fc }], + executed: { + 'fc-1': { + call: fc, + result: { content: [{ type: 'text' as const, text: 'ok' }], details: {} }, + is_error: false, + duration_ms: 1, + }, + }, + }); + + expect(appendMessages).not.toHaveBeenCalled(); + expect(rec.state).toBe('steering_check'); + }); +}); diff --git a/harness/tests/turn-orchestrator/functions.test.ts b/harness/tests/turn-orchestrator/functions.test.ts index 55f7a3f6..07f0dfb2 100644 --- a/harness/tests/turn-orchestrator/functions.test.ts +++ b/harness/tests/turn-orchestrator/functions.test.ts @@ -2,12 +2,12 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; import * as events from '../../src/turn-orchestrator/events.js'; import * as hookModule from '../../src/turn-orchestrator/hook.js'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; +import { installMockTurnStore } from './_helpers/mockTurnStore.js'; import type { TurnStateRecord } from '../../src/turn-orchestrator/state.js'; import { newRecord } from '../../src/turn-orchestrator/state.js'; import * as agentTriggerModule from '../../src/turn-orchestrator/agent-trigger.js'; -import { parseApprovalDecision } from '../../src/turn-orchestrator/states/function-awaiting-approval.js'; -import { handleExecute } from '../../src/turn-orchestrator/states/function-execute.js'; +import { parseApprovalDecision } from '../../src/turn-orchestrator/function-awaiting-approval/ports.js'; +import { handleExecute } from '../../src/turn-orchestrator/function-execute/process.js'; import type { AssistantMessage } from '../../src/types/agent-message.js'; afterEach(() => { @@ -15,9 +15,10 @@ afterEach(() => { }); function mockFinalizePersistence(): void { - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); - vi.spyOn(hookModule, 'publishAfter').mockResolvedValue(undefined); + installMockTurnStore({ + loadMessages: vi.fn(async () => []), + appendMessages: vi.fn(async () => {}), + }); } /** Build a minimal AssistantMessage with the given function_call content blocks. */ @@ -75,8 +76,17 @@ describe('parseApprovalDecision', () => { }); }); +/** Wrap a target function id in the agent_trigger envelope (production shape). */ +function agentTriggerCall( + id: string, + functionId: string, + payload: unknown = {}, +): { id: string; function_id: string; arguments: unknown } { + return { id, function_id: 'agent_trigger', arguments: { function: functionId, payload } }; +} + describe('handleExecute new flow', () => { - it('builds work.batch from last_assistant when work is absent', async () => { + it('builds work.prepared from last_assistant when work is absent', async () => { vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ kind: 'result', result: { @@ -88,14 +98,11 @@ describe('handleExecute new flow', () => { const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); rec.state = 'function_execute'; - rec.last_assistant = makeAssistant([ - { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, - ]); + rec.last_assistant = makeAssistant([agentTriggerCall('fc-1', 'shell::run', { command: 'ls' })]); mockFinalizePersistence(); await handleExecute(iii, rec); - // work should be cleared after finalize expect(rec.work).toBeUndefined(); expect(rec.state).toBe('steering_check'); expect(rec.function_results).toHaveLength(1); @@ -106,13 +113,12 @@ describe('handleExecute new flow', () => { const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); rec.state = 'function_execute'; + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; rec.work = { - batch: [ - { function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, blocked: null }, - ], - results: [ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + prepared: [{ route: 'dispatch', call: fc }], + executed: { + 'fc-1': { + call: fc, result: { content: [{ type: 'text' as const, text: 'bye' }], details: {}, @@ -121,7 +127,7 @@ describe('handleExecute new flow', () => { is_error: false, duration_ms: 1, }, - ], + }, }; mockFinalizePersistence(); @@ -142,15 +148,16 @@ describe('handleExecute new flow', () => { const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); rec.state = 'function_execute'; - // Re-entry: fc-1 already in results (executed before park), fc-2 still pending. + const fc1 = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + const fc2 = { id: 'fc-2', function_id: 'shell::run', arguments: {} }; rec.work = { - batch: [ - { function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, blocked: null }, - { function_call: { id: 'fc-2', function_id: 'shell::run', arguments: {} }, blocked: null }, + prepared: [ + { route: 'dispatch', call: fc1 }, + { route: 'dispatch', call: fc2 }, ], - results: [ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + executed: { + 'fc-1': { + call: fc1, result: { content: [{ type: 'text' as const, text: 'done' }], details: {}, @@ -159,7 +166,7 @@ describe('handleExecute new flow', () => { is_error: false, duration_ms: 5, }, - ], + }, }; mockFinalizePersistence(); @@ -168,31 +175,27 @@ describe('handleExecute new flow', () => { const starts = emitted .filter((e) => e.type === 'function_execution_start') .map((e) => e.function_call_id); - expect(starts).toEqual(['fc-2']); // fc-1 NOT restarted on re-entry + expect(starts).toEqual(['fc-2']); const fc1Ends = emitted.filter( (e) => e.type === 'function_execution_end' && e.function_call_id === 'fc-1', ); - expect(fc1Ends).toHaveLength(1); // fc-1 end replayed exactly once + expect(fc1Ends).toHaveLength(1); }); it('pushes the call onto awaiting_approval and transitions to function_awaiting_approval on pending', async () => { - const dispatchSpy = vi.spyOn(agentTriggerModule, 'dispatchWithHook'); - dispatchSpy.mockResolvedValueOnce({ kind: 'pending' }); + vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ kind: 'pending' }); const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); rec.state = 'function_execute'; - rec.last_assistant = makeAssistant([ - { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, - ]); + rec.last_assistant = makeAssistant([agentTriggerCall('fc-1', 'shell::run', { command: 'ls' })]); await handleExecute(iii, rec); expect(rec.state).toBe('function_awaiting_approval'); expect(rec.awaiting_approval).toHaveLength(1); expect(rec.awaiting_approval?.[0]?.function_call_id).toBe('fc-1'); - // work.batch should still be populated (re-entry will continue from it) - expect(rec.work?.batch).toHaveLength(1); + expect(rec.work?.prepared).toHaveLength(1); }); it('skips consultBefore on pre_approved entries and uses triggerFunctionCall', async () => { @@ -200,20 +203,14 @@ describe('handleExecute new flow', () => { const iii = { trigger: triggerSpy } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); rec.state = 'function_execute'; - // Supply via rec.work (simulates re-entry after approval was granted) rec.work = { - batch: [ + prepared: [ { - function_call: { - id: 'fc-1', - function_id: 'shell::run', - arguments: { command: 'ls' }, - }, - blocked: null, - pre_approved: true, + route: 'pre_approved', + call: { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, }, ], - results: [], + executed: {}, }; const consultBeforeSpy = vi.spyOn(hookModule, 'consultBefore'); mockFinalizePersistence(); @@ -238,25 +235,23 @@ describe('handleExecute new flow', () => { const rec: TurnStateRecord = newRecord('s1'); rec.state = 'function_execute'; rec.work = { - batch: [ + prepared: [ { - function_call: { + route: 'pre_approved', + call: { id: 'fc-1', function_id: 'shell::fs::write', arguments: { content: 'Tue May 19 08:17:10 -03 2026\n' }, }, - blocked: null, - pre_approved: true, }, ], - results: [], + executed: {}, }; mockFinalizePersistence(); await expect(handleExecute(iii, rec)).resolves.toBeUndefined(); expect(rec.state).toBe('steering_check'); - // The result should be an error with denied details expect(rec.function_results).toHaveLength(1); expect(rec.function_results[0]?.is_error).toBe(true); const details = rec.function_results[0]?.details as Record; @@ -266,7 +261,7 @@ describe('handleExecute new flow', () => { expect(String(details?.reason)).toContain('S210'); }); - it('emits denial result without dispatching when blocked is set', async () => { + it('emits denial result without dispatching when route is synthetic', async () => { const triggerSpy = vi.fn().mockResolvedValue(null); const iii = { trigger: triggerSpy } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); @@ -278,14 +273,14 @@ describe('handleExecute new flow', () => { terminate: false, }; rec.work = { - batch: [ + prepared: [ { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - blocked: denial, - pre_approved: false, + route: 'synthetic', + call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + result: denial, }, ], - results: [], + executed: {}, }; mockFinalizePersistence(); await handleExecute(iii, rec); @@ -297,7 +292,7 @@ describe('handleExecute new flow', () => { expect(rec.state).toBe('steering_check'); }); - it('replays persisted executed calls without re-dispatching (re-entry with pre-populated work.results)', async () => { + it('replays persisted executed calls without re-dispatching (re-entry with pre-populated work.executed)', async () => { const dispatchSpy = vi.spyOn(agentTriggerModule, 'dispatchWithHook'); const triggerSpy = vi.fn().mockResolvedValue(null); const iii = { trigger: triggerSpy } as unknown as ISdk; @@ -309,22 +304,17 @@ describe('handleExecute new flow', () => { details: {}, terminate: false, }; - // Pre-populate rec.work with batch + already-executed result + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; rec.work = { - batch: [ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - blocked: null, - }, - ], - results: [ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + prepared: [{ route: 'dispatch', call: fc }], + executed: { + 'fc-1': { + call: fc, result: existingResult, is_error: false, duration_ms: 42, }, - ], + }, }; mockFinalizePersistence(); @@ -346,7 +336,7 @@ describe('handleExecute new flow', () => { const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec = newRecord('s1'); rec.state = 'function_execute'; - rec.last_assistant = makeAssistant([{ id: 'fc-1', function_id: 'shell::run', arguments: {} }]); + rec.last_assistant = makeAssistant([agentTriggerCall('fc-1', 'shell::run')]); mockFinalizePersistence(); await handleExecute(iii, rec); @@ -360,17 +350,12 @@ describe('handleExecute new flow', () => { rec.state = 'function_execute'; rec.last_assistant = null; - // Supply pre-populated work so ensureWork doesn't throw on null last_assistant + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; rec.work = { - batch: [ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, - blocked: null, - }, - ], - results: [ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + prepared: [{ route: 'dispatch', call: fc }], + executed: { + 'fc-1': { + call: fc, result: { content: [{ type: 'text' as const, text: 'ok' }], details: {}, @@ -379,18 +364,18 @@ describe('handleExecute new flow', () => { is_error: false, duration_ms: 1, }, - ], + }, }; - vi.spyOn(hookModule, 'publishAfter').mockResolvedValue(undefined); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + installMockTurnStore({ + loadMessages: vi.fn(async () => []), + appendMessages: vi.fn(async () => {}), + }); const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); await handleExecute(iii, rec); expect(rec.state).toBe('steering_check'); expect(rec.function_results).toHaveLength(1); - // No turn_end emitted when last_assistant is null expect(emitSpy.mock.calls.some((call) => call[2]?.type === 'turn_end')).toBe(false); }); @@ -410,12 +395,12 @@ describe('handleExecute new flow', () => { timestamp: 1, }; - // Supply pre-populated work (last_assistant has no function_call blocks here) + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; rec.work = { - batch: [], - results: [ - { - function_call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + prepared: [{ route: 'dispatch', call: fc }], + executed: { + 'fc-1': { + call: fc, result: { content: [{ type: 'text' as const, text: 'ok' }], details: {}, @@ -424,11 +409,12 @@ describe('handleExecute new flow', () => { is_error: false, duration_ms: 1, }, - ], + }, }; - vi.spyOn(hookModule, 'publishAfter').mockResolvedValue(undefined); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + installMockTurnStore({ + loadMessages: vi.fn(async () => []), + appendMessages: vi.fn(async () => {}), + }); const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); await handleExecute(iii, rec); @@ -439,10 +425,6 @@ describe('handleExecute new flow', () => { }); it('does NOT duplicate function_results in flat-state when handleExecute re-enters', async () => { - // Idempotency guard: a durable retry / step-fanout race can replay the - // finalize path with the same work. Re-pushing the same function_result - // blocks makes Anthropic reject with "each tool_use must have a single - // result. Found multiple tool_result blocks with id". const existingResult = { content: [{ type: 'text' as const, text: 'ok' }], details: {}, @@ -454,15 +436,16 @@ describe('handleExecute new flow', () => { const rec = newRecord('s1'); rec.state = 'function_execute'; rec.last_assistant = makeAssistant([ - { id: 'toolu_01', function_id: 'shell::run', arguments: { command: 'ls' } }, + agentTriggerCall('toolu_01', 'shell::run', { command: 'ls' }), ]); let storedMessages: unknown[] = []; - vi.spyOn(persistence, 'loadMessages').mockImplementation(async () => storedMessages as never); - vi.spyOn(persistence, 'saveMessages').mockImplementation(async (_iii, _sid, msgs) => { - storedMessages = msgs as never; + installMockTurnStore({ + loadMessages: vi.fn(async () => storedMessages as never), + appendMessages: vi.fn(async (_sid, msgs) => { + storedMessages = [...storedMessages, ...msgs]; + }), }); - vi.spyOn(hookModule, 'publishAfter').mockResolvedValue(undefined); vi.spyOn(events, 'emit').mockResolvedValue(undefined); vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValue({ kind: 'result', @@ -471,14 +454,18 @@ describe('handleExecute new flow', () => { await handleExecute(iii, rec); - // Re-entry: simulate state was reset before durable confirmation rec.state = 'function_execute'; rec.turn_end_emitted = false; - // work.results already has the executed call after first run cleared rec.work, - // so we need to re-populate work for re-entry simulation rec.work = { - batch: [{ function_call: fc, blocked: null }], - results: [{ function_call: fc, result: existingResult, is_error: false, duration_ms: 5 }], + prepared: [{ route: 'dispatch', call: fc }], + executed: { + toolu_01: { + call: fc, + result: existingResult, + is_error: false, + duration_ms: 5, + }, + }, }; await handleExecute(iii, rec); diff --git a/harness/tests/turn-orchestrator/get-state.test.ts b/harness/tests/turn-orchestrator/get-state.test.ts index f7c9f742..362f98da 100644 --- a/harness/tests/turn-orchestrator/get-state.test.ts +++ b/harness/tests/turn-orchestrator/get-state.test.ts @@ -1,4 +1,5 @@ import { describe, expect, it, vi } from 'vitest'; +import { TURN_STATE_SCOPE } from '../../src/turn-orchestrator/state.js'; import type { ISdk } from '../../src/runtime/iii.js'; import { execute } from '../../src/turn-orchestrator/get-state.js'; import { newRecord } from '../../src/turn-orchestrator/state.js'; @@ -64,7 +65,8 @@ describe('turn::get_state execute', () => { trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { if ( req.function_id === 'state::get' && - (req.payload as Record).key === 'session/sess-abc/turn_state' + (req.payload as Record).scope === TURN_STATE_SCOPE && + (req.payload as Record).key === 'sess-abc' ) { return rec; } diff --git a/harness/tests/turn-orchestrator/hook.test.ts b/harness/tests/turn-orchestrator/hook.test.ts index 914e4231..9829dd5d 100644 --- a/harness/tests/turn-orchestrator/hook.test.ts +++ b/harness/tests/turn-orchestrator/hook.test.ts @@ -1,11 +1,10 @@ -import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { describe, expect, it, vi } from 'vitest'; import type { CheckPermissionsPayload, PolicyCheckReply, } from '../../src/harness/policy/check-permissions.js'; import type { ISdk } from '../../src/runtime/iii.js'; -import { TOPIC_AFTER, consultBefore, publishAfter } from '../../src/turn-orchestrator/hook.js'; -import { resetSubscriberCache } from '../../src/turn-orchestrator/subscriber-presence.js'; +import { consultBefore } from '../../src/turn-orchestrator/hook.js'; function fakeIii( triggerImpl: (req: { @@ -72,45 +71,3 @@ describe('consultBefore (direct policy call)', () => { expect(trigger.mock.calls[0][0].function_id).toBe('policy::check_permissions'); }); }); - -describe('publishAfter (subscriber-aware after-hook)', () => { - beforeEach(() => resetSubscriberCache()); - - const fc = { id: 'fc-1', function_id: 'shell::fs::write', arguments: { path: '/tmp/x' } }; - const result = { content: [{ type: 'text', text: 'ok' }] }; - - it('skips publish_collect when no subscriber is registered for the topic', async () => { - const trigger = vi.fn(async (req: { function_id: string }) => { - if (req.function_id === 'engine::triggers::list') return { triggers: [] }; - throw new Error(`should not call ${req.function_id}`); - }); - const iii = { trigger } as unknown as ISdk; - - const merged = await publishAfter(iii, fc, result); - - expect(merged).toBeUndefined(); - const fns = trigger.mock.calls.map((c) => c[0].function_id); - expect(fns).not.toContain('hook-fanout::publish_collect'); - }); - - it('calls publish_collect when a subscriber is registered for the topic', async () => { - const trigger = vi.fn(async (req: { function_id: string }) => { - if (req.function_id === 'engine::triggers::list') { - return { - triggers: [{ trigger_type: 'durable:subscriber', config: { topic: TOPIC_AFTER } }], - }; - } - if (req.function_id === 'hook-fanout::publish_collect') return { merged: { rewritten: true } }; - throw new Error(`unexpected ${req.function_id}`); - }); - const iii = { trigger } as unknown as ISdk; - - const merged = await publishAfter(iii, fc, result); - - expect(merged).toEqual({ rewritten: true }); - const fns = trigger.mock.calls.map((c) => c[0].function_id); - // Proves the subscriber gate ran before delegating to the primitive. - expect(fns).toContain('engine::triggers::list'); - expect(fns).toContain('hook-fanout::publish_collect'); - }); -}); diff --git a/harness/tests/turn-orchestrator/provisioning-layer.test.ts b/harness/tests/turn-orchestrator/provisioning-layer.test.ts new file mode 100644 index 00000000..a711c183 --- /dev/null +++ b/harness/tests/turn-orchestrator/provisioning-layer.test.ts @@ -0,0 +1,107 @@ +import { describe, expect, it, vi } from 'vitest'; +import { applyProvisioningOutcome } from '../../src/turn-orchestrator/provisioning/process.js'; +import { loadDefaultSkillBodies } from '../../src/turn-orchestrator/provisioning/load-skills.js'; +import type { ProvisioningPorts } from '../../src/turn-orchestrator/provisioning/ports.js'; +import { processProvisioning } from '../../src/turn-orchestrator/provisioning/process.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; + +function stubPorts(overrides: Partial = {}): ProvisioningPorts { + return { + defaultSkillUris: [], + loadRunRequest: vi.fn(async () => ({ + provider: '', + model: '', + mode: null, + system_prompt: '', + function_schemas: [], + })), + saveRunRequest: vi.fn(async () => {}), + fetchSkillsIndex: vi.fn(async () => null), + fetchSkillBody: vi.fn(async () => null), + ...overrides, + }; +} + +describe('loadDefaultSkillBodies', () => { + it('fetches each URI and maps to DefaultSkillBody', async () => { + const fetchSkillBody = vi.fn(async (id: string) => + id === 'iii-directory/index' ? 'BODY' : null, + ); + const bodies = await loadDefaultSkillBodies({ fetchSkillBody }, ['iii://iii-directory/index']); + + expect(fetchSkillBody).toHaveBeenCalledWith('iii-directory/index'); + expect(bodies).toEqual([ + { uri: 'iii://iii-directory/index', id: 'iii-directory/index', body: 'BODY' }, + ]); + }); + + it('preserves null bodies for unavailable skills', async () => { + const bodies = await loadDefaultSkillBodies({ fetchSkillBody: vi.fn(async () => null) }, [ + 'iii://missing', + ]); + expect(bodies[0]?.body).toBeNull(); + }); +}); + +describe('processProvisioning', () => { + it('builds prompt with mode and attaches agent_trigger schema', async () => { + const ports = stubPorts({ + defaultSkillUris: [], + loadRunRequest: vi.fn(async () => ({ + provider: 'openai', + model: 'gpt-4', + mode: 'agent', + system_prompt: '', + function_schemas: [], + })), + fetchSkillsIndex: vi.fn(async () => 'INDEX'), + }); + const rec = { ...newRecord('s1'), state: 'provisioning' as const }; + + const outcome = await processProvisioning(ports, rec); + + expect(outcome.kind).toBe('ready'); + expect(outcome.runRequest.system_prompt).toContain('operating in agent mode'); + expect(outcome.runRequest.system_prompt).toContain('INDEX'); + expect(outcome.runRequest.function_schemas).toEqual([ + expect.objectContaining({ name: 'agent_trigger' }), + ]); + }); + + it('preserves a non-empty caller override verbatim', async () => { + const ports = stubPorts({ + loadRunRequest: vi.fn(async () => ({ + provider: '', + model: '', + mode: null, + system_prompt: 'custom override', + function_schemas: [], + })), + }); + const rec = { ...newRecord('s1'), state: 'provisioning' as const }; + + const outcome = await processProvisioning(ports, rec); + + expect(outcome.runRequest.system_prompt).toBe('custom override'); + }); +}); + +describe('applyProvisioningOutcome', () => { + it('saves run request and transitions to assistant_streaming', async () => { + const saveRunRequest = vi.fn(async () => {}); + const ports = stubPorts({ saveRunRequest }); + const rec = { ...newRecord('s1'), state: 'provisioning' as const }; + const runRequest = { + provider: 'openai', + model: 'gpt-4', + mode: 'agent' as const, + system_prompt: 'prompt', + function_schemas: [], + }; + + await applyProvisioningOutcome(ports, rec, { kind: 'ready', runRequest }); + + expect(saveRunRequest).toHaveBeenCalledWith('s1', runRequest); + expect(rec.state).toBe('assistant_streaming'); + }); +}); diff --git a/harness/tests/turn-orchestrator/provisioning.test.ts b/harness/tests/turn-orchestrator/provisioning.test.ts index 18bd297c..11ccb7b1 100644 --- a/harness/tests/turn-orchestrator/provisioning.test.ts +++ b/harness/tests/turn-orchestrator/provisioning.test.ts @@ -1,14 +1,11 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; import type { TurnOrchestratorConfig } from '../../src/turn-orchestrator/config.js'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; +import { defaultRunRequest, installMockTurnStore } from './_helpers/mockTurnStore.js'; import { type TurnStateRecord, newRecord } from '../../src/turn-orchestrator/state.js'; import { TurnStepPayloadSchema } from '../../src/turn-orchestrator/schemas.js'; -import { - handleProvisioning, - parseDirectoryBody, - register, -} from '../../src/turn-orchestrator/states/provisioning.js'; +import { parseDirectoryBody } from '../../src/turn-orchestrator/provisioning/ports.js'; +import { handleProvisioning, register } from '../../src/turn-orchestrator/provisioning/process.js'; type TriggerCall = { function_id: string; payload: unknown; timeoutMs?: number }; @@ -57,20 +54,18 @@ describe('handleProvisioning', () => { }); const cfg = { system_default_skills: ['iii://iii-directory/index'] }; - vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ - provider: 'openai', - model: 'gpt-4', - mode: 'agent', - system_prompt: '', - function_schemas: [], + const store = installMockTurnStore({ + loadRunRequest: vi.fn(async () => ({ + ...defaultRunRequest, + mode: 'agent', + })), }); - const saveRunRequest = vi.spyOn(persistence, 'saveRunRequest').mockResolvedValue(); + const saveRunRequest = store.saveRunRequest; await handleProvisioning(iii, cfg, rec); expect(rec.state).toBe('assistant_streaming'); expect(saveRunRequest).toHaveBeenCalledWith( - iii, 's1', expect.objectContaining({ provider: 'openai', @@ -88,19 +83,18 @@ describe('handleProvisioning', () => { const { iii } = fakeIii(); const cfg = { system_default_skills: [] as string[] }; - vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ - provider: 'openai', - model: 'gpt-4', - mode: null, - system_prompt: 'custom override', - function_schemas: [], + const store = installMockTurnStore({ + loadRunRequest: vi.fn(async () => ({ + ...defaultRunRequest, + mode: null, + system_prompt: 'custom override', + })), }); - const saveRunRequest = vi.spyOn(persistence, 'saveRunRequest').mockResolvedValue(); + const saveRunRequest = store.saveRunRequest; await handleProvisioning(iii, cfg, rec); expect(saveRunRequest).toHaveBeenCalledWith( - iii, 's1', expect.objectContaining({ system_prompt: 'custom override' }), ); @@ -111,20 +105,20 @@ describe('handleProvisioning', () => { const { iii } = fakeIii(); const cfg = { system_default_skills: ['iii://missing'] }; - vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ - provider: '', - model: '', - mode: null, - system_prompt: '', - function_schemas: [], + const store = installMockTurnStore({ + loadRunRequest: vi.fn(async () => ({ + ...defaultRunRequest, + provider: '', + model: '', + mode: null, + })), }); - const saveRunRequest = vi.spyOn(persistence, 'saveRunRequest').mockResolvedValue(); + const saveRunRequest = store.saveRunRequest; await handleProvisioning(iii, cfg, rec); expect(rec.state).toBe('assistant_streaming'); expect(saveRunRequest).toHaveBeenCalledWith( - iii, 's1', expect.objectContaining({ system_prompt: expect.stringContaining('You are an iii agent worker'), @@ -167,16 +161,17 @@ describe('register', () => { it('registers turn::provisioning, threads cfg into the runner, and returns metadata', async () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'provisioning' }; - vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); - const saveRecord = vi.spyOn(persistence, 'saveRecord').mockResolvedValue(); - const loadRunRequest = vi.spyOn(persistence, 'loadRunRequest').mockResolvedValue({ - provider: '', - model: '', - mode: null, - system_prompt: '', - function_schemas: [], + const store = installMockTurnStore({ + loadRecord: vi.fn(async () => rec), + loadRunRequest: vi.fn(async () => ({ + ...defaultRunRequest, + provider: '', + model: '', + mode: null, + })), }); - vi.spyOn(persistence, 'saveRunRequest').mockResolvedValue(); + const saveRecord = store.saveRecord; + const loadRunRequest = store.loadRunRequest; const { iii, getHandler, getId } = captureHandler(); register(iii, cfg); @@ -186,9 +181,8 @@ describe('register', () => { // cfg flows through to handleProvisioning (which reads the run request), // and the runner threads the pre-mutation snapshot into saveRecord. - expect(loadRunRequest).toHaveBeenCalledWith(iii, 's1'); + expect(loadRunRequest).toHaveBeenCalledWith('s1'); expect(saveRecord).toHaveBeenCalledWith( - iii, rec, expect.objectContaining({ state: 'provisioning' }), ); diff --git a/harness/tests/turn-orchestrator/run-start.test.ts b/harness/tests/turn-orchestrator/run-start.test.ts index 7e08a4ad..937344ce 100644 --- a/harness/tests/turn-orchestrator/run-start.test.ts +++ b/harness/tests/turn-orchestrator/run-start.test.ts @@ -151,8 +151,8 @@ describe('execute', () => { const turnStateSet = calls.find( (c) => c.function_id === 'state::set' && - (c.payload as { scope?: string; key?: string }).scope === 'agent' && - (c.payload as { scope?: string; key?: string }).key === 'session/sess-1/turn_state', + (c.payload as { scope?: string; key?: string }).scope === 'turn_state' && + (c.payload as { scope?: string; key?: string }).key === 'sess-1', ); expect(turnStateSet).toBeDefined(); expect((turnStateSet?.payload as { value: { state: string } }).value.state).toBe( diff --git a/harness/tests/turn-orchestrator/run-transition.test.ts b/harness/tests/turn-orchestrator/run-transition.test.ts index 7f13321a..b3c61e92 100644 --- a/harness/tests/turn-orchestrator/run-transition.test.ts +++ b/harness/tests/turn-orchestrator/run-transition.test.ts @@ -1,13 +1,14 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; import { TransientError } from '../../src/turn-orchestrator/errors.js'; +import { TURN_STATE_SCOPE } from '../../src/turn-orchestrator/state.js'; import { runTransition } from '../../src/turn-orchestrator/run-transition.js'; import { type TurnStateRecord, newRecord, transitionTo, } from '../../src/turn-orchestrator/state.js'; +import { installMockTurnStore } from './_helpers/mockTurnStore.js'; afterEach(() => { vi.restoreAllMocks(); @@ -15,7 +16,7 @@ afterEach(() => { describe('runTransition', () => { it('throws when the session record is missing, without running the handler', async () => { - vi.spyOn(persistence, 'loadRecord').mockResolvedValue(null); + installMockTurnStore({ loadRecord: vi.fn(async () => null) }); const handle = vi.fn(); await expect( @@ -26,22 +27,20 @@ describe('runTransition', () => { it('returns a stale skip without running the handler or saving', async () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'assistant_streaming' }; - vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); - const saveRecord = vi.spyOn(persistence, 'saveRecord').mockResolvedValue(); + const store = installMockTurnStore({ loadRecord: vi.fn(async () => rec) }); const handle = vi.fn(); const result = await runTransition({} as ISdk, 'provisioning', handle, { session_id: 's1' }); expect(result).toEqual({ ok: true, skipped: true, reason: 'stale' }); expect(handle).not.toHaveBeenCalled(); - expect(saveRecord).not.toHaveBeenCalled(); + expect(store.saveRecord).not.toHaveBeenCalled(); }); it('runs the handler and threads the pre-mutation snapshot into saveRecord', async () => { const iii = {} as ISdk; const rec: TurnStateRecord = { ...newRecord('s1'), state: 'provisioning' }; - vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); - const saveRecord = vi.spyOn(persistence, 'saveRecord').mockResolvedValue(); + const store = installMockTurnStore({ loadRecord: vi.fn(async () => rec) }); const handle = vi.fn(async (_iii: ISdk, r: TurnStateRecord) => { transitionTo(r, 'assistant_streaming'); }); @@ -49,8 +48,7 @@ describe('runTransition', () => { const result = await runTransition(iii, 'provisioning', handle, { session_id: 's1' }); expect(handle).toHaveBeenCalledWith(iii, rec); - expect(saveRecord).toHaveBeenCalledWith( - iii, + expect(store.saveRecord).toHaveBeenCalledWith( rec, expect.objectContaining({ state: 'provisioning' }), ); @@ -65,10 +63,12 @@ describe('runTransition', () => { const iii = {} as ISdk; const rec: TurnStateRecord = { ...newRecord('s1'), state: 'function_execute' }; rec.awaiting_approval = []; - vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); let captured: TurnStateRecord | null | undefined; - vi.spyOn(persistence, 'saveRecord').mockImplementation(async (_i, _r, previous) => { - captured = previous; + installMockTurnStore({ + loadRecord: vi.fn(async () => rec), + saveRecord: vi.fn(async (_r, previous) => { + captured = previous; + }), }); const handle = vi.fn(async (_iii: ISdk, r: TurnStateRecord) => { r.awaiting_approval?.push({ function_call_id: 'fc-1', function_id: 'f', args: {} }); @@ -77,25 +77,23 @@ describe('runTransition', () => { await runTransition(iii, 'function_execute', handle, { session_id: 's1' }); - // The snapshot reflects state BEFORE the handler ran, even though the - // handler mutated rec.awaiting_approval in place. expect(captured?.state).toBe('function_execute'); expect(captured?.awaiting_approval).toEqual([]); }); it('routes an unexpected handler throw to failed without re-throwing', async () => { const rec: TurnStateRecord = { ...newRecord('s1'), state: 'steering_check' }; - vi.spyOn(persistence, 'loadRecord').mockResolvedValue(rec); - const saveRecord = vi.spyOn(persistence, 'saveRecord').mockResolvedValue(); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); + const store = installMockTurnStore({ + loadRecord: vi.fn(async () => rec), + loadMessages: vi.fn(async () => []), + }); const handle = vi.fn(async () => { throw new Error('boom'); }); - // Should NOT re-throw — returns { ok: true, to_state: 'failed' } const result = await runTransition({} as ISdk, 'steering_check', handle, { session_id: 's1' }); expect(result).toMatchObject({ ok: true, to_state: 'failed' }); - expect(saveRecord).toHaveBeenCalled(); + expect(store.saveRecord).toHaveBeenCalled(); }); }); @@ -104,7 +102,13 @@ function fakeIii(record: unknown) { const iii = { trigger: vi.fn(async ({ function_id, payload }: any) => { writes.push({ function_id, payload }); - if (function_id === 'state::get' && payload.key.endsWith('/turn_state')) return record; + if ( + function_id === 'state::get' && + payload.scope === TURN_STATE_SCOPE && + payload.key === 's1' + ) { + return record; + } return null; }), } as any; @@ -113,35 +117,58 @@ function fakeIii(record: unknown) { describe('runTransition error model', () => { const base = { - session_id: 's1', state: 'function_execute', turn_count: 1, - function_results: [], turn_end_emitted: false, - started_at_ms: 1, updated_at_ms: 1, + session_id: 's1', + state: 'function_execute', + turn_count: 1, + function_results: [], + turn_end_emitted: false, + started_at_ms: 1, + updated_at_ms: 1, }; it('routes an unexpected throw to failed and does not re-throw', async () => { const { iii, writes } = fakeIii({ ...base }); - const res = await runTransition(iii, 'function_execute', async () => { - throw new Error('boom'); - }, { session_id: 's1' }); + const res = await runTransition( + iii, + 'function_execute', + async () => { + throw new Error('boom'); + }, + { session_id: 's1' }, + ); expect(res).toMatchObject({ ok: true, to_state: 'failed' }); - const saved = writes.find((w) => w.function_id === 'state::set' && w.payload.key.endsWith('/turn_state')); + const saved = writes.find( + (w) => + w.function_id === 'state::set' && + w.payload.scope === TURN_STATE_SCOPE && + w.payload.key === 's1', + ); expect(saved?.payload.value.state).toBe('failed'); expect(saved?.payload.value.error.message).toContain('boom'); - const surfaced = writes.some((w) => - w.function_id === 'stream::set' - && w.payload.data?.type === 'message_complete' - && w.payload.data?.message?.stop_reason === 'error'); + const surfaced = writes.some( + (w) => + w.function_id === 'stream::set' && + w.payload.data?.type === 'message_complete' && + w.payload.data?.message?.stop_reason === 'error', + ); expect(surfaced).toBe(true); - const ended = writes.some((w) => w.function_id === 'stream::set' && w.payload.data?.type === 'agent_end'); + const ended = writes.some( + (w) => w.function_id === 'stream::set' && w.payload.data?.type === 'agent_end', + ); expect(ended).toBe(true); }); it('re-throws TransientError so the queue retries', async () => { const { iii } = fakeIii({ ...base }); await expect( - runTransition(iii, 'function_execute', async () => { - throw new TransientError('retry me'); - }, { session_id: 's1' }), + runTransition( + iii, + 'function_execute', + async () => { + throw new TransientError('retry me'); + }, + { session_id: 's1' }, + ), ).rejects.toThrow('retry me'); }); }); diff --git a/harness/tests/turn-orchestrator/state.test.ts b/harness/tests/turn-orchestrator/state.test.ts index 3079faa9..1ad0315b 100644 --- a/harness/tests/turn-orchestrator/state.test.ts +++ b/harness/tests/turn-orchestrator/state.test.ts @@ -1,11 +1,8 @@ import { describe, expect, it } from 'vitest'; -import type { TurnStateRecord } from '../../src/turn-orchestrator/state.js'; import { - AGENT_SCOPE, - messagesKey, + type TurnStateRecord, newRecord, transitionTo, - turnStateKey, } from '../../src/turn-orchestrator/state.js'; describe('TurnStateRecord', () => { @@ -28,11 +25,3 @@ describe('TurnStateRecord', () => { expect(rec.awaiting_approval).toBeUndefined(); }); }); - -describe('state keys', () => { - it('namespace by session under agent scope', () => { - expect(AGENT_SCOPE).toBe('agent'); - expect(turnStateKey('abc')).toBe('session/abc/turn_state'); - expect(messagesKey('abc')).toBe('session/abc/messages'); - }); -}); diff --git a/harness/tests/turn-orchestrator/steering-check-layer.test.ts b/harness/tests/turn-orchestrator/steering-check-layer.test.ts new file mode 100644 index 00000000..e93acb6d --- /dev/null +++ b/harness/tests/turn-orchestrator/steering-check-layer.test.ts @@ -0,0 +1,174 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { AgentMessage } from '../../src/types/agent-message.js'; +import { applySteeringCheckOutcome } from '../../src/turn-orchestrator/steering-check/process.js'; +import { parseDrainItems } from '../../src/turn-orchestrator/steering-check/ports.js'; +import type { SteeringCheckPorts } from '../../src/turn-orchestrator/steering-check/ports.js'; +import { processSteeringCheck } from '../../src/turn-orchestrator/steering-check/process.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; + +function userMessage(text: string): AgentMessage { + return { role: 'user', content: [{ type: 'text', text }] }; +} + +function stubPorts(overrides: Partial = {}): SteeringCheckPorts { + return { + drainInbox: vi.fn(async () => []), + loadMessages: vi.fn(async () => []), + appendMessages: vi.fn(async () => {}), + checkpoint: vi.fn(async () => {}), + loadRunRequest: vi.fn(async () => ({ + provider: 'openai', + model: 'gpt-4', + mode: null, + system_prompt: '', + function_schemas: [], + })), + saveRunRequest: vi.fn(async () => {}), + emitTurnEnd: vi.fn(async () => {}), + finishSession: vi.fn(async (rec) => { + rec.state = 'stopped'; + }), + emit: vi.fn(async () => {}), + ...overrides, + }; +} + +describe('parseDrainItems', () => { + it('returns items array when present', () => { + const items = [userMessage('hello')]; + expect(parseDrainItems({ items })).toEqual(items); + }); + + it('returns empty array for invalid shapes', () => { + expect(parseDrainItems(null)).toEqual([]); + expect(parseDrainItems({})).toEqual([]); + expect(parseDrainItems({ items: 'bad' })).toEqual([]); + }); +}); + +describe('processSteeringCheck', () => { + it('returns resume_with_inbox for steering messages', async () => { + const steeringItems = [userMessage('steer')]; + const ports = stubPorts({ + drainInbox: vi.fn(async (name) => (name === 'steering' ? steeringItems : [])), + }); + const rec = { ...newRecord('s1'), state: 'steering_check' as const }; + + const outcome = await processSteeringCheck(ports, rec); + + expect(outcome).toEqual({ kind: 'resume_with_inbox', inbox: steeringItems }); + expect(ports.drainInbox).toHaveBeenCalledTimes(1); + }); + + it('drains followup only when steering is empty', async () => { + const followupItems = [userMessage('follow')]; + const drainInbox = vi.fn(async (name: 'steering' | 'followup') => + name === 'followup' ? followupItems : [], + ); + const ports = stubPorts({ drainInbox }); + const rec = { ...newRecord('s1'), state: 'steering_check' as const }; + + const outcome = await processSteeringCheck(ports, rec); + + expect(outcome).toEqual({ kind: 'resume_with_inbox', inbox: followupItems }); + expect(drainInbox).toHaveBeenCalledWith('steering', 's1'); + expect(drainInbox).toHaveBeenCalledWith('followup', 's1'); + }); + + it('returns continue_after_function when function_results present', async () => { + const ports = stubPorts(); + const rec = { + ...newRecord('s1'), + state: 'steering_check' as const, + function_results: [{ role: 'function_result', content: [] }] as never, + }; + + const outcome = await processSteeringCheck(ports, rec); + + expect(outcome).toEqual({ kind: 'continue_after_function' }); + }); + + it('returns max_turns_reached when cap hit on continue path', async () => { + const ports = stubPorts(); + const rec = { + ...newRecord('s1'), + state: 'steering_check' as const, + max_turns: 2, + turn_count: 2, + function_results: [{ role: 'function_result', content: [] }] as never, + }; + + const outcome = await processSteeringCheck(ports, rec); + + expect(outcome).toEqual({ kind: 'max_turns_reached' }); + }); + + it('returns end_turn when no steering, followup, or function results', async () => { + const ports = stubPorts(); + const rec = { ...newRecord('s1'), state: 'steering_check' as const }; + + const outcome = await processSteeringCheck(ports, rec); + + expect(outcome).toEqual({ kind: 'end_turn' }); + }); +}); + +describe('applySteeringCheckOutcome', () => { + it('resume_with_inbox: emits turn_end, saves messages, clears function_results', async () => { + const inbox = [userMessage('new')]; + const emitTurnEnd = vi.fn(async () => {}); + const appendMessages = vi.fn(async () => {}); + const ports = stubPorts({ + emitTurnEnd, + appendMessages, + }); + const rec = { + ...newRecord('s1'), + state: 'steering_check' as const, + function_results: [{ role: 'function_result', content: [] }] as never, + }; + + await applySteeringCheckOutcome(ports, rec, { kind: 'resume_with_inbox', inbox }); + + expect(rec.state).toBe('assistant_streaming'); + expect(rec.function_results).toEqual([]); + expect(rec.turn_end_emitted).toBe(true); + expect(emitTurnEnd).toHaveBeenCalledWith('s1', expect.anything(), []); + expect(appendMessages).toHaveBeenCalledWith('s1', inbox); + }); + + it('continue_after_function: transitions without loading messages', async () => { + const loadMessages = vi.fn(async () => []); + const emitTurnEnd = vi.fn(async () => {}); + const ports = stubPorts({ loadMessages, emitTurnEnd }); + const rec = { + ...newRecord('s1'), + state: 'steering_check' as const, + function_results: [{ role: 'function_result', content: [] }] as never, + turn_end_emitted: true, + }; + + await applySteeringCheckOutcome(ports, rec, { kind: 'continue_after_function' }); + + expect(rec.state).toBe('assistant_streaming'); + expect(rec.function_results).toEqual([]); + expect(loadMessages).not.toHaveBeenCalled(); + expect(emitTurnEnd).not.toHaveBeenCalled(); + }); + + it('end_turn: emits turn_end and finishes session', async () => { + const emitTurnEnd = vi.fn(async () => {}); + const finishSession = vi.fn(async (rec) => { + rec.state = 'stopped'; + }); + const ports = stubPorts({ emitTurnEnd, finishSession }); + const rec = { ...newRecord('s1'), state: 'steering_check' as const }; + + await applySteeringCheckOutcome(ports, rec, { kind: 'end_turn' }); + + expect(rec.state).toBe('stopped'); + expect(rec.turn_end_emitted).toBe(true); + expect(emitTurnEnd).toHaveBeenCalledWith('s1', expect.anything(), []); + expect(finishSession).toHaveBeenCalled(); + }); +}); diff --git a/harness/tests/turn-orchestrator/steering.test.ts b/harness/tests/turn-orchestrator/steering.test.ts index c153a65b..22a76b3e 100644 --- a/harness/tests/turn-orchestrator/steering.test.ts +++ b/harness/tests/turn-orchestrator/steering.test.ts @@ -2,9 +2,9 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ISdk } from '../../src/runtime/iii.js'; import type { AgentMessage } from '../../src/types/agent-message.js'; import * as events from '../../src/turn-orchestrator/events.js'; -import * as persistence from '../../src/turn-orchestrator/persistence.js'; +import { installMockTurnStore } from './_helpers/mockTurnStore.js'; import { newRecord, type TurnStateRecord } from '../../src/turn-orchestrator/state.js'; -import { handleSteering, route } from '../../src/turn-orchestrator/states/steering-check.js'; +import { handleSteering, route } from '../../src/turn-orchestrator/steering-check/process.js'; afterEach(() => { vi.restoreAllMocks(); @@ -27,9 +27,7 @@ function userMessage(text: string): AgentMessage { return { role: 'user', content: [{ type: 'text', text }] }; } -function makeIii( - opts: { steeringItems?: AgentMessage[]; followupItems?: AgentMessage[] } = {}, -) { +function makeIii(opts: { steeringItems?: AgentMessage[]; followupItems?: AgentMessage[] } = {}) { const { steeringItems = [], followupItems = [] } = opts; const drainCalls: Array<{ name: string; session_id: string }> = []; @@ -68,8 +66,8 @@ describe('handleSteering', () => { const rec = steeringRec('s1', { function_results: [{ role: 'function_result', content: [] }] as never, }); - const loadSpy = vi.spyOn(persistence, 'loadMessages').mockResolvedValue([userMessage('prior')]); - const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + const store = installMockTurnStore(); + const appendSpy = store.appendMessages; vi.spyOn(events, 'emit').mockResolvedValue(undefined); await handleSteering(iii, rec); @@ -77,23 +75,23 @@ describe('handleSteering', () => { expect(rec.state).toBe('assistant_streaming'); expect(rec.function_results).toEqual([]); expect(rec.turn_end_emitted).toBe(true); - expect(saveSpy).toHaveBeenCalledWith(iii, 's1', [userMessage('prior'), ...steeringItems]); - expect(loadSpy).toHaveBeenCalled(); + expect(appendSpy).toHaveBeenCalledWith('s1', steeringItems); + expect(store.loadMessages).not.toHaveBeenCalled(); }); it('followup: drains followup when steering queue is empty', async () => { const followupItems = [userMessage('follow-up')]; const { iii, drainCalls } = makeIii({ followupItems }); const rec = steeringRec('s1'); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + const store = installMockTurnStore(); + const appendSpy = store.appendMessages; vi.spyOn(events, 'emit').mockResolvedValue(undefined); await handleSteering(iii, rec); expect(rec.state).toBe('assistant_streaming'); expect(drainCalls.map((c) => c.name)).toEqual(['steering', 'followup']); - expect(saveSpy).toHaveBeenCalledWith(iii, 's1', followupItems); + expect(appendSpy).toHaveBeenCalledWith('s1', followupItems); }); it('followup: skipped when steering queue has items', async () => { @@ -102,8 +100,7 @@ describe('handleSteering', () => { followupItems: [userMessage('follow')], }); const rec = steeringRec('s1'); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + installMockTurnStore(); vi.spyOn(events, 'emit').mockResolvedValue(undefined); await handleSteering(iii, rec); @@ -118,20 +115,21 @@ describe('handleSteering', () => { function_results: [{ role: 'function_result', content: [] }] as never, turn_end_emitted: true, }); - const loadSpy = vi.spyOn(persistence, 'loadMessages'); + const store = installMockTurnStore(); const emitSpy = vi.spyOn(events, 'emit'); await handleSteering(iii, rec); expect(rec.state).toBe('assistant_streaming'); expect(rec.function_results).toEqual([]); - expect(loadSpy).not.toHaveBeenCalled(); + expect(store.loadMessages).not.toHaveBeenCalled(); expect(emitSpy).not.toHaveBeenCalled(); }); it('end_turn: emits turn_end then finishes the session (agent_end + stopped)', async () => { const { iii } = makeIii(); const rec = steeringRec('s1'); + installMockTurnStore({ loadMessages: vi.fn(async () => []) }); const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); await handleSteering(iii, rec); @@ -149,8 +147,8 @@ describe('handleSteering', () => { turn_count: 2, function_results: [{ role: 'function_result', content: [] }] as never, }); - const loadSpy = vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - const saveSpy = vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + const store = installMockTurnStore({ loadMessages: vi.fn(async () => []) }); + const appendSpy = store.appendMessages; const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); await handleSteering(iii, rec); @@ -166,18 +164,14 @@ describe('handleSteering', () => { expect.objectContaining({ type: 'message_complete' }), ); expect(emitSpy).toHaveBeenCalledWith(iii, 's1', expect.objectContaining({ type: 'turn_end' })); - expect(loadSpy).toHaveBeenCalledWith(iii, 's1'); - expect(saveSpy).toHaveBeenCalledWith( - iii, - 's1', - expect.arrayContaining([ - expect.objectContaining({ - content: expect.arrayContaining([ - expect.objectContaining({ text: expect.stringContaining('max_turns') }), - ]), - }), - ]), - ); + expect(store.loadMessages).toHaveBeenCalledWith('s1'); + expect(appendSpy).toHaveBeenCalledWith('s1', [ + expect.objectContaining({ + content: expect.arrayContaining([ + expect.objectContaining({ text: expect.stringContaining('max_turns') }), + ]), + }), + ]); }); it('caps at max_turns via steering route: tears down instead of continuing to assistant_streaming', async () => { @@ -186,8 +180,7 @@ describe('handleSteering', () => { max_turns: 3, turn_count: 3, }); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + installMockTurnStore({ loadMessages: vi.fn(async () => []) }); vi.spyOn(events, 'emit').mockResolvedValue(undefined); await handleSteering(iii, rec); @@ -206,8 +199,7 @@ describe('handleSteering', () => { turn_count: 2, function_results: [{ role: 'function_result', content: [] }] as never, }); - vi.spyOn(persistence, 'loadMessages').mockResolvedValue([]); - vi.spyOn(persistence, 'saveMessages').mockResolvedValue(undefined); + installMockTurnStore(); vi.spyOn(events, 'emit').mockResolvedValue(undefined); await handleSteering(iii, rec); diff --git a/harness/tests/turn-orchestrator/store.test.ts b/harness/tests/turn-orchestrator/store.test.ts new file mode 100644 index 00000000..970c8b4d --- /dev/null +++ b/harness/tests/turn-orchestrator/store.test.ts @@ -0,0 +1,198 @@ +import { describe, expect, it, vi } from 'vitest'; +import { TriggerAction } from '../../src/runtime/iii.js'; +import type { ISdk } from '../../src/runtime/iii.js'; +import { + createTurnStore, + parseFlatMessages, + shouldWakeStep, +} from '../../src/turn-orchestrator/state-runtime/store.js'; +import { newRecord } from '../../src/turn-orchestrator/state.js'; + +describe('parseFlatMessages', () => { + it('returns the array when messages are objects', () => { + const messages = [{ role: 'user', content: [], timestamp: 1 }]; + expect(parseFlatMessages(messages)).toEqual(messages); + }); + + it('returns [] for null, undefined, and non-arrays', () => { + expect(parseFlatMessages(null)).toEqual([]); + expect(parseFlatMessages(undefined)).toEqual([]); + expect(parseFlatMessages('bad')).toEqual([]); + expect(parseFlatMessages({})).toEqual([]); + }); +}); + +function fakeIii(): { iii: ISdk; emits: Array<{ session_id: string; event: unknown }> } { + const emits: Array<{ session_id: string; event: unknown }> = []; + const iii = { + trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { + if (function_id === 'stream::set') { + const p = payload as { group_id: string; data: unknown }; + emits.push({ session_id: p.group_id, event: p.data }); + return null; + } + if (function_id === 'state::set') { + return { old_value: null, new_value: (payload as { value: unknown }).value }; + } + if (function_id === 'state::update') { + return { old_value: 0 }; + } + return null; + }), + } as unknown as ISdk; + return { iii, emits }; +} + +describe('saveRecord turn_state_changed emission', () => { + it('emits turn_state_changed on agent::events with group_id = session_id', async () => { + const { iii, emits } = fakeIii(); + const store = createTurnStore(iii); + const rec = newRecord('sess-a'); + rec.state = 'function_awaiting_approval'; + const previous = { ...rec, state: 'function_execute' as const }; + + await store.saveRecord(rec, previous); + + expect(emits).toHaveLength(1); + expect(emits[0]?.session_id).toBe('sess-a'); + expect(emits[0]?.event).toMatchObject({ + type: 'turn_state_changed', + event_type: 'state:updated', + new_value: { state: 'function_awaiting_approval' }, + old_value: { state: 'function_execute' }, + }); + }); + + it('swallows emit failures (logs only, never rethrows)', async () => { + const iii = { + trigger: vi.fn(async () => { + throw new Error('stream::set down'); + }), + } as unknown as ISdk; + const store = createTurnStore(iii); + const rec = newRecord('sess-a'); + await expect(store.saveRecord(rec)).resolves.toBeUndefined(); + }); + + it('omits old_value from the emitted event when state:created', async () => { + const { iii, emits } = fakeIii(); + const store = createTurnStore(iii); + const rec = newRecord('sess-a'); + rec.state = 'provisioning'; + + await store.saveRecord(rec); + + expect(emits).toHaveLength(1); + const event = emits[0]?.event as Record; + expect(event.type).toBe('turn_state_changed'); + expect(event.event_type).toBe('state:created'); + expect('old_value' in event).toBe(false); + }); +}); + +describe('shouldWakeStep', () => { + it('accepts first write to a stepable state', () => { + expect(shouldWakeStep(null, 'provisioning')).toBe(true); + }); + + it('accepts transitions to another stepable state', () => { + expect(shouldWakeStep('provisioning', 'assistant_streaming')).toBe(true); + expect(shouldWakeStep('assistant_streaming', 'function_execute')).toBe(true); + }); + + it('rejects terminal state (stopped)', () => { + expect(shouldWakeStep('steering_check', 'stopped')).toBe(false); + }); + + it('rejects function_awaiting_approval (orchestrator parks here)', () => { + expect(shouldWakeStep('function_execute', 'function_awaiting_approval')).toBe(false); + }); + + it('rejects same-state writes', () => { + expect(shouldWakeStep('function_execute', 'function_execute')).toBe(false); + }); +}); + +describe('TurnStore.wakeStep', () => { + it('enqueues turn::{state} on the turn-step FIFO queue', async () => { + const triggers: Array<{ function_id: string; payload: unknown; action?: unknown }> = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string; payload: unknown; action?: unknown }) => { + triggers.push(req); + return null; + }), + } as unknown as ISdk; + + await createTurnStore(iii).wakeStep('sess-abc', 'assistant_streaming'); + + expect(triggers).toHaveLength(1); + expect(triggers[0]?.function_id).toBe('turn::assistant_streaming'); + expect(triggers[0]?.payload).toEqual({ session_id: 'sess-abc' }); + expect(triggers[0]?.action).toEqual(TriggerAction.Enqueue({ queue: 'turn-step' })); + }); + + it('swallows enqueue failures (logs only, never rethrows)', async () => { + const iii = { + trigger: vi.fn(async () => { + throw new Error('queue down'); + }), + } as unknown as ISdk; + + await expect( + createTurnStore(iii).wakeStep('sess-abc', 'provisioning'), + ).resolves.toBeUndefined(); + }); +}); + +describe('TurnStore.wakeFromRecord', () => { + it('enqueues turn::{currentState} from persisted record', async () => { + const rec = newRecord('sess-x'); + rec.state = 'function_awaiting_approval'; + const triggers: Array<{ function_id: string; payload: unknown; action?: unknown }> = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string; payload: unknown; action?: unknown }) => { + if (req.function_id === 'state::get') return rec; + triggers.push(req); + return null; + }), + } as unknown as ISdk; + + await createTurnStore(iii).wakeFromRecord('sess-x'); + + expect(triggers).toHaveLength(1); + expect(triggers[0]?.function_id).toBe('turn::function_awaiting_approval'); + expect(triggers[0]?.payload).toEqual({ session_id: 'sess-x' }); + }); + + it('no-ops when session is stopped', async () => { + const rec = newRecord('sess-y'); + rec.state = 'stopped'; + const turnTriggers: string[] = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string }) => { + if (req.function_id === 'state::get') return rec; + if (req.function_id.startsWith('turn::')) turnTriggers.push(req.function_id); + return null; + }), + } as unknown as ISdk; + + await createTurnStore(iii).wakeFromRecord('sess-y'); + expect(turnTriggers).toHaveLength(0); + }); + + it('no-ops when session is failed (no turn::failed handler exists)', async () => { + const rec = newRecord('sess-z'); + rec.state = 'failed'; + const turnTriggers: string[] = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string }) => { + if (req.function_id === 'state::get') return rec; + if (req.function_id.startsWith('turn::')) turnTriggers.push(req.function_id); + return null; + }), + } as unknown as ISdk; + + await createTurnStore(iii).wakeFromRecord('sess-z'); + expect(turnTriggers).toHaveLength(0); + }); +}); diff --git a/harness/tests/turn-orchestrator/subscriber-presence.test.ts b/harness/tests/turn-orchestrator/subscriber-presence.test.ts deleted file mode 100644 index e1741332..00000000 --- a/harness/tests/turn-orchestrator/subscriber-presence.test.ts +++ /dev/null @@ -1,69 +0,0 @@ -import { beforeEach, describe, expect, it, vi } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; -import { - hasDurableSubscriber, - resetSubscriberCache, -} from '../../src/turn-orchestrator/subscriber-presence.js'; - -function fakeIii( - triggers: unknown[], - opts?: { onList?: () => void; throwOnList?: boolean }, -) { - const trigger = vi.fn(async (req: { function_id: string }) => { - if (req.function_id === 'engine::triggers::list') { - opts?.onList?.(); - if (opts?.throwOnList) throw new Error('engine unreachable'); - return { triggers }; - } - throw new Error(`unexpected function_id: ${req.function_id}`); - }); - return { iii: { trigger } as unknown as ISdk, trigger }; -} - -describe('hasDurableSubscriber', () => { - beforeEach(() => resetSubscriberCache()); - - it('returns true when a durable:subscriber trigger is bound to the topic', async () => { - const { iii } = fakeIii([ - { trigger_type: 'durable:subscriber', config: { topic: 'agent::after_function_call' } }, - ]); - expect(await hasDurableSubscriber(iii, 'agent::after_function_call')).toBe(true); - }); - - it('returns false when no trigger subscribes to the topic', async () => { - const { iii } = fakeIii([ - { trigger_type: 'stream', config: { stream_name: 'agent::hook_reply' } }, - { trigger_type: 'durable:subscriber', config: { topic: 'some::other::topic' } }, - ]); - expect(await hasDurableSubscriber(iii, 'agent::after_function_call')).toBe(false); - }); - - it('caches the result within the TTL (one engine query for repeated calls)', async () => { - let listCalls = 0; - const { iii } = fakeIii([], { - onList: () => { - listCalls++; - }, - }); - await hasDurableSubscriber(iii, 'agent::after_function_call', 1000); - await hasDurableSubscriber(iii, 'agent::after_function_call', 2000); - expect(listCalls).toBe(1); - }); - - it('re-queries after the TTL expires', async () => { - let listCalls = 0; - const { iii } = fakeIii([], { - onList: () => { - listCalls++; - }, - }); - await hasDurableSubscriber(iii, 'agent::after_function_call', 1000); - await hasDurableSubscriber(iii, 'agent::after_function_call', 1000 + 30_001); - expect(listCalls).toBe(2); - }); - - it('fails safe (returns true) when the engine query throws', async () => { - const { iii } = fakeIii([], { throwOnList: true }); - expect(await hasDurableSubscriber(iii, 'agent::after_function_call')).toBe(true); - }); -}); diff --git a/harness/tests/turn-orchestrator/turn-state-write.test.ts b/harness/tests/turn-orchestrator/turn-state-write.test.ts deleted file mode 100644 index 468500c1..00000000 --- a/harness/tests/turn-orchestrator/turn-state-write.test.ts +++ /dev/null @@ -1,62 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; -import { emitTurnStateChanged } from '../../src/turn-orchestrator/turn-state-write.js'; - -function fakeIii(): { iii: ISdk; emits: Array<{ session_id: string; event: unknown }> } { - const emits: Array<{ session_id: string; event: unknown }> = []; - const iii = { - trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { - if (function_id === 'stream::set') { - const p = payload as { group_id: string; data: unknown }; - emits.push({ session_id: p.group_id, event: p.data }); - return null; - } - if (function_id === 'state::update') { - return { old_value: 0 }; - } - return null; - }), - } as unknown as ISdk; - return { iii, emits }; -} - -describe('emitTurnStateChanged', () => { - it('emits turn_state_changed on agent::events with group_id = session_id', async () => { - const { iii, emits } = fakeIii(); - await emitTurnStateChanged( - iii, - 'sess-a', - 'state:updated', - { state: 'function_awaiting_approval', awaiting_approval: [] }, - { state: 'function_execute', awaiting_approval: null }, - ); - expect(emits).toHaveLength(1); - expect(emits[0]?.session_id).toBe('sess-a'); - expect(emits[0]?.event).toMatchObject({ - type: 'turn_state_changed', - event_type: 'state:updated', - new_value: { state: 'function_awaiting_approval' }, - old_value: { state: 'function_execute' }, - }); - }); - - it('swallows emit failures (logs only, never rethrows)', async () => { - const iii = { - trigger: vi.fn(async () => { - throw new Error('stream::set down'); - }), - } as unknown as ISdk; - await expect( - emitTurnStateChanged(iii, 'sess-a', 'state:created', { state: 'provisioning' }), - ).resolves.toBeUndefined(); - }); - - it('omits old_value from the emitted event when state:created', async () => { - const { iii, emits } = fakeIii(); - await emitTurnStateChanged(iii, 'sess-a', 'state:created', { state: 'provisioning' }); - expect(emits).toHaveLength(1); - const event = emits[0]?.event as Record; - expect(event.type).toBe('turn_state_changed'); - expect('old_value' in event).toBe(false); - }); -}); diff --git a/harness/tests/turn-orchestrator/wake.test.ts b/harness/tests/turn-orchestrator/wake.test.ts deleted file mode 100644 index dd732781..00000000 --- a/harness/tests/turn-orchestrator/wake.test.ts +++ /dev/null @@ -1,112 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import { TriggerAction } from '../../src/runtime/iii.js'; -import type { ISdk } from '../../src/runtime/iii.js'; -import { newRecord } from '../../src/turn-orchestrator/state.js'; -import { shouldWakeStep, wakeFromRecord, wakeState } from '../../src/turn-orchestrator/wake.js'; - -describe('shouldWakeStep', () => { - it('accepts first write to a stepable state', () => { - expect(shouldWakeStep(null, 'provisioning')).toBe(true); - }); - - it('accepts transitions to another stepable state', () => { - expect(shouldWakeStep('provisioning', 'assistant_streaming')).toBe(true); - expect(shouldWakeStep('assistant_streaming', 'function_execute')).toBe(true); - }); - - it('rejects terminal state (stopped)', () => { - expect(shouldWakeStep('steering_check', 'stopped')).toBe(false); - }); - - it('rejects function_awaiting_approval (orchestrator parks here)', () => { - expect(shouldWakeStep('function_execute', 'function_awaiting_approval')).toBe(false); - }); - - it('rejects same-state writes', () => { - expect(shouldWakeStep('function_execute', 'function_execute')).toBe(false); - }); -}); - -describe('wakeState', () => { - it('enqueues turn::{state} on the turn-step FIFO queue', async () => { - const triggers: Array<{ function_id: string; payload: unknown; action?: unknown }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown; action?: unknown }) => { - triggers.push(req); - return null; - }), - } as unknown as ISdk; - - await wakeState(iii, 'sess-abc', 'assistant_streaming'); - - expect(triggers).toHaveLength(1); - expect(triggers[0]?.function_id).toBe('turn::assistant_streaming'); - expect(triggers[0]?.payload).toEqual({ session_id: 'sess-abc' }); - expect(triggers[0]?.action).toEqual(TriggerAction.Enqueue({ queue: 'turn-step' })); - }); - - it('swallows enqueue failures (logs only, never rethrows)', async () => { - const iii = { - trigger: vi.fn(async () => { - throw new Error('queue down'); - }), - } as unknown as ISdk; - - await expect(wakeState(iii, 'sess-abc', 'provisioning')).resolves.toBeUndefined(); - }); -}); - -describe('wakeFromRecord', () => { - it('enqueues turn::{currentState} from persisted record', async () => { - const rec = newRecord('sess-x'); - rec.state = 'function_awaiting_approval'; - const triggers: Array<{ function_id: string; payload: unknown; action?: unknown }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown; action?: unknown }) => { - if (req.function_id === 'state::get') { - return rec; - } - triggers.push(req); - return null; - }), - } as unknown as ISdk; - - await wakeFromRecord(iii, 'sess-x'); - - expect(triggers).toHaveLength(1); - expect(triggers[0]?.function_id).toBe('turn::function_awaiting_approval'); - expect(triggers[0]?.payload).toEqual({ session_id: 'sess-x' }); - }); - - it('no-ops when session is stopped', async () => { - const rec = newRecord('sess-y'); - rec.state = 'stopped'; - const iii = { - trigger: vi.fn(async (req: { function_id: string }) => { - if (req.function_id === 'state::get') return rec; - return null; - }), - } as unknown as ISdk; - - await wakeFromRecord(iii, 'sess-y'); - expect(iii.trigger).toHaveBeenCalledTimes(1); - }); - - it('no-ops when session is failed (no turn::failed handler exists)', async () => { - const rec = newRecord('sess-z'); - rec.state = 'failed'; - const triggers: Array<{ function_id: string }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string }) => { - if (req.function_id === 'state::get') return rec; - triggers.push(req); - return null; - }), - } as unknown as ISdk; - - await wakeFromRecord(iii, 'sess-z'); - // only the state::get read — no enqueue of an unregistered turn::failed - expect(iii.trigger).toHaveBeenCalledTimes(1); - expect(triggers).toHaveLength(0); - }); -}); From 35589ea8b8d54c13a39151d4dbada35b7a325d00 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Tue, 26 May 2026 12:45:14 -0300 Subject: [PATCH 35/41] refactor(turn-orchestrator): enhance function-awaiting-approval handling - Update the function-awaiting-approval state to execute resolved calls immediately as decisions arrive, improving responsiveness. - Modify the function-execute state to allow concurrent handling of pending approvals, ensuring that multiple calls can await approval without blocking. - Refactor related logic to streamline the transition between states, including clearer handling of batch completion and decision processing. - Remove outdated tests and add new integration tests to validate parallel approval flows and decision handling. This refactor aims to improve the efficiency and clarity of the approval process within the turn orchestrator, ensuring a more responsive and robust system. --- harness/docs/architecture.md | 19 +- harness/docs/workers/approval-gate.md | 2 +- harness/docs/workers/turn-orchestrator.md | 12 +- .../function-awaiting-approval/ports.ts | 8 - .../function-awaiting-approval/process.ts | 117 +++++----- .../function-execute/process.ts | 36 +++- .../turn-orchestrator/function-execute/run.ts | 43 +++- .../function-execute/types.ts | 7 +- .../integration/approval-resume.e2e.test.ts | 101 --------- .../integration/parallel-approval-harness.ts | 204 ++++++++++++++++++ .../integration/parallel-approval.e2e.test.ts | 176 +++++++++++++++ .../awaiting-approval.test.ts | 160 -------------- .../function-awaiting-approval.test.ts | 175 --------------- .../tests/turn-orchestrator/functions.test.ts | 18 +- 14 files changed, 531 insertions(+), 547 deletions(-) delete mode 100644 harness/tests/integration/approval-resume.e2e.test.ts create mode 100644 harness/tests/integration/parallel-approval-harness.ts create mode 100644 harness/tests/integration/parallel-approval.e2e.test.ts delete mode 100644 harness/tests/turn-orchestrator/awaiting-approval.test.ts delete mode 100644 harness/tests/turn-orchestrator/function-awaiting-approval.test.ts diff --git a/harness/docs/architecture.md b/harness/docs/architecture.md index be938120..02d2f505 100644 --- a/harness/docs/architecture.md +++ b/harness/docs/architecture.md @@ -111,7 +111,8 @@ stateDiagram-v2 function_execute --> function_awaiting_approval: any call needs approval function_execute --> steering_check: batch complete function_execute --> stopped: all calls terminate session via finishSession - function_awaiting_approval --> function_execute: all decisions written + function_awaiting_approval --> function_execute: awaiting empty, batch incomplete + function_awaiting_approval --> steering_check: awaiting empty, batch complete steering_check --> assistant_streaming: continue turn steering_check --> stopped: stop or max turns via finishSession stopped --> [*] @@ -126,9 +127,9 @@ unexpectedly (unless it opts into queue retry via `TransientError`). The orchestrator consults `policy::check_permissions` directly inside `consultBefore` — `allow`, `deny`, or `pending`. There is no hook fanout on the before path. The orchestrator parks the turn in `function_awaiting_approval` -and waits until `approval::resolve` writes the decision to scope `approvals`, -which fires `turn::on_approval` and calls `wakeFromRecord` to re-enqueue the -current state handler. +and waits until each parked call receives `approval::resolve` (decisions may +arrive independently). Each write to scope `approvals` fires `turn::on_approval` +and calls `wakeFromRecord` to re-enqueue the current state handler. ```mermaid sequenceDiagram @@ -149,8 +150,14 @@ sequenceDiagram User->>Gate: approval::resolve(decision, reason) Gate->>Bus: state::set approvals// = {decision, reason} Bus-->>Turn: turn::on_approval state trigger - Turn->>Turn: wakeFromRecord → function_awaiting_approval reads
approvals// for each pending entry - Turn->>Turn: fold decisions into work.prepared,
transition back to function_execute + Turn->>Turn: wakeFromRecord → function_awaiting_approval executes
that call immediately, removes it from awaiting_approval[] + alt more calls still awaiting + Turn->>Turn: stay in function_awaiting_approval + else awaiting empty and batch incomplete + Turn->>Turn: transition to function_execute + else awaiting empty and batch complete + Turn->>Turn: finalizeBatch → steering_check / stopped + end end ``` diff --git a/harness/docs/workers/approval-gate.md b/harness/docs/workers/approval-gate.md index b4e6c178..188aa3a9 100644 --- a/harness/docs/workers/approval-gate.md +++ b/harness/docs/workers/approval-gate.md @@ -23,7 +23,7 @@ and persist the decision where the orchestrator can read it. 2. The console calls `approval::resolve` with `{ session_id, function_call_id, decision, reason? }`. 3. `approval::resolve` writes `approvals//` via `state::set`. 4. The `turn::on_approval` state trigger (scope `approvals`) fires and calls `wakeFromRecord`. -5. `function_awaiting_approval` reads all decisions, folds them into the prepared snapshot, and returns to `function_execute`. +5. `function_awaiting_approval` executes each resolved call immediately, removes it from `awaiting_approval[]`, and stays parked until none remain; then finalizes the batch or returns to `function_execute`. ## Registered functions diff --git a/harness/docs/workers/turn-orchestrator.md b/harness/docs/workers/turn-orchestrator.md index dc2e231c..bf0dc8b7 100644 --- a/harness/docs/workers/turn-orchestrator.md +++ b/harness/docs/workers/turn-orchestrator.md @@ -37,8 +37,8 @@ unreachable → deny with a `gate_unavailable` `DenialEnvelope`. `provisioning`, and wake the FSM via `saveRecord`. - `turn::provisioning` — FSM step: build system prompt + single `agent_trigger` schema, write enriched `run_request`, advance to `assistant_streaming`. - `turn::assistant_streaming` — FSM step: stream the turn over a provider channel; on completion emit `message_complete`, persist the assistant message (dup-guarded), route to `function_execute` / `steering_check` / `stopped` (via `finishSession`). -- `turn::function_execute` — FSM step: own the full function lifecycle via `rec.work`; build batch from `rec.last_assistant`, run each call, checkpoint per-call via `writeRecord`, park to `function_awaiting_approval` on a `pending` gate reply, finalize results into messages + emit `turn_end`, route to `steering_check` / `stopped` (via `finishSession`). -- `turn::function_awaiting_approval` — FSM step: read decisions for `awaiting_approval[]`; fold them into `rec.work.prepared` (`allow` → `pre_approved`, `deny`/`aborted` → `synthetic`); clear `awaiting_approval`, advance to `function_execute`. +- `turn::function_execute` — FSM step: own the full function lifecycle via `rec.work`; build batch from `rec.last_assistant`, run each call (skip already-executed and awaiting-approval ids), checkpoint per-call via `writeRecord`; if `pending` → append to `awaiting_approval` and continue the batch in parallel; park to `function_awaiting_approval` when any call awaits approval; finalize results into messages + emit `turn_end` when the batch completes → `steering_check` / `stopped` (via `finishSession`). +- `turn::function_awaiting_approval` — FSM step: on each wake, read decisions for individual `awaiting_approval[]` entries; execute each resolved call immediately (`allow` → dispatch pre-approved; `deny`/`aborted` → synthetic denial); remove resolved entries; stay parked while any remain; when none remain → `finalizeBatch` if complete else `function_execute`. - `turn::steering_check` — FSM step: drain `steering`/`followup` inboxes, enforce `max_turns` cap (emits synthetic `max_turns` message + `turn_end` → `stopped` via `finishSession`), route to `assistant_streaming` / `stopped`. - `turn::get_state` — One-shot reader returning a lean `TurnStateView` (from `schemas.ts:toView`) for a session. UI clients call this on reload to recover in-progress modals (e.g. `function_awaiting_approval`) without reading iii state directly. Returns `null` for unknown sessions. @@ -58,8 +58,8 @@ The 7 states from [state.ts](harness/src/turn-orchestrator/state.ts): |---|---|---| | `provisioning` | [provisioning/process.ts](harness/src/turn-orchestrator/provisioning/process.ts) | Fetch skills index + default-skill bodies, build system prompt, write enriched `run_request` (with `function_schemas: [agentTriggerTool()]`), → `assistant_streaming`. | | `assistant_streaming` | [assistant-streaming/process.ts](harness/src/turn-orchestrator/assistant-streaming/process.ts) | Increment `turn_count`; create channel; trigger provider stream; relay `message_update` deltas; on completion call `finalizeAssistantTurn` which emits `message_complete`, persists the assistant message (dup-guarded), then routes → `function_execute` (has calls) / `steering_check` (no calls) / `stopped` via `finishSession` (error/aborted). | -| `function_execute` | [function-execute/process.ts](harness/src/turn-orchestrator/function-execute/process.ts) | Build batch from `rec.last_assistant` (or reuse existing `rec.work`); for each call: emit `function_execution_start`, skip if already executed, dispatch via `dispatchWithHook`; if `pending` → append to `awaiting_approval`, → `function_awaiting_approval`; otherwise commit result (silent `writeRecord` checkpoint) + emit `function_execution_end`; after batch: fold results into messages + emit `turn_end` → `steering_check` / `stopped` via `finishSession`. | -| `function_awaiting_approval` | [function-awaiting-approval/process.ts](harness/src/turn-orchestrator/function-awaiting-approval/process.ts) | Read decision for each `awaiting_approval[]` entry; if any is still missing → return (park); when all present, fold into `rec.work.prepared` (`allow` → `pre_approved`; `deny`/`aborted` → `synthetic` with denial result); clear `awaiting_approval` → `function_execute`. | +| `function_execute` | [function-execute/process.ts](harness/src/turn-orchestrator/function-execute/process.ts) | Build batch from `rec.last_assistant` (or reuse existing `rec.work`); for each call: emit `function_execution_start`, skip if already executed or awaiting approval, dispatch via `dispatchWithHook`; if `pending` → append to `awaiting_approval` and continue other calls; park to `function_awaiting_approval` when any call awaits; otherwise commit result (silent `writeRecord` checkpoint) + emit `function_execution_end`; after batch: fold results into messages + emit `turn_end` → `steering_check` / `stopped` via `finishSession`. | +| `function_awaiting_approval` | [function-awaiting-approval/process.ts](harness/src/turn-orchestrator/function-awaiting-approval/process.ts) | On each wake: for each `awaiting_approval[]` entry with a decision, execute immediately (`allow` → pre-approved dispatch; `deny`/`aborted` → synthetic denial); remove resolved entries; stay parked while any remain; when none remain → `finalizeBatch` if complete else `function_execute`. | | `steering_check` | [steering-check/process.ts](harness/src/turn-orchestrator/steering-check/process.ts) | Priority route: steering msg → `assistant_streaming` (unless `max_turns` reached); followup msg → `assistant_streaming` (unless `max_turns` reached); function results present → `assistant_streaming` (unless `max_turns` reached); else emit `turn_end` once → `stopped` via `finishSession`. `max_turns` path emits a synthetic `message_complete` + `turn_end`. | | `stopped` | (no handler) | Terminal. Idempotent. Session teardown (`agent_end`) happens inline via `TurnStatePorts.finishSession` before entering this state. | | `failed` | (set by `runTransition` on unexpected throw) | Terminal. Carries `error: {kind, message}` on the record. Emits `message_complete{stop_reason:'error'}` + `agent_end` so the UI sees the reason. A handler may throw `TransientError` to use the queue's retry/DLQ instead. | @@ -70,7 +70,9 @@ enqueue a handler for these. `dispatchWithHook` returns `{ kind: 'result', result }` or `{ kind: 'pending' }`. Policy denies are returned as `{ kind: 'result' }` with a denied `FunctionResult`. -`pending` triggers the `function_awaiting_approval` park. +`pending` triggers the `function_awaiting_approval` park. Multiple calls may +await approval concurrently; each is executed individually as its decision +arrives. ## State scopes diff --git a/harness/src/turn-orchestrator/function-awaiting-approval/ports.ts b/harness/src/turn-orchestrator/function-awaiting-approval/ports.ts index fd8de255..350d3877 100644 --- a/harness/src/turn-orchestrator/function-awaiting-approval/ports.ts +++ b/harness/src/turn-orchestrator/function-awaiting-approval/ports.ts @@ -5,16 +5,8 @@ import { ApprovalDecisionSchema, STATE_SCOPE } from '../../approval-gate/schemas.js'; import type { ISdk } from '../../runtime/iii.js'; import type { z } from 'zod'; -import type { PreparedCall } from '../function-execute/types.js'; - export type ApprovalDecision = z.infer; -/** Explicit control flow — replaces void + early return. */ -export type AwaitingApprovalOutcome = - | { kind: 'resume_empty' } - | { kind: 'parked' } - | { kind: 'resume'; prepared: PreparedCall[] }; - /** Decode stored approval decision from `state::get` (scope `approvals`). */ export function parseApprovalDecision(value: unknown): ApprovalDecision | null { const parsed = ApprovalDecisionSchema.safeParse(value); diff --git a/harness/src/turn-orchestrator/function-awaiting-approval/process.ts b/harness/src/turn-orchestrator/function-awaiting-approval/process.ts index a2e301f3..fe17054f 100644 --- a/harness/src/turn-orchestrator/function-awaiting-approval/process.ts +++ b/harness/src/turn-orchestrator/function-awaiting-approval/process.ts @@ -1,18 +1,20 @@ /** - * Read approval decisions, compute resume or park outcome, and register the FSM step. + * Read approval decisions, execute resolved calls individually, and register the FSM step. */ import type { ISdk } from '../../runtime/iii.js'; import { text } from '../../types/content.js'; import type { FunctionResult } from '../../types/function.js'; +import { createPorts } from '../function-execute/ports.js'; +import { finalizeBatch, runOneCall } from '../function-execute/run.js'; import type { PreparedCall } from '../function-execute/types.js'; +import { isBatchComplete } from '../function-execute/types.js'; import { runTransition } from '../run-transition.js'; import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; import { transitionTo, type AwaitingApprovalEntry, type TurnStateRecord } from '../state.js'; import { createAwaitingApprovalPorts, type ApprovalDecision, - type AwaitingApprovalOutcome, type AwaitingApprovalPorts, } from './ports.js'; @@ -48,78 +50,83 @@ export function applyDecisionToPrepared( }; } -export function foldDecisionsIntoPrepared( +function findPreparedCall( prepared: readonly PreparedCall[], + function_call_id: string, +): PreparedCall | undefined { + return prepared.find((entry) => entry.call.id === function_call_id); +} + +function withoutAwaitingEntry( awaiting: AwaitingApprovalEntry[], - decisions: ApprovalDecision[], -): PreparedCall[] { - const next = [...prepared]; - for (let i = 0; i < awaiting.length; i++) { - const entry = awaiting[i]; - const decision = decisions[i]; - if (!entry || !decision) continue; - const idx = next.findIndex((pe) => pe.call.id === entry.function_call_id); - if (idx < 0) continue; - const current = next[idx]; - if (!current) continue; - next[idx] = applyDecisionToPrepared(current, decision); - } - return next; + function_call_id: string, +): AwaitingApprovalEntry[] { + return awaiting.filter((entry) => entry.function_call_id !== function_call_id); } -export async function processAwaitingApproval( - ports: AwaitingApprovalPorts, +export async function processResolvedApprovals( + readPorts: AwaitingApprovalPorts, + executePorts: ReturnType, rec: TurnStateRecord, -): Promise { - const awaiting = rec.awaiting_approval ?? []; - if (awaiting.length === 0) { - return { kind: 'resume_empty' }; - } +): Promise { + if (!rec.work) return; - const decisions = await Promise.all( - awaiting.map((entry) => ports.readDecision(rec.session_id, entry.function_call_id)), - ); + let awaiting = [...(rec.awaiting_approval ?? [])]; + const executed = { ...rec.work.executed }; - if (decisions.some((decision) => decision === null)) { - return { kind: 'parked' }; - } + for (const entry of [...awaiting]) { + const callId = entry.function_call_id; - const prepared = foldDecisionsIntoPrepared( - rec.work?.prepared ?? [], - awaiting, - decisions as NonNullable<(typeof decisions)[number]>[], - ); + if (executed[callId]) { + awaiting = withoutAwaitingEntry(awaiting, callId); + continue; + } - return { kind: 'resume', prepared }; -} + const decision = await readPorts.readDecision(rec.session_id, callId); + if (!decision) continue; -export function applyAwaitingApprovalOutcome( - rec: TurnStateRecord, - outcome: AwaitingApprovalOutcome, -): void { - if (outcome.kind === 'parked') { - return; - } + const current = findPreparedCall(rec.work.prepared, callId); + if (!current) { + awaiting = withoutAwaitingEntry(awaiting, callId); + continue; + } + + const resolved = applyDecisionToPrepared(current, decision); + await runOneCall(executePorts, rec.session_id, resolved, executed, { skipStart: true }); - if (outcome.kind === 'resume' && rec.work) { - rec.work = { ...rec.work, prepared: outcome.prepared }; + awaiting = withoutAwaitingEntry(awaiting, callId); + rec.work = { prepared: rec.work.prepared, executed }; + await executePorts.checkpoint(rec); } - rec.awaiting_approval = []; - transitionTo(rec, 'function_execute'); + rec.awaiting_approval = awaiting; } -export async function runAwaitingApproval( - ports: AwaitingApprovalPorts, +export async function routeAfterApprovalProcessing( + executePorts: ReturnType, rec: TurnStateRecord, ): Promise { - const outcome = await processAwaitingApproval(ports, rec); - applyAwaitingApprovalOutcome(rec, outcome); + if ((rec.awaiting_approval?.length ?? 0) > 0) { + return; + } + + if (!rec.work) { + transitionTo(rec, 'function_execute'); + return; + } + + if (isBatchComplete(rec.work)) { + await finalizeBatch(executePorts, rec, rec.work); + } else { + transitionTo(rec, 'function_execute'); + } } export async function handleAwaitingApproval(iii: ISdk, rec: TurnStateRecord): Promise { - const ports = createAwaitingApprovalPorts(iii); - await runAwaitingApproval(ports, rec); + const executePorts = createPorts(iii); + const readPorts = createAwaitingApprovalPorts(iii); + await processResolvedApprovals(readPorts, executePorts, rec); + await routeAfterApprovalProcessing(executePorts, rec); } export function register(iii: ISdk): void { @@ -131,7 +138,7 @@ export function register(iii: ISdk): void { }, { description: - 'Run one durable FSM transition for session in state function_awaiting_approval: read approval decisions and resume.', + 'Run one durable FSM transition for session in state function_awaiting_approval: execute each call as its approval decision arrives.', }, ); } diff --git a/harness/src/turn-orchestrator/function-execute/process.ts b/harness/src/turn-orchestrator/function-execute/process.ts index d6e2e3ec..dfe5598e 100644 --- a/harness/src/turn-orchestrator/function-execute/process.ts +++ b/harness/src/turn-orchestrator/function-execute/process.ts @@ -5,24 +5,46 @@ import type { ISdk } from '../../runtime/iii.js'; import { runTransition } from '../run-transition.js'; import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; -import { transitionTo, type TurnStateRecord } from '../state.js'; +import { transitionTo, type AwaitingApprovalEntry, type TurnStateRecord } from '../state.js'; +import type { PendingApproval } from './types.js'; +import { isBatchComplete } from './types.js'; import { finalizeBatch, loadOrPlanWork, runBatch } from './run.js'; import { createPorts } from './ports.js'; +function mergeAwaitingApproval( + existing: AwaitingApprovalEntry[] | undefined, + newPending: PendingApproval[], +): AwaitingApprovalEntry[] { + const ids = new Set(existing?.map((entry) => entry.function_call_id) ?? []); + const merged = [...(existing ?? [])]; + for (const pending of newPending) { + if (ids.has(pending.function_call_id)) continue; + ids.add(pending.function_call_id); + merged.push(pending); + } + return merged; +} + export async function handleExecute(iii: ISdk, rec: TurnStateRecord): Promise { const ports = createPorts(iii); const work = loadOrPlanWork(rec); const outcome = await runBatch(ports, rec, work); + rec.work = outcome.work; - if (outcome.kind === 'parked') { - rec.work = outcome.work; - rec.awaiting_approval = [...(rec.awaiting_approval ?? []), outcome.pending]; - transitionTo(rec, 'function_awaiting_approval'); - return; + if (outcome.kind === 'incomplete') { + rec.awaiting_approval = mergeAwaitingApproval(rec.awaiting_approval, outcome.newPending); + if ((rec.awaiting_approval?.length ?? 0) > 0) { + transitionTo(rec, 'function_awaiting_approval'); + return; + } } - await finalizeBatch(ports, rec, outcome.work); + if (isBatchComplete(outcome.work)) { + await finalizeBatch(ports, rec, outcome.work); + } else { + transitionTo(rec, 'function_execute'); + } } export function register(iii: ISdk): void { diff --git a/harness/src/turn-orchestrator/function-execute/run.ts b/harness/src/turn-orchestrator/function-execute/run.ts index 481648ab..ea623c2e 100644 --- a/harness/src/turn-orchestrator/function-execute/run.ts +++ b/harness/src/turn-orchestrator/function-execute/run.ts @@ -18,15 +18,19 @@ import { transitionTo, type TurnStateRecord } from '../state.js'; import type { FunctionExecutePorts } from './ports.js'; import { emptyBatchWork, + isBatchComplete, preparedCallId, type BatchOutcome, type ExecutedCall, type FunctionBatchWork, + type PendingApproval, type PreparedCall, type ResolveCallResult, type RunOneCallResult, } from './types.js'; +export { isBatchComplete }; + export class FunctionExecuteInvariantError extends Error { constructor(message: string) { super(message); @@ -98,11 +102,17 @@ async function resolvePreparedCall( } } +export type RunOneCallOptions = { + /** Skip `function_execution_start` — used when resuming after approval (start already emitted). */ + skipStart?: boolean; +}; + export async function runOneCall( ports: FunctionExecutePorts, session_id: string, prepared: PreparedCall, executed: Record, + opts?: RunOneCallOptions, ): Promise { const call: FunctionCall = prepared.call; @@ -112,7 +122,9 @@ export async function runOneCall( return { kind: 'skipped' }; } - await ports.emitStart(session_id, call); + if (!opts?.skipStart) { + await ports.emitStart(session_id, call); + } const startedAt = Date.now(); const resolved = await resolvePreparedCall(ports, prepared, session_id); @@ -137,20 +149,25 @@ export async function runBatch( work: FunctionBatchWork, ): Promise { const executed = { ...work.executed }; + const awaitingIds = new Set( + (rec.awaiting_approval ?? []).map((entry) => entry.function_call_id), + ); + const newPending: PendingApproval[] = []; for (const prepared of work.prepared) { + const callId = preparedCallId(prepared); + if (executed[callId]) continue; + if (awaitingIds.has(callId)) continue; + const outcome = await runOneCall(ports, rec.session_id, prepared, executed); if (outcome.kind === 'pending') { - return { - kind: 'parked', - work: { prepared: work.prepared, executed }, - pending: { - function_call_id: outcome.call.id, - function_id: outcome.call.function_id, - args: outcome.call.arguments, - }, - }; + newPending.push({ + function_call_id: outcome.call.id, + function_id: outcome.call.function_id, + args: outcome.call.arguments, + }); + continue; } if (outcome.kind === 'executed') { @@ -159,7 +176,11 @@ export async function runBatch( } } - return { kind: 'completed', work: { prepared: work.prepared, executed } }; + const batchWork = { prepared: work.prepared, executed }; + if (newPending.length > 0 || awaitingIds.size > 0) { + return { kind: 'incomplete', work: batchWork, newPending }; + } + return { kind: 'completed', work: batchWork }; } function toFunctionResultMessage( diff --git a/harness/src/turn-orchestrator/function-execute/types.ts b/harness/src/turn-orchestrator/function-execute/types.ts index fda48005..e6b67143 100644 --- a/harness/src/turn-orchestrator/function-execute/types.ts +++ b/harness/src/turn-orchestrator/function-execute/types.ts @@ -32,7 +32,7 @@ export type PendingApproval = { /** Batch loop outcome — explicit control flow instead of early return + void. */ export type BatchOutcome = | { kind: 'completed'; work: FunctionBatchWork } - | { kind: 'parked'; work: FunctionBatchWork; pending: PendingApproval }; + | { kind: 'incomplete'; work: FunctionBatchWork; newPending: PendingApproval[] }; export type RunOneCallResult = | { kind: 'skipped' } @@ -52,3 +52,8 @@ export function preparedCallId(prepared: PreparedCall): string { export function emptyBatchWork(prepared: readonly PreparedCall[]): FunctionBatchWork { return { prepared, executed: {} }; } + +/** True when every prepared call has a committed entry in `executed`. */ +export function isBatchComplete(work: FunctionBatchWork): boolean { + return work.prepared.every((prepared) => work.executed[preparedCallId(prepared)] !== undefined); +} diff --git a/harness/tests/integration/approval-resume.e2e.test.ts b/harness/tests/integration/approval-resume.e2e.test.ts deleted file mode 100644 index bcdc0f6c..00000000 --- a/harness/tests/integration/approval-resume.e2e.test.ts +++ /dev/null @@ -1,101 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import { handleResolveRequest } from '../../src/approval-gate/resolve.js'; -import { - handleApprovalDecisionWrite, - isApprovalDecisionWrite, -} from '../../src/turn-orchestrator/on-approval.js'; -import type { ISdk } from '../../src/runtime/iii.js'; -import { TURN_STATE_SCOPE } from '../../src/turn-orchestrator/state.js'; -import { newRecord } from '../../src/turn-orchestrator/state.js'; - -async function flushMicrotasks(): Promise { - await Promise.resolve(); - await Promise.resolve(); -} - -/** - * Fake iii where `state::set` re-emits a state event and feeds it to the - * approval reactive trigger on the `approvals` scope — exercising the - * producer → trigger → wake path. - */ -function fakeIii(): { - iii: ISdk; - wakeTriggers: Array<{ session_id: string; function_id: string }>; - stateStore: Map; -} { - const stateStore = new Map(); - const wakeTriggers: Array<{ session_id: string; function_id: string }> = []; - - const iii = { - trigger: vi.fn( - async ({ - function_id, - payload, - action, - }: { - function_id: string; - payload: unknown; - action?: unknown; - }) => { - if (function_id === 'state::set') { - const p = payload as { scope: string; key: string; value: unknown }; - const fullKey = `${p.scope}/${p.key}`; - const old_value = stateStore.get(fullKey) ?? null; - stateStore.set(fullKey, p.value); - const event = { - event_type: old_value == null ? 'state:created' : 'state:updated', - scope: p.scope, - key: p.key, - old_value, - new_value: p.value, - message_type: 'state', - }; - if (p.scope === 'approvals' && isApprovalDecisionWrite(event)) { - await handleApprovalDecisionWrite(iii as unknown as ISdk, event); - } - return null; - } - - if (function_id === 'state::get') { - const p = payload as { scope: string; key: string }; - return stateStore.get(`${p.scope}/${p.key}`) ?? null; - } - - if (function_id.startsWith('turn::') && action != null) { - const p = payload as { session_id: string }; - wakeTriggers.push({ session_id: p.session_id, function_id }); - return null; - } - - return null; - }, - ), - }; - - return { iii: iii as unknown as ISdk, wakeTriggers, stateStore }; -} - -describe('approval reactive trigger', () => { - it('approval::resolve persists the decision and the trigger enqueues turn::{state}', async () => { - const { iii, wakeTriggers, stateStore } = fakeIii(); - const rec = newRecord('sess-x'); - rec.state = 'function_awaiting_approval'; - stateStore.set(`${TURN_STATE_SCOPE}/sess-x`, rec); - - const out = await handleResolveRequest(iii, { - session_id: 'sess-x', - function_call_id: 'fc-1', - decision: 'allow', - }); - expect(out).toEqual({ ok: true }); - - await flushMicrotasks(); - - expect(stateStore.get('approvals/sess-x/fc-1')).toEqual({ decision: 'allow', reason: null }); - expect(wakeTriggers).toHaveLength(1); - expect(wakeTriggers[0]).toMatchObject({ - session_id: 'sess-x', - function_id: 'turn::function_awaiting_approval', - }); - }); -}); diff --git a/harness/tests/integration/parallel-approval-harness.ts b/harness/tests/integration/parallel-approval-harness.ts new file mode 100644 index 00000000..da04849a --- /dev/null +++ b/harness/tests/integration/parallel-approval-harness.ts @@ -0,0 +1,204 @@ +/** + * Integration harness for parallel approval flows: real TurnStore + runTransition, + * simulated iii state/streams, and dispatchWithHook routing. + */ + +import { vi } from 'vitest'; +import { handleResolveRequest } from '../../src/approval-gate/resolve.js'; +import { handleAwaitingApproval } from '../../src/turn-orchestrator/function-awaiting-approval/process.js'; +import { handleExecute } from '../../src/turn-orchestrator/function-execute/process.js'; +import { runTransition } from '../../src/turn-orchestrator/run-transition.js'; +import { + handleApprovalDecisionWrite, + isApprovalDecisionWrite, +} from '../../src/turn-orchestrator/on-approval.js'; +import { TURN_STATE_SCOPE, newRecord, type TurnStateRecord } from '../../src/turn-orchestrator/state.js'; +import type { ISdk } from '../../src/runtime/iii.js'; +import type { AgentEvent } from '../../src/types/agent-event.js'; +import type { AssistantMessage } from '../../src/types/agent-message.js'; + +export type ParallelApprovalHarness = { + iii: ISdk; + stateStore: Map; + emitted: AgentEvent[]; + loadTurnRecord(session_id: string): TurnStateRecord | null; + seedExecute(session_id: string, assistant: AssistantMessage): TurnStateRecord; + runExecute(session_id: string): Promise; + resolveApproval( + session_id: string, + function_call_id: string, + decision: 'allow' | 'deny', + reason?: string | null, + ): Promise; +}; + +function makeAgentTriggerCall( + id: string, + functionId: string, + payload: unknown = {}, +): { type: 'function_call'; id: string; function_id: string; arguments: unknown } { + return { + type: 'function_call', + id, + function_id: 'agent_trigger', + arguments: { function: functionId, payload }, + }; +} + +export function makeAssistantWithCalls( + calls: Array<{ id: string; functionId: string; payload?: unknown }>, +): AssistantMessage { + return { + role: 'assistant', + content: calls.map((c) => makeAgentTriggerCall(c.id, c.functionId, c.payload ?? {})), + stop_reason: 'function_call', + error_message: null, + error_kind: null, + usage: null, + model: 'm', + provider: 'p', + timestamp: 1, + }; +} + +async function flushMicrotasks(): Promise { + await Promise.resolve(); + await Promise.resolve(); +} + +async function runTurnStep(iii: ISdk, function_id: string, session_id: string): Promise { + const payload = { session_id }; + if (function_id === 'turn::function_execute') { + await runTransition(iii, 'function_execute', handleExecute, payload); + return; + } + if (function_id === 'turn::function_awaiting_approval') { + await runTransition(iii, 'function_awaiting_approval', handleAwaitingApproval, payload); + } +} + +export function createParallelApprovalHarness(): ParallelApprovalHarness { + const stateStore = new Map(); + const emitted: AgentEvent[] = []; + let eventSeq = 0; + + const iii = { + trigger: vi.fn( + async ({ + function_id, + payload, + action, + }: { + function_id: string; + payload: unknown; + action?: unknown; + }) => { + if (function_id === 'state::get') { + const p = payload as { scope: string; key: string }; + const v = stateStore.get(`${p.scope}/${p.key}`); + return v === undefined ? null : structuredClone(v); + } + + if (function_id === 'state::set') { + const p = payload as { scope: string; key: string; value: unknown }; + const storeKey = `${p.scope}/${p.key}`; + const old_value = stateStore.has(storeKey) + ? structuredClone(stateStore.get(storeKey)) + : null; + const new_value = structuredClone(p.value); + stateStore.set(storeKey, new_value); + const event = { + event_type: old_value == null ? 'state:created' : 'state:updated', + scope: p.scope, + key: p.key, + old_value, + new_value, + message_type: 'state', + }; + if (p.scope === 'approvals' && isApprovalDecisionWrite(event)) { + await handleApprovalDecisionWrite(iii as unknown as ISdk, event); + } + return { old_value, new_value }; + } + + if (function_id === 'state::update') { + eventSeq += 1; + return { old_value: eventSeq - 1 }; + } + + if (function_id === 'stream::set') { + const p = payload as { data: AgentEvent }; + emitted.push(p.data); + return null; + } + + if (function_id === 'shell::run') { + return { + content: [{ type: 'text', text: 'ok' }], + details: {}, + terminate: false, + }; + } + + if (function_id.startsWith('turn::') && action != null) { + const p = payload as { session_id: string }; + await runTurnStep(iii as unknown as ISdk, function_id, p.session_id); + return null; + } + + return null; + }, + ), + } as unknown as ISdk; + + return { + iii, + stateStore, + emitted, + + loadTurnRecord(session_id: string): TurnStateRecord | null { + const raw = stateStore.get(`${TURN_STATE_SCOPE}/${session_id}`); + return raw ? (structuredClone(raw) as TurnStateRecord) : null; + }, + + seedExecute(session_id: string, assistant: AssistantMessage): TurnStateRecord { + const rec = newRecord(session_id); + rec.state = 'function_execute'; + rec.last_assistant = assistant; + stateStore.set(`${TURN_STATE_SCOPE}/${session_id}`, structuredClone(rec)); + return rec; + }, + + async runExecute(session_id: string): Promise { + await runTurnStep(iii, 'turn::function_execute', session_id); + }, + + async resolveApproval( + session_id: string, + function_call_id: string, + decision: 'allow' | 'deny', + reason: null | string = null, + ): Promise { + const out = await handleResolveRequest(iii, { + session_id, + function_call_id, + decision, + reason, + }); + if (!out.ok) throw new Error(`approval::resolve failed: ${out.error}`); + await flushMicrotasks(); + }, + }; +} + +export function executionEvents( + emitted: AgentEvent[], + type: 'function_execution_start' | 'function_execution_end', + function_call_id?: string, +): AgentEvent[] { + return emitted.filter((event) => { + if (event.type !== type) return false; + if (!function_call_id) return true; + return 'function_call_id' in event && event.function_call_id === function_call_id; + }); +} diff --git a/harness/tests/integration/parallel-approval.e2e.test.ts b/harness/tests/integration/parallel-approval.e2e.test.ts new file mode 100644 index 00000000..33da3ba6 --- /dev/null +++ b/harness/tests/integration/parallel-approval.e2e.test.ts @@ -0,0 +1,176 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import * as agentTriggerModule from '../../src/turn-orchestrator/agent-trigger.js'; +import { + createParallelApprovalHarness, + executionEvents, + makeAssistantWithCalls, +} from './parallel-approval-harness.js'; + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe('parallel approval e2e', () => { + it('dispatches later calls while earlier ones park without blocking the batch', async () => { + const h = createParallelApprovalHarness(); + vi.spyOn(agentTriggerModule, 'dispatchWithHook') + .mockResolvedValueOnce({ kind: 'pending' }) + .mockResolvedValueOnce({ + kind: 'result', + result: { + content: [{ type: 'text' as const, text: 'fc-2-ok' }], + details: {}, + terminate: false, + }, + }) + .mockResolvedValueOnce({ kind: 'pending' }); + + h.seedExecute( + 'sess-parallel', + makeAssistantWithCalls([ + { id: 'fc-1', functionId: 'shell::run' }, + { id: 'fc-2', functionId: 'shell::run' }, + { id: 'fc-3', functionId: 'shell::run' }, + ]), + ); + + await h.runExecute('sess-parallel'); + const rec = h.loadTurnRecord('sess-parallel'); + + expect(rec?.state).toBe('function_awaiting_approval'); + expect(rec?.awaiting_approval?.map((e) => e.function_call_id).sort()).toEqual(['fc-1', 'fc-3']); + expect(rec?.work?.executed['fc-2']?.result.content[0]).toMatchObject({ text: 'fc-2-ok' }); + expect(rec?.work?.executed['fc-1']).toBeUndefined(); + expect(rec?.work?.executed['fc-3']).toBeUndefined(); + }); + + it('executes one approved call immediately while a sibling stays pending', async () => { + const h = createParallelApprovalHarness(); + vi.spyOn(agentTriggerModule, 'dispatchWithHook') + .mockResolvedValueOnce({ kind: 'pending' }) + .mockResolvedValueOnce({ + kind: 'result', + result: { content: [{ type: 'text' as const, text: 'ok' }], details: {}, terminate: false }, + }) + .mockResolvedValueOnce({ kind: 'pending' }); + + h.seedExecute( + 'sess-partial', + makeAssistantWithCalls([ + { id: 'fc-1', functionId: 'shell::run' }, + { id: 'fc-2', functionId: 'shell::run' }, + { id: 'fc-3', functionId: 'shell::run' }, + ]), + ); + await h.runExecute('sess-partial'); + + const fc1StartsBefore = executionEvents(h.emitted, 'function_execution_start', 'fc-1'); + expect(fc1StartsBefore).toHaveLength(1); + await h.resolveApproval('sess-partial', 'fc-1', 'allow'); + + const rec = h.loadTurnRecord('sess-partial'); + expect(rec?.state).toBe('function_awaiting_approval'); + expect(rec?.awaiting_approval?.map((e) => e.function_call_id)).toEqual(['fc-3']); + expect(rec?.work?.executed['fc-1']).toBeDefined(); + expect(rec?.work?.executed['fc-3']).toBeUndefined(); + + expect(executionEvents(h.emitted, 'function_execution_start', 'fc-1')).toHaveLength(1); + expect(executionEvents(h.emitted, 'function_execution_end', 'fc-1')).toHaveLength(1); + }); + + it('resolves approvals out of order without waiting for batch order', async () => { + const h = createParallelApprovalHarness(); + vi.spyOn(agentTriggerModule, 'dispatchWithHook') + .mockResolvedValueOnce({ kind: 'pending' }) + .mockResolvedValueOnce({ kind: 'pending' }); + + h.seedExecute( + 'sess-order', + makeAssistantWithCalls([ + { id: 'fc-1', functionId: 'shell::run' }, + { id: 'fc-2', functionId: 'shell::run' }, + ]), + ); + await h.runExecute('sess-order'); + + await h.resolveApproval('sess-order', 'fc-2', 'allow'); + let rec = h.loadTurnRecord('sess-order'); + expect(rec?.awaiting_approval?.map((e) => e.function_call_id)).toEqual(['fc-1']); + expect(rec?.work?.executed['fc-2']).toBeDefined(); + expect(rec?.state).toBe('function_awaiting_approval'); + + await h.resolveApproval('sess-order', 'fc-1', 'allow'); + rec = h.loadTurnRecord('sess-order'); + expect(rec?.awaiting_approval).toEqual([]); + expect(rec?.state).toBe('steering_check'); + expect(rec?.work).toBeUndefined(); + }); + + it('denies one pending call without affecting an unresolved sibling', async () => { + const h = createParallelApprovalHarness(); + vi.spyOn(agentTriggerModule, 'dispatchWithHook') + .mockResolvedValueOnce({ kind: 'pending' }) + .mockResolvedValueOnce({ kind: 'pending' }); + + h.seedExecute( + 'sess-deny', + makeAssistantWithCalls([ + { id: 'fc-1', functionId: 'shell::run' }, + { id: 'fc-2', functionId: 'shell::run' }, + ]), + ); + await h.runExecute('sess-deny'); + + await h.resolveApproval('sess-deny', 'fc-1', 'deny', 'operator rejected'); + + const rec = h.loadTurnRecord('sess-deny'); + expect(rec?.state).toBe('function_awaiting_approval'); + expect(rec?.awaiting_approval?.map((e) => e.function_call_id)).toEqual(['fc-2']); + expect(rec?.work?.executed['fc-1']?.is_error).toBe(true); + expect(rec?.work?.executed['fc-1']?.result.details).toMatchObject({ + approval_denied: true, + decision: 'deny', + reason: 'operator rejected', + }); + expect(rec?.work?.executed['fc-2']).toBeUndefined(); + }); + + it('is idempotent when the same decision wake is delivered twice', async () => { + const h = createParallelApprovalHarness(); + vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ kind: 'pending' }); + + h.seedExecute( + 'sess-dup', + makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }]), + ); + await h.runExecute('sess-dup'); + + await h.resolveApproval('sess-dup', 'fc-1', 'allow'); + const endsAfterFirst = executionEvents(h.emitted, 'function_execution_end', 'fc-1').length; + + await h.resolveApproval('sess-dup', 'fc-1', 'allow'); + const rec = h.loadTurnRecord('sess-dup'); + + expect(rec?.awaiting_approval).toEqual([]); + expect(executionEvents(h.emitted, 'function_execution_end', 'fc-1')).toHaveLength(endsAfterFirst); + }); + + it('persists the decision and wakes function_awaiting_approval via the reactive trigger', async () => { + const h = createParallelApprovalHarness(); + vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ kind: 'pending' }); + + h.seedExecute('sess-wake', makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }])); + await h.runExecute('sess-wake'); + + expect(h.loadTurnRecord('sess-wake')?.state).toBe('function_awaiting_approval'); + + await h.resolveApproval('sess-wake', 'fc-1', 'allow'); + + expect(h.stateStore.get('approvals/sess-wake/fc-1')).toEqual({ + decision: 'allow', + reason: null, + }); + expect(h.loadTurnRecord('sess-wake')?.state).toBe('steering_check'); + expect(h.loadTurnRecord('sess-wake')?.work).toBeUndefined(); + }); +}); diff --git a/harness/tests/turn-orchestrator/awaiting-approval.test.ts b/harness/tests/turn-orchestrator/awaiting-approval.test.ts deleted file mode 100644 index 7aeee31c..00000000 --- a/harness/tests/turn-orchestrator/awaiting-approval.test.ts +++ /dev/null @@ -1,160 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; -import type { PreparedCall, TurnStateRecord, TurnWork } from '../../src/turn-orchestrator/state.js'; -import { handleAwaitingApproval } from '../../src/turn-orchestrator/function-awaiting-approval/process.js'; - -function fakeIii(stateGetImpl: (scope: string, key: string) => unknown): ISdk { - return { - trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { - if (function_id === 'state::get') { - const p = payload as { scope: string; key: string }; - return stateGetImpl(p.scope, p.key); - } - if (function_id === 'state::set') return null; - return null; - }), - } as unknown as ISdk; -} - -function recordWith( - awaiting: { function_call_id: string; function_id: string; args: unknown }[], - work?: TurnWork, -): TurnStateRecord { - return { - session_id: 's1', - state: 'function_awaiting_approval', - turn_count: 0, - max_turns: undefined, - last_assistant: null, - function_results: [], - turn_end_emitted: false, - started_at_ms: 0, - updated_at_ms: 0, - awaiting_approval: awaiting, - work, - }; -} - -function workWith(prepared: PreparedCall[]): TurnWork { - return { prepared, executed: {} }; -} - -describe('handleAwaitingApproval', () => { - it('transitions straight to function_execute when awaiting is empty', async () => { - const iii = fakeIii((_scope, _key) => null); - const rec = recordWith([]); - await handleAwaitingApproval(iii, rec); - expect(rec.state).toBe('function_execute'); - }); - - it('no-ops when any decision is missing', async () => { - const iii = fakeIii((_scope, _key) => null); - const rec = recordWith( - [{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], - workWith([ - { route: 'dispatch', call: { id: 'fc-1', function_id: 'shell::run', arguments: {} } }, - ]), - ); - await handleAwaitingApproval(iii, rec); - expect(rec.state).toBe('function_awaiting_approval'); - expect(rec.awaiting_approval).toHaveLength(1); - expect(rec.work?.prepared[0]?.route).toBe('dispatch'); - }); - - it('folds pre_approved route into work.prepared on allow and transitions to function_execute', async () => { - const iii = fakeIii((_scope, key) => { - if (key === 's1/fc-1') return { decision: 'allow', reason: null }; - return null; - }); - const rec = recordWith( - [{ function_call_id: 'fc-1', function_id: 'shell::run', args: { command: 'ls' } }], - workWith([ - { - route: 'dispatch', - call: { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, - }, - ]), - ); - - await handleAwaitingApproval(iii, rec); - - expect(rec.state).toBe('function_execute'); - expect(rec.awaiting_approval).toEqual([]); - expect(rec.work?.prepared[0]?.route).toBe('pre_approved'); - }); - - it('sets synthetic denial result in work.prepared on deny and transitions to function_execute', async () => { - const iii = fakeIii((_scope, key) => { - if (key === 's1/fc-1') return { decision: 'deny', reason: 'policy' }; - return null; - }); - const rec = recordWith( - [{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], - workWith([ - { route: 'dispatch', call: { id: 'fc-1', function_id: 'shell::run', arguments: {} } }, - ]), - ); - - await handleAwaitingApproval(iii, rec); - - expect(rec.state).toBe('function_execute'); - const entry = rec.work?.prepared[0]; - expect(entry?.route).toBe('synthetic'); - if (entry?.route === 'synthetic') { - expect(entry.result.details).toMatchObject({ - approval_denied: true, - decision: 'deny', - reason: 'policy', - }); - } - }); - - it('handles aborted decision like deny (folded into work.prepared)', async () => { - const iii = fakeIii((_scope, key) => { - if (key === 's1/fc-1') return { decision: 'aborted', reason: 'session_aborted' }; - return null; - }); - const rec = recordWith( - [{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], - workWith([ - { route: 'dispatch', call: { id: 'fc-1', function_id: 'shell::run', arguments: {} } }, - ]), - ); - - await handleAwaitingApproval(iii, rec); - - expect(rec.state).toBe('function_execute'); - const entry = rec.work?.prepared[0]; - expect(entry?.route).toBe('synthetic'); - if (entry?.route === 'synthetic') { - expect(entry.result.details).toMatchObject({ decision: 'aborted' }); - } - }); - - it('folds independent decisions across a multi-call batch', async () => { - const iii = fakeIii((_scope, key) => { - if (key === 's1/fc-1') return { decision: 'allow', reason: null }; - if (key === 's1/fc-2') return { decision: 'deny', reason: 'policy' }; - return null; - }); - const rec = recordWith( - [ - { function_call_id: 'fc-1', function_id: 'shell::run', args: {} }, - { function_call_id: 'fc-2', function_id: 'shell::fs::write', args: {} }, - ], - workWith([ - { route: 'dispatch', call: { id: 'fc-1', function_id: 'shell::run', arguments: {} } }, - { - route: 'dispatch', - call: { id: 'fc-2', function_id: 'shell::fs::write', arguments: {} }, - }, - ]), - ); - - await handleAwaitingApproval(iii, rec); - - expect(rec.state).toBe('function_execute'); - expect(rec.work?.prepared[0]?.route).toBe('pre_approved'); - expect(rec.work?.prepared[1]?.route).toBe('synthetic'); - }); -}); diff --git a/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts b/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts deleted file mode 100644 index 1f487608..00000000 --- a/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts +++ /dev/null @@ -1,175 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import { - applyAwaitingApprovalOutcome, - applyDecisionToPrepared, - foldDecisionsIntoPrepared, - processAwaitingApproval, -} from '../../src/turn-orchestrator/function-awaiting-approval/process.js'; -import type { AwaitingApprovalPorts } from '../../src/turn-orchestrator/function-awaiting-approval/ports.js'; -import type { PreparedCall, TurnStateRecord, TurnWork } from '../../src/turn-orchestrator/state.js'; - -const dispatchCall = { - route: 'dispatch' as const, - call: { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }, -}; - -function recordWith( - awaiting: { function_call_id: string; function_id: string; args: unknown }[], - work?: TurnWork, -): TurnStateRecord { - return { - session_id: 's1', - state: 'function_awaiting_approval', - turn_count: 0, - max_turns: undefined, - last_assistant: null, - function_results: [], - turn_end_emitted: false, - started_at_ms: 0, - updated_at_ms: 0, - awaiting_approval: awaiting, - work, - }; -} - -function stubPorts( - decisions: Record, -): AwaitingApprovalPorts { - return { - readDecision: vi.fn(async (_session_id, function_call_id) => { - const decision = decisions[function_call_id]; - return decision ?? null; - }), - }; -} - -describe('applyDecisionToPrepared', () => { - it('maps allow to pre_approved', () => { - const next = applyDecisionToPrepared(dispatchCall, { decision: 'allow', reason: null }); - expect(next).toEqual({ - route: 'pre_approved', - call: dispatchCall.call, - }); - }); - - it('maps deny to synthetic with denial result', () => { - const next = applyDecisionToPrepared(dispatchCall, { decision: 'deny', reason: 'policy' }); - expect(next.route).toBe('synthetic'); - if (next.route === 'synthetic') { - expect(next.result.details).toMatchObject({ - approval_denied: true, - decision: 'deny', - reason: 'policy', - }); - } - }); - - it('maps aborted to synthetic with aborted details', () => { - const next = applyDecisionToPrepared(dispatchCall, { - decision: 'aborted', - reason: 'session_aborted', - }); - expect(next.route).toBe('synthetic'); - if (next.route === 'synthetic') { - expect(next.result.details).toMatchObject({ decision: 'aborted' }); - } - }); -}); - -describe('foldDecisionsIntoPrepared', () => { - it('folds each awaiting entry by function_call_id', () => { - const prepared: PreparedCall[] = [ - dispatchCall, - { - route: 'dispatch', - call: { id: 'fc-2', function_id: 'shell::fs::write', arguments: {} }, - }, - ]; - const awaiting = [ - { function_call_id: 'fc-1', function_id: 'shell::run', args: {} }, - { function_call_id: 'fc-2', function_id: 'shell::fs::write', args: {} }, - ]; - const decisions = [ - { decision: 'allow' as const, reason: null }, - { decision: 'deny' as const, reason: 'policy' }, - ]; - - const folded = foldDecisionsIntoPrepared(prepared, awaiting, decisions); - - expect(folded[0]?.route).toBe('pre_approved'); - expect(folded[1]?.route).toBe('synthetic'); - }); - - it('skips awaiting entries not found in prepared', () => { - const prepared: PreparedCall[] = [dispatchCall]; - const awaiting = [{ function_call_id: 'fc-missing', function_id: 'x', args: {} }]; - const folded = foldDecisionsIntoPrepared(prepared, awaiting, [ - { decision: 'allow', reason: null }, - ]); - expect(folded).toEqual(prepared); - }); -}); - -describe('processAwaitingApproval', () => { - it('returns resume_empty when awaiting is empty', async () => { - const rec = recordWith([]); - const outcome = await processAwaitingApproval(stubPorts({}), rec); - expect(outcome).toEqual({ kind: 'resume_empty' }); - }); - - it('returns parked when any decision is missing', async () => { - const rec = recordWith([{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], { - prepared: [dispatchCall], - executed: {}, - }); - const outcome = await processAwaitingApproval(stubPorts({}), rec); - expect(outcome).toEqual({ kind: 'parked' }); - }); - - it('returns resume with folded prepared when all decisions present', async () => { - const rec = recordWith([{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], { - prepared: [dispatchCall], - executed: {}, - }); - const outcome = await processAwaitingApproval( - stubPorts({ 'fc-1': { decision: 'allow', reason: null } }), - rec, - ); - expect(outcome.kind).toBe('resume'); - if (outcome.kind === 'resume') { - expect(outcome.prepared[0]?.route).toBe('pre_approved'); - } - }); -}); - -describe('applyAwaitingApprovalOutcome', () => { - it('no-ops when parked', () => { - const rec = recordWith([{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], { - prepared: [dispatchCall], - executed: {}, - }); - applyAwaitingApprovalOutcome(rec, { kind: 'parked' }); - expect(rec.state).toBe('function_awaiting_approval'); - expect(rec.awaiting_approval).toHaveLength(1); - expect(rec.work?.prepared[0]?.route).toBe('dispatch'); - }); - - it('clears awaiting and transitions on resume_empty', () => { - const rec = recordWith([{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }]); - applyAwaitingApprovalOutcome(rec, { kind: 'resume_empty' }); - expect(rec.state).toBe('function_execute'); - expect(rec.awaiting_approval).toEqual([]); - }); - - it('updates prepared and transitions on resume', () => { - const rec = recordWith([{ function_call_id: 'fc-1', function_id: 'shell::run', args: {} }], { - prepared: [dispatchCall], - executed: {}, - }); - const prepared: PreparedCall[] = [{ route: 'pre_approved', call: dispatchCall.call }]; - applyAwaitingApprovalOutcome(rec, { kind: 'resume', prepared }); - expect(rec.state).toBe('function_execute'); - expect(rec.awaiting_approval).toEqual([]); - expect(rec.work?.prepared).toEqual(prepared); - }); -}); diff --git a/harness/tests/turn-orchestrator/functions.test.ts b/harness/tests/turn-orchestrator/functions.test.ts index 07f0dfb2..7ff99fa6 100644 --- a/harness/tests/turn-orchestrator/functions.test.ts +++ b/harness/tests/turn-orchestrator/functions.test.ts @@ -179,23 +179,7 @@ describe('handleExecute new flow', () => { const fc1Ends = emitted.filter( (e) => e.type === 'function_execution_end' && e.function_call_id === 'fc-1', ); - expect(fc1Ends).toHaveLength(1); - }); - - it('pushes the call onto awaiting_approval and transitions to function_awaiting_approval on pending', async () => { - vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ kind: 'pending' }); - - const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; - const rec: TurnStateRecord = newRecord('s1'); - rec.state = 'function_execute'; - rec.last_assistant = makeAssistant([agentTriggerCall('fc-1', 'shell::run', { command: 'ls' })]); - - await handleExecute(iii, rec); - - expect(rec.state).toBe('function_awaiting_approval'); - expect(rec.awaiting_approval).toHaveLength(1); - expect(rec.awaiting_approval?.[0]?.function_call_id).toBe('fc-1'); - expect(rec.work?.prepared).toHaveLength(1); + expect(fc1Ends).toHaveLength(0); }); it('skips consultBefore on pre_approved entries and uses triggerFunctionCall', async () => { From 23f956ad435f3e6d26608d2eb7e9a345e8047f32 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Tue, 26 May 2026 15:18:14 -0300 Subject: [PATCH 36/41] refactor(turn-orchestrator): streamline approval handling and state transitions - Remove the wakeStep and wakeFromRecord methods from TurnStore, replacing them with inline enqueuing of state transitions directly from saveRecord. - Update the on-approval trigger to enqueue function_awaiting_approval, enhancing the responsiveness of the approval process. - Refactor the function-awaiting-approval logic to ensure that parked calls are handled more efficiently, allowing for better management of concurrent approvals. - Clean up related tests and remove outdated code to improve maintainability and clarity. These changes aim to enhance the efficiency and clarity of the turn orchestrator's approval handling, ensuring a more robust and responsive system. --- harness/docs/architecture.md | 64 +++++++++++--- harness/docs/workers/approval-gate.md | 2 +- harness/docs/workers/turn-orchestrator.md | 16 ++-- .../function-awaiting-approval/process.ts | 14 +-- .../function-execute/process.ts | 6 +- harness/src/turn-orchestrator/on-approval.ts | 35 +++----- harness/src/turn-orchestrator/register.ts | 3 +- .../turn-orchestrator/state-runtime/store.ts | 22 +---- .../_helpers/mockTurnStore.ts | 3 - harness/tests/turn-orchestrator/store.test.ts | 85 ------------------- 10 files changed, 85 insertions(+), 165 deletions(-) diff --git a/harness/docs/architecture.md b/harness/docs/architecture.md index 02d2f505..247e05eb 100644 --- a/harness/docs/architecture.md +++ b/harness/docs/architecture.md @@ -84,7 +84,6 @@ flowchart LR state -- "agent::events stream" --> harness state -- "agent::events stream" --> compact - state -- "state trigger (scope=approvals)" --> turnOrch state -- "state trigger (scope=turn_state)" --> harness harness -- "ui::session::event::" --> client compact -- "session-tree::compact" --> session @@ -95,11 +94,11 @@ flowchart LR [src/turn-orchestrator/state.ts](harness/src/turn-orchestrator/state.ts) defines a 7-state durable FSM. Each state is a registered `turn::{state}` function executed via `runTransition` and enqueued onto the `turn-step` FIFO -queue by `TurnStore.wakeStep` ([store.ts](harness/src/turn-orchestrator/state-runtime/store.ts)). -`saveRecord` calls `shouldWakeStep` then `wakeStep` when the persisted state +queue from `saveRecord` ([store.ts](harness/src/turn-orchestrator/state-runtime/store.ts)). +`saveRecord` calls `shouldWakeStep` then enqueues `turn::{newState}` when the persisted state transitions to a stepable state. Paused sessions are also woken by the approval-decision state trigger (`turn::on_approval` on scope `approvals`) -via `TurnStore.wakeFromRecord`. +by enqueuing `turn::function_awaiting_approval`. ```mermaid stateDiagram-v2 @@ -127,9 +126,51 @@ unexpectedly (unless it opts into queue retry via `TransientError`). The orchestrator consults `policy::check_permissions` directly inside `consultBefore` — `allow`, `deny`, or `pending`. There is no hook fanout on the before path. The orchestrator parks the turn in `function_awaiting_approval` -and waits until each parked call receives `approval::resolve` (decisions may -arrive independently). Each write to scope `approvals` fires `turn::on_approval` -and calls `wakeFromRecord` to re-enqueue the current state handler. +when any call in the batch needs approval, then resumes as each parked call +receives `approval::resolve` (decisions may arrive independently and out of +batch order). Each write to scope `approvals` fires `turn::on_approval` and +enqueues `turn::function_awaiting_approval`. + +### Parallel batch during `function_execute` + +When the assistant message contains multiple tool calls, `runBatch` does not +stop at the first `pending`. For each call in assistant tool order: + +- already in `work.executed` or listed in `awaiting_approval[]` → skip +- policy `allow` (or immediate policy `deny`) → dispatch, checkpoint, emit + `function_execution_end` +- policy `needs_approval` → emit `function_execution_start`, append the call + to `awaiting_approval[]`, **continue** remaining siblings + +After the loop: if any call is still awaiting approval, transition to +`function_awaiting_approval`; otherwise finalize the batch or re-enter +`function_execute` when the batch is incomplete but nothing is parked. + +Example batch A, B, C: A → pending, B → allow (executes immediately), C → +pending → `awaiting_approval = [A, C]`, B recorded in `work.executed`, turn +parked until A and C are resolved. + +### Durability and reload + +| Surface | Location | Role | +|---|---|---| +| Open approvals | `turn_state/` → `awaiting_approval[]` | Which calls are parked and their args | +| Decisions | `approvals//` | Written by `approval::resolve`; read on each wake | +| UI mirror | `turn_state_changed` on `agent::events` | Console shows pending modals from `TurnStateView.awaiting_approval` | +| Reload | `turn::get_state` | One-shot lean view after refresh (no direct iii state reads) | + +A page refresh does not lose pending approvals as long as iii state persists. +Operators can still approve from the console after reload; each `approval::resolve` +write fires `turn::on_approval` while the worker is running. + +### Resume semantics + +- Decisions may arrive in any order (e.g. resolve call C before call A). +- On `allow`, the parked call executes with `skipStart: true` — the + `function_execution_start` event was already emitted when the call first + returned `pending`. +- A duplicate `approval::resolve` for the same call re-wakes the handler; + resolved entries are pruned idempotently so execution is not doubled. ```mermaid sequenceDiagram @@ -139,18 +180,20 @@ sequenceDiagram participant Gate as approval-gate participant User + Note over Turn: function_execute: runBatch walks all tool calls.
pending calls append to awaiting_approval[];
allowed siblings execute in the same pass. + Turn->>Harness: policy::check_permissions(function_id, args) [5s timeout] alt rule.action == allow Harness-->>Turn: allow → dispatch the call else rule.action == deny Harness-->>Turn: deny + DenialEnvelope → error FunctionResult else no rule (needs_approval) - Harness-->>Turn: needs_approval → park in function_awaiting_approval - Note over Turn,Bus: saveRecord does not wake stepable handlers for
function_awaiting_approval. awaiting_approval pins open calls. + Harness-->>Turn: needs_approval → append to awaiting_approval[], continue batch + Note over Turn,Bus: When the batch pass finishes with any awaiting calls,
saveRecord parks in function_awaiting_approval (no wake on park). User->>Gate: approval::resolve(decision, reason) Gate->>Bus: state::set approvals// = {decision, reason} Bus-->>Turn: turn::on_approval state trigger - Turn->>Turn: wakeFromRecord → function_awaiting_approval executes
that call immediately, removes it from awaiting_approval[] + Turn->>Turn: turn::function_awaiting_approval executes
that call immediately (skipStart), removes it from awaiting_approval[] alt more calls still awaiting Turn->>Turn: stay in function_awaiting_approval else awaiting empty and batch incomplete @@ -232,7 +275,6 @@ flowchart TD provOAI --> turnOrch provKimi --> turnOrch provLms --> turnOrch - hook[hook-fanout] --> approval session --> compact[context-compaction] provAnth --> compact provOAI --> compact diff --git a/harness/docs/workers/approval-gate.md b/harness/docs/workers/approval-gate.md index 188aa3a9..7695e14e 100644 --- a/harness/docs/workers/approval-gate.md +++ b/harness/docs/workers/approval-gate.md @@ -22,7 +22,7 @@ and persist the decision where the orchestrator can read it. 1. While parked, the orchestrator keeps pending calls in `awaiting_approval[]` on the turn record. 2. The console calls `approval::resolve` with `{ session_id, function_call_id, decision, reason? }`. 3. `approval::resolve` writes `approvals//` via `state::set`. -4. The `turn::on_approval` state trigger (scope `approvals`) fires and calls `wakeFromRecord`. +4. The `turn::on_approval` state trigger (scope `approvals`) enqueues `turn::function_awaiting_approval`. 5. `function_awaiting_approval` executes each resolved call immediately, removes it from `awaiting_approval[]`, and stays parked until none remain; then finalizes the batch or returns to `function_execute`. ## Registered functions diff --git a/harness/docs/workers/turn-orchestrator.md b/harness/docs/workers/turn-orchestrator.md index bf0dc8b7..b37c9d83 100644 --- a/harness/docs/workers/turn-orchestrator.md +++ b/harness/docs/workers/turn-orchestrator.md @@ -8,10 +8,10 @@ provisioning, assistant, function-execute, steering, and session finish. This is the heart of the bundle. `run::start` opens a session and returns immediately; the rest of the work happens inside per-state durable functions (`turn::provisioning`, `turn::assistant_streaming`, …), each enqueued onto -the `turn-step` FIFO queue via `TurnStore.wakeStep`. +the `turn-step` FIFO queue inline from `saveRecord`. Saving the record with a new non-terminal, non-parking state automatically enqueues the next handler (`saveRecord` in -[state-runtime/store.ts](harness/src/turn-orchestrator/state-runtime/store.ts) calls `shouldWakeStep` then `wakeStep`). +[state-runtime/store.ts](harness/src/turn-orchestrator/state-runtime/store.ts) calls `shouldWakeStep` then enqueues on the `turn-step` FIFO). Every per-state handler is wrapped by `runTransition` ([run-transition.ts](harness/src/turn-orchestrator/run-transition.ts)): @@ -44,14 +44,14 @@ unreachable → deny with a `gate_unavailable` `DenialEnvelope`. ## Triggers -The record-written wake is now inline in `saveRecord` (no separate `on-record-written` adapter): every `saveRecord` call that transitions to a non-terminal, non-parking state calls `wakeStep` directly. Similarly, `turn_state_changed` events are emitted inline from `persistRecord` inside `TurnStore` — there is no separate `on-turn-state-changed` state trigger. +The record-written wake is inline in `saveRecord` (no separate `on-record-written` adapter): every `saveRecord` call that transitions to a non-terminal, non-parking state enqueues `turn::{newState}` on the `turn-step` FIFO. Similarly, `turn_state_changed` events are emitted inline from `persistRecord` inside `TurnStore` — there is no separate `on-turn-state-changed` state trigger. -Paused turns (`function_awaiting_approval`) are woken when `approval::resolve` writes a decision to scope `approvals`, which fires the reactive `turn::on_approval` state trigger (see [on-approval.ts](harness/src/turn-orchestrator/on-approval.ts) and [workers/approval-gate.md](workers/approval-gate.md)). `recoverParkedApprovals` re-wakes parked sessions at worker startup. +Paused turns (`function_awaiting_approval`) are woken when `approval::resolve` writes a decision to scope `approvals`, which fires the reactive `turn::on_approval` state trigger (see [on-approval.ts](harness/src/turn-orchestrator/on-approval.ts) and [workers/approval-gate.md](workers/approval-gate.md)). ## Turn FSM Each state is a registered `turn::{state}` function executed via -`runTransition` and enqueued onto the `turn-step` FIFO queue by `TurnStore.wakeStep`. +`runTransition` and enqueued onto the `turn-step` FIFO queue from `saveRecord` when `shouldWakeStep` allows. The 7 states from [state.ts](harness/src/turn-orchestrator/state.ts): | State | Handler file | Role | @@ -116,7 +116,7 @@ Unchanged from prior design: `dispatchWithHook` → `consultBefore` → reply returns `{ kind: 'pending' }` from `dispatchWithHook`, which parks the session to `function_awaiting_approval`. `approval::resolve` writes the decision to scope `approvals`, which fires `turn::on_approval` and calls -`TurnStore.wakeFromRecord` to re-enqueue the session's current state handler. +`turn::on_approval` enqueues `turn::function_awaiting_approval` on the `turn-step` queue. ## Configuration @@ -142,12 +142,12 @@ From | [src/turn-orchestrator/register.ts](harness/src/turn-orchestrator/register.ts) | Composes all registered functions: `run::start`, per-state `turn::{state}` handlers, `turn::on_approval`, `turn::get_state`. | | [src/turn-orchestrator/run-start.ts](harness/src/turn-orchestrator/run-start.ts) | `run::start` handler — persists run config and messages, seeds `turn_state` to `provisioning` via `saveRecord` (which wakes the FSM). | | [src/turn-orchestrator/run-transition.ts](harness/src/turn-orchestrator/run-transition.ts) | Shared FSM transition runner: load → null-check → stale-skip → handle → save. Routes to `failed` on unexpected throw; re-throws `TransientError` for queue retry. | -| [src/turn-orchestrator/state-runtime/store.ts](harness/src/turn-orchestrator/state-runtime/store.ts) | `TurnStore` / `createTurnStore` — agent-scope load/save, `shouldWakeStep`, `wakeStep`, `wakeFromRecord`. | +| [src/turn-orchestrator/state-runtime/store.ts](harness/src/turn-orchestrator/state-runtime/store.ts) | `TurnStore` / `createTurnStore` — agent-scope load/save, `shouldWakeStep`, inline FIFO enqueue from `saveRecord`. | | [src/turn-orchestrator/run-request.ts](harness/src/turn-orchestrator/run-request.ts) | `RunRequest` type and `parseRunRequest` — the typed, parsed form of scope `run_request` (includes `function_schemas`). | | [src/turn-orchestrator/get-state.ts](harness/src/turn-orchestrator/get-state.ts) | `turn::get_state` — one-shot reader returning `TurnStateView \| null`. | | [src/turn-orchestrator/agent-trigger.ts](harness/src/turn-orchestrator/agent-trigger.ts) | Dispatcher chokepoint: `dispatchWithHook` (consult + trigger), `triggerFunctionCall` (trigger/decode/error), `agentTriggerTool` (schema), `unwrapAgentTrigger`. | | [src/turn-orchestrator/hook.ts](harness/src/turn-orchestrator/hook.ts) | `consultBefore` — `policy::check_permissions` (5 s, fail-closed) → `allow` / `pending` / `deny`. | -| [src/turn-orchestrator/on-approval.ts](harness/src/turn-orchestrator/on-approval.ts) | Reactive `turn::on_approval` state trigger on scope `approvals`; `recoverParkedApprovals` re-wakes parked sessions at startup. | +| [src/turn-orchestrator/on-approval.ts](harness/src/turn-orchestrator/on-approval.ts) | Reactive `turn::on_approval` state trigger on scope `approvals`. | | [src/turn-orchestrator/schemas.ts](harness/src/turn-orchestrator/schemas.ts) | All registered-function I/O schemas and types: `RunStartPayloadSchema`, `TurnStepPayloadSchema`, `TurnStateView`, `toView`, `ApprovalDecisionEventSchema`. | | [src/turn-orchestrator/state-runtime/ports.ts](harness/src/turn-orchestrator/state-runtime/ports.ts) | `TurnStatePorts` / `createTurnStatePorts` — shared dependency ports for per-state handlers (incl. `finishSession`). | | [src/turn-orchestrator/provisioning/process.ts](harness/src/turn-orchestrator/provisioning/process.ts) | `turn::provisioning` handler and provisioning pipeline. | diff --git a/harness/src/turn-orchestrator/function-awaiting-approval/process.ts b/harness/src/turn-orchestrator/function-awaiting-approval/process.ts index fe17054f..4fc34abb 100644 --- a/harness/src/turn-orchestrator/function-awaiting-approval/process.ts +++ b/harness/src/turn-orchestrator/function-awaiting-approval/process.ts @@ -6,7 +6,7 @@ import type { ISdk } from '../../runtime/iii.js'; import { text } from '../../types/content.js'; import type { FunctionResult } from '../../types/function.js'; import { createPorts } from '../function-execute/ports.js'; -import { finalizeBatch, runOneCall } from '../function-execute/run.js'; +import { finalizeBatch, FunctionExecuteInvariantError, runOneCall } from '../function-execute/run.js'; import type { PreparedCall } from '../function-execute/types.js'; import { isBatchComplete } from '../function-execute/types.js'; import { runTransition } from '../run-transition.js'; @@ -110,13 +110,15 @@ export async function routeAfterApprovalProcessing( return; } - if (!rec.work) { - transitionTo(rec, 'function_execute'); - return; + const work = rec.work; + if (!work) { + throw new FunctionExecuteInvariantError( + 'function_awaiting_approval with empty awaiting_approval requires work', + ); } - if (isBatchComplete(rec.work)) { - await finalizeBatch(executePorts, rec, rec.work); + if (isBatchComplete(work)) { + await finalizeBatch(executePorts, rec, work); } else { transitionTo(rec, 'function_execute'); } diff --git a/harness/src/turn-orchestrator/function-execute/process.ts b/harness/src/turn-orchestrator/function-execute/process.ts index dfe5598e..29278784 100644 --- a/harness/src/turn-orchestrator/function-execute/process.ts +++ b/harness/src/turn-orchestrator/function-execute/process.ts @@ -34,10 +34,8 @@ export async function handleExecute(iii: ISdk, rec: TurnStateRecord): Promise 0) { - transitionTo(rec, 'function_awaiting_approval'); - return; - } + transitionTo(rec, 'function_awaiting_approval'); + return; } if (isBatchComplete(outcome.work)) { diff --git a/harness/src/turn-orchestrator/on-approval.ts b/harness/src/turn-orchestrator/on-approval.ts index d6e0e21b..63c7208c 100644 --- a/harness/src/turn-orchestrator/on-approval.ts +++ b/harness/src/turn-orchestrator/on-approval.ts @@ -8,10 +8,10 @@ * `state::set` `approvals// = { decision, reason }`. */ -import type { ISdk } from '../runtime/iii.js'; +import { TriggerAction, type ISdk } from '../runtime/iii.js'; import { logger } from '../runtime/otel.js'; import { ApprovalDecisionEventSchema, type ParsedApprovalDecisionWrite } from './schemas.js'; -import { createTurnStore } from './state-runtime/store.js'; +import { TURN_STEP_QUEUE } from './state-runtime/store.js'; export function parseApprovalDecisionWrite(event: unknown): ParsedApprovalDecisionWrite | null { const result = ApprovalDecisionEventSchema.safeParse(event); @@ -23,9 +23,12 @@ export function isApprovalDecisionWrite(event: unknown): boolean { } export async function execute(iii: ISdk, write: ParsedApprovalDecisionWrite): Promise { - const store = createTurnStore(iii); try { - await store.wakeFromRecord(write.session_id); + await iii.trigger({ + function_id: `turn::function_awaiting_approval`, + payload: { session_id: write.session_id }, + action: TriggerAction.Enqueue({ queue: TURN_STEP_QUEUE }), + }); } catch (err) { logger.warn('turn::on_approval: wake failed', { session_id: write.session_id, @@ -35,26 +38,9 @@ export async function execute(iii: ISdk, write: ParsedApprovalDecisionWrite): Pr } export async function handleApprovalDecisionWrite(iii: ISdk, event: unknown): Promise { - const write = parseApprovalDecisionWrite(event); - if (!write) return; - await execute(iii, write); -} - -/** Wake sessions still parked on approval (e.g. a decision arrived during downtime). */ -export async function recoverParkedApprovals(iii: ISdk): Promise { - const store = createTurnStore(iii); - const records = await store.listTurnStateRecords(); - for (const rec of records) { - if (rec.state !== 'function_awaiting_approval') continue; - try { - await store.wakeFromRecord(rec.session_id); - } catch (err) { - logger.warn('recoverParkedApprovals: wake failed', { - session_id: rec.session_id, - err: String(err), - }); - } - } + const result = ApprovalDecisionEventSchema.safeParse(event); + if (!result.success) return; + await execute(iii, result.data); } export function register(iii: ISdk): void { @@ -85,3 +71,4 @@ export function register(iii: ISdk): void { }, }); } +"" \ No newline at end of file diff --git a/harness/src/turn-orchestrator/register.ts b/harness/src/turn-orchestrator/register.ts index 2c5740e8..40deab3d 100644 --- a/harness/src/turn-orchestrator/register.ts +++ b/harness/src/turn-orchestrator/register.ts @@ -7,7 +7,7 @@ import { register as registerFunctionAwaitingApproval } from './function-awaitin import { register as registerFunctionExecute } from './function-execute/process.js'; import { register as registerGetState } from './get-state.js'; import { register as registerRunStart } from './run-start.js'; -import { recoverParkedApprovals, register as registerOnApproval } from './on-approval.js'; +import { register as registerOnApproval } from './on-approval.js'; import { register as registerProvisioning } from './provisioning/process.js'; import { register as registerSteeringCheck } from './steering-check/process.js'; @@ -22,7 +22,6 @@ export async function register(iii: ISdk, ctx: { configPath: string }): Promise< registerSteeringCheck(iii); registerGetState(iii); registerOnApproval(iii); - await recoverParkedApprovals(iii); void bootstrap.run(iii, orchestratorCfg); } diff --git a/harness/src/turn-orchestrator/state-runtime/store.ts b/harness/src/turn-orchestrator/state-runtime/store.ts index ff6907f8..f924c7f2 100644 --- a/harness/src/turn-orchestrator/state-runtime/store.ts +++ b/harness/src/turn-orchestrator/state-runtime/store.ts @@ -5,7 +5,7 @@ import { z } from 'zod'; import { TriggerAction, type ISdk } from '../../runtime/iii.js'; -import { stateGet, stateListValues, stateSet } from '../../runtime/state.js'; +import { stateGet, stateSet } from '../../runtime/state.js'; import { logger } from '../../runtime/otel.js'; import type { AgentMessage } from '../../types/agent-message.js'; import { MESSAGES_SCOPE, RUN_REQUEST_SCOPE, TURN_STATE_SCOPE } from '../state.js'; @@ -47,9 +47,6 @@ export type TurnStore = { appendMessages(session_id: string, msgs: AgentMessage[]): Promise; loadRunRequest(session_id: string): Promise; saveRunRequest(session_id: string, request: RunRequest): Promise; - listTurnStateRecords(): Promise; - wakeStep(session_id: string, state: TurnState): Promise; - wakeFromRecord(session_id: string): Promise; }; const FlatMessagesSchema = z @@ -113,13 +110,6 @@ export function createTurnStore(iii: ISdk): TurnStore { return parseTurnStateRecord(await scopedGet(iii, TURN_STATE_SCOPE, session_id)); }, - async listTurnStateRecords() { - const values = await stateListValues(iii, { scope: TURN_STATE_SCOPE }); - return values - .map((value) => parseTurnStateRecord(value)) - .filter((rec): rec is TurnStateRecord => rec !== null); - }, - async writeRecord(rec) { await scopedSet(iii, TURN_STATE_SCOPE, rec.session_id, rec); }, @@ -131,16 +121,6 @@ export function createTurnStore(iii: ISdk): TurnStore { } }, - wakeStep(session_id, state) { - return enqueueTurnStep(iii, session_id, state); - }, - - async wakeFromRecord(session_id) { - const rec = parseTurnStateRecord(await scopedGet(iii, TURN_STATE_SCOPE, session_id)); - if (!rec || rec.state === 'stopped' || rec.state === 'failed') return; - await enqueueTurnStep(iii, session_id, rec.state); - }, - async loadMessages(session_id) { return parseFlatMessages(await scopedGet(iii, MESSAGES_SCOPE, session_id)); }, diff --git a/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts b/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts index f4635b25..e1ff6b0f 100644 --- a/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts +++ b/harness/tests/turn-orchestrator/_helpers/mockTurnStore.ts @@ -27,9 +27,6 @@ export function mockTurnStore(overrides: Partial = {}): MockTurnStore appendMessages: vi.fn(async () => {}), loadRunRequest: vi.fn(async () => defaultRunRequest), saveRunRequest: vi.fn(async () => {}), - listTurnStateRecords: vi.fn(async () => []), - wakeStep: vi.fn(async () => {}), - wakeFromRecord: vi.fn(async () => {}), ...overrides, } as MockTurnStore; } diff --git a/harness/tests/turn-orchestrator/store.test.ts b/harness/tests/turn-orchestrator/store.test.ts index 970c8b4d..6712c714 100644 --- a/harness/tests/turn-orchestrator/store.test.ts +++ b/harness/tests/turn-orchestrator/store.test.ts @@ -1,5 +1,4 @@ import { describe, expect, it, vi } from 'vitest'; -import { TriggerAction } from '../../src/runtime/iii.js'; import type { ISdk } from '../../src/runtime/iii.js'; import { createTurnStore, @@ -112,87 +111,3 @@ describe('shouldWakeStep', () => { expect(shouldWakeStep('function_execute', 'function_execute')).toBe(false); }); }); - -describe('TurnStore.wakeStep', () => { - it('enqueues turn::{state} on the turn-step FIFO queue', async () => { - const triggers: Array<{ function_id: string; payload: unknown; action?: unknown }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown; action?: unknown }) => { - triggers.push(req); - return null; - }), - } as unknown as ISdk; - - await createTurnStore(iii).wakeStep('sess-abc', 'assistant_streaming'); - - expect(triggers).toHaveLength(1); - expect(triggers[0]?.function_id).toBe('turn::assistant_streaming'); - expect(triggers[0]?.payload).toEqual({ session_id: 'sess-abc' }); - expect(triggers[0]?.action).toEqual(TriggerAction.Enqueue({ queue: 'turn-step' })); - }); - - it('swallows enqueue failures (logs only, never rethrows)', async () => { - const iii = { - trigger: vi.fn(async () => { - throw new Error('queue down'); - }), - } as unknown as ISdk; - - await expect( - createTurnStore(iii).wakeStep('sess-abc', 'provisioning'), - ).resolves.toBeUndefined(); - }); -}); - -describe('TurnStore.wakeFromRecord', () => { - it('enqueues turn::{currentState} from persisted record', async () => { - const rec = newRecord('sess-x'); - rec.state = 'function_awaiting_approval'; - const triggers: Array<{ function_id: string; payload: unknown; action?: unknown }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown; action?: unknown }) => { - if (req.function_id === 'state::get') return rec; - triggers.push(req); - return null; - }), - } as unknown as ISdk; - - await createTurnStore(iii).wakeFromRecord('sess-x'); - - expect(triggers).toHaveLength(1); - expect(triggers[0]?.function_id).toBe('turn::function_awaiting_approval'); - expect(triggers[0]?.payload).toEqual({ session_id: 'sess-x' }); - }); - - it('no-ops when session is stopped', async () => { - const rec = newRecord('sess-y'); - rec.state = 'stopped'; - const turnTriggers: string[] = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string }) => { - if (req.function_id === 'state::get') return rec; - if (req.function_id.startsWith('turn::')) turnTriggers.push(req.function_id); - return null; - }), - } as unknown as ISdk; - - await createTurnStore(iii).wakeFromRecord('sess-y'); - expect(turnTriggers).toHaveLength(0); - }); - - it('no-ops when session is failed (no turn::failed handler exists)', async () => { - const rec = newRecord('sess-z'); - rec.state = 'failed'; - const turnTriggers: string[] = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string }) => { - if (req.function_id === 'state::get') return rec; - if (req.function_id.startsWith('turn::')) turnTriggers.push(req.function_id); - return null; - }), - } as unknown as ISdk; - - await createTurnStore(iii).wakeFromRecord('sess-z'); - expect(turnTriggers).toHaveLength(0); - }); -}); From 0f6f945b968fc2f0f24a36da27be4b782c13198f Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Wed, 27 May 2026 07:16:40 -0300 Subject: [PATCH 37/41] refactor(turn-orchestrator): enhance approval handling and state management - Streamline the approval process by updating the `approval-gate` description to clarify its role in persisting decisions and enqueuing `turn::function_awaiting_approval`. - Refactor the handling of approval decisions to ensure that state transitions are triggered correctly when decisions are written to the `approvals` scope. - Remove the obsolete `on-approval.ts` file and integrate its functionality into the existing structure, improving maintainability. - Introduce a new `TurnStateInvariantError` class to handle validation errors related to turn state records. These changes aim to improve the clarity and efficiency of the turn orchestrator's approval handling, ensuring a more robust and responsive system. --- harness/docs/architecture.md | 19 +- harness/docs/workers/turn-orchestrator.md | 7 +- harness/src/index.ts | 2 +- .../assistant-streaming/ports.ts | 6 - .../assistant-streaming/process.ts | 158 +-------------- .../assistant-streaming/run.ts | 151 +++++++++++++++ harness/src/turn-orchestrator/errors.ts | 8 + .../function-awaiting-approval/process.ts | 162 +++++----------- .../function-awaiting-approval/run.ts | 91 +++++++++ .../function-execute/process.ts | 49 ++--- .../turn-orchestrator/function-execute/run.ts | 118 +++++------- harness/src/turn-orchestrator/on-approval.ts | 74 ------- harness/src/turn-orchestrator/register.ts | 2 - harness/src/turn-orchestrator/schemas.ts | 115 ++++++++++- harness/src/turn-orchestrator/state.ts | 46 ++++- .../steering-check/process.ts | 115 +---------- .../turn-orchestrator/steering-check/run.ts | 111 +++++++++++ .../tests/approval-gate/_helpers/fakeIii.ts | 15 +- .../integration/parallel-approval-harness.ts | 32 ++-- .../integration/parallel-approval.e2e.test.ts | 2 +- .../assistant-streaming.test.ts | 15 +- .../tests/turn-orchestrator/assistant.test.ts | 2 +- ...on-awaiting-approval-state-trigger.test.ts | 44 +++++ .../function-awaiting-approval.test.ts | 180 ++++++++++++++++++ .../function-execute.test.ts | 38 ++-- .../tests/turn-orchestrator/functions.test.ts | 149 ++++++--------- .../turn-orchestrator/on-approval.test.ts | 126 ------------ harness/tests/turn-orchestrator/state.test.ts | 77 +++++++- .../steering-check-layer.test.ts | 3 +- .../tests/turn-orchestrator/steering.test.ts | 3 +- 30 files changed, 1072 insertions(+), 848 deletions(-) create mode 100644 harness/src/turn-orchestrator/assistant-streaming/run.ts create mode 100644 harness/src/turn-orchestrator/function-awaiting-approval/run.ts delete mode 100644 harness/src/turn-orchestrator/on-approval.ts create mode 100644 harness/src/turn-orchestrator/steering-check/run.ts create mode 100644 harness/tests/turn-orchestrator/function-awaiting-approval-state-trigger.test.ts create mode 100644 harness/tests/turn-orchestrator/function-awaiting-approval.test.ts delete mode 100644 harness/tests/turn-orchestrator/on-approval.test.ts diff --git a/harness/docs/architecture.md b/harness/docs/architecture.md index 247e05eb..821665d7 100644 --- a/harness/docs/architecture.md +++ b/harness/docs/architecture.md @@ -20,7 +20,7 @@ workers. |---|---|---|---| | harness | [src/harness/](harness/src/harness/) | Meta-worker; loads `iii-permissions.yaml`, exposes `harness::trigger` (WS ingestion bridge — see [Telemetry & trace correlation](#telemetry--trace-correlation)) / `policy::check_permissions` / `ui::*`, spins up `agent::events` fan-out. | [workers/harness.md](harness/docs/workers/harness.md) | | turn-orchestrator | [src/turn-orchestrator/](harness/src/turn-orchestrator/) | Durable FSM driving each agent turn; `dispatchWithHook` approval chokepoint. | [workers/turn-orchestrator.md](harness/docs/workers/turn-orchestrator.md) | -| approval-gate | [src/approval-gate/](harness/src/approval-gate/) | Registers `approval::resolve` and shared approval wire schemas; persists decisions to scope `approvals` (turn-orchestrator reacts via `turn::on_approval`). | [workers/approval-gate.md](harness/docs/workers/approval-gate.md) | +| approval-gate | [src/approval-gate/](harness/src/approval-gate/) | Registers `approval::resolve`; persists decisions to scope `approvals`. Wake via `turn::on_approval` state trigger. | [workers/approval-gate.md](harness/docs/workers/approval-gate.md) | | session | [src/session/](harness/src/session/) | Branching session storage (`session-tree::*`) plus per-session inbox queues (`session-inbox::*`). | [workers/session.md](harness/docs/workers/session.md) | | llm-budget | [src/llm-budget/](harness/src/llm-budget/) | Workspace + agent LLM spend caps with alerts, forecast, period rollover. | [workers/llm-budget.md](harness/docs/workers/llm-budget.md) | | hook-fanout | [src/hook-fanout/](harness/src/hook-fanout/) | Generic publish-and-collect primitive over a stream topic. | [workers/hook-fanout.md](harness/docs/workers/hook-fanout.md) | @@ -96,9 +96,8 @@ defines a 7-state durable FSM. Each state is a registered `turn::{state}` function executed via `runTransition` and enqueued onto the `turn-step` FIFO queue from `saveRecord` ([store.ts](harness/src/turn-orchestrator/state-runtime/store.ts)). `saveRecord` calls `shouldWakeStep` then enqueues `turn::{newState}` when the persisted state -transitions to a stepable state. Paused sessions are also woken by -the approval-decision state trigger (`turn::on_approval` on scope `approvals`) -by enqueuing `turn::function_awaiting_approval`. +transitions to a stepable state. Paused sessions are woken when `approval::resolve` writes +scope `approvals`, which fires `turn::on_approval` to enqueue `turn::function_awaiting_approval`. ```mermaid stateDiagram-v2 @@ -128,8 +127,8 @@ The orchestrator consults `policy::check_permissions` directly inside the before path. The orchestrator parks the turn in `function_awaiting_approval` when any call in the batch needs approval, then resumes as each parked call receives `approval::resolve` (decisions may arrive independently and out of -batch order). Each write to scope `approvals` fires `turn::on_approval` and -enqueues `turn::function_awaiting_approval`. +batch order). Each `approval::resolve` persists the decision; the `turn::on_approval` +state trigger enqueues `turn::function_awaiting_approval`. ### Parallel batch during `function_execute` @@ -160,8 +159,8 @@ parked until A and C are resolved. | Reload | `turn::get_state` | One-shot lean view after refresh (no direct iii state reads) | A page refresh does not lose pending approvals as long as iii state persists. -Operators can still approve from the console after reload; each `approval::resolve` -write fires `turn::on_approval` while the worker is running. +Operators can still approve from the console after reload; each decision write +fires `turn::on_approval` to enqueue the parked turn step while the worker is running. ### Resume semantics @@ -192,8 +191,8 @@ sequenceDiagram Note over Turn,Bus: When the batch pass finishes with any awaiting calls,
saveRecord parks in function_awaiting_approval (no wake on park). User->>Gate: approval::resolve(decision, reason) Gate->>Bus: state::set approvals// = {decision, reason} - Bus-->>Turn: turn::on_approval state trigger - Turn->>Turn: turn::function_awaiting_approval executes
that call immediately (skipStart), removes it from awaiting_approval[] + Gate->>Turn: enqueue turn::function_awaiting_approval + Turn->>Turn: function_awaiting_approval executes
that call immediately (skipStart), removes it from awaiting_approval[] alt more calls still awaiting Turn->>Turn: stay in function_awaiting_approval else awaiting empty and batch incomplete diff --git a/harness/docs/workers/turn-orchestrator.md b/harness/docs/workers/turn-orchestrator.md index b37c9d83..1ae3665a 100644 --- a/harness/docs/workers/turn-orchestrator.md +++ b/harness/docs/workers/turn-orchestrator.md @@ -46,7 +46,7 @@ unreachable → deny with a `gate_unavailable` `DenialEnvelope`. The record-written wake is inline in `saveRecord` (no separate `on-record-written` adapter): every `saveRecord` call that transitions to a non-terminal, non-parking state enqueues `turn::{newState}` on the `turn-step` FIFO. Similarly, `turn_state_changed` events are emitted inline from `persistRecord` inside `TurnStore` — there is no separate `on-turn-state-changed` state trigger. -Paused turns (`function_awaiting_approval`) are woken when `approval::resolve` writes a decision to scope `approvals`, which fires the reactive `turn::on_approval` state trigger (see [on-approval.ts](harness/src/turn-orchestrator/on-approval.ts) and [workers/approval-gate.md](workers/approval-gate.md)). +Paused turns (`function_awaiting_approval`) are woken when `approval::resolve` writes scope `approvals`, which fires `turn::on_approval` (registered in [function-awaiting-approval/process.ts](harness/src/turn-orchestrator/function-awaiting-approval/process.ts); see [workers/approval-gate.md](workers/approval-gate.md)). ## Turn FSM @@ -115,8 +115,7 @@ Unchanged from prior design: `dispatchWithHook` → `consultBefore` → `policy::check_permissions` (5 s timeout, fail-closed). A `needs_approval` reply returns `{ kind: 'pending' }` from `dispatchWithHook`, which parks the session to `function_awaiting_approval`. `approval::resolve` writes the -decision to scope `approvals`, which fires `turn::on_approval` and calls -`turn::on_approval` enqueues `turn::function_awaiting_approval` on the `turn-step` queue. +decision to scope `approvals`, which fires `turn::on_approval` to enqueue `turn::function_awaiting_approval` on the `turn-step` queue. ## Configuration @@ -147,7 +146,7 @@ From | [src/turn-orchestrator/get-state.ts](harness/src/turn-orchestrator/get-state.ts) | `turn::get_state` — one-shot reader returning `TurnStateView \| null`. | | [src/turn-orchestrator/agent-trigger.ts](harness/src/turn-orchestrator/agent-trigger.ts) | Dispatcher chokepoint: `dispatchWithHook` (consult + trigger), `triggerFunctionCall` (trigger/decode/error), `agentTriggerTool` (schema), `unwrapAgentTrigger`. | | [src/turn-orchestrator/hook.ts](harness/src/turn-orchestrator/hook.ts) | `consultBefore` — `policy::check_permissions` (5 s, fail-closed) → `allow` / `pending` / `deny`. | -| [src/turn-orchestrator/on-approval.ts](harness/src/turn-orchestrator/on-approval.ts) | Reactive `turn::on_approval` state trigger on scope `approvals`. | +| [src/turn-orchestrator/function-awaiting-approval/process.ts](harness/src/turn-orchestrator/function-awaiting-approval/process.ts) | `turn::function_awaiting_approval` FSM step + `turn::on_approval` state trigger on scope `approvals`. | | [src/turn-orchestrator/schemas.ts](harness/src/turn-orchestrator/schemas.ts) | All registered-function I/O schemas and types: `RunStartPayloadSchema`, `TurnStepPayloadSchema`, `TurnStateView`, `toView`, `ApprovalDecisionEventSchema`. | | [src/turn-orchestrator/state-runtime/ports.ts](harness/src/turn-orchestrator/state-runtime/ports.ts) | `TurnStatePorts` / `createTurnStatePorts` — shared dependency ports for per-state handlers (incl. `finishSession`). | | [src/turn-orchestrator/provisioning/process.ts](harness/src/turn-orchestrator/provisioning/process.ts) | `turn::provisioning` handler and provisioning pipeline. | diff --git a/harness/src/index.ts b/harness/src/index.ts index 7e834461..fed2b395 100644 --- a/harness/src/index.ts +++ b/harness/src/index.ts @@ -49,7 +49,7 @@ const WORKERS: readonly WorkerDefinition[] = [ { name: 'approval-gate', description: - 'Registers approval::resolve; persists human decisions to the approvals scope (turn-orchestrator reacts via turn::on_approval).', + 'Registers approval::resolve; persists human decisions to the approvals scope and enqueues turn::function_awaiting_approval.', register: (iii) => registerApprovalGate(iii), }, { diff --git a/harness/src/turn-orchestrator/assistant-streaming/ports.ts b/harness/src/turn-orchestrator/assistant-streaming/ports.ts index 29cc1817..66bb329b 100644 --- a/harness/src/turn-orchestrator/assistant-streaming/ports.ts +++ b/harness/src/turn-orchestrator/assistant-streaming/ports.ts @@ -48,12 +48,6 @@ export function isErrorOrAborted(asst: AssistantMessage): boolean { return asst.stop_reason === 'error' || asst.stop_reason === 'aborted'; } -export class AssistantStreamingInvariantError extends Error { - constructor(message: string) { - super(message); - this.name = 'AssistantStreamingInvariantError'; - } -} export type DeltaHandler = ( partial: AssistantMessage, diff --git a/harness/src/turn-orchestrator/assistant-streaming/process.ts b/harness/src/turn-orchestrator/assistant-streaming/process.ts index 2f4b45df..74d29880 100644 --- a/harness/src/turn-orchestrator/assistant-streaming/process.ts +++ b/harness/src/turn-orchestrator/assistant-streaming/process.ts @@ -1,160 +1,22 @@ /** - * Stream one provider turn, persist the assistant message, route onward, and register the FSM step. + * Register the assistant_streaming FSM step and run one durable transition. */ import type { ISdk } from '../../runtime/iii.js'; -import type { AssistantMessage } from '../../types/agent-message.js'; -import { decide } from '../provider-router.js'; import { runTransition } from '../run-transition.js'; -import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; -import { syntheticAssistant } from '../synthetic-assistant.js'; -import { emitTurnEndOnce } from '../state-runtime/turn-end.js'; -import { transitionTo, type TurnStateRecord } from '../state.js'; import { - AssistantStreamingInvariantError, - createStreamingPorts, - hasFunctionCalls, - isErrorOrAborted, - parseFunctionSchemas, - type AssistantRoute, - type AssistantStreamingPorts, - type StreamContext, - type StreamTurnOutcome, -} from './ports.js'; - -export function beginTurn(rec: TurnStateRecord): void { - rec.turn_count++; - rec.turn_end_emitted = false; - rec.assistant_body_streamed = false; -} - -export async function prepareStreamContext( - ports: AssistantStreamingPorts, - rec: TurnStateRecord, -): Promise { - const request = await ports.loadRunRequest(rec.session_id); - let messages = await ports.loadMessages(rec.session_id); - const { provider, model, system_prompt, function_schemas } = request; - const decision = decide({ provider, model }); - const tools = parseFunctionSchemas(function_schemas); - - if ( - (await ports.runPreflight(rec.session_id, messages, decision.provider, model)) === 'compacted' - ) { - messages = await ports.loadMessages(rec.session_id); - } - - return { - session_id: rec.session_id, - decision, - system_prompt, - tools, - messages, - }; -} - -export async function runStreamTurn( - ports: AssistantStreamingPorts, - session_id: string, - ctx: StreamContext, -): Promise { - let body_streamed = false; - - const { final, error } = await ports.streamTurn(ctx, async (partial, event) => { - await ports.emitMessageUpdate(session_id, partial, event); - if (event.type === 'text_delta' || event.type === 'thinking_delta') { - body_streamed = true; - } - }); - - return { final, error, body_streamed }; -} - -export function resolveAssistantMessage( - outcome: StreamTurnOutcome, - decision: StreamContext['decision'], -): AssistantMessage { - if (outcome.final) return outcome.final; - - const reason = outcome.error ?? 'provider channel closed without final'; - return syntheticAssistant({ - stop_reason: 'error', - text: reason, - provider: decision.provider, - model: decision.model, - }); -} - -/** Reason text for a synthetic error update when the provider did not return a final message. */ -export function syntheticStreamReason(outcome: StreamTurnOutcome): string | null { - if (outcome.final) return null; - return outcome.error ?? 'provider channel closed without final'; -} - -export function routeAssistantTurn(asst: AssistantMessage): AssistantRoute { - if (isErrorOrAborted(asst)) { - return { - kind: 'stopped', - reason: asst.stop_reason === 'aborted' ? 'aborted' : 'error', - }; - } - if (hasFunctionCalls(asst)) { - return { kind: 'function_execute' }; - } - return { kind: 'steering_check' }; -} - -export async function finalizeAssistantTurn( - ports: AssistantStreamingPorts, - rec: TurnStateRecord, -): Promise { - const asst = rec.last_assistant; - if (!asst) { - throw new AssistantStreamingInvariantError( - 'assistant_streaming finalize without last_assistant', - ); - } - - await ports.emitMessageComplete(rec.session_id, asst, rec.assistant_body_streamed === true); - - const route = routeAssistantTurn(asst); - - if (route.kind === 'stopped') { - await emitTurnEndOnce(ports, rec, asst); - await ports.finishSession(rec); - return; - } - - await ports.persistAssistantIfNew(rec.session_id, asst); - - if (route.kind === 'function_execute') { - rec.function_results = []; - rec.work = undefined; - transitionTo(rec, 'function_execute'); - return; - } - - transitionTo(rec, 'steering_check'); -} + TurnStepPayloadSchema, + parseAssistantStreamingRecord, + type TurnStepPayload, +} from '../schemas.js'; +import type { TurnStateRecord } from '../state.js'; +import { createStreamingPorts } from './ports.js'; +import { runAssistantStreaming } from './run.js'; export async function handleStreaming(iii: ISdk, rec: TurnStateRecord): Promise { + const streaming = parseAssistantStreamingRecord(rec); const ports = createStreamingPorts(iii); - beginTurn(rec); - const ctx = await prepareStreamContext(ports, rec); - const outcome = await runStreamTurn(ports, rec.session_id, ctx); - rec.last_assistant = resolveAssistantMessage(outcome, ctx.decision); - rec.assistant_body_streamed = outcome.body_streamed; - - const syntheticReason = syntheticStreamReason(outcome); - if (syntheticReason) { - await ports.emitMessageUpdate(rec.session_id, rec.last_assistant, { - type: 'text_delta', - partial: rec.last_assistant, - delta: syntheticReason, - }); - } - - await finalizeAssistantTurn(ports, rec); + await runAssistantStreaming(ports, streaming); } export function register(iii: ISdk): void { diff --git a/harness/src/turn-orchestrator/assistant-streaming/run.ts b/harness/src/turn-orchestrator/assistant-streaming/run.ts new file mode 100644 index 00000000..6532263b --- /dev/null +++ b/harness/src/turn-orchestrator/assistant-streaming/run.ts @@ -0,0 +1,151 @@ +/** + * Stream one provider turn, persist the assistant message, and route onward. + */ + +import type { AssistantMessage } from '../../types/agent-message.js'; +import { decide } from '../provider-router.js'; +import { syntheticAssistant } from '../synthetic-assistant.js'; +import { emitTurnEndOnce } from '../state-runtime/turn-end.js'; +import { enterFunctionExecute } from '../function-execute/run.js'; +import { transitionTo, type AssistantStreamingTurnRecord } from '../state.js'; +import { + hasFunctionCalls, + isErrorOrAborted, + parseFunctionSchemas, + type AssistantRoute, + type AssistantStreamingPorts, + type StreamContext, + type StreamTurnOutcome, +} from './ports.js'; + +export function beginTurn(rec: AssistantStreamingTurnRecord): void { + rec.turn_count++; + rec.turn_end_emitted = false; + rec.assistant_body_streamed = false; +} + +export async function prepareStreamContext( + ports: AssistantStreamingPorts, + rec: AssistantStreamingTurnRecord, +): Promise { + const request = await ports.loadRunRequest(rec.session_id); + let messages = await ports.loadMessages(rec.session_id); + const { provider, model, system_prompt, function_schemas } = request; + const decision = decide({ provider, model }); + const tools = parseFunctionSchemas(function_schemas); + + if ( + (await ports.runPreflight(rec.session_id, messages, decision.provider, model)) === 'compacted' + ) { + messages = await ports.loadMessages(rec.session_id); + } + + return { + session_id: rec.session_id, + decision, + system_prompt, + tools, + messages, + }; +} + +export async function runStreamTurn( + ports: AssistantStreamingPorts, + session_id: string, + ctx: StreamContext, +): Promise { + let body_streamed = false; + + const { final, error } = await ports.streamTurn(ctx, async (partial, event) => { + await ports.emitMessageUpdate(session_id, partial, event); + if (event.type === 'text_delta' || event.type === 'thinking_delta') { + body_streamed = true; + } + }); + + return { final, error, body_streamed }; +} + +export function resolveAssistantMessage( + outcome: StreamTurnOutcome, + decision: StreamContext['decision'], +): AssistantMessage { + if (outcome.final) return outcome.final; + + const reason = outcome.error ?? 'provider channel closed without final'; + return syntheticAssistant({ + stop_reason: 'error', + text: reason, + provider: decision.provider, + model: decision.model, + }); +} + +/** Reason text for a synthetic error update when the provider did not return a final message. */ +export function syntheticStreamReason(outcome: StreamTurnOutcome): string | null { + if (outcome.final) return null; + return outcome.error ?? 'provider channel closed without final'; +} + +export function routeAssistantTurn(asst: AssistantMessage): AssistantRoute { + if (isErrorOrAborted(asst)) { + return { + kind: 'stopped', + reason: asst.stop_reason === 'aborted' ? 'aborted' : 'error', + }; + } + if (hasFunctionCalls(asst)) { + return { kind: 'function_execute' }; + } + return { kind: 'steering_check' }; +} + +export async function finalizeAssistantTurn( + ports: AssistantStreamingPorts, + rec: AssistantStreamingTurnRecord, + asst: AssistantMessage, +): Promise { + await ports.emitMessageComplete(rec.session_id, asst, rec.assistant_body_streamed === true); + + const route = routeAssistantTurn(asst); + + if (route.kind === 'stopped') { + await emitTurnEndOnce(ports, rec, asst); + await ports.finishSession(rec); + return; + } + + await ports.persistAssistantIfNew(rec.session_id, asst); + + if (route.kind === 'function_execute') { + rec.function_results = []; + enterFunctionExecute(rec, asst); + transitionTo(rec, 'function_execute'); + return; + } + + transitionTo(rec, 'steering_check'); +} + +export async function runAssistantStreaming( + ports: AssistantStreamingPorts, + rec: AssistantStreamingTurnRecord, +): Promise { + beginTurn(rec); + const ctx = await prepareStreamContext(ports, rec); + const outcome = await runStreamTurn(ports, rec.session_id, ctx); + const asst = resolveAssistantMessage(outcome, ctx.decision); + rec.last_assistant = asst; + rec.assistant_body_streamed = outcome.body_streamed; + + const syntheticReason = syntheticStreamReason(outcome); + if (syntheticReason) { + await ports.emitMessageUpdate(rec.session_id, asst, { + type: 'text_delta', + partial: asst, + delta: syntheticReason, + }); + } + + await finalizeAssistantTurn(ports, rec, asst); +} diff --git a/harness/src/turn-orchestrator/errors.ts b/harness/src/turn-orchestrator/errors.ts index c8361f72..3a3a494e 100644 --- a/harness/src/turn-orchestrator/errors.ts +++ b/harness/src/turn-orchestrator/errors.ts @@ -26,3 +26,11 @@ export class TransientError extends Error { this.name = 'TransientError'; } } + +/** Persisted turn_state is missing fields required for the current FSM step. */ +export class TurnStateInvariantError extends Error { + constructor(message: string) { + super(message); + this.name = 'TurnStateInvariantError'; + } +} diff --git a/harness/src/turn-orchestrator/function-awaiting-approval/process.ts b/harness/src/turn-orchestrator/function-awaiting-approval/process.ts index 4fc34abb..0081769b 100644 --- a/harness/src/turn-orchestrator/function-awaiting-approval/process.ts +++ b/harness/src/turn-orchestrator/function-awaiting-approval/process.ts @@ -2,133 +2,44 @@ * Read approval decisions, execute resolved calls individually, and register the FSM step. */ -import type { ISdk } from '../../runtime/iii.js'; -import { text } from '../../types/content.js'; -import type { FunctionResult } from '../../types/function.js'; +import { TriggerAction, type ISdk } from '../../runtime/iii.js'; +import { logger } from '../../runtime/otel.js'; import { createPorts } from '../function-execute/ports.js'; -import { finalizeBatch, FunctionExecuteInvariantError, runOneCall } from '../function-execute/run.js'; -import type { PreparedCall } from '../function-execute/types.js'; -import { isBatchComplete } from '../function-execute/types.js'; import { runTransition } from '../run-transition.js'; -import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; -import { transitionTo, type AwaitingApprovalEntry, type TurnStateRecord } from '../state.js'; import { - createAwaitingApprovalPorts, - type ApprovalDecision, - type AwaitingApprovalPorts, -} from './ports.js'; - -export function denialResultFromDecision(decision: ApprovalDecision): FunctionResult { - const reason = - decision.reason ?? (decision.decision === 'aborted' ? 'session_aborted' : 'denied'); - const message = - decision.decision === 'aborted' - ? `Function call aborted: ${reason}` - : `Permission denied by user: ${reason}`; - return { - content: [text(message)], - details: { - approval_denied: true, - decision: decision.decision, - reason, - }, - terminate: false, - }; -} - -export function applyDecisionToPrepared( - current: PreparedCall, - decision: ApprovalDecision, -): PreparedCall { - if (decision.decision === 'allow') { - return { route: 'pre_approved', call: current.call }; - } - return { - route: 'synthetic', - call: current.call, - result: denialResultFromDecision(decision), - }; -} - -function findPreparedCall( - prepared: readonly PreparedCall[], - function_call_id: string, -): PreparedCall | undefined { - return prepared.find((entry) => entry.call.id === function_call_id); -} - -function withoutAwaitingEntry( - awaiting: AwaitingApprovalEntry[], - function_call_id: string, -): AwaitingApprovalEntry[] { - return awaiting.filter((entry) => entry.function_call_id !== function_call_id); -} - -export async function processResolvedApprovals( - readPorts: AwaitingApprovalPorts, - executePorts: ReturnType, - rec: TurnStateRecord, -): Promise { - if (!rec.work) return; - - let awaiting = [...(rec.awaiting_approval ?? [])]; - const executed = { ...rec.work.executed }; - - for (const entry of [...awaiting]) { - const callId = entry.function_call_id; - - if (executed[callId]) { - awaiting = withoutAwaitingEntry(awaiting, callId); - continue; - } - - const decision = await readPorts.readDecision(rec.session_id, callId); - if (!decision) continue; - - const current = findPreparedCall(rec.work.prepared, callId); - if (!current) { - awaiting = withoutAwaitingEntry(awaiting, callId); - continue; - } - - const resolved = applyDecisionToPrepared(current, decision); - await runOneCall(executePorts, rec.session_id, resolved, executed, { skipStart: true }); - - awaiting = withoutAwaitingEntry(awaiting, callId); - rec.work = { prepared: rec.work.prepared, executed }; - await executePorts.checkpoint(rec); - } - - rec.awaiting_approval = awaiting; -} - -export async function routeAfterApprovalProcessing( - executePorts: ReturnType, - rec: TurnStateRecord, -): Promise { - if ((rec.awaiting_approval?.length ?? 0) > 0) { - return; - } - - const work = rec.work; - if (!work) { - throw new FunctionExecuteInvariantError( - 'function_awaiting_approval with empty awaiting_approval requires work', - ); - } - - if (isBatchComplete(work)) { - await finalizeBatch(executePorts, rec, work); - } else { - transitionTo(rec, 'function_execute'); + ApprovalDecisionEventSchema, + TurnStepPayloadSchema, + parseFunctionBatchRecord, + type TurnStepPayload, +} from '../schemas.js'; +import { TURN_STEP_QUEUE } from '../state-runtime/store.js'; +import type { TurnStateRecord } from '../state.js'; +import { createAwaitingApprovalPorts } from './ports.js'; +import { processResolvedApprovals, routeAfterApprovalProcessing } from './run.js'; + +export async function handleApprovalStateWrite(iii: ISdk, event: unknown): Promise { + const parsed = ApprovalDecisionEventSchema.safeParse(event); + if (!parsed.success) return; + try { + await iii.trigger({ + function_id: 'turn::function_awaiting_approval', + payload: { session_id: parsed.data.session_id }, + action: TriggerAction.Enqueue({ queue: TURN_STEP_QUEUE }), + }); + } catch (err) { + logger.warn('turn::on_approval: wake failed', { + session_id: parsed.data.session_id, + err: String(err), + }); } } export async function handleAwaitingApproval(iii: ISdk, rec: TurnStateRecord): Promise { + const batch = parseFunctionBatchRecord(rec); const executePorts = createPorts(iii); const readPorts = createAwaitingApprovalPorts(iii); - await processResolvedApprovals(readPorts, executePorts, rec); - await routeAfterApprovalProcessing(executePorts, rec); + await processResolvedApprovals(readPorts, executePorts, batch); + await routeAfterApprovalProcessing(executePorts, batch); } export function register(iii: ISdk): void { @@ -143,4 +54,19 @@ export function register(iii: ISdk): void { 'Run one durable FSM transition for session in state function_awaiting_approval: execute each call as its approval decision arrives.', }, ); + + iii.registerFunction( + 'turn::on_approval', + async (event: unknown) => handleApprovalStateWrite(iii, event), + { + description: + 'State trigger on scope=approvals; enqueues turn::function_awaiting_approval when a decision is written.', + }, + ); + + iii.registerTrigger({ + type: 'state', + function_id: 'turn::on_approval', + config: { scope: 'approvals' }, + }); } diff --git a/harness/src/turn-orchestrator/function-awaiting-approval/run.ts b/harness/src/turn-orchestrator/function-awaiting-approval/run.ts new file mode 100644 index 00000000..ebd5aabc --- /dev/null +++ b/harness/src/turn-orchestrator/function-awaiting-approval/run.ts @@ -0,0 +1,91 @@ +/** + * Resolve approval decisions and route the batch after each decision. + */ + +import { text } from '../../types/content.js'; +import type { FunctionResult } from '../../types/function.js'; +import { finalizeBatch, runOneCall } from '../function-execute/run.js'; +import type { FunctionExecutePorts } from '../function-execute/ports.js'; +import type { PreparedCall } from '../function-execute/types.js'; +import { isBatchComplete } from '../function-execute/types.js'; +import { transitionTo, type FunctionBatchTurnRecord } from '../state.js'; +import type { ApprovalDecision, AwaitingApprovalPorts } from './ports.js'; + +export function denialResultFromDecision(decision: ApprovalDecision): FunctionResult { + const reason = + decision.reason ?? (decision.decision === 'aborted' ? 'session_aborted' : 'denied'); + const message = + decision.decision === 'aborted' + ? `Function call aborted: ${reason}` + : `Permission denied by user: ${reason}`; + return { + content: [text(message)], + details: { + approval_denied: true, + decision: decision.decision, + reason, + }, + terminate: false, + }; +} + +export function applyDecisionToPrepared( + current: PreparedCall, + decision: ApprovalDecision, +): PreparedCall { + if (decision.decision === 'allow') { + return { route: 'pre_approved', call: current.call }; + } + return { + route: 'synthetic', + call: current.call, + result: denialResultFromDecision(decision), + }; +} + +export async function processResolvedApprovals( + readPorts: AwaitingApprovalPorts, + executePorts: FunctionExecutePorts, + rec: FunctionBatchTurnRecord, +): Promise { + const work = rec.work; + let awaiting = [...rec.awaiting_approval]; + const executed = { ...work.executed }; + + for (const entry of [...awaiting]) { + const callId = entry.function_call_id; + + if (executed[callId]) { + awaiting = awaiting.filter((e) => e.function_call_id !== callId); + continue; + } + + const decision = await readPorts.readDecision(rec.session_id, callId); + if (!decision) continue; + + const current = work.prepared.find((p) => p.call.id === callId)!; + const resolved = applyDecisionToPrepared(current, decision); + await runOneCall(executePorts, rec.session_id, resolved, executed, { skipStart: true }); + + awaiting = awaiting.filter((e) => e.function_call_id !== callId); + rec.work = { prepared: work.prepared, executed }; + await executePorts.checkpoint(rec); + } + + rec.awaiting_approval = awaiting; +} + +export async function routeAfterApprovalProcessing( + executePorts: FunctionExecutePorts, + rec: FunctionBatchTurnRecord, +): Promise { + if (rec.awaiting_approval.length > 0) { + return; + } + + if (isBatchComplete(rec.work)) { + await finalizeBatch(executePorts, rec); + } else { + transitionTo(rec, 'function_execute'); + } +} diff --git a/harness/src/turn-orchestrator/function-execute/process.ts b/harness/src/turn-orchestrator/function-execute/process.ts index 29278784..dbec00d2 100644 --- a/harness/src/turn-orchestrator/function-execute/process.ts +++ b/harness/src/turn-orchestrator/function-execute/process.ts @@ -4,45 +4,36 @@ import type { ISdk } from '../../runtime/iii.js'; import { runTransition } from '../run-transition.js'; -import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; -import { transitionTo, type AwaitingApprovalEntry, type TurnStateRecord } from '../state.js'; -import type { PendingApproval } from './types.js'; -import { isBatchComplete } from './types.js'; -import { finalizeBatch, loadOrPlanWork, runBatch } from './run.js'; +import { + TurnStepPayloadSchema, + parseFunctionBatchRecord, + type TurnStepPayload, +} from '../schemas.js'; +import { transitionTo, type TurnStateRecord } from '../state.js'; +import { finalizeBatch, runBatch } from './run.js'; import { createPorts } from './ports.js'; -function mergeAwaitingApproval( - existing: AwaitingApprovalEntry[] | undefined, - newPending: PendingApproval[], -): AwaitingApprovalEntry[] { - const ids = new Set(existing?.map((entry) => entry.function_call_id) ?? []); - const merged = [...(existing ?? [])]; - for (const pending of newPending) { - if (ids.has(pending.function_call_id)) continue; - ids.add(pending.function_call_id); - merged.push(pending); - } - return merged; -} - export async function handleExecute(iii: ISdk, rec: TurnStateRecord): Promise { + const batch = parseFunctionBatchRecord(rec); const ports = createPorts(iii); - const work = loadOrPlanWork(rec); - const outcome = await runBatch(ports, rec, work); - rec.work = outcome.work; + const outcome = await runBatch(ports, batch); + batch.work = outcome.work; if (outcome.kind === 'incomplete') { - rec.awaiting_approval = mergeAwaitingApproval(rec.awaiting_approval, outcome.newPending); - transitionTo(rec, 'function_awaiting_approval'); + const ids = new Set(batch.awaiting_approval.map((entry) => entry.function_call_id)); + const merged = [...batch.awaiting_approval]; + for (const pending of outcome.newPending) { + if (ids.has(pending.function_call_id)) continue; + ids.add(pending.function_call_id); + merged.push(pending); + } + batch.awaiting_approval = merged; + transitionTo(batch, 'function_awaiting_approval'); return; } - if (isBatchComplete(outcome.work)) { - await finalizeBatch(ports, rec, outcome.work); - } else { - transitionTo(rec, 'function_execute'); - } + await finalizeBatch(ports, batch); } export function register(iii: ISdk): void { diff --git a/harness/src/turn-orchestrator/function-execute/run.ts b/harness/src/turn-orchestrator/function-execute/run.ts index ea623c2e..9ab75b54 100644 --- a/harness/src/turn-orchestrator/function-execute/run.ts +++ b/harness/src/turn-orchestrator/function-execute/run.ts @@ -14,11 +14,14 @@ import { } from '../agent-trigger.js'; import { emitTurnEndOnce } from '../state-runtime/turn-end.js'; import { persistedTrailingResultIds } from '../state-runtime/transcript.js'; -import { transitionTo, type TurnStateRecord } from '../state.js'; +import { + transitionTo, + type AwaitingApprovalEntry, + type FunctionBatchTurnRecord, + type TurnStateRecord, +} from '../state.js'; import type { FunctionExecutePorts } from './ports.js'; import { - emptyBatchWork, - isBatchComplete, preparedCallId, type BatchOutcome, type ExecutedCall, @@ -29,55 +32,41 @@ import { type RunOneCallResult, } from './types.js'; -export { isBatchComplete }; - -export class FunctionExecuteInvariantError extends Error { - constructor(message: string) { - super(message); - this.name = 'FunctionExecuteInvariantError'; - } -} - function isFunctionCallBlock( block: AssistantMessage['content'][number], ): block is FunctionCallContent { return block.type === 'function_call'; } -function extractFunctionCalls(msg: AssistantMessage): FunctionCall[] { - return msg.content.filter(isFunctionCallBlock).map((b) => ({ - id: b.id, - function_id: b.function_id, - arguments: b.arguments, - })); -} - -function toPreparedCall(raw: FunctionCall): PreparedCall { - if (raw.function_id !== TOOL_NAME) { - return { route: 'synthetic', call: raw, result: missingFunctionResult() }; - } - const call = unwrapAgentTrigger(raw); - if (!call.function_id) { +function toPreparedCall(block: FunctionCallContent): PreparedCall { + const call: FunctionCall = { + id: block.id, + function_id: block.function_id, + arguments: block.arguments, + }; + if (block.function_id !== TOOL_NAME) { return { route: 'synthetic', call, result: missingFunctionResult() }; } - return { route: 'dispatch', call }; -} - -/** Build prepared calls from the assistant message that requested them. */ -export function planBatchFromAssistant(asst: AssistantMessage): PreparedCall[] { - return extractFunctionCalls(asst).map(toPreparedCall); + const unwrapped = unwrapAgentTrigger(call); + if (!unwrapped.function_id) { + return { route: 'synthetic', call: unwrapped, result: missingFunctionResult() }; + } + return { route: 'dispatch', call: unwrapped }; } -/** Use existing work or plan a new batch from last_assistant. */ -export function loadOrPlanWork(rec: TurnStateRecord): FunctionBatchWork { - if (rec.work) { - return rec.work; - } - const asst = rec.last_assistant; - if (!asst) { - throw new FunctionExecuteInvariantError('function_execute without last_assistant or work'); - } - return emptyBatchWork(planBatchFromAssistant(asst)); +/** Set fields expected when entering `function_execute` (mirrors assistant_streaming finalize). */ +export function enterFunctionExecute(rec: TurnStateRecord, asst: AssistantMessage): void { + const batch = rec as TurnStateRecord & { + awaiting_approval: AwaitingApprovalEntry[]; + last_assistant: AssistantMessage; + work: FunctionBatchWork; + }; + batch.awaiting_approval = []; + batch.last_assistant = asst; + batch.work = { + prepared: asst.content.filter(isFunctionCallBlock).map(toPreparedCall), + executed: {}, + }; } async function resolvePreparedCall( @@ -145,21 +134,19 @@ export async function runOneCall( export async function runBatch( ports: FunctionExecutePorts, - rec: TurnStateRecord, - work: FunctionBatchWork, + rec: FunctionBatchTurnRecord, ): Promise { - const executed = { ...work.executed }; - const awaitingIds = new Set( - (rec.awaiting_approval ?? []).map((entry) => entry.function_call_id), - ); + const { prepared } = rec.work; + const executed = { ...rec.work.executed }; + const awaitingIds = new Set(rec.awaiting_approval.map((entry) => entry.function_call_id)); const newPending: PendingApproval[] = []; - for (const prepared of work.prepared) { - const callId = preparedCallId(prepared); + for (const item of prepared) { + const callId = preparedCallId(item); if (executed[callId]) continue; if (awaitingIds.has(callId)) continue; - const outcome = await runOneCall(ports, rec.session_id, prepared, executed); + const outcome = await runOneCall(ports, rec.session_id, item, executed); if (outcome.kind === 'pending') { newPending.push({ @@ -171,12 +158,12 @@ export async function runBatch( } if (outcome.kind === 'executed') { - rec.work = { prepared: work.prepared, executed }; + rec.work = { prepared, executed }; await ports.checkpoint(rec); } } - const batchWork = { prepared: work.prepared, executed }; + const batchWork = { prepared, executed }; if (newPending.length > 0 || awaitingIds.size > 0) { return { kind: 'incomplete', work: batchWork, newPending }; } @@ -198,24 +185,19 @@ function toFunctionResultMessage( }; } -/** Collect executed entries in batch order (assistant tool order). */ +/** Collect executed entries in batch order (caller must only invoke when batch is complete). */ function executedInBatchOrder(work: FunctionBatchWork): ExecutedCall[] { - const ordered: ExecutedCall[] = []; - for (const prepared of work.prepared) { - const entry = work.executed[preparedCallId(prepared)]; - if (entry) ordered.push(entry); - } - return ordered; + return work.prepared.map((item) => work.executed[preparedCallId(item)]!); } export async function finalizeBatch( ports: FunctionExecutePorts, - rec: TurnStateRecord, - work: FunctionBatchWork, + rec: FunctionBatchTurnRecord, ): Promise { - const executed = executedInBatchOrder(work); + const executed = executedInBatchOrder(rec.work); const function_results: FunctionResultMessage[] = []; - let allTerminate = executed.length > 0; + let allTerminate = true; + const lastAssistant = rec.last_assistant; for (const entry of executed) { const result = entry.result; @@ -237,17 +219,15 @@ export async function finalizeBatch( await ports.appendMessages(rec.session_id, fresh); } - const asst = rec.last_assistant; rec.function_results = function_results; - rec.work = undefined; - if (asst) { - await emitTurnEndOnce(ports, rec, asst, function_results); - } + await emitTurnEndOnce(ports, rec, lastAssistant, function_results); if (allTerminate) { await ports.finishSession(rec); } else { transitionTo(rec, 'steering_check'); } + + (rec as TurnStateRecord).work = undefined; } diff --git a/harness/src/turn-orchestrator/on-approval.ts b/harness/src/turn-orchestrator/on-approval.ts deleted file mode 100644 index 63c7208c..00000000 --- a/harness/src/turn-orchestrator/on-approval.ts +++ /dev/null @@ -1,74 +0,0 @@ -/** - * Reactive approval wake. A `state` trigger on `scope: 'approvals'` filtered by - * the `/` decision key fires this adapter, which enqueues - * `turn::{state}` on the durable FIFO queue so the parked session re-reads its - * decisions in `function_awaiting_approval`. - * - * The decision write is produced by `approval::resolve` (approval-gate) — - * `state::set` `approvals// = { decision, reason }`. - */ - -import { TriggerAction, type ISdk } from '../runtime/iii.js'; -import { logger } from '../runtime/otel.js'; -import { ApprovalDecisionEventSchema, type ParsedApprovalDecisionWrite } from './schemas.js'; -import { TURN_STEP_QUEUE } from './state-runtime/store.js'; - -export function parseApprovalDecisionWrite(event: unknown): ParsedApprovalDecisionWrite | null { - const result = ApprovalDecisionEventSchema.safeParse(event); - return result.success ? result.data : null; -} - -export function isApprovalDecisionWrite(event: unknown): boolean { - return parseApprovalDecisionWrite(event) !== null; -} - -export async function execute(iii: ISdk, write: ParsedApprovalDecisionWrite): Promise { - try { - await iii.trigger({ - function_id: `turn::function_awaiting_approval`, - payload: { session_id: write.session_id }, - action: TriggerAction.Enqueue({ queue: TURN_STEP_QUEUE }), - }); - } catch (err) { - logger.warn('turn::on_approval: wake failed', { - session_id: write.session_id, - err: String(err), - }); - } -} - -export async function handleApprovalDecisionWrite(iii: ISdk, event: unknown): Promise { - const result = ApprovalDecisionEventSchema.safeParse(event); - if (!result.success) return; - await execute(iii, result.data); -} - -export function register(iii: ISdk): void { - iii.registerFunction( - 'turn::is_approval_decision', - async (event: unknown) => isApprovalDecisionWrite(event), - { - description: - 'Condition: state event writes a decision to approvals// (state:created or state:updated).', - }, - ); - - iii.registerFunction( - 'turn::on_approval', - async (event: unknown) => handleApprovalDecisionWrite(iii, event), - { - description: - 'State trigger adapter on scope=approvals for decision writes; enqueues turn::{state} so the parked session reads its decision.', - }, - ); - - iii.registerTrigger({ - type: 'state', - function_id: 'turn::on_approval', - config: { - scope: 'approvals', - condition_function_id: 'turn::is_approval_decision', - }, - }); -} -"" \ No newline at end of file diff --git a/harness/src/turn-orchestrator/register.ts b/harness/src/turn-orchestrator/register.ts index 40deab3d..1d6a4f48 100644 --- a/harness/src/turn-orchestrator/register.ts +++ b/harness/src/turn-orchestrator/register.ts @@ -7,7 +7,6 @@ import { register as registerFunctionAwaitingApproval } from './function-awaitin import { register as registerFunctionExecute } from './function-execute/process.js'; import { register as registerGetState } from './get-state.js'; import { register as registerRunStart } from './run-start.js'; -import { register as registerOnApproval } from './on-approval.js'; import { register as registerProvisioning } from './provisioning/process.js'; import { register as registerSteeringCheck } from './steering-check/process.js'; @@ -21,7 +20,6 @@ export async function register(iii: ISdk, ctx: { configPath: string }): Promise< registerFunctionAwaitingApproval(iii); registerSteeringCheck(iii); registerGetState(iii); - registerOnApproval(iii); void bootstrap.run(iii, orchestratorCfg); } diff --git a/harness/src/turn-orchestrator/schemas.ts b/harness/src/turn-orchestrator/schemas.ts index 87bd7b96..1721f80b 100644 --- a/harness/src/turn-orchestrator/schemas.ts +++ b/harness/src/turn-orchestrator/schemas.ts @@ -6,8 +6,18 @@ */ import { z } from 'zod'; -import type { AgentMessage } from '../types/agent-message.js'; -import type { AwaitingApprovalEntry, TurnState, TurnStateRecord } from './state.js'; +import type { AssistantMessage, AgentMessage } from '../types/agent-message.js'; +import { TurnStateInvariantError } from './errors.js'; +import type { FunctionBatchWork } from './function-execute/types.js'; +import { + FUNCTION_BATCH_STATES, + type AssistantStreamingTurnRecord, + type AwaitingApprovalEntry, + type FunctionBatchTurnRecord, + type SteeringCheckTurnRecord, + type TurnState, + type TurnStateRecord, +} from './state.js'; import type { Mode } from './system-prompt.js'; /** Shared `{ session_id }` payload — `turn::{state}` steps and `turn::get_state`. */ @@ -35,6 +45,104 @@ export type TurnStepResult = | { ok: true; from_state: TurnState; to_state: TurnState } | { ok: true; skipped: true; reason: 'stale' }; +// --- function_execute / function_awaiting_approval persisted record --- +const AwaitingApprovalEntrySchema = z.object({ + function_call_id: z.string().min(1), + function_id: z.string().min(1), + args: z.unknown(), +}); + +const FunctionBatchWorkSchema = z.custom( + (v) => + v != null && + typeof v === 'object' && + Array.isArray((v as FunctionBatchWork).prepared) && + typeof (v as FunctionBatchWork).executed === 'object' && + (v as FunctionBatchWork).executed !== null, + { message: 'work must include prepared and executed' }, +); + +const AssistantMessageSchema = z.custom( + (v) => v != null && typeof v === 'object' && (v as AssistantMessage).role === 'assistant', + { message: 'last_assistant is required' }, +); + +/** Fields required before function_execute / function_awaiting_approval handlers run. */ +export const FunctionBatchTurnRecordSchema = z + .object({ + session_id: z.string().min(1), + state: z.enum(FUNCTION_BATCH_STATES), + turn_count: z.number(), + function_results: z.array(z.unknown()), + turn_end_emitted: z.boolean(), + started_at_ms: z.number(), + updated_at_ms: z.number(), + last_assistant: AssistantMessageSchema, + work: FunctionBatchWorkSchema, + awaiting_approval: z.array(AwaitingApprovalEntrySchema), + }) + .passthrough(); + +function formatZodIssues(error: z.ZodError): string { + return error.issues.map((issue) => `${issue.path.join('.')}: ${issue.message}`).join('; '); +} + +/** Validate persisted turn_state for function-batch handlers; throws {@link TurnStateInvariantError}. */ +export function parseFunctionBatchRecord(rec: TurnStateRecord): FunctionBatchTurnRecord { + const result = FunctionBatchTurnRecordSchema.safeParse(rec); + if (!result.success) { + throw new TurnStateInvariantError(`invalid function batch turn record: ${formatZodIssues(result.error)}`); + } + // Return the same object — handlers mutate turn_state in place before saveRecord. + return rec as FunctionBatchTurnRecord; +} + +/** Fields required before assistant_streaming handlers run. */ +export const AssistantStreamingTurnRecordSchema = z + .object({ + session_id: z.string().min(1), + state: z.literal('assistant_streaming'), + turn_count: z.number(), + function_results: z.array(z.unknown()), + turn_end_emitted: z.boolean(), + started_at_ms: z.number(), + updated_at_ms: z.number(), + }) + .passthrough(); + +/** Validate persisted turn_state for assistant_streaming; throws {@link TurnStateInvariantError}. */ +export function parseAssistantStreamingRecord(rec: TurnStateRecord): AssistantStreamingTurnRecord { + const result = AssistantStreamingTurnRecordSchema.safeParse(rec); + if (!result.success) { + throw new TurnStateInvariantError( + `invalid assistant_streaming turn record: ${formatZodIssues(result.error)}`, + ); + } + return rec as AssistantStreamingTurnRecord; +} + +/** Fields required before steering_check handlers run. */ +export const SteeringCheckTurnRecordSchema = z + .object({ + session_id: z.string().min(1), + state: z.literal('steering_check'), + turn_count: z.number(), + function_results: z.array(z.unknown()), + turn_end_emitted: z.boolean(), + started_at_ms: z.number(), + updated_at_ms: z.number(), + }) + .passthrough(); + +/** Validate persisted turn_state for steering_check; throws {@link TurnStateInvariantError}. */ +export function parseSteeringCheckRecord(rec: TurnStateRecord): SteeringCheckTurnRecord { + const result = SteeringCheckTurnRecordSchema.safeParse(rec); + if (!result.success) { + throw new TurnStateInvariantError(`invalid steering_check turn record: ${formatZodIssues(result.error)}`); + } + return rec as SteeringCheckTurnRecord; +} + // --- turn::get_state --- export const GetStatePayloadSchema = SessionIdPayloadSchema; export type GetStatePayload = z.infer; @@ -63,7 +171,7 @@ export function toView(rec: TurnStateRecord): TurnStateView { export type GetStateResult = TurnStateView | null; -// --- turn::is_approval_decision / turn::on_approval (approvals-scope state event) --- +// --- turn::on_approval (approvals-scope state event) --- const ApprovalDecisionWriteEventSchema = z.object({ type: z.literal('state').optional(), scope: z.literal('approvals').optional(), @@ -77,4 +185,3 @@ export const ApprovalDecisionEventSchema = ApprovalDecisionWriteEventSchema.tran const session_id = data.key.slice(0, data.key.indexOf('/')); return { session_id }; }); -export type ParsedApprovalDecisionWrite = z.infer; diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index 33dba8de..b4551f79 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -23,6 +23,9 @@ export type TurnState = | 'stopped' | 'failed'; +export const FUNCTION_BATCH_STATES = ['function_execute', 'function_awaiting_approval'] as const; +export type FunctionBatchState = (typeof FUNCTION_BATCH_STATES)[number]; + export type AwaitingApprovalEntry = { function_call_id: string; function_id: string; @@ -34,23 +37,56 @@ export type TurnWork = FunctionBatchWork; export type { ExecutedCall, FunctionBatchWork, PreparedCall }; -export type TurnStateRecord = { +type TurnStateRecordCore = { session_id: string; - state: TurnState; turn_count: number; max_turns?: number; - last_assistant?: AssistantMessage | null; function_results: FunctionResultMessage[]; turn_end_emitted: boolean; started_at_ms: number; updated_at_ms: number; - awaiting_approval?: AwaitingApprovalEntry[]; /** Set during assistant_streaming when message_update deltas were emitted. */ assistant_body_streamed?: boolean; - work?: TurnWork; error?: { kind: string; message: string }; }; +/** Required fields while in function_execute or function_awaiting_approval. */ +export type FunctionBatchTurnRecord = TurnStateRecordCore & { + state: FunctionBatchState; + last_assistant: AssistantMessage; + work: TurnWork; + awaiting_approval: AwaitingApprovalEntry[]; +}; + +/** Persisted shape while in assistant_streaming (last_assistant set mid-handler). */ +export type AssistantStreamingTurnRecord = TurnStateRecordCore & { + state: 'assistant_streaming'; + last_assistant?: AssistantMessage | null; + work?: TurnWork; + awaiting_approval?: AwaitingApprovalEntry[]; +}; + +/** Persisted shape while in steering_check (work cleared on entry from function batch). */ +export type SteeringCheckTurnRecord = TurnStateRecordCore & { + state: 'steering_check'; + last_assistant?: AssistantMessage | null; + work?: TurnWork; + awaiting_approval?: AwaitingApprovalEntry[]; +}; + +type OtherTurnState = Exclude; + +export type TurnStateRecord = + | FunctionBatchTurnRecord + | AssistantStreamingTurnRecord + | SteeringCheckTurnRecord + | (TurnStateRecordCore & { + state: OtherTurnState; + last_assistant?: AssistantMessage | null; + work?: TurnWork; + awaiting_approval?: AwaitingApprovalEntry[]; + }); + const TURN_STATES = [ 'provisioning', 'assistant_streaming', diff --git a/harness/src/turn-orchestrator/steering-check/process.ts b/harness/src/turn-orchestrator/steering-check/process.ts index 5b95b22b..7df1b7b4 100644 --- a/harness/src/turn-orchestrator/steering-check/process.ts +++ b/harness/src/turn-orchestrator/steering-check/process.ts @@ -3,119 +3,16 @@ */ import type { ISdk } from '../../runtime/iii.js'; -import type { AgentMessage } from '../../types/agent-message.js'; import { runTransition } from '../run-transition.js'; -import { TurnStepPayloadSchema, type TurnStepPayload } from '../schemas.js'; -import { syntheticAssistant } from '../synthetic-assistant.js'; -import { emitTurnEndOnce, resumeToAssistantStreaming } from '../state-runtime/turn-end.js'; -import { type TurnStateRecord } from '../state.js'; -import { createSteeringCheckPorts, type SteeringCheckPorts } from './ports.js'; - -export type SteeringRoute = 'steering' | 'followup' | 'continue_after_function' | 'end_turn'; - -export type SteeringCheckOutcome = - | { kind: 'max_turns_reached' } - | { kind: 'resume_with_inbox'; inbox: AgentMessage[] } - | { kind: 'continue_after_function' } - | { kind: 'end_turn' }; - -export function route( - has_steering: boolean, - has_followup: boolean, - has_function_results: boolean, -): SteeringRoute { - if (has_steering) return 'steering'; - if (has_followup) return 'followup'; - if (has_function_results) return 'continue_after_function'; - return 'end_turn'; -} - -function maxTurnsReached(rec: TurnStateRecord): boolean { - return rec.max_turns !== undefined && rec.turn_count >= rec.max_turns; -} - -async function endForMaxTurns(ports: SteeringCheckPorts, rec: TurnStateRecord): Promise { - const msg = syntheticAssistant({ - stop_reason: 'end', - text: `loop stopped: max_turns (${rec.max_turns ?? 0}) reached`, - }); - rec.last_assistant = msg; - await ports.appendMessages(rec.session_id, [msg]); - await ports.emit(rec.session_id, { - type: 'message_complete', - message: msg, - body_streamed: false, - }); - await emitTurnEndOnce(ports, rec, msg); - await ports.finishSession(rec); -} - -export async function processSteeringCheck( - ports: SteeringCheckPorts, - rec: TurnStateRecord, -): Promise { - const steering = await ports.drainInbox('steering', rec.session_id); - const followup = steering.length > 0 ? [] : await ports.drainInbox('followup', rec.session_id); - - const decision = route(steering.length > 0, followup.length > 0, rec.function_results.length > 0); - - if ( - (decision === 'steering' || - decision === 'followup' || - decision === 'continue_after_function') && - maxTurnsReached(rec) - ) { - return { kind: 'max_turns_reached' }; - } - - switch (decision) { - case 'steering': - return { kind: 'resume_with_inbox', inbox: steering }; - case 'followup': - return { kind: 'resume_with_inbox', inbox: followup }; - case 'continue_after_function': - return { kind: 'continue_after_function' }; - case 'end_turn': - return { kind: 'end_turn' }; - } -} - -export async function applySteeringCheckOutcome( - ports: SteeringCheckPorts, - rec: TurnStateRecord, - outcome: SteeringCheckOutcome, -): Promise { - switch (outcome.kind) { - case 'max_turns_reached': - await endForMaxTurns(ports, rec); - return; - case 'resume_with_inbox': { - await emitTurnEndOnce(ports, rec); - await ports.appendMessages(rec.session_id, outcome.inbox); - resumeToAssistantStreaming(rec); - return; - } - case 'continue_after_function': - resumeToAssistantStreaming(rec); - return; - case 'end_turn': - await emitTurnEndOnce(ports, rec); - await ports.finishSession(rec); - return; - } -} - -export async function runSteeringCheck( - ports: SteeringCheckPorts, - rec: TurnStateRecord, -): Promise { - const outcome = await processSteeringCheck(ports, rec); - await applySteeringCheckOutcome(ports, rec, outcome); -} +import { TurnStepPayloadSchema, parseSteeringCheckRecord, type TurnStepPayload } from '../schemas.js'; +import type { TurnStateRecord } from '../state.js'; +import { createSteeringCheckPorts } from './ports.js'; +import { runSteeringCheck } from './run.js'; export async function handleSteering(iii: ISdk, rec: TurnStateRecord): Promise { + const steering = parseSteeringCheckRecord(rec); const ports = createSteeringCheckPorts(iii); - await runSteeringCheck(ports, rec); + await runSteeringCheck(ports, steering); } export function register(iii: ISdk): void { diff --git a/harness/src/turn-orchestrator/steering-check/run.ts b/harness/src/turn-orchestrator/steering-check/run.ts new file mode 100644 index 00000000..b6777a3b --- /dev/null +++ b/harness/src/turn-orchestrator/steering-check/run.ts @@ -0,0 +1,111 @@ +/** + * Drain inboxes, route steering_check outcomes, and apply transitions. + */ + +import type { AgentMessage } from '../../types/agent-message.js'; +import { syntheticAssistant } from '../synthetic-assistant.js'; +import { emitTurnEndOnce, resumeToAssistantStreaming } from '../state-runtime/turn-end.js'; +import type { SteeringCheckTurnRecord } from '../state.js'; +import type { SteeringCheckPorts } from './ports.js'; + +export type SteeringRoute = 'steering' | 'followup' | 'continue_after_function' | 'end_turn'; + +export type SteeringCheckOutcome = + | { kind: 'max_turns_reached' } + | { kind: 'resume_with_inbox'; inbox: AgentMessage[] } + | { kind: 'continue_after_function' } + | { kind: 'end_turn' }; + +export function route( + has_steering: boolean, + has_followup: boolean, + has_function_results: boolean, +): SteeringRoute { + if (has_steering) return 'steering'; + if (has_followup) return 'followup'; + if (has_function_results) return 'continue_after_function'; + return 'end_turn'; +} + +function maxTurnsReached(rec: SteeringCheckTurnRecord): boolean { + return rec.max_turns !== undefined && rec.turn_count >= rec.max_turns; +} + +async function endForMaxTurns(ports: SteeringCheckPorts, rec: SteeringCheckTurnRecord): Promise { + const msg = syntheticAssistant({ + stop_reason: 'end', + text: `loop stopped: max_turns (${rec.max_turns ?? 0}) reached`, + }); + rec.last_assistant = msg; + await ports.appendMessages(rec.session_id, [msg]); + await ports.emit(rec.session_id, { + type: 'message_complete', + message: msg, + body_streamed: false, + }); + await emitTurnEndOnce(ports, rec, msg); + await ports.finishSession(rec); +} + +export async function processSteeringCheck( + ports: SteeringCheckPorts, + rec: SteeringCheckTurnRecord, +): Promise { + const steering = await ports.drainInbox('steering', rec.session_id); + const followup = steering.length > 0 ? [] : await ports.drainInbox('followup', rec.session_id); + + const decision = route(steering.length > 0, followup.length > 0, rec.function_results.length > 0); + + if ( + (decision === 'steering' || + decision === 'followup' || + decision === 'continue_after_function') && + maxTurnsReached(rec) + ) { + return { kind: 'max_turns_reached' }; + } + + switch (decision) { + case 'steering': + return { kind: 'resume_with_inbox', inbox: steering }; + case 'followup': + return { kind: 'resume_with_inbox', inbox: followup }; + case 'continue_after_function': + return { kind: 'continue_after_function' }; + case 'end_turn': + return { kind: 'end_turn' }; + } +} + +export async function applySteeringCheckOutcome( + ports: SteeringCheckPorts, + rec: SteeringCheckTurnRecord, + outcome: SteeringCheckOutcome, +): Promise { + switch (outcome.kind) { + case 'max_turns_reached': + await endForMaxTurns(ports, rec); + return; + case 'resume_with_inbox': { + await emitTurnEndOnce(ports, rec); + await ports.appendMessages(rec.session_id, outcome.inbox); + resumeToAssistantStreaming(rec); + return; + } + case 'continue_after_function': + resumeToAssistantStreaming(rec); + return; + case 'end_turn': + await emitTurnEndOnce(ports, rec); + await ports.finishSession(rec); + return; + } +} + +export async function runSteeringCheck( + ports: SteeringCheckPorts, + rec: SteeringCheckTurnRecord, +): Promise { + const outcome = await processSteeringCheck(ports, rec); + await applySteeringCheckOutcome(ports, rec, outcome); +} diff --git a/harness/tests/approval-gate/_helpers/fakeIii.ts b/harness/tests/approval-gate/_helpers/fakeIii.ts index 22c159c4..d1fe0f96 100644 --- a/harness/tests/approval-gate/_helpers/fakeIii.ts +++ b/harness/tests/approval-gate/_helpers/fakeIii.ts @@ -7,7 +7,7 @@ import type { ISdk } from 'iii-sdk'; import { vi } from 'vitest'; -export type TriggerCall = { function_id: string; payload: unknown }; +export type TriggerCall = { function_id: string; payload: unknown; action?: unknown }; export type FakeIii = { iii: ISdk; @@ -20,8 +20,17 @@ export function fakeIii(): FakeIii { const streamSets: unknown[] = []; const iii = { - trigger: vi.fn(async ({ function_id, payload }: { function_id: string; payload: unknown }) => { - calls.push({ function_id, payload }); + trigger: vi.fn( + async ({ + function_id, + payload, + action, + }: { + function_id: string; + payload: unknown; + action?: unknown; + }) => { + calls.push({ function_id, payload, action }); if (function_id === 'stream::set') { streamSets.push(payload); } diff --git a/harness/tests/integration/parallel-approval-harness.ts b/harness/tests/integration/parallel-approval-harness.ts index da04849a..625cfa01 100644 --- a/harness/tests/integration/parallel-approval-harness.ts +++ b/harness/tests/integration/parallel-approval-harness.ts @@ -5,13 +5,13 @@ import { vi } from 'vitest'; import { handleResolveRequest } from '../../src/approval-gate/resolve.js'; -import { handleAwaitingApproval } from '../../src/turn-orchestrator/function-awaiting-approval/process.js'; +import { + handleApprovalStateWrite, + handleAwaitingApproval, +} from '../../src/turn-orchestrator/function-awaiting-approval/process.js'; import { handleExecute } from '../../src/turn-orchestrator/function-execute/process.js'; +import { enterFunctionExecute } from '../../src/turn-orchestrator/function-execute/run.js'; import { runTransition } from '../../src/turn-orchestrator/run-transition.js'; -import { - handleApprovalDecisionWrite, - isApprovalDecisionWrite, -} from '../../src/turn-orchestrator/on-approval.js'; import { TURN_STATE_SCOPE, newRecord, type TurnStateRecord } from '../../src/turn-orchestrator/state.js'; import type { ISdk } from '../../src/runtime/iii.js'; import type { AgentEvent } from '../../src/types/agent-event.js'; @@ -107,16 +107,16 @@ export function createParallelApprovalHarness(): ParallelApprovalHarness { : null; const new_value = structuredClone(p.value); stateStore.set(storeKey, new_value); - const event = { - event_type: old_value == null ? 'state:created' : 'state:updated', - scope: p.scope, - key: p.key, - old_value, - new_value, - message_type: 'state', - }; - if (p.scope === 'approvals' && isApprovalDecisionWrite(event)) { - await handleApprovalDecisionWrite(iii as unknown as ISdk, event); + if (p.scope === 'approvals') { + const event = { + event_type: old_value == null ? 'state:created' : 'state:updated', + scope: p.scope, + key: p.key, + old_value, + new_value, + message_type: 'state', + }; + await handleApprovalStateWrite(iii as unknown as ISdk, event); } return { old_value, new_value }; } @@ -163,8 +163,8 @@ export function createParallelApprovalHarness(): ParallelApprovalHarness { seedExecute(session_id: string, assistant: AssistantMessage): TurnStateRecord { const rec = newRecord(session_id); + enterFunctionExecute(rec, assistant); rec.state = 'function_execute'; - rec.last_assistant = assistant; stateStore.set(`${TURN_STATE_SCOPE}/${session_id}`, structuredClone(rec)); return rec; }, diff --git a/harness/tests/integration/parallel-approval.e2e.test.ts b/harness/tests/integration/parallel-approval.e2e.test.ts index 33da3ba6..73d112d2 100644 --- a/harness/tests/integration/parallel-approval.e2e.test.ts +++ b/harness/tests/integration/parallel-approval.e2e.test.ts @@ -155,7 +155,7 @@ describe('parallel approval e2e', () => { expect(executionEvents(h.emitted, 'function_execution_end', 'fc-1')).toHaveLength(endsAfterFirst); }); - it('persists the decision and wakes function_awaiting_approval via the reactive trigger', async () => { + it('persists the decision and wakes function_awaiting_approval via approval::resolve', async () => { const h = createParallelApprovalHarness(); vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ kind: 'pending' }); diff --git a/harness/tests/turn-orchestrator/assistant-streaming.test.ts b/harness/tests/turn-orchestrator/assistant-streaming.test.ts index 2783e36e..ff55b9c1 100644 --- a/harness/tests/turn-orchestrator/assistant-streaming.test.ts +++ b/harness/tests/turn-orchestrator/assistant-streaming.test.ts @@ -5,7 +5,7 @@ import { resolveAssistantMessage, routeAssistantTurn, syntheticStreamReason, -} from '../../src/turn-orchestrator/assistant-streaming/process.js'; +} from '../../src/turn-orchestrator/assistant-streaming/run.js'; import { parseFunctionSchemas, type AssistantStreamingPorts, @@ -77,6 +77,7 @@ describe('prepareStreamContext', () => { runPreflight: vi.fn(async () => 'compacted'), }); const rec = newRecord('s1'); + rec.state = 'assistant_streaming'; const ctx = await prepareStreamContext(ports, rec); @@ -133,9 +134,10 @@ describe('finalizeAssistantTurn', () => { it('stops without persisting on error assistant', async () => { const ports = stubStreamingPorts(); const rec = newRecord('s1'); - rec.last_assistant = assistant({ stop_reason: 'error', error_message: 'auth failed' }); + rec.state = 'assistant_streaming'; + const asst = assistant({ stop_reason: 'error', error_message: 'auth failed' }); - await finalizeAssistantTurn(ports, rec); + await finalizeAssistantTurn(ports, rec, asst); expect(rec.state).toBe('stopped'); expect(rec.turn_end_emitted).toBe(true); @@ -145,15 +147,16 @@ describe('finalizeAssistantTurn', () => { it('persists and routes to function_execute when calls exist', async () => { const ports = stubStreamingPorts(); const rec = newRecord('s1'); - rec.last_assistant = assistant({ + rec.state = 'assistant_streaming'; + const asst = assistant({ content: [{ type: 'function_call', id: 'fc-1', function_id: 'shell::run', arguments: {} }], }); - await finalizeAssistantTurn(ports, rec); + await finalizeAssistantTurn(ports, rec, asst); expect(ports.persistAssistantIfNew).toHaveBeenCalledOnce(); expect(rec.state).toBe('function_execute'); - expect(rec.work).toBeUndefined(); + expect(rec.work?.prepared).toHaveLength(1); expect(rec.function_results).toEqual([]); }); }); diff --git a/harness/tests/turn-orchestrator/assistant.test.ts b/harness/tests/turn-orchestrator/assistant.test.ts index b2e5cd21..b70e61e6 100644 --- a/harness/tests/turn-orchestrator/assistant.test.ts +++ b/harness/tests/turn-orchestrator/assistant.test.ts @@ -151,7 +151,7 @@ describe('handleStreaming', () => { expect(rec.state).toBe('function_execute'); expect(rec.last_assistant).toEqual(finalMsg); expect(rec.function_results).toEqual([]); - expect(rec.work).toBeUndefined(); + expect(rec.work?.prepared).toHaveLength(1); }); it('routes to steering_check when the assistant made no calls', async () => { diff --git a/harness/tests/turn-orchestrator/function-awaiting-approval-state-trigger.test.ts b/harness/tests/turn-orchestrator/function-awaiting-approval-state-trigger.test.ts new file mode 100644 index 00000000..77675055 --- /dev/null +++ b/harness/tests/turn-orchestrator/function-awaiting-approval-state-trigger.test.ts @@ -0,0 +1,44 @@ +import { describe, expect, it, vi } from 'vitest'; +import { TriggerAction, type ISdk } from '../../src/runtime/iii.js'; +import { handleApprovalStateWrite } from '../../src/turn-orchestrator/function-awaiting-approval/process.js'; +import { ApprovalDecisionEventSchema } from '../../src/turn-orchestrator/schemas.js'; + +const matchingEvent = { + event_type: 'state:created' as const, + scope: 'approvals' as const, + key: 'sess-abc/fc-1', + old_value: null, + new_value: { decision: 'allow', reason: null }, + message_type: 'state', +}; + +describe('ApprovalDecisionEventSchema', () => { + it('extracts session_id from the / key', () => { + expect(ApprovalDecisionEventSchema.parse(matchingEvent)).toEqual({ session_id: 'sess-abc' }); + }); +}); + +describe('handleApprovalStateWrite', () => { + it('enqueues turn::function_awaiting_approval on a decision write', async () => { + const triggers: Array<{ function_id: string; payload: unknown; action?: unknown }> = []; + const iii = { + trigger: vi.fn(async (req: { function_id: string; payload: unknown; action?: unknown }) => { + triggers.push(req); + return null; + }), + } as unknown as ISdk; + + await handleApprovalStateWrite(iii, matchingEvent); + + expect(triggers).toHaveLength(1); + expect(triggers[0]?.function_id).toBe('turn::function_awaiting_approval'); + expect(triggers[0]?.payload).toEqual({ session_id: 'sess-abc' }); + expect(triggers[0]?.action).toEqual(TriggerAction.Enqueue({ queue: 'turn-step' })); + }); + + it('no-ops on a non-matching event', async () => { + const iii = { trigger: vi.fn() } as unknown as ISdk; + await handleApprovalStateWrite(iii, { ...matchingEvent, new_value: { reason: 'x' } }); + expect(iii.trigger).not.toHaveBeenCalled(); + }); +}); diff --git a/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts b/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts new file mode 100644 index 00000000..4368ea46 --- /dev/null +++ b/harness/tests/turn-orchestrator/function-awaiting-approval.test.ts @@ -0,0 +1,180 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import type { ISdk } from '../../src/runtime/iii.js'; +import * as events from '../../src/turn-orchestrator/events.js'; +import { installMockTurnStore } from './_helpers/mockTurnStore.js'; +import { + applyDecisionToPrepared, + denialResultFromDecision, +} from '../../src/turn-orchestrator/function-awaiting-approval/run.js'; +import { handleAwaitingApproval } from '../../src/turn-orchestrator/function-awaiting-approval/process.js'; +import { enterFunctionExecute } from '../../src/turn-orchestrator/function-execute/run.js'; +import type { FunctionBatchWork } from '../../src/turn-orchestrator/function-execute/types.js'; +import { newRecord, type TurnStateRecord } from '../../src/turn-orchestrator/state.js'; +import type { AssistantMessage } from '../../src/types/agent-message.js'; + +afterEach(() => { + vi.restoreAllMocks(); +}); + +function makeAssistant( + calls: Array<{ id: string; function_id: string; arguments?: unknown }> = [], +): AssistantMessage { + return { + role: 'assistant', + content: calls.map((c) => ({ + type: 'function_call' as const, + id: c.id, + function_id: c.function_id, + arguments: c.arguments ?? {}, + })), + stop_reason: 'function_call', + error_message: null, + error_kind: null, + usage: null, + model: 'm', + provider: 'p', + timestamp: 1, + }; +} + +function seedFunctionAwaitingApproval( + rec: TurnStateRecord, + work: FunctionBatchWork, + awaiting: Array<{ function_call_id: string; function_id: string; args?: unknown }>, + asst?: AssistantMessage, +): void { + enterFunctionExecute(rec, asst ?? makeAssistant()); + rec.work = work; + rec.awaiting_approval = awaiting.map((e) => ({ + function_call_id: e.function_call_id, + function_id: e.function_id, + args: e.args ?? {}, + })); + rec.state = 'function_awaiting_approval'; +} + +function makeIii(approvalStore: Map): ISdk { + return { + trigger: vi.fn(async (req: { function_id: string; payload: unknown }) => { + if (req.function_id === 'state::get') { + const p = req.payload as { scope: string; key: string }; + return approvalStore.get(`${p.scope}/${p.key}`) ?? null; + } + if (req.function_id === 'state::update') return { old_value: 0 }; + if (req.function_id === 'stream::set') return null; + if (req.function_id === 'shell::run') { + return { + content: [{ type: 'text' as const, text: 'ok' }], + details: {}, + terminate: false, + }; + } + return null; + }), + } as unknown as ISdk; +} + +describe('applyDecisionToPrepared', () => { + const dispatchCall = { + route: 'dispatch' as const, + call: { id: 'fc-1', function_id: 'shell::run', arguments: {} }, + }; + + it('maps allow to pre_approved', () => { + expect(applyDecisionToPrepared(dispatchCall, { decision: 'allow', reason: null })).toEqual({ + route: 'pre_approved', + call: dispatchCall.call, + }); + }); + + it('maps deny to synthetic denial result', () => { + const resolved = applyDecisionToPrepared(dispatchCall, { decision: 'deny', reason: 'policy' }); + expect(resolved.route).toBe('synthetic'); + expect(resolved).toMatchObject({ + result: denialResultFromDecision({ decision: 'deny', reason: 'policy' }), + }); + }); +}); + +describe('handleAwaitingApproval', () => { + it('executes allow decision and finalizes when batch completes', async () => { + const approvalStore = new Map(); + approvalStore.set('approvals/s1/fc-1', { decision: 'allow', reason: null }); + const iii = makeIii(approvalStore); + const rec = newRecord('s1'); + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }; + seedFunctionAwaitingApproval( + rec, + { prepared: [{ route: 'dispatch', call: fc }], executed: {} }, + [{ function_call_id: 'fc-1', function_id: 'shell::run' }], + ); + + installMockTurnStore({ + loadMessages: vi.fn(async () => []), + appendMessages: vi.fn(async () => {}), + }); + vi.spyOn(events, 'emit').mockResolvedValue(undefined); + + await handleAwaitingApproval(iii, rec); + + expect(rec.awaiting_approval).toEqual([]); + expect(rec.state).toBe('steering_check'); + expect(rec.work).toBeUndefined(); + expect(rec.function_results).toHaveLength(1); + }); + + it('leaves state parked when awaiting entries remain undecided', async () => { + const iii = makeIii(new Map()); + const rec = newRecord('s1'); + const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + seedFunctionAwaitingApproval( + rec, + { prepared: [{ route: 'dispatch', call: fc }], executed: {} }, + [{ function_call_id: 'fc-1', function_id: 'shell::run' }], + ); + installMockTurnStore(); + + await handleAwaitingApproval(iii, rec); + + expect(rec.state).toBe('function_awaiting_approval'); + expect(rec.awaiting_approval).toHaveLength(1); + }); + + it('returns to function_execute when approvals done but batch incomplete', async () => { + const approvalStore = new Map(); + approvalStore.set('approvals/s1/fc-2', { decision: 'deny', reason: null }); + const iii = makeIii(approvalStore); + const rec = newRecord('s1'); + const fc1 = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + const fc2 = { id: 'fc-2', function_id: 'shell::run', arguments: {} }; + const fc3 = { id: 'fc-3', function_id: 'shell::run', arguments: {} }; + seedFunctionAwaitingApproval( + rec, + { + prepared: [ + { route: 'dispatch', call: fc1 }, + { route: 'dispatch', call: fc2 }, + { route: 'dispatch', call: fc3 }, + ], + executed: { + 'fc-1': { + call: fc1, + result: { content: [{ type: 'text' as const, text: 'ok' }], details: {} }, + is_error: false, + duration_ms: 1, + }, + }, + }, + [{ function_call_id: 'fc-2', function_id: 'shell::run' }], + ); + installMockTurnStore(); + vi.spyOn(events, 'emit').mockResolvedValue(undefined); + + await handleAwaitingApproval(iii, rec); + + expect(rec.state).toBe('function_execute'); + expect(rec.awaiting_approval).toEqual([]); + expect(rec.work?.executed['fc-2']).toBeDefined(); + expect(rec.work?.executed['fc-3']).toBeUndefined(); + }); +}); diff --git a/harness/tests/turn-orchestrator/function-execute.test.ts b/harness/tests/turn-orchestrator/function-execute.test.ts index dddd75c3..fa6fc90b 100644 --- a/harness/tests/turn-orchestrator/function-execute.test.ts +++ b/harness/tests/turn-orchestrator/function-execute.test.ts @@ -4,10 +4,8 @@ import { unwrapAgentTrigger, } from '../../src/turn-orchestrator/agent-trigger.js'; import { + enterFunctionExecute, finalizeBatch, - FunctionExecuteInvariantError, - loadOrPlanWork, - planBatchFromAssistant, runOneCall, } from '../../src/turn-orchestrator/function-execute/run.js'; import { withRoutingEnvelope } from '../../src/turn-orchestrator/function-execute/ports.js'; @@ -60,9 +58,15 @@ function stubPorts(overrides: Partial = {}): FunctionExecu }; } -describe('planBatchFromAssistant', () => { +function preparedFromAssistant(asst: AssistantMessage) { + const rec = newRecord('s1'); + enterFunctionExecute(rec, asst); + return rec.work!.prepared; +} + +describe('batch planning from assistant', () => { it('unwraps agent_trigger and maps empty function_id to synthetic', () => { - const batch = planBatchFromAssistant( + const batch = preparedFromAssistant( makeAssistant([ { id: 'fc-1', @@ -88,7 +92,7 @@ describe('planBatchFromAssistant', () => { }); it('maps non-agent_trigger function_id to synthetic error', () => { - const batch = planBatchFromAssistant( + const batch = preparedFromAssistant( makeAssistant([{ id: 'fc-1', function_id: 'shell::run', arguments: { command: 'ls' } }]), ); expect(batch[0]).toMatchObject({ @@ -112,14 +116,6 @@ describe('withRoutingEnvelope', () => { }); }); -describe('loadOrPlanWork', () => { - it('throws when work and last_assistant are both missing', () => { - const rec = newRecord('s1'); - rec.state = 'function_execute'; - expect(() => loadOrPlanWork(rec)).toThrow(FunctionExecuteInvariantError); - }); -}); - describe('runOneCall', () => { it('replays end event only when call id is already executed', async () => { const ports = stubPorts(); @@ -159,10 +155,11 @@ describe('finalizeBatch', () => { it('routes to stopped when every result terminates', async () => { const ports = stubPorts(); const rec = newRecord('s1'); - rec.state = 'function_execute'; const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; + enterFunctionExecute(rec, makeAssistant([fc])); + rec.state = 'function_execute'; - await finalizeBatch(ports, rec, { + rec.work = { prepared: [{ route: 'dispatch', call: fc }], executed: { 'fc-1': { @@ -176,7 +173,8 @@ describe('finalizeBatch', () => { duration_ms: 1, }, }, - }); + }; + await finalizeBatch(ports, rec); expect(rec.state).toBe('stopped'); expect(ports.finishSession).toHaveBeenCalledOnce(); @@ -200,9 +198,10 @@ describe('finalizeBatch', () => { appendMessages, }); const rec = newRecord('s1'); + enterFunctionExecute(rec, makeAssistant([fc])); rec.state = 'function_execute'; - await finalizeBatch(ports, rec, { + rec.work = { prepared: [{ route: 'dispatch', call: fc }], executed: { 'fc-1': { @@ -212,7 +211,8 @@ describe('finalizeBatch', () => { duration_ms: 1, }, }, - }); + }; + await finalizeBatch(ports, rec); expect(appendMessages).not.toHaveBeenCalled(); expect(rec.state).toBe('steering_check'); diff --git a/harness/tests/turn-orchestrator/functions.test.ts b/harness/tests/turn-orchestrator/functions.test.ts index 7ff99fa6..6f399928 100644 --- a/harness/tests/turn-orchestrator/functions.test.ts +++ b/harness/tests/turn-orchestrator/functions.test.ts @@ -8,6 +8,8 @@ import { newRecord } from '../../src/turn-orchestrator/state.js'; import * as agentTriggerModule from '../../src/turn-orchestrator/agent-trigger.js'; import { parseApprovalDecision } from '../../src/turn-orchestrator/function-awaiting-approval/ports.js'; import { handleExecute } from '../../src/turn-orchestrator/function-execute/process.js'; +import { enterFunctionExecute } from '../../src/turn-orchestrator/function-execute/run.js'; +import type { FunctionBatchWork } from '../../src/turn-orchestrator/function-execute/types.js'; import type { AssistantMessage } from '../../src/types/agent-message.js'; afterEach(() => { @@ -76,6 +78,13 @@ describe('parseApprovalDecision', () => { }); }); +/** Seed required function-batch invariants before handleExecute. */ +function seedFunctionExecute(rec: TurnStateRecord, work: FunctionBatchWork, asst?: AssistantMessage): void { + enterFunctionExecute(rec, asst ?? makeAssistant([])); + rec.work = work; + rec.state = 'function_execute'; +} + /** Wrap a target function id in the agent_trigger envelope (production shape). */ function agentTriggerCall( id: string, @@ -86,7 +95,7 @@ function agentTriggerCall( } describe('handleExecute new flow', () => { - it('builds work.prepared from last_assistant when work is absent', async () => { + it('runs the prepared batch from work', async () => { vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ kind: 'result', result: { @@ -97,13 +106,11 @@ describe('handleExecute new flow', () => { }); const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); + enterFunctionExecute(rec, makeAssistant([agentTriggerCall('fc-1', 'shell::run', { command: 'ls' })])); rec.state = 'function_execute'; - rec.last_assistant = makeAssistant([agentTriggerCall('fc-1', 'shell::run', { command: 'ls' })]); mockFinalizePersistence(); await handleExecute(iii, rec); - - expect(rec.work).toBeUndefined(); expect(rec.state).toBe('steering_check'); expect(rec.function_results).toHaveLength(1); expect(rec.function_results[0]?.function_call_id).toBe('fc-1'); @@ -112,9 +119,8 @@ describe('handleExecute new flow', () => { it('finishes the session when every function result terminates', async () => { const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); - rec.state = 'function_execute'; const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; - rec.work = { + seedFunctionExecute(rec, { prepared: [{ route: 'dispatch', call: fc }], executed: { 'fc-1': { @@ -128,7 +134,7 @@ describe('handleExecute new flow', () => { duration_ms: 1, }, }, - }; + }); mockFinalizePersistence(); await handleExecute(iii, rec); @@ -147,10 +153,9 @@ describe('handleExecute new flow', () => { }); const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); - rec.state = 'function_execute'; const fc1 = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; const fc2 = { id: 'fc-2', function_id: 'shell::run', arguments: {} }; - rec.work = { + seedFunctionExecute(rec, { prepared: [ { route: 'dispatch', call: fc1 }, { route: 'dispatch', call: fc2 }, @@ -167,7 +172,7 @@ describe('handleExecute new flow', () => { duration_ms: 5, }, }, - }; + }); mockFinalizePersistence(); await handleExecute(iii, rec); @@ -186,8 +191,7 @@ describe('handleExecute new flow', () => { const triggerSpy = vi.fn().mockResolvedValue({ ok: true }); const iii = { trigger: triggerSpy } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); - rec.state = 'function_execute'; - rec.work = { + seedFunctionExecute(rec, { prepared: [ { route: 'pre_approved', @@ -195,7 +199,7 @@ describe('handleExecute new flow', () => { }, ], executed: {}, - }; + }); const consultBeforeSpy = vi.spyOn(hookModule, 'consultBefore'); mockFinalizePersistence(); @@ -217,8 +221,7 @@ describe('handleExecute new flow', () => { }); const iii = { trigger: triggerSpy } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); - rec.state = 'function_execute'; - rec.work = { + seedFunctionExecute(rec, { prepared: [ { route: 'pre_approved', @@ -230,7 +233,7 @@ describe('handleExecute new flow', () => { }, ], executed: {}, - }; + }); mockFinalizePersistence(); await expect(handleExecute(iii, rec)).resolves.toBeUndefined(); @@ -249,14 +252,12 @@ describe('handleExecute new flow', () => { const triggerSpy = vi.fn().mockResolvedValue(null); const iii = { trigger: triggerSpy } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); - rec.state = 'function_execute'; - const denial = { content: [{ type: 'text' as const, text: 'denied' }], details: { approval_denied: true, decision: 'deny' as const }, terminate: false, }; - rec.work = { + seedFunctionExecute(rec, { prepared: [ { route: 'synthetic', @@ -265,7 +266,7 @@ describe('handleExecute new flow', () => { }, ], executed: {}, - }; + }); mockFinalizePersistence(); await handleExecute(iii, rec); @@ -281,15 +282,13 @@ describe('handleExecute new flow', () => { const triggerSpy = vi.fn().mockResolvedValue(null); const iii = { trigger: triggerSpy } as unknown as ISdk; const rec = newRecord('s1'); - rec.state = 'function_execute'; - const existingResult = { content: [{ type: 'text' as const, text: 'cached' }], details: {}, terminate: false, }; const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; - rec.work = { + seedFunctionExecute(rec, { prepared: [{ route: 'dispatch', call: fc }], executed: { 'fc-1': { @@ -299,7 +298,7 @@ describe('handleExecute new flow', () => { duration_ms: 42, }, }, - }; + }); mockFinalizePersistence(); await handleExecute(iii, rec); @@ -320,7 +319,7 @@ describe('handleExecute new flow', () => { const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec = newRecord('s1'); rec.state = 'function_execute'; - rec.last_assistant = makeAssistant([agentTriggerCall('fc-1', 'shell::run')]); + enterFunctionExecute(rec, makeAssistant([agentTriggerCall('fc-1', 'shell::run')])); mockFinalizePersistence(); await handleExecute(iii, rec); @@ -328,46 +327,10 @@ describe('handleExecute new flow', () => { expect(rec.state).toBe('steering_check'); }); - it('transitions to steering_check when last_assistant is missing after execute (with pre-populated work)', async () => { - const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; - const rec = newRecord('s1'); - rec.state = 'function_execute'; - rec.last_assistant = null; - - const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; - rec.work = { - prepared: [{ route: 'dispatch', call: fc }], - executed: { - 'fc-1': { - call: fc, - result: { - content: [{ type: 'text' as const, text: 'ok' }], - details: {}, - terminate: false, - }, - is_error: false, - duration_ms: 1, - }, - }, - }; - installMockTurnStore({ - loadMessages: vi.fn(async () => []), - appendMessages: vi.fn(async () => {}), - }); - const emitSpy = vi.spyOn(events, 'emit').mockResolvedValue(undefined); - - await handleExecute(iii, rec); - - expect(rec.state).toBe('steering_check'); - expect(rec.function_results).toHaveLength(1); - expect(emitSpy.mock.calls.some((call) => call[2]?.type === 'turn_end')).toBe(false); - }); - it('emits turn lifecycle and sets turn_end_emitted when last_assistant is present', async () => { const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec = newRecord('s1'); - rec.state = 'function_execute'; - rec.last_assistant = { + const asst: AssistantMessage = { role: 'assistant', content: [{ type: 'text', text: 'done' }], stop_reason: 'end', @@ -378,23 +341,26 @@ describe('handleExecute new flow', () => { provider: 'p', timestamp: 1, }; - const fc = { id: 'fc-1', function_id: 'shell::run', arguments: {} }; - rec.work = { - prepared: [{ route: 'dispatch', call: fc }], - executed: { - 'fc-1': { - call: fc, - result: { - content: [{ type: 'text' as const, text: 'ok' }], - details: {}, - terminate: false, + seedFunctionExecute( + rec, + { + prepared: [{ route: 'dispatch', call: fc }], + executed: { + 'fc-1': { + call: fc, + result: { + content: [{ type: 'text' as const, text: 'ok' }], + details: {}, + terminate: false, + }, + is_error: false, + duration_ms: 1, }, - is_error: false, - duration_ms: 1, }, }, - }; + asst, + ); installMockTurnStore({ loadMessages: vi.fn(async () => []), appendMessages: vi.fn(async () => {}), @@ -419,9 +385,10 @@ describe('handleExecute new flow', () => { const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec = newRecord('s1'); rec.state = 'function_execute'; - rec.last_assistant = makeAssistant([ - agentTriggerCall('toolu_01', 'shell::run', { command: 'ls' }), - ]); + enterFunctionExecute( + rec, + makeAssistant([agentTriggerCall('toolu_01', 'shell::run', { command: 'ls' })]), + ); let storedMessages: unknown[] = []; installMockTurnStore({ @@ -438,19 +405,23 @@ describe('handleExecute new flow', () => { await handleExecute(iii, rec); - rec.state = 'function_execute'; - rec.turn_end_emitted = false; - rec.work = { - prepared: [{ route: 'dispatch', call: fc }], - executed: { - toolu_01: { - call: fc, - result: existingResult, - is_error: false, - duration_ms: 5, + const asst = makeAssistant([agentTriggerCall('toolu_01', 'shell::run', { command: 'ls' })]); + seedFunctionExecute( + rec, + { + prepared: [{ route: 'dispatch', call: fc }], + executed: { + toolu_01: { + call: fc, + result: existingResult, + is_error: false, + duration_ms: 5, + }, }, }, - }; + asst, + ); + rec.turn_end_emitted = false; await handleExecute(iii, rec); diff --git a/harness/tests/turn-orchestrator/on-approval.test.ts b/harness/tests/turn-orchestrator/on-approval.test.ts deleted file mode 100644 index 44e96463..00000000 --- a/harness/tests/turn-orchestrator/on-approval.test.ts +++ /dev/null @@ -1,126 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; -import { TriggerAction, type ISdk } from '../../src/runtime/iii.js'; -import { - execute, - handleApprovalDecisionWrite, - isApprovalDecisionWrite, - parseApprovalDecisionWrite, -} from '../../src/turn-orchestrator/on-approval.js'; -import { ApprovalDecisionEventSchema } from '../../src/turn-orchestrator/schemas.js'; -import { newRecord } from '../../src/turn-orchestrator/state.js'; - -const matchingEvent = { - event_type: 'state:created' as const, - scope: 'approvals' as const, - key: 'sess-abc/fc-1', - old_value: null, - new_value: { decision: 'allow', reason: null }, - message_type: 'state', -}; - -describe('ApprovalDecisionEventSchema', () => { - it('extracts session_id from the / key', () => { - expect(ApprovalDecisionEventSchema.parse(matchingEvent)).toEqual({ session_id: 'sess-abc' }); - }); - - it('accepts deny and aborted decisions', () => { - expect( - ApprovalDecisionEventSchema.parse({ - ...matchingEvent, - new_value: { decision: 'deny', reason: 'policy' }, - }), - ).toEqual({ session_id: 'sess-abc' }); - expect( - ApprovalDecisionEventSchema.parse({ - ...matchingEvent, - new_value: { decision: 'aborted', reason: 'x' }, - }), - ).toEqual({ session_id: 'sess-abc' }); - }); - - it('rejects values without a decision', () => { - expect(() => - ApprovalDecisionEventSchema.parse({ ...matchingEvent, new_value: { reason: 'x' } }), - ).toThrow(); - }); - - it('rejects keys that are not / shaped', () => { - expect(() => - ApprovalDecisionEventSchema.parse({ ...matchingEvent, key: 'session/sess-abc/turn_state' }), - ).toThrow(); - expect(() => - ApprovalDecisionEventSchema.parse({ ...matchingEvent, key: 'no-slash' }), - ).toThrow(); - }); - - it('rejects state:deleted and nested wrappers', () => { - expect(() => - ApprovalDecisionEventSchema.parse({ ...matchingEvent, event_type: 'state:deleted' }), - ).toThrow(); - expect(() => ApprovalDecisionEventSchema.parse({ payload: matchingEvent })).toThrow(); - expect(() => ApprovalDecisionEventSchema.parse(null)).toThrow(); - }); -}); - -function mockIiiWithTurnState(rec: ReturnType): { - iii: ISdk; - triggers: Array<{ function_id: string; payload: unknown; action?: unknown }>; -} { - const triggers: Array<{ function_id: string; payload: unknown; action?: unknown }> = []; - const iii = { - trigger: vi.fn(async (req: { function_id: string; payload: unknown; action?: unknown }) => { - if (req.function_id === 'state::get') return rec; - triggers.push(req); - return null; - }), - } as unknown as ISdk; - return { iii, triggers }; -} - -describe('parseApprovalDecisionWrite condition', () => { - it('matches an approvals decision write and extracts session_id', () => { - expect(parseApprovalDecisionWrite(matchingEvent)).toEqual({ session_id: 'sess-abc' }); - expect(isApprovalDecisionWrite(matchingEvent)).toBe(true); - }); - - it('skips writes with no decision and non-/ keys', () => { - expect(parseApprovalDecisionWrite({ ...matchingEvent, new_value: { reason: 'x' } })).toBeNull(); - expect( - parseApprovalDecisionWrite({ ...matchingEvent, key: 'session/s/turn_state' }), - ).toBeNull(); - }); -}); - -describe('handleApprovalDecisionWrite', () => { - it('extracts session_id and enqueues turn::{state}', async () => { - const rec = newRecord('sess-abc'); - rec.state = 'function_awaiting_approval'; - const { iii, triggers } = mockIiiWithTurnState(rec); - - await handleApprovalDecisionWrite(iii, matchingEvent); - - expect(triggers).toHaveLength(1); - expect(triggers[0]?.function_id).toBe('turn::function_awaiting_approval'); - expect(triggers[0]?.payload).toEqual({ session_id: 'sess-abc' }); - expect(triggers[0]?.action).toEqual(TriggerAction.Enqueue({ queue: 'turn-step' })); - }); - - it('no-ops on a non-matching event', async () => { - const iii = { trigger: vi.fn() } as unknown as ISdk; - await handleApprovalDecisionWrite(iii, { ...matchingEvent, new_value: { reason: 'x' } }); - expect(iii.trigger).not.toHaveBeenCalled(); - }); -}); - -describe('on-approval execute', () => { - it('enqueues turn::{state} on the turn-step queue', async () => { - const rec = newRecord('sess-abc'); - rec.state = 'function_awaiting_approval'; - const { iii, triggers } = mockIiiWithTurnState(rec); - - await execute(iii, { session_id: 'sess-abc' }); - - expect(triggers[0]?.function_id).toBe('turn::function_awaiting_approval'); - expect(triggers[0]?.action).toEqual(TriggerAction.Enqueue({ queue: 'turn-step' })); - }); -}); diff --git a/harness/tests/turn-orchestrator/state.test.ts b/harness/tests/turn-orchestrator/state.test.ts index 1ad0315b..8bf8e9a1 100644 --- a/harness/tests/turn-orchestrator/state.test.ts +++ b/harness/tests/turn-orchestrator/state.test.ts @@ -1,9 +1,9 @@ import { describe, expect, it } from 'vitest'; -import { - type TurnStateRecord, - newRecord, - transitionTo, -} from '../../src/turn-orchestrator/state.js'; +import { TurnStateInvariantError } from '../../src/turn-orchestrator/errors.js'; +import { parseAssistantStreamingRecord, parseFunctionBatchRecord, parseSteeringCheckRecord } from '../../src/turn-orchestrator/schemas.js'; +import { type TurnStateRecord, newRecord, transitionTo } from '../../src/turn-orchestrator/state.js'; +import { enterFunctionExecute } from '../../src/turn-orchestrator/function-execute/run.js'; +import type { AssistantMessage } from '../../src/types/agent-message.js'; describe('TurnStateRecord', () => { it('starts in provisioning with no work and the given max_turns', () => { @@ -25,3 +25,70 @@ describe('TurnStateRecord', () => { expect(rec.awaiting_approval).toBeUndefined(); }); }); + +describe('parseFunctionBatchRecord', () => { + const asst: AssistantMessage = { + role: 'assistant', + content: [], + stop_reason: 'function_call', + error_message: null, + error_kind: null, + usage: null, + model: 'm', + provider: 'p', + timestamp: 1, + }; + + it('returns a validated record when function-batch fields are present', () => { + const rec = newRecord('s1'); + enterFunctionExecute(rec, asst); + rec.state = 'function_execute'; + const batch = parseFunctionBatchRecord(rec); + expect(batch.work).toBeDefined(); + expect(batch.awaiting_approval).toEqual([]); + }); + + it('throws TurnStateInvariantError when last_assistant is missing', () => { + const rec = newRecord('s1'); + rec.state = 'function_execute'; + rec.work = { prepared: [], executed: {} }; + rec.awaiting_approval = []; + expect(() => parseFunctionBatchRecord(rec)).toThrow(TurnStateInvariantError); + }); +}); + +describe('parseAssistantStreamingRecord', () => { + it('returns a validated record for assistant_streaming', () => { + const rec = newRecord('s1'); + rec.state = 'assistant_streaming'; + const streaming = parseAssistantStreamingRecord(rec); + expect(streaming.state).toBe('assistant_streaming'); + expect(streaming.function_results).toEqual([]); + }); + + it('throws TurnStateInvariantError when session_id is missing', () => { + const rec = { state: 'assistant_streaming' } as TurnStateRecord; + expect(() => parseAssistantStreamingRecord(rec)).toThrow(TurnStateInvariantError); + }); + + it('throws TurnStateInvariantError when state is wrong', () => { + const rec = newRecord('s1'); + rec.state = 'provisioning'; + expect(() => parseAssistantStreamingRecord(rec)).toThrow(TurnStateInvariantError); + }); +}); + +describe('parseSteeringCheckRecord', () => { + it('returns a validated record for steering_check', () => { + const rec = newRecord('s1'); + rec.state = 'steering_check'; + const steering = parseSteeringCheckRecord(rec); + expect(steering.state).toBe('steering_check'); + expect(steering.function_results).toEqual([]); + }); + + it('throws TurnStateInvariantError when session_id is missing', () => { + const rec = { state: 'steering_check' } as TurnStateRecord; + expect(() => parseSteeringCheckRecord(rec)).toThrow(TurnStateInvariantError); + }); +}); diff --git a/harness/tests/turn-orchestrator/steering-check-layer.test.ts b/harness/tests/turn-orchestrator/steering-check-layer.test.ts index e93acb6d..db2836d4 100644 --- a/harness/tests/turn-orchestrator/steering-check-layer.test.ts +++ b/harness/tests/turn-orchestrator/steering-check-layer.test.ts @@ -1,9 +1,8 @@ import { describe, expect, it, vi } from 'vitest'; import type { AgentMessage } from '../../src/types/agent-message.js'; -import { applySteeringCheckOutcome } from '../../src/turn-orchestrator/steering-check/process.js'; +import { applySteeringCheckOutcome, processSteeringCheck } from '../../src/turn-orchestrator/steering-check/run.js'; import { parseDrainItems } from '../../src/turn-orchestrator/steering-check/ports.js'; import type { SteeringCheckPorts } from '../../src/turn-orchestrator/steering-check/ports.js'; -import { processSteeringCheck } from '../../src/turn-orchestrator/steering-check/process.js'; import { newRecord } from '../../src/turn-orchestrator/state.js'; function userMessage(text: string): AgentMessage { diff --git a/harness/tests/turn-orchestrator/steering.test.ts b/harness/tests/turn-orchestrator/steering.test.ts index 22a76b3e..a1fb9a14 100644 --- a/harness/tests/turn-orchestrator/steering.test.ts +++ b/harness/tests/turn-orchestrator/steering.test.ts @@ -4,7 +4,8 @@ import type { AgentMessage } from '../../src/types/agent-message.js'; import * as events from '../../src/turn-orchestrator/events.js'; import { installMockTurnStore } from './_helpers/mockTurnStore.js'; import { newRecord, type TurnStateRecord } from '../../src/turn-orchestrator/state.js'; -import { handleSteering, route } from '../../src/turn-orchestrator/steering-check/process.js'; +import { handleSteering } from '../../src/turn-orchestrator/steering-check/process.js'; +import { route } from '../../src/turn-orchestrator/steering-check/run.js'; afterEach(() => { vi.restoreAllMocks(); From 25e089f2dbf4b8741a95626e1cac77118064e272 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Wed, 27 May 2026 07:50:32 -0300 Subject: [PATCH 38/41] chore(harness): apply biome 2.4.10 formatting Match the CI biome version on 11 files (3 src, 8 tests). --- .../turn-orchestrator/assistant-streaming/ports.ts | 1 - harness/src/turn-orchestrator/schemas.ts | 8 ++++++-- harness/src/turn-orchestrator/state.ts | 5 ++++- .../src/turn-orchestrator/steering-check/process.ts | 6 +++++- harness/src/turn-orchestrator/steering-check/run.ts | 5 ++++- harness/tests/approval-gate/_helpers/fakeIii.ts | 13 +++++++------ .../tests/integration/parallel-approval-harness.ts | 6 +++++- .../tests/integration/parallel-approval.e2e.test.ts | 9 ++++----- harness/tests/turn-orchestrator/functions.test.ts | 11 +++++++++-- harness/tests/turn-orchestrator/state.test.ts | 12 ++++++++++-- .../turn-orchestrator/steering-check-layer.test.ts | 5 ++++- 11 files changed, 58 insertions(+), 23 deletions(-) diff --git a/harness/src/turn-orchestrator/assistant-streaming/ports.ts b/harness/src/turn-orchestrator/assistant-streaming/ports.ts index 66bb329b..75688f29 100644 --- a/harness/src/turn-orchestrator/assistant-streaming/ports.ts +++ b/harness/src/turn-orchestrator/assistant-streaming/ports.ts @@ -48,7 +48,6 @@ export function isErrorOrAborted(asst: AssistantMessage): boolean { return asst.stop_reason === 'error' || asst.stop_reason === 'aborted'; } - export type DeltaHandler = ( partial: AssistantMessage, event: AssistantMessageEvent, diff --git a/harness/src/turn-orchestrator/schemas.ts b/harness/src/turn-orchestrator/schemas.ts index 1721f80b..2f187647 100644 --- a/harness/src/turn-orchestrator/schemas.ts +++ b/harness/src/turn-orchestrator/schemas.ts @@ -91,7 +91,9 @@ function formatZodIssues(error: z.ZodError): string { export function parseFunctionBatchRecord(rec: TurnStateRecord): FunctionBatchTurnRecord { const result = FunctionBatchTurnRecordSchema.safeParse(rec); if (!result.success) { - throw new TurnStateInvariantError(`invalid function batch turn record: ${formatZodIssues(result.error)}`); + throw new TurnStateInvariantError( + `invalid function batch turn record: ${formatZodIssues(result.error)}`, + ); } // Return the same object — handlers mutate turn_state in place before saveRecord. return rec as FunctionBatchTurnRecord; @@ -138,7 +140,9 @@ export const SteeringCheckTurnRecordSchema = z export function parseSteeringCheckRecord(rec: TurnStateRecord): SteeringCheckTurnRecord { const result = SteeringCheckTurnRecordSchema.safeParse(rec); if (!result.success) { - throw new TurnStateInvariantError(`invalid steering_check turn record: ${formatZodIssues(result.error)}`); + throw new TurnStateInvariantError( + `invalid steering_check turn record: ${formatZodIssues(result.error)}`, + ); } return rec as SteeringCheckTurnRecord; } diff --git a/harness/src/turn-orchestrator/state.ts b/harness/src/turn-orchestrator/state.ts index b4551f79..f8fc6995 100644 --- a/harness/src/turn-orchestrator/state.ts +++ b/harness/src/turn-orchestrator/state.ts @@ -74,7 +74,10 @@ export type SteeringCheckTurnRecord = TurnStateRecordCore & { awaiting_approval?: AwaitingApprovalEntry[]; }; -type OtherTurnState = Exclude; +type OtherTurnState = Exclude< + TurnState, + FunctionBatchState | 'assistant_streaming' | 'steering_check' +>; export type TurnStateRecord = | FunctionBatchTurnRecord diff --git a/harness/src/turn-orchestrator/steering-check/process.ts b/harness/src/turn-orchestrator/steering-check/process.ts index 7df1b7b4..e4624314 100644 --- a/harness/src/turn-orchestrator/steering-check/process.ts +++ b/harness/src/turn-orchestrator/steering-check/process.ts @@ -4,7 +4,11 @@ import type { ISdk } from '../../runtime/iii.js'; import { runTransition } from '../run-transition.js'; -import { TurnStepPayloadSchema, parseSteeringCheckRecord, type TurnStepPayload } from '../schemas.js'; +import { + TurnStepPayloadSchema, + parseSteeringCheckRecord, + type TurnStepPayload, +} from '../schemas.js'; import type { TurnStateRecord } from '../state.js'; import { createSteeringCheckPorts } from './ports.js'; import { runSteeringCheck } from './run.js'; diff --git a/harness/src/turn-orchestrator/steering-check/run.ts b/harness/src/turn-orchestrator/steering-check/run.ts index b6777a3b..e7e42ee3 100644 --- a/harness/src/turn-orchestrator/steering-check/run.ts +++ b/harness/src/turn-orchestrator/steering-check/run.ts @@ -31,7 +31,10 @@ function maxTurnsReached(rec: SteeringCheckTurnRecord): boolean { return rec.max_turns !== undefined && rec.turn_count >= rec.max_turns; } -async function endForMaxTurns(ports: SteeringCheckPorts, rec: SteeringCheckTurnRecord): Promise { +async function endForMaxTurns( + ports: SteeringCheckPorts, + rec: SteeringCheckTurnRecord, +): Promise { const msg = syntheticAssistant({ stop_reason: 'end', text: `loop stopped: max_turns (${rec.max_turns ?? 0}) reached`, diff --git a/harness/tests/approval-gate/_helpers/fakeIii.ts b/harness/tests/approval-gate/_helpers/fakeIii.ts index d1fe0f96..b5e9f7ff 100644 --- a/harness/tests/approval-gate/_helpers/fakeIii.ts +++ b/harness/tests/approval-gate/_helpers/fakeIii.ts @@ -30,12 +30,13 @@ export function fakeIii(): FakeIii { payload: unknown; action?: unknown; }) => { - calls.push({ function_id, payload, action }); - if (function_id === 'stream::set') { - streamSets.push(payload); - } - return null; - }), + calls.push({ function_id, payload, action }); + if (function_id === 'stream::set') { + streamSets.push(payload); + } + return null; + }, + ), } as unknown as ISdk; return { iii, calls, streamSets }; diff --git a/harness/tests/integration/parallel-approval-harness.ts b/harness/tests/integration/parallel-approval-harness.ts index 625cfa01..2fa5e5cc 100644 --- a/harness/tests/integration/parallel-approval-harness.ts +++ b/harness/tests/integration/parallel-approval-harness.ts @@ -12,7 +12,11 @@ import { import { handleExecute } from '../../src/turn-orchestrator/function-execute/process.js'; import { enterFunctionExecute } from '../../src/turn-orchestrator/function-execute/run.js'; import { runTransition } from '../../src/turn-orchestrator/run-transition.js'; -import { TURN_STATE_SCOPE, newRecord, type TurnStateRecord } from '../../src/turn-orchestrator/state.js'; +import { + TURN_STATE_SCOPE, + newRecord, + type TurnStateRecord, +} from '../../src/turn-orchestrator/state.js'; import type { ISdk } from '../../src/runtime/iii.js'; import type { AgentEvent } from '../../src/types/agent-event.js'; import type { AssistantMessage } from '../../src/types/agent-message.js'; diff --git a/harness/tests/integration/parallel-approval.e2e.test.ts b/harness/tests/integration/parallel-approval.e2e.test.ts index 73d112d2..a13c5435 100644 --- a/harness/tests/integration/parallel-approval.e2e.test.ts +++ b/harness/tests/integration/parallel-approval.e2e.test.ts @@ -139,10 +139,7 @@ describe('parallel approval e2e', () => { const h = createParallelApprovalHarness(); vi.spyOn(agentTriggerModule, 'dispatchWithHook').mockResolvedValueOnce({ kind: 'pending' }); - h.seedExecute( - 'sess-dup', - makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }]), - ); + h.seedExecute('sess-dup', makeAssistantWithCalls([{ id: 'fc-1', functionId: 'shell::run' }])); await h.runExecute('sess-dup'); await h.resolveApproval('sess-dup', 'fc-1', 'allow'); @@ -152,7 +149,9 @@ describe('parallel approval e2e', () => { const rec = h.loadTurnRecord('sess-dup'); expect(rec?.awaiting_approval).toEqual([]); - expect(executionEvents(h.emitted, 'function_execution_end', 'fc-1')).toHaveLength(endsAfterFirst); + expect(executionEvents(h.emitted, 'function_execution_end', 'fc-1')).toHaveLength( + endsAfterFirst, + ); }); it('persists the decision and wakes function_awaiting_approval via approval::resolve', async () => { diff --git a/harness/tests/turn-orchestrator/functions.test.ts b/harness/tests/turn-orchestrator/functions.test.ts index 6f399928..54a36fe5 100644 --- a/harness/tests/turn-orchestrator/functions.test.ts +++ b/harness/tests/turn-orchestrator/functions.test.ts @@ -79,7 +79,11 @@ describe('parseApprovalDecision', () => { }); /** Seed required function-batch invariants before handleExecute. */ -function seedFunctionExecute(rec: TurnStateRecord, work: FunctionBatchWork, asst?: AssistantMessage): void { +function seedFunctionExecute( + rec: TurnStateRecord, + work: FunctionBatchWork, + asst?: AssistantMessage, +): void { enterFunctionExecute(rec, asst ?? makeAssistant([])); rec.work = work; rec.state = 'function_execute'; @@ -106,7 +110,10 @@ describe('handleExecute new flow', () => { }); const iii = { trigger: vi.fn().mockResolvedValue(null) } as unknown as ISdk; const rec: TurnStateRecord = newRecord('s1'); - enterFunctionExecute(rec, makeAssistant([agentTriggerCall('fc-1', 'shell::run', { command: 'ls' })])); + enterFunctionExecute( + rec, + makeAssistant([agentTriggerCall('fc-1', 'shell::run', { command: 'ls' })]), + ); rec.state = 'function_execute'; mockFinalizePersistence(); diff --git a/harness/tests/turn-orchestrator/state.test.ts b/harness/tests/turn-orchestrator/state.test.ts index 8bf8e9a1..6d341a46 100644 --- a/harness/tests/turn-orchestrator/state.test.ts +++ b/harness/tests/turn-orchestrator/state.test.ts @@ -1,7 +1,15 @@ import { describe, expect, it } from 'vitest'; import { TurnStateInvariantError } from '../../src/turn-orchestrator/errors.js'; -import { parseAssistantStreamingRecord, parseFunctionBatchRecord, parseSteeringCheckRecord } from '../../src/turn-orchestrator/schemas.js'; -import { type TurnStateRecord, newRecord, transitionTo } from '../../src/turn-orchestrator/state.js'; +import { + parseAssistantStreamingRecord, + parseFunctionBatchRecord, + parseSteeringCheckRecord, +} from '../../src/turn-orchestrator/schemas.js'; +import { + type TurnStateRecord, + newRecord, + transitionTo, +} from '../../src/turn-orchestrator/state.js'; import { enterFunctionExecute } from '../../src/turn-orchestrator/function-execute/run.js'; import type { AssistantMessage } from '../../src/types/agent-message.js'; diff --git a/harness/tests/turn-orchestrator/steering-check-layer.test.ts b/harness/tests/turn-orchestrator/steering-check-layer.test.ts index db2836d4..78d99b67 100644 --- a/harness/tests/turn-orchestrator/steering-check-layer.test.ts +++ b/harness/tests/turn-orchestrator/steering-check-layer.test.ts @@ -1,6 +1,9 @@ import { describe, expect, it, vi } from 'vitest'; import type { AgentMessage } from '../../src/types/agent-message.js'; -import { applySteeringCheckOutcome, processSteeringCheck } from '../../src/turn-orchestrator/steering-check/run.js'; +import { + applySteeringCheckOutcome, + processSteeringCheck, +} from '../../src/turn-orchestrator/steering-check/run.js'; import { parseDrainItems } from '../../src/turn-orchestrator/steering-check/ports.js'; import type { SteeringCheckPorts } from '../../src/turn-orchestrator/steering-check/ports.js'; import { newRecord } from '../../src/turn-orchestrator/state.js'; From 842e706e68b7f0c858550c3e3e9b2e5e55b56459 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Wed, 27 May 2026 08:22:54 -0300 Subject: [PATCH 39/41] docs(context-compaction): note dedicated agent::turn_end subscription Header and registered-function blurb still said the subscriber fires on every agent::events message; the wire is agent::turn_end (one wake per turn). The function name kept its historical on_agent_event id. --- harness/docs/workers/context-compaction.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/harness/docs/workers/context-compaction.md b/harness/docs/workers/context-compaction.md index 1fdc3ce5..fb044682 100644 --- a/harness/docs/workers/context-compaction.md +++ b/harness/docs/workers/context-compaction.md @@ -1,9 +1,11 @@ # context-compaction -Out-of-band session-history compactor (v2). Watches `agent::events` for -`TurnEnd` frames and summarises older turns when the session approaches the -model's usable context limit. Also exposes a sync pre-turn path that the -turn-orchestrator calls to compact before a turn that would overflow. +Out-of-band session-history compactor (v2). Subscribes to the dedicated +`agent::turn_end` stream (mirrored by the event producer) and summarises older +turns when the session approaches the model's usable context limit — one wake +per turn instead of one per `agent::events` frame. Also exposes a sync +pre-turn path that the turn-orchestrator calls to compact before a turn that +would overflow. ## Purpose @@ -27,7 +29,8 @@ This worker is optional. Without it, sessions keep their full transcript. ### `context-compaction::on_agent_event` -Internal stream subscriber. Fires on every `agent::events` message. +Internal stream subscriber on `agent::turn_end` — fires once per turn (kept +under the historical `on_agent_event` name). **Payload** (camelCase or snake_case envelope): ``` From 201c6836b61c4ace18845e86e1cfbe63c623fb81 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Wed, 27 May 2026 08:28:57 -0300 Subject: [PATCH 40/41] docs: reconcile harness docs with code after FSM rewrite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - architecture.md: worker count 11 → 15; add provider-config + provider-llamacpp rows; note context-compaction rides agent::turn_end. - approval-gate.md: replace dangling on-approval.ts link with function-awaiting-approval/process.ts. - context-compaction.md: use update_parts (batched) in prose + dependencies table; list handler-pipeline.ts and flat-state.ts in source layout. - session.md: add the four session-tree functions the list was missing (compactions, append_synthetic, update_part, update_parts); 11 → 15. - turn-orchestrator.md: drop misleading "in parallel" from the function_execute description — the dispatch loop is sequential, just non-blocking on pending calls. --- harness/docs/architecture.md | 6 ++++-- harness/docs/workers/approval-gate.md | 2 +- harness/docs/workers/context-compaction.md | 6 ++++-- harness/docs/workers/session.md | 6 +++++- harness/docs/workers/turn-orchestrator.md | 2 +- 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/harness/docs/architecture.md b/harness/docs/architecture.md index 821665d7..d390ec13 100644 --- a/harness/docs/architecture.md +++ b/harness/docs/architecture.md @@ -1,7 +1,7 @@ # harness architecture `harness` is the Node/TypeScript port of the iii harness stack. It ships -as one pnpm package containing 11 workers (one folder per worker, one feature +as one pnpm package containing 15 workers (one folder per worker, one feature per file) plus a shared `runtime/` SDK helper layer and a `types/` wire-type mirror of `harness/crates/harness-types`. Each worker is independently runnable as `pnpm dev:` (development) or `iii-` (production binary); @@ -26,11 +26,13 @@ workers. | hook-fanout | [src/hook-fanout/](harness/src/hook-fanout/) | Generic publish-and-collect primitive over a stream topic. | [workers/hook-fanout.md](harness/docs/workers/hook-fanout.md) | | auth-credentials | [src/auth-credentials/](harness/src/auth-credentials/) | File-backed multi-provider credential store. | [workers/auth-credentials.md](harness/docs/workers/auth-credentials.md) | | models-catalog | [src/models-catalog/](harness/src/models-catalog/) | Static model-capability catalogue (state-first, embedded fallback). | [workers/models-catalog.md](harness/docs/workers/models-catalog.md) | +| provider-config | [src/provider-config/](harness/src/provider-config/) | Runtime provider settings store on the iii bus (`provider_config::*` — base URL / max tokens overrides). | [workers/provider-config.md](harness/docs/workers/provider-config.md) | | provider-anthropic | [src/provider-anthropic/](harness/src/provider-anthropic/) | Anthropic Messages API SSE → channel writer. | [workers/provider-anthropic.md](harness/docs/workers/provider-anthropic.md) | | provider-openai | [src/provider-openai/](harness/src/provider-openai/) | OpenAI Chat Completions SSE → channel writer. | [workers/provider-openai.md](harness/docs/workers/provider-openai.md) | | provider-kimi | [src/provider-kimi/](harness/src/provider-kimi/) | Kimi Chat Completions SSE → channel writer. | [workers/provider-kimi.md](harness/docs/workers/provider-kimi.md) | | provider-lmstudio | [src/provider-lmstudio/](harness/src/provider-lmstudio/) | LM Studio (localhost) Chat Completions SSE → channel writer. | [workers/provider-lmstudio.md](harness/docs/workers/provider-lmstudio.md) | -| context-compaction | [src/context-compaction/](harness/src/context-compaction/) | Optional `agent::events` side-car that compacts session history when running token count crosses a threshold. | [workers/context-compaction.md](harness/docs/workers/context-compaction.md) | +| provider-llamacpp | [src/provider-llamacpp/](harness/src/provider-llamacpp/) | llama.cpp `llama-server` (localhost) Chat Completions SSE → channel writer. | [workers/provider-llamacpp.md](harness/docs/workers/provider-llamacpp.md) | +| context-compaction | [src/context-compaction/](harness/src/context-compaction/) | Optional `agent::turn_end` side-car that compacts session history when running token count crosses a threshold. | [workers/context-compaction.md](harness/docs/workers/context-compaction.md) | ## System diagram diff --git a/harness/docs/workers/approval-gate.md b/harness/docs/workers/approval-gate.md index 7695e14e..6f4e6a54 100644 --- a/harness/docs/workers/approval-gate.md +++ b/harness/docs/workers/approval-gate.md @@ -87,5 +87,5 @@ no explicit dependency block. | [src/approval-gate/iii.worker.yaml](harness/src/approval-gate/iii.worker.yaml) | Worker manifest. | Related orchestrator code: -[on-approval.ts](harness/src/turn-orchestrator/on-approval.ts), +[function-awaiting-approval/process.ts](harness/src/turn-orchestrator/function-awaiting-approval/process.ts) (registers `turn::on_approval`), [hook.ts](harness/src/turn-orchestrator/hook.ts). diff --git a/harness/docs/workers/context-compaction.md b/harness/docs/workers/context-compaction.md index fb044682..997ca07b 100644 --- a/harness/docs/workers/context-compaction.md +++ b/harness/docs/workers/context-compaction.md @@ -177,7 +177,7 @@ scratch, so the summary converges rather than growing without bound. `COMPACT_PRUNE_PROTECT` goes into the prune queue. 4. If the queue would free fewer than `COMPACT_PRUNE_MIN_FREE` tokens, it skips entirely (no-op). -5. Calls `session-tree::update_part` to null out each pruned output. +5. Calls `session-tree::update_parts` to null out each pruned output (batched, one load). Tools listed in `COMPACT_PRUNE_PROTECTED_TOOLS` are never pruned. @@ -262,7 +262,7 @@ outer `instrumentHandler` wrapper. | `session-tree::compact` | Append a Compaction entry (summary + `tail_start_id` + `tokens_before`). | | `session-tree::compactions` | Load existing Compaction entries for prior-summary anchor. | | `session-tree::append_synthetic` | Append the "Continue…" prompt after sync compaction. | -| `session-tree::update_part` | Null out pruned tool outputs in-place. | +| `session-tree::update_parts` | Null out pruned tool outputs in-place (batched). | | `models::get` | Resolve `context_window` / `max_output_tokens` for model-adaptive threshold. | Worker manifest deps (`iii.worker.yaml`): @@ -277,6 +277,8 @@ Worker manifest deps (`iii.worker.yaml`): | `src/context-compaction/config.ts` | Reads all `COMPACT_*` env vars. | | `src/context-compaction/handler-async.ts` | Async TurnEnd path: envelope decode, overflow check, lease, prune, summarise. | | `src/context-compaction/handler-sync.ts` | Sync pre-turn path: lease-with-wait, extract replay, prune, summarise, reinject. | +| `src/context-compaction/handler-pipeline.ts` | Shared prune → summarise → flat-state rewrite pipeline used by both handlers. | +| `src/context-compaction/flat-state.ts` | Rewrites scope `messages` after compaction so the next turn reads the new flat transcript. | | `src/context-compaction/model-resolver.ts` | Shared model-resolution helpers: `fetchModelLimit` (catalog lookup) and `resolveModelFromSession` (session-scan + catalog lookup). | | `src/context-compaction/prune.ts` | Tool-output pruning (`prune`). | | `src/context-compaction/summarize.ts` | `summarizeAndAppend`: load → select tail → summarise → append Compaction entry. | diff --git a/harness/docs/workers/session.md b/harness/docs/workers/session.md index 852f4006..7ac2feef 100644 --- a/harness/docs/workers/session.md +++ b/harness/docs/workers/session.md @@ -36,6 +36,10 @@ backend. - `session-tree::messages` — Load every AgentMessage on the active path of a session, paired with its entry_id, oldest first. - `session-tree::reconcile` — Mirror missing messages from a state-snapshot into session-tree. - `session-tree::list` — List sessions with optional pagination and ordering. +- `session-tree::compactions` — Return all Compaction entries for a session, sorted by timestamp ascending. +- `session-tree::append_synthetic` — Append a synthetic user-role message entry to a session (used by the context-compaction replay path). +- `session-tree::update_part` — Replace the content of a `function_result` message entry with compacted output. +- `session-tree::update_parts` — Batch variant of `update_part`; loads target entries once and rewrites all of them. ### `session-inbox::*` @@ -91,7 +95,7 @@ From [src/session/iii.worker.yaml](harness/src/session/iii.worker.yaml): | [src/session/main.ts](harness/src/session/main.ts) | Binary entry point (`iii-session`). | | [src/session/register.ts](harness/src/session/register.ts) | Picks the backend and wires both sub-surfaces. | | [src/session/config.ts](harness/src/session/config.ts) | Loads the `session` config section. | -| [src/session/tree/register.ts](harness/src/session/tree/register.ts) | Registers all 11 `session-tree::*` functions; exports `FUNCTION_IDS`. | +| [src/session/tree/register.ts](harness/src/session/tree/register.ts) | Registers all 15 `session-tree::*` functions; exports `FUNCTION_IDS`. | | [src/session/tree/operations.ts](harness/src/session/tree/operations.ts) | Pure tree algorithms: create, fork, clone, compact, active path, messages, reconcile, tree, export_html, list. | | [src/session/tree/store.ts](harness/src/session/tree/store.ts) | `SessionStore` interface + `InMemoryStore` + `IiiStateSessionStore`. | | [src/session/tree/types.ts](harness/src/session/tree/types.ts) | `SessionEntry` (`message` / `custom_message` / `branch_summary` / `compaction`, each with an explicit `timestamp`), `SessionMeta`, `TreeNode`, `ReconcileResult`, `SessionError`, plus the `entryTimestamp` helper used by the `(timestamp, id)` sort. | diff --git a/harness/docs/workers/turn-orchestrator.md b/harness/docs/workers/turn-orchestrator.md index 1ae3665a..5a164596 100644 --- a/harness/docs/workers/turn-orchestrator.md +++ b/harness/docs/workers/turn-orchestrator.md @@ -37,7 +37,7 @@ unreachable → deny with a `gate_unavailable` `DenialEnvelope`. `provisioning`, and wake the FSM via `saveRecord`. - `turn::provisioning` — FSM step: build system prompt + single `agent_trigger` schema, write enriched `run_request`, advance to `assistant_streaming`. - `turn::assistant_streaming` — FSM step: stream the turn over a provider channel; on completion emit `message_complete`, persist the assistant message (dup-guarded), route to `function_execute` / `steering_check` / `stopped` (via `finishSession`). -- `turn::function_execute` — FSM step: own the full function lifecycle via `rec.work`; build batch from `rec.last_assistant`, run each call (skip already-executed and awaiting-approval ids), checkpoint per-call via `writeRecord`; if `pending` → append to `awaiting_approval` and continue the batch in parallel; park to `function_awaiting_approval` when any call awaits approval; finalize results into messages + emit `turn_end` when the batch completes → `steering_check` / `stopped` (via `finishSession`). +- `turn::function_execute` — FSM step: own the full function lifecycle via `rec.work`; build batch from `rec.last_assistant`, run each call (skip already-executed and awaiting-approval ids), checkpoint per-call via `writeRecord`; if `pending` → append to `awaiting_approval` and keep dispatching the remaining calls (pending does not block siblings); park to `function_awaiting_approval` when any call awaits approval; finalize results into messages + emit `turn_end` when the batch completes → `steering_check` / `stopped` (via `finishSession`). - `turn::function_awaiting_approval` — FSM step: on each wake, read decisions for individual `awaiting_approval[]` entries; execute each resolved call immediately (`allow` → dispatch pre-approved; `deny`/`aborted` → synthetic denial); remove resolved entries; stay parked while any remain; when none remain → `finalizeBatch` if complete else `function_execute`. - `turn::steering_check` — FSM step: drain `steering`/`followup` inboxes, enforce `max_turns` cap (emits synthetic `max_turns` message + `turn_end` → `stopped` via `finishSession`), route to `assistant_streaming` / `stopped`. - `turn::get_state` — One-shot reader returning a lean `TurnStateView` (from `schemas.ts:toView`) for a session. UI clients call this on reload to recover in-progress modals (e.g. `function_awaiting_approval`) without reading iii state directly. Returns `null` for unknown sessions. From 5b44565ca3977ae4a479a547a9e41b43b319cba6 Mon Sep 17 00:00:00 2001 From: Ytallo Layon Date: Wed, 27 May 2026 11:01:27 -0300 Subject: [PATCH 41/41] docs(shell): remove outdated isolation boundary warning from README The README.md file has been updated to remove the section regarding the isolation boundary of `shell::exec`, clarifying that it is not an effective isolation mechanism. This change aims to improve the documentation's accuracy and guidance for users regarding the use of shell execution in untrusted environments. --- shell/README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/shell/README.md b/shell/README.md index 887adc26..31187961 100644 --- a/shell/README.md +++ b/shell/README.md @@ -4,10 +4,6 @@ Unix shell and filesystem worker on the iii bus. Every agent that needs to touch the OS (run a build, read a file, list a directory, call a CLI) goes through `shell::*` and `shell::fs::*`, so allowlists, timeouts, output caps, and a host-root jail live in one place. Both surfaces accept an optional `target` field that forwards the call into a live `iii-sandbox` microVM, so the same allowlist policy gates host and sandbox execution. - -Host-targeted `shell::exec` is not an isolation boundary. The denylist is a regex tripwire on `argv.join(" ")`. A caller running an allowlisted interpreter (`sh`, `node`, `python3`) can construct any forbidden token at runtime and bypass it. For untrusted input, pass `target: { kind: "sandbox", sandbox_id }` so the call forwards into a microVM. Prefer `shell::fs::ls`, `shell::fs::stat`, and `shell::fs::grep` over `exec`-ing the same tools; the fs backends stay in-process, respect the jail, and return structured results. - - ## Install ```bash